From caf3f4bdb535f73c6e7b828bc98097f275f819bb Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 17 Jul 2019 22:49:08 +0900 Subject: [PATCH 001/851] h8300: fix memset return value. The return address is not as specified. Signed-off-by: Yoshinori Sato --- arch/h8300/lib/memset.S | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S index 2d1abc37fd08b..df873779bb5e0 100644 --- a/arch/h8300/lib/memset.S +++ b/arch/h8300/lib/memset.S @@ -19,13 +19,15 @@ ;; c = er1(r1l) ;; count = er2 memset: - btst #0,r0l + mov.l er4,@-sp + mov.l er0,er4 + btst #0,r4l beq 2f ;; odd address 1: - mov.b r1l,@er0 - adds #1,er0 + mov.b r1l,@er4 + adds #1,er4 dec.l #1,er2 beq 6f @@ -46,8 +48,8 @@ memset: mov.b r1l,r1h mov.w r1,e1 3: - mov.l er1,@er0 - adds #4,er0 + mov.l er1,@er4 + adds #4,er4 dec.l #1,er2 bne 3b 4: @@ -55,11 +57,12 @@ memset: and.b #3,r3l beq 6f 5: - mov.b r1l,@er0 - adds #1,er0 + mov.b r1l,@er4 + adds #1,er4 dec.b r3l bne 5b 6: + mov.l @sp+,er4 rts clear_user: From 3e138fe1a4fb6aa834b46529e4faaee4ce2d5505 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 17 Jul 2019 22:55:00 +0900 Subject: [PATCH 002/851] h8300: Add missing symbol "BOOT_LINK_OFFSET" Signed-off-by: Yoshinori Sato --- arch/h8300/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index ec800e9d5aadf..14bb45644c0c8 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -48,4 +48,7 @@ config NR_CPUS int default 1 +config BOOT_LINK_OFFSET + hex "zImage link offset" + source "arch/h8300/Kconfig.cpu" From ece7be2c964c2aa5015e74628ca33c03a5ff4e47 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 23 Jul 2019 19:21:06 +0900 Subject: [PATCH 003/851] h8300: move definition of __kernel_size_t etc. to posix_types.h These types should be defined in posix_types.h, not in bitsperlong.h . With these defines moved, h8300-specific bitsperlong.h is no longer needed since Kbuild will automatically create a wrapper of include/uapi/asm-generic/bitsperlong.h Signed-off-by: Masahiro Yamada Signed-off-by: Yoshinori Sato --- arch/h8300/include/uapi/asm/bitsperlong.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 arch/h8300/include/uapi/asm/bitsperlong.h diff --git a/arch/h8300/include/uapi/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h new file mode 100644 index 0000000000000..0a9ffb344ea99 --- /dev/null +++ b/arch/h8300/include/uapi/asm/bitsperlong.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_H8300_BITS_PER_LONG +#define _UAPI__ASM_H8300_BITS_PER_LONG + +#include + +#if !defined(__ASSEMBLY__) +/* h8300-unknown-linux required long */ +#define __kernel_size_t __kernel_size_t +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +#endif + +#endif /* _UAPI__ASM_H8300_BITS_PER_LONG */ From de98871736825349b367275b0790e42ea4157350 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 16 Apr 2020 13:43:01 +0900 Subject: [PATCH 004/851] h8300: move definition of __kernel_size_t etc. to posix_types.h These types should be defined in posix_types.h, not in bitsperlong.h . With these defines moved, h8300-specific bitsperlong.h is no longer needed since Kbuild will automatically create a wrapper of include/uapi/asm-generic/bitsperlong.h Signed-off-by: Masahiro Yamada Signed-off-by: Yoshinori Sato --- arch/h8300/include/uapi/asm/bitsperlong.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 arch/h8300/include/uapi/asm/bitsperlong.h diff --git a/arch/h8300/include/uapi/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h deleted file mode 100644 index 0a9ffb344ea99..0000000000000 --- a/arch/h8300/include/uapi/asm/bitsperlong.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__ASM_H8300_BITS_PER_LONG -#define _UAPI__ASM_H8300_BITS_PER_LONG - -#include - -#if !defined(__ASSEMBLY__) -/* h8300-unknown-linux required long */ -#define __kernel_size_t __kernel_size_t -typedef unsigned long __kernel_size_t; -typedef long __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -#endif - -#endif /* _UAPI__ASM_H8300_BITS_PER_LONG */ From 5d90be1ecf44a116b503649e67e952a7ec070745 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Tue, 13 Aug 2019 19:19:35 +0900 Subject: [PATCH 005/851] h8300: Fix BOOT_LINK_OFFSET Signed-off-by: Yoshinori Sato --- arch/h8300/Kconfig | 3 --- arch/h8300/Kconfig.cpu | 4 ++++ arch/h8300/configs/edosk2674_defconfig | 10 +++------- arch/h8300/configs/h8300h-sim_defconfig | 8 ++------ arch/h8300/configs/h8s-sim_defconfig | 8 ++------ 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 14bb45644c0c8..ec800e9d5aadf 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -48,7 +48,4 @@ config NR_CPUS int default 1 -config BOOT_LINK_OFFSET - hex "zImage link offset" - source "arch/h8300/Kconfig.cpu" diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index b5e14d513e622..2b9cbaf41cd00 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -97,4 +97,8 @@ config OFFSET hex "Load offset" default 0 +config BOOT_LINK_OFFSET + hex "zImage link offset" + default 0x200000 + endmenu diff --git a/arch/h8300/configs/edosk2674_defconfig b/arch/h8300/configs/edosk2674_defconfig index 23791dcf6c259..bcf2edb8fff94 100644 --- a/arch/h8300/configs/edosk2674_defconfig +++ b/arch/h8300/configs/edosk2674_defconfig @@ -1,9 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_USELIB is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set # CONFIG_SYSFS_SYSCALL is not set -# CONFIG_KALLSYMS is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set @@ -12,17 +10,17 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EVENTFD is not set # CONFIG_AIO is not set # CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y +CONFIG_BOOT_LINK_OFFSET=0x400000 +CONFIG_H8S_EDOSK2674=y # CONFIG_BLOCK is not set -CONFIG_H8S_SIM=y -CONFIG_H8300_BUILTIN_DTB="h8s_sim" # CONFIG_BINFMT_SCRIPT is not set CONFIG_BINFMT_FLAT=y # CONFIG_COREDUMP is not set -# CONFIG_UEVENT_HELPER is not set # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set @@ -32,9 +30,7 @@ CONFIG_BINFMT_FLAT=y # CONFIG_VT is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y -CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set diff --git a/arch/h8300/configs/h8300h-sim_defconfig b/arch/h8300/configs/h8300h-sim_defconfig index 7fc9c2f0acc00..1b90399758f3a 100644 --- a/arch/h8300/configs/h8300h-sim_defconfig +++ b/arch/h8300/configs/h8300h-sim_defconfig @@ -1,9 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_USELIB is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set # CONFIG_SYSFS_SYSCALL is not set -# CONFIG_KALLSYMS is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set @@ -12,17 +10,17 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EVENTFD is not set # CONFIG_AIO is not set # CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y -# CONFIG_BLOCK is not set CONFIG_H8300H_SIM=y CONFIG_H8300_BUILTIN_DTB="h8300h_sim" +# CONFIG_BLOCK is not set # CONFIG_BINFMT_SCRIPT is not set CONFIG_BINFMT_FLAT=y # CONFIG_COREDUMP is not set -# CONFIG_UEVENT_HELPER is not set # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set @@ -32,9 +30,7 @@ CONFIG_BINFMT_FLAT=y # CONFIG_VT is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y -CONFIG_SERIAL_SH_SCI_EARLYCON=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set diff --git a/arch/h8300/configs/h8s-sim_defconfig b/arch/h8300/configs/h8s-sim_defconfig index 23791dcf6c259..4d46adcc21a48 100644 --- a/arch/h8300/configs/h8s-sim_defconfig +++ b/arch/h8300/configs/h8s-sim_defconfig @@ -1,9 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_USELIB is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set # CONFIG_SYSFS_SYSCALL is not set -# CONFIG_KALLSYMS is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set @@ -12,17 +10,17 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EVENTFD is not set # CONFIG_AIO is not set # CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y -# CONFIG_BLOCK is not set CONFIG_H8S_SIM=y CONFIG_H8300_BUILTIN_DTB="h8s_sim" +# CONFIG_BLOCK is not set # CONFIG_BINFMT_SCRIPT is not set CONFIG_BINFMT_FLAT=y # CONFIG_COREDUMP is not set -# CONFIG_UEVENT_HELPER is not set # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set @@ -32,9 +30,7 @@ CONFIG_BINFMT_FLAT=y # CONFIG_VT is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y -CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set From b2c03bb18a6dd957130ad8a37f660030e4b7932a Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:21:36 +0900 Subject: [PATCH 006/851] smc91x: remove GPIOLIB dependency. Signed-off-by: Yoshinori Sato --- drivers/net/ethernet/smsc/Kconfig | 1 - drivers/net/ethernet/smsc/smc91x.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index 9e1c3752b2004..64ca1b36b91e0 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -37,7 +37,6 @@ config SMC91X tristate "SMC 91C9x/91C1xxx support" select CRC32 select MII - depends on !OF || GPIOLIB depends on ARM || ARM64 || ATARI_ETHERNAT || COLDFIRE || \ MIPS || NIOS2 || SUPERH || XTENSA || H8300 ---help--- diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 90410f9d3b1aa..605dc17290efb 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2190,6 +2190,7 @@ static const struct of_device_id smc91x_match[] = { }; MODULE_DEVICE_TABLE(of, smc91x_match); +#if defined(CONFIG_GPIOLIB) /** * of_try_set_control_gpio - configure a gpio if it exists */ @@ -2214,6 +2215,15 @@ static int try_toggle_control_gpio(struct device *dev, return 0; } +#else +static int try_toggle_control_gpio(struct device *dev, + struct gpio_desc **desc, + const char *name, int index, + int value, unsigned int nsdelay) +{ + return 0; +} +#endif #endif /* From 2fdf50bd510fd183ee89e4fdea52e7474ed9709c Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:22:22 +0900 Subject: [PATCH 007/851] sh-sci: 8bit register fix. Signed-off-by: Yoshinori Sato --- drivers/tty/serial/sh-sci.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index c073aa7001c4f..d51edc40fcb18 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2965,10 +2965,7 @@ static int sci_init_single(struct platform_device *dev, port->fifosize = sci_port->params->fifosize; if (port->type == PORT_SCI) { - if (sci_port->reg_size >= 0x20) - port->regshift = 2; - else - port->regshift = 1; + port->regshift = sci_port->reg_size >> 4; } /* From 04fffbe01d2df57e600da35d3f31e3cca8ee14f4 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:23:24 +0900 Subject: [PATCH 008/851] h8300: update dts. Signed-off-by: Yoshinori Sato --- arch/h8300/boot/dts/edosk2674.dts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/h8300/boot/dts/edosk2674.dts b/arch/h8300/boot/dts/edosk2674.dts index d1733805ea67a..f9731fe8c7d09 100644 --- a/arch/h8300/boot/dts/edosk2674.dts +++ b/arch/h8300/boot/dts/edosk2674.dts @@ -61,7 +61,7 @@ compatible = "renesas,h8s-intc", "renesas,h8300-intc"; #interrupt-cells = <2>; interrupt-controller; - reg = <0xfffe00 24>; + reg = <0xfffe00 24>, <0xffff30 6>; }; bsc: memory-controller@fffec0 { @@ -79,7 +79,7 @@ timer8: timer@ffffb0 { compatible = "renesas,8bit-timer"; reg = <0xffffb0 10>; - interrupts = <72 0>; + interrupts = <72 0>, <73 0>, <74 0>; clocks = <&fclk>; clock-names = "fck"; }; @@ -105,4 +105,10 @@ clocks = <&fclk>; clock-names = "fck"; }; + ethernet: ethernet@f80000 { + compatible = "smsc,lan91c94"; + reg = <0xf80000 0xfbffff>; + reg-io-width = <1>; + interrupts = <16 0>; + }; }; From 951261bc483479d4c18ee435c35f94cccd659012 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:24:04 +0900 Subject: [PATCH 009/851] h8300_timer8: fix count mode. Signed-off-by: Yoshinori Sato --- drivers/clocksource/h8300_timer8.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c index 1d740a8c42ab3..5bf514c78d8b6 100644 --- a/drivers/clocksource/h8300_timer8.c +++ b/drivers/clocksource/h8300_timer8.c @@ -25,8 +25,8 @@ #define TCORB 6 #define _8TCNT 8 -#define CMIEA 6 -#define CMFA 6 +#define OVIE 5 +#define OVF 5 #define FLAG_STARTED (1 << 3) @@ -40,6 +40,7 @@ struct timer8_priv { void __iomem *mapbase; unsigned long flags; unsigned int rate; + uint16_t cnt; }; static irqreturn_t timer8_interrupt(int irq, void *dev_id) @@ -51,7 +52,8 @@ static irqreturn_t timer8_interrupt(int irq, void *dev_id) p->ced.event_handler(&p->ced); - bclr(CMFA, p->mapbase + _8TCSR); + iowrite16be(p->cnt, p->mapbase + _8TCNT); + bclr(OVF, p->mapbase + _8TCSR); return IRQ_HANDLED; } @@ -60,16 +62,14 @@ static void timer8_set_next(struct timer8_priv *p, unsigned long delta) { if (delta >= 0x10000) pr_warn("delta out of range\n"); - bclr(CMIEA, p->mapbase + _8TCR); - iowrite16be(delta, p->mapbase + TCORA); - iowrite16be(0x0000, p->mapbase + _8TCNT); - bclr(CMFA, p->mapbase + _8TCSR); - bset(CMIEA, p->mapbase + _8TCR); + p->cnt = 0x10000 - delta; + iowrite16be(p->cnt, p->mapbase + _8TCNT); + bclr(OVF, p->mapbase + _8TCSR); + bset(OVIE, p->mapbase + _8TCR); } static int timer8_enable(struct timer8_priv *p) { - iowrite16be(0xffff, p->mapbase + TCORA); iowrite16be(0x0000, p->mapbase + _8TCNT); iowrite16be(0x0c02, p->mapbase + _8TCR); @@ -177,7 +177,7 @@ static int __init h8300_8timer_init(struct device_node *node) } ret = -EINVAL; - irq = irq_of_parse_and_map(node, 0); + irq = irq_of_parse_and_map(node, 2); if (!irq) { pr_err("failed to get irq for clockevent\n"); goto unmap_reg; From 2c496a11304da8263e1dce9bdcef1ce04c027414 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:24:28 +0900 Subject: [PATCH 010/851] irq-renesas-h8s: fix interrupt handling. Signed-off-by: Yoshinori Sato --- drivers/irqchip/irq-renesas-h8s.c | 36 +++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-renesas-h8s.c b/drivers/irqchip/irq-renesas-h8s.c index 4e2461bae944d..b69f2c825a7f8 100644 --- a/drivers/irqchip/irq-renesas-h8s.c +++ b/drivers/irqchip/irq-renesas-h8s.c @@ -11,8 +11,9 @@ #include #include -static void *intc_baseaddr; -#define IPRA (intc_baseaddr) +static void *ipr_base; +static void *icr_base; +#define IPRA (ipr_base) static const unsigned char ipr_table[] = { 0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10, /* 16 - 23 */ @@ -41,8 +42,8 @@ static void h8s_disable_irq(struct irq_data *data) addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); pos = (ipr_table[irq - 16] & 0x0f) * 4; pri = ~(0x000f << pos); - pri &= readw(addr); - writew(pri, addr); + pri &= __raw_readw(addr); + __raw_writew(pri, addr); } static void h8s_enable_irq(struct irq_data *data) @@ -55,15 +56,30 @@ static void h8s_enable_irq(struct irq_data *data) addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); pos = (ipr_table[irq - 16] & 0x0f) * 4; pri = ~(0x000f << pos); - pri &= readw(addr); + pri &= __raw_readw(addr); pri |= 1 << pos; - writew(pri, addr); + __raw_writew(pri, addr); +} + +static void h8s_ack_irq(struct irq_data *data) +{ + void __iomem *isr_addr = icr_base + 4; + int irq = data->irq; + uint16_t isr; + + if (irq >= 16 && irq < 32) { + irq -= 16; + isr = __raw_readw(isr_addr); + isr &= ~(1 << irq); + __raw_writew(isr, isr_addr); + } } struct irq_chip h8s_irq_chip = { .name = "H8S-INTC", .irq_enable = h8s_enable_irq, .irq_disable = h8s_disable_irq, + .irq_ack = h8s_ack_irq, }; static __init int irq_map(struct irq_domain *h, unsigned int virq, @@ -85,14 +101,16 @@ static int __init h8s_intc_of_init(struct device_node *intc, struct irq_domain *domain; int n; - intc_baseaddr = of_iomap(intc, 0); - BUG_ON(!intc_baseaddr); + ipr_base = of_iomap(intc, 0); + icr_base = of_iomap(intc, 1); + BUG_ON(!ipr_base || !icr_base); /* All interrupt priority is 0 (disable) */ /* IPRA to IPRK */ for (n = 0; n <= 'k' - 'a'; n++) - writew(0x0000, IPRA + (n * 2)); + __raw_writew(0x0000, IPRA + (n * 2)); + __raw_writew(0xffff, icr_base + 2); domain = irq_domain_add_linear(intc, NR_IRQS, &irq_ops, NULL); BUG_ON(!domain); irq_set_default_host(domain); From 8bd55b5fce8011d0026f7d233f7b45788f01ad0c Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 15 Apr 2020 21:59:05 +0900 Subject: [PATCH 011/851] irq-renesas-h8300h: Fix external interrupt contorol. Signed-off-by: Yoshinori Sato --- drivers/irqchip/irq-renesas-h8300h.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-h8300h.c b/drivers/irqchip/irq-renesas-h8300h.c index 1054d74b7edde..42e2918eee2fe 100644 --- a/drivers/irqchip/irq-renesas-h8300h.c +++ b/drivers/irqchip/irq-renesas-h8300h.c @@ -24,7 +24,10 @@ static const char ipr_bit[] = { static void __iomem *intc_baseaddr; -#define IPR (intc_baseaddr + 6) +#define ICSR (intc_baseaddr + 2) +#define IER (intc_baseaddr + 3) +#define ISR (intc_baseaddr + 4) +#define IPR (intc_baseaddr + 6) static void h8300h_disable_irq(struct irq_data *data) { @@ -38,6 +41,8 @@ static void h8300h_disable_irq(struct irq_data *data) else ctrl_bclr(bit & 7, (IPR+1)); } + if (irq < 6) + ctrl_bclr(irq, IER); } static void h8300h_enable_irq(struct irq_data *data) @@ -52,12 +57,24 @@ static void h8300h_enable_irq(struct irq_data *data) else ctrl_bset(bit & 7, (IPR+1)); } + if (irq < 6) + ctrl_bset(irq, IER); +} + +static void h8300h_ack_irq(struct irq_data *data) +{ + int bit; + int irq = data->irq - 12; + + if (irq < 6) + ctrl_bclr(irq, ISR); } struct irq_chip h8300h_irq_chip = { .name = "H8/300H-INTC", .irq_enable = h8300h_enable_irq, .irq_disable = h8300h_disable_irq, + .irq_eoi = h8300h_ack_irq, }; static int irq_map(struct irq_domain *h, unsigned int virq, From beab3d5cb0e14e6397c4e596b3dbf1d36bf49d06 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 15 Apr 2020 21:59:50 +0900 Subject: [PATCH 012/851] irq-renesas-h8s: Fix external interrupt control. Signed-off-by: Yoshinori Sato --- drivers/irqchip/irq-renesas-h8s.c | 39 +++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/irqchip/irq-renesas-h8s.c b/drivers/irqchip/irq-renesas-h8s.c index b69f2c825a7f8..0b8d9d7ef2ee1 100644 --- a/drivers/irqchip/irq-renesas-h8s.c +++ b/drivers/irqchip/irq-renesas-h8s.c @@ -14,6 +14,8 @@ static void *ipr_base; static void *icr_base; #define IPRA (ipr_base) +#define IER (icr_base + 2) +#define ISR (icr_base + 4) static const unsigned char ipr_table[] = { 0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10, /* 16 - 23 */ @@ -37,13 +39,22 @@ static void h8s_disable_irq(struct irq_data *data) int pos; void __iomem *addr; unsigned short pri; - int irq = data->irq; + int irq = data->irq - 16; + unsigned short ier; + + if (irq < 0) + return; - addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); - pos = (ipr_table[irq - 16] & 0x0f) * 4; + addr = IPRA + ((ipr_table[irq] & 0xf0) >> 3); + pos = (ipr_table[irq] & 0x0f) * 4; pri = ~(0x000f << pos); pri &= __raw_readw(addr); __raw_writew(pri, addr); + if (irq < 16) { + ier = __raw_readw(IER); + ier &= ~(1 << irq); + __raw_writew(ier, IER); + } } static void h8s_enable_irq(struct irq_data *data) @@ -51,27 +62,35 @@ static void h8s_enable_irq(struct irq_data *data) int pos; void __iomem *addr; unsigned short pri; - int irq = data->irq; + int irq = data->irq - 16; + unsigned short ier; + + if (irq < 0) + return; - addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); - pos = (ipr_table[irq - 16] & 0x0f) * 4; + addr = IPRA + ((ipr_table[irq] & 0xf0) >> 3); + pos = (ipr_table[irq] & 0x0f) * 4; pri = ~(0x000f << pos); pri &= __raw_readw(addr); pri |= 1 << pos; __raw_writew(pri, addr); + if (irq < 16) { + ier = __raw_readw(IER); + ier &= ~(1 << irq); + __raw_writew(ier, IER); + } } static void h8s_ack_irq(struct irq_data *data) { - void __iomem *isr_addr = icr_base + 4; int irq = data->irq; uint16_t isr; if (irq >= 16 && irq < 32) { irq -= 16; - isr = __raw_readw(isr_addr); + isr = __raw_readw(ISR); isr &= ~(1 << irq); - __raw_writew(isr, isr_addr); + __raw_writew(isr, ISR); } } @@ -110,7 +129,7 @@ static int __init h8s_intc_of_init(struct device_node *intc, for (n = 0; n <= 'k' - 'a'; n++) __raw_writew(0x0000, IPRA + (n * 2)); - __raw_writew(0xffff, icr_base + 2); + __raw_writew(0xffff, IER); domain = irq_domain_add_linear(intc, NR_IRQS, &irq_ops, NULL); BUG_ON(!domain); irq_set_default_host(domain); From 35b34568912cb19c53b83d6daba96fe960941279 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 14:28:38 +0200 Subject: [PATCH 013/851] h8300: dts: Fix /chosen:stdout-path arch/h8300/boot/dts/h8s_sim.dts:11.3-25: Warning (chosen_node_stdout_path): /chosen:stdout-path: property is not a string arch/h8300/boot/dts/h8300h_sim.dts:11.3-25: Warning (chosen_node_stdout_path): /chosen:stdout-path: property is not a string Drop the angle brackets to fix this. A similar fix was already applied to arch/h8300/boot/dts/edosk2674.dts in commit 780ffcd51cb28717 ("h8300: register address fix"). Fixes: 38d6bded13084d50 ("h8300: devicetree source") Signed-off-by: Geert Uytterhoeven Reviewed-by: Masahiro Yamada Signed-off-by: Yoshinori Sato --- arch/h8300/boot/dts/h8300h_sim.dts | 2 +- arch/h8300/boot/dts/h8s_sim.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/h8300/boot/dts/h8300h_sim.dts b/arch/h8300/boot/dts/h8300h_sim.dts index 595398b9d0180..e1d4d9b7f6b40 100644 --- a/arch/h8300/boot/dts/h8300h_sim.dts +++ b/arch/h8300/boot/dts/h8300h_sim.dts @@ -8,7 +8,7 @@ chosen { bootargs = "earlyprintk=h8300-sim"; - stdout-path = <&sci0>; + stdout-path = &sci0; }; aliases { serial0 = &sci0; diff --git a/arch/h8300/boot/dts/h8s_sim.dts b/arch/h8300/boot/dts/h8s_sim.dts index 932cc3c5a81bc..4848e40e607ec 100644 --- a/arch/h8300/boot/dts/h8s_sim.dts +++ b/arch/h8300/boot/dts/h8s_sim.dts @@ -8,7 +8,7 @@ chosen { bootargs = "earlyprintk=h8300-sim"; - stdout-path = <&sci0>; + stdout-path = &sci0; }; aliases { serial0 = &sci0; From 8808515be0ed4e33de9bfdc65f4c1b547ee11065 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 14:29:25 +0200 Subject: [PATCH 014/851] h8300: Replace by The H8/300 platform code is not a clock provider, and just needs to call of_clk_init(). Hence it can include instead of . Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Signed-off-by: Yoshinori Sato --- arch/h8300/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 28ac88358a89a..0ecaac7dd7e93 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include From 1efbcec2ef8c037f1e801c76e4b9434ee2400be7 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 9 Dec 2020 21:34:48 +0100 Subject: [PATCH 015/851] =?UTF-8?q?coresight:=20cti:=20Reduce=20scope=20fo?= =?UTF-8?q?r=20the=20variable=20=E2=80=9Ccs=5Ffwnode=E2=80=9D=20in=20cti?= =?UTF-8?q?=5Fplat=5Fcreate=5Fconnection()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A local variable was used only within an else branch. Thus move the definition for the variable “cs_fwnode” into the corresponding code block. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-cti-platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-cti-platform.c b/drivers/hwtracing/coresight/coresight-cti-platform.c index 98f830c6ed507..ccef04f27f12f 100644 --- a/drivers/hwtracing/coresight/coresight-cti-platform.c +++ b/drivers/hwtracing/coresight/coresight-cti-platform.c @@ -343,7 +343,6 @@ static int cti_plat_create_connection(struct device *dev, { struct cti_trig_con *tc = NULL; int cpuid = -1, err = 0; - struct fwnode_handle *cs_fwnode = NULL; struct coresight_device *csdev = NULL; const char *assoc_name = "unknown"; char cpu_name_str[16]; @@ -397,8 +396,9 @@ static int cti_plat_create_connection(struct device *dev, assoc_name = cpu_name_str; } else { /* associated device ? */ - cs_fwnode = fwnode_find_reference(fwnode, - CTI_DT_CSDEV_ASSOC, 0); + struct fwnode_handle *cs_fwnode = fwnode_find_reference(fwnode, + CTI_DT_CSDEV_ASSOC, + 0); if (!IS_ERR(cs_fwnode)) { assoc_name = cti_plat_get_csdev_or_node_name(cs_fwnode, &csdev); From 7c76194275c9faa2fc3bbdfb27b266f3725aff79 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 8 Jan 2021 12:48:47 +0100 Subject: [PATCH 016/851] Revert "init/console: Use ttynull as a fallback when there is no console" This reverts commit 757055ae8dedf5333af17b3b5b4b70ba9bc9da4e. The commit caused that ttynull was used as the default console on several systems[1][2][3]. As a result, the console was blank even when a better alternative existed. It happened when there was no console configured on the command line and ttynull_init() was the first initcall calling register_console(). Or it happened when /dev/ did not exist when console_on_rootfs() was called. It was not able to open /dev/console even though a console driver was registered. It tried to add ttynull console but it obviously did not help. But ttynull became the preferred console and was used by /dev/console when it was available later. The commit tried to fix a historical problem that have been there for ages. The primary motivation was the commit 3cffa06aeef7ece30f6 ("printk/console: Allow to disable console output by using console="" or console=null"). It provided a clean solution for a workaround that was widely used and worked only by chance. This revert causes that the console="" or console=null command line options will again work only by chance. These options will cause that a particular console will be preferred and the default (tty) ones will not get enabled. There will be no console registered at all. As a result there won't be stdin, stdout, and stderr for the init process. But it worked exactly this way even before. The proper solution has to fulfill many conditions: + Register ttynull only when explicitly required or as the ultimate fallback. + ttynull should get associated with /dev/console but it must not become preferred console when used as a fallback. Especially, it must still be possible to replace it by a better console later. Such a change requires clean up of the register_console() code. Otherwise, it would be even harder to follow. Especially, the use of has_preferred_console and CON_CONSDEV flag is tricky. The clean up is risky. The ordering of consoles is not well defined. And any changes tend to break existing user settings. Do the revert at the least risky solution for now. [1] https://lore.kernel.org/linux-kselftest/20201221144302.GR4077@smile.fi.intel.com/ [2] https://lore.kernel.org/lkml/d2a3b3c0-e548-7dd1-730f-59bc5c04e191@synopsys.com/ [3] https://patchwork.ozlabs.org/project/linux-um/patch/20210105120128.10854-1-thomas@m3y3r.de/ Reported-by: Andy Shevchenko Reported-by: Vineet Gupta Reported-by: Thomas Meyer Acked-by: Greg Kroah-Hartman Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210108114847.23469-1-pmladek@suse.com --- drivers/tty/Kconfig | 14 ++++++++++++++ drivers/tty/Makefile | 3 ++- drivers/tty/ttynull.c | 18 ------------------ include/linux/console.h | 3 --- init/main.c | 10 ++-------- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 47a6e42f0d04f..e15cd6b5bb99a 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -401,6 +401,20 @@ config MIPS_EJTAG_FDC_KGDB_CHAN help FDC channel number to use for KGDB. +config NULL_TTY + tristate "NULL TTY driver" + help + Say Y here if you want a NULL TTY which simply discards messages. + + This is useful to allow userspace applications which expect a console + device to work without modifications even when no console is + available or desired. + + In order to use this driver, you should redirect the console to this + TTY, or boot the kernel with console=ttynull. + + If unsure, say N. + config TRACE_ROUTER tristate "Trace data router for MIPI P1149.7 cJTAG standard" depends on TRACE_SINK diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile index 3c1c5a9240a70..b3ccae9326601 100644 --- a/drivers/tty/Makefile +++ b/drivers/tty/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_TTY) += tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \ tty_buffer.o tty_port.o tty_mutex.o \ tty_ldsem.o tty_baudrate.o tty_jobctrl.o \ - n_null.o ttynull.o + n_null.o obj-$(CONFIG_LEGACY_PTYS) += pty.o obj-$(CONFIG_UNIX98_PTYS) += pty.o obj-$(CONFIG_AUDIT) += tty_audit.o @@ -25,6 +25,7 @@ obj-$(CONFIG_ISI) += isicom.o obj-$(CONFIG_MOXA_INTELLIO) += moxa.o obj-$(CONFIG_MOXA_SMARTIO) += mxser.o obj-$(CONFIG_NOZOMI) += nozomi.o +obj-$(CONFIG_NULL_TTY) += ttynull.o obj-$(CONFIG_ROCKETPORT) += rocket.o obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o obj-$(CONFIG_PPC_EPAPR_HV_BYTECHAN) += ehv_bytechan.o diff --git a/drivers/tty/ttynull.c b/drivers/tty/ttynull.c index eced70ec54e17..17f05b7eb6d3e 100644 --- a/drivers/tty/ttynull.c +++ b/drivers/tty/ttynull.c @@ -2,13 +2,6 @@ /* * Copyright (C) 2019 Axis Communications AB * - * The console is useful for userspace applications which expect a console - * device to work without modifications even when no console is available - * or desired. - * - * In order to use this driver, you should redirect the console to this - * TTY, or boot the kernel with console=ttynull. - * * Based on ttyprintk.c: * Copyright (C) 2010 Samo Pogacnik */ @@ -66,17 +59,6 @@ static struct console ttynull_console = { .device = ttynull_device, }; -void __init register_ttynull_console(void) -{ - if (!ttynull_driver) - return; - - if (add_preferred_console(ttynull_console.name, 0, NULL)) - return; - - register_console(&ttynull_console); -} - static int __init ttynull_init(void) { struct tty_driver *driver; diff --git a/include/linux/console.h b/include/linux/console.h index dbe78e8e26029..20874db50bc8a 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -186,12 +186,9 @@ extern int braille_register_console(struct console *, int index, extern int braille_unregister_console(struct console *); #ifdef CONFIG_TTY extern void console_sysfs_notify(void); -extern void register_ttynull_console(void); #else static inline void console_sysfs_notify(void) { } -static inline void register_ttynull_console(void) -{ } #endif extern bool console_suspend_enabled; diff --git a/init/main.c b/init/main.c index 6feee7f11eafc..3024c4db17a92 100644 --- a/init/main.c +++ b/init/main.c @@ -1480,14 +1480,8 @@ void __init console_on_rootfs(void) struct file *file = filp_open("/dev/console", O_RDWR, 0); if (IS_ERR(file)) { - pr_err("Warning: unable to open an initial console. Fallback to ttynull.\n"); - register_ttynull_console(); - - file = filp_open("/dev/console", O_RDWR, 0); - if (IS_ERR(file)) { - pr_err("Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process!\n"); - return; - } + pr_err("Warning: unable to open an initial console.\n"); + return; } init_dup(file); init_dup(file); From b1deeeb93933d39098cf9b192ae0ac347e9bbd02 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 31 Dec 2020 23:23:01 +0000 Subject: [PATCH 017/851] sh: fix trivial misannotations Trivial misannotations in * get_user() (__gu_addr is a userland pointer there) * ip_fast_csum() (sum is __wsum, not unsigned int) * csum_and_copy_to_user() (destination is void *, not const void * - mea culpa) * __clear_user() (to is a userland pointer) * several places in kernel/traps_32.c (regs->pc is a userland pointer when regs is a userland pt_regs) * math-emu/math.c: READ() and WRITE() casts of address should be to userland pointer. No changes in code generation and those take care of the majority of noise from sparse on sh builds. Signed-off-by: Al Viro Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/include/asm/checksum_32.h | 5 +++-- arch/sh/include/asm/uaccess.h | 4 ++-- arch/sh/kernel/traps_32.c | 8 ++++---- arch/sh/math-emu/math.c | 4 ++-- arch/sh/mm/nommu.c | 4 ++-- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h index 1a391e3a76599..a6501b856f3eb 100644 --- a/arch/sh/include/asm/checksum_32.h +++ b/arch/sh/include/asm/checksum_32.h @@ -84,7 +84,8 @@ static inline __sum16 csum_fold(__wsum sum) */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - unsigned int sum, __dummy0, __dummy1; + __wsum sum; + unsigned int __dummy0, __dummy1; __asm__ __volatile__( "mov.l @%1+, %0\n\t" @@ -197,6 +198,6 @@ static inline __wsum csum_and_copy_to_user(const void *src, { if (!access_ok(dst, len)) return 0; - return csum_partial_copy_generic((__force const void *)src, dst, len); + return csum_partial_copy_generic(src, (__force void *)dst, len); } #endif /* __ASM_SH_CHECKSUM_H */ diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index 73f3b48d4a34b..8867bb04b00e2 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -68,7 +68,7 @@ struct __large_struct { unsigned long buf[100]; }; ({ \ long __gu_err = -EFAULT; \ unsigned long __gu_val = 0; \ - const __typeof__(*(ptr)) *__gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ if (likely(access_ok(__gu_addr, (size)))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -124,7 +124,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) * Clear the area and return remaining number of bytes * (on failure. Usually it's 0.) */ -__kernel_size_t __clear_user(void *addr, __kernel_size_t size); +__kernel_size_t __clear_user(void __user *addr, __kernel_size_t size); #define clear_user(addr,n) \ ({ \ diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index b62ad0ba23950..b3c715bc254b2 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -490,7 +490,7 @@ asmlinkage void do_address_error(struct pt_regs *regs, inc_unaligned_user_access(); oldfs = force_uaccess_begin(); - if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1), + if (copy_from_user(&instruction, (insn_size_t __user *)(regs->pc & ~1), sizeof(instruction))) { force_uaccess_end(oldfs); goto uspace_segv; @@ -614,7 +614,7 @@ asmlinkage void do_reserved_inst(void) unsigned short inst = 0; int err; - get_user(inst, (unsigned short*)regs->pc); + get_user(inst, (unsigned short __user *)regs->pc); err = do_fpu_inst(inst, regs); if (!err) { @@ -699,9 +699,9 @@ asmlinkage void do_illegal_slot_inst(void) return; #ifdef CONFIG_SH_FPU_EMU - get_user(inst, (unsigned short *)regs->pc + 1); + get_user(inst, (unsigned short __user *)regs->pc + 1); if (!do_fpu_inst(inst, regs)) { - get_user(inst, (unsigned short *)regs->pc); + get_user(inst, (unsigned short __user *)regs->pc); if (!emulate_branch(inst, regs)) return; /* fault in branch.*/ diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index e8be0eca0444a..3495a48b77138 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -51,8 +51,8 @@ #define Rn (regs->regs[n]) #define Rm (regs->regs[m]) -#define WRITE(d,a) ({if(put_user(d, (typeof (d)*)a)) return -EFAULT;}) -#define READ(d,a) ({if(get_user(d, (typeof (d)*)a)) return -EFAULT;}) +#define WRITE(d,a) ({if(put_user(d, (typeof (d) __user *)a)) return -EFAULT;}) +#define READ(d,a) ({if(get_user(d, (typeof (d) __user *)a)) return -EFAULT;}) #define PACK_S(r,f) FP_PACK_SP(&r,f) #define UNPACK_S(f,r) FP_UNPACK_SP(f,&r) diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c index 8b4504413c5f6..78c4b6e6d33ba 100644 --- a/arch/sh/mm/nommu.c +++ b/arch/sh/mm/nommu.c @@ -28,9 +28,9 @@ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n) return 0; } -__kernel_size_t __clear_user(void *to, __kernel_size_t n) +__kernel_size_t __clear_user(void __user *to, __kernel_size_t n) { - memset(to, 0, n); + memset((__force void *)to, 0, n); return 0; } From 1e5b1406fbd236dd505df4b8766d73613f5e7b91 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 22 Dec 2020 12:54:01 -0800 Subject: [PATCH 018/851] sh: check return code of request_irq request_irq is marked __must_check, but the call in shx3_prepare_cpus has a void return type, so it can't propagate failure to the caller. Follow cues from hexagon and just print an error. Fixes: c7936b9abcf5 ("sh: smp: Hook in to the generic IPI handler for SH-X3 SMP.") Cc: Miguel Ojeda Cc: Paul Mundt Reported-by: Guenter Roeck Signed-off-by: Nick Desaulniers Tested-by: John Paul Adrian Glaubitz Reviewed-by: Miguel Ojeda Signed-off-by: Rich Felker --- arch/sh/kernel/cpu/sh4a/smp-shx3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index f8a2bec0f260b..1261dc7b84e8b 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus) BUILD_BUG_ON(SMP_MSG_NR >= 8); for (i = 0; i < SMP_MSG_NR; i++) - request_irq(104 + i, ipi_interrupt_handler, - IRQF_PERCPU, "IPI", (void *)(long)i); + if (request_irq(104 + i, ipi_interrupt_handler, + IRQF_PERCPU, "IPI", (void *)(long)i)) + pr_err("Failed to request irq %d\n", i); for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); From e2163ce7002b6fdfed7079c4dedfd248bd95159e Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Thu, 19 Nov 2020 18:56:56 +0800 Subject: [PATCH 019/851] sh: boards: Fix the cacography in irq.c The word 'swtich' is wrong, so fix it. Signed-off-by: Tang Bin Reviewed-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/boards/mach-landisk/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/mach-landisk/irq.c b/arch/sh/boards/mach-landisk/irq.c index 29b8b1f852466..0b672b80c5617 100644 --- a/arch/sh/boards/mach-landisk/irq.c +++ b/arch/sh/boards/mach-landisk/irq.c @@ -26,8 +26,8 @@ enum { PCI_INTD, /* PCI int D */ ATA, /* ATA */ FATA, /* CF */ - POWER, /* Power swtich */ - BUTTON, /* Button swtich */ + POWER, /* Power switch */ + BUTTON, /* Button switch */ }; /* Vectors for LANDISK */ From 97709ef0221729c91a175f76ae3d11c8e31c48fa Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 17 Jan 2021 20:16:31 +0900 Subject: [PATCH 020/851] sh: boot: add intermediate vmlinux.bin* to targets instead of extra-y You do not need to build all of vmlinux.bin* They are built on demand as prerequsites of uImage.bin*, hence should be added to targets instead of extra-y. Signed-off-by: Masahiro Yamada Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/boot/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile index 58592dfa5cb60..dded61296c9a0 100644 --- a/arch/sh/boot/Makefile +++ b/arch/sh/boot/Makefile @@ -27,8 +27,8 @@ suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo targets := zImage vmlinux.srec romImage uImage uImage.srec uImage.gz \ - uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin -extra-y += vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ + uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin \ + vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo subdir- := compressed romimage From 7ec713058f2191d7632d9f49b6ca290ab5cbf1ed Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 17 Jan 2021 20:16:32 +0900 Subject: [PATCH 021/851] sh: boot: avoid unneeded rebuilds under arch/sh/boot/compressed/ Even if none of source code is updated, the following are every time rebuilt: CC arch/sh/boot/compressed/cache.o SHIPPED arch/sh/boot/compressed/ashiftrt.S AS arch/sh/boot/compressed/ashiftrt.o SHIPPED arch/sh/boot/compressed/ashldi3.c CC arch/sh/boot/compressed/ashldi3.o SHIPPED arch/sh/boot/compressed/ashrsi3.S AS arch/sh/boot/compressed/ashrsi3.o SHIPPED arch/sh/boot/compressed/ashlsi3.S AS arch/sh/boot/compressed/ashlsi3.o SHIPPED arch/sh/boot/compressed/lshrsi3.S AS arch/sh/boot/compressed/lshrsi3.o LD arch/sh/boot/compressed/vmlinux OBJCOPY arch/sh/boot/zImage Add build artifacts to 'targets' as needed. I turned the library files to check-in files. It is simpler than copying from arch/sh/lib/ at build-time. Signed-off-by: Masahiro Yamada Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/boot/compressed/.gitignore | 5 ----- arch/sh/boot/compressed/Makefile | 32 ++++++++++++------------------ arch/sh/boot/compressed/ashiftrt.S | 2 ++ arch/sh/boot/compressed/ashldi3.c | 2 ++ arch/sh/boot/compressed/ashlsi3.S | 2 ++ arch/sh/boot/compressed/ashrsi3.S | 2 ++ arch/sh/boot/compressed/lshrsi3.S | 2 ++ 7 files changed, 23 insertions(+), 24 deletions(-) create mode 100644 arch/sh/boot/compressed/ashiftrt.S create mode 100644 arch/sh/boot/compressed/ashldi3.c create mode 100644 arch/sh/boot/compressed/ashlsi3.S create mode 100644 arch/sh/boot/compressed/ashrsi3.S create mode 100644 arch/sh/boot/compressed/lshrsi3.S diff --git a/arch/sh/boot/compressed/.gitignore b/arch/sh/boot/compressed/.gitignore index 37aa53057369c..cd16663bc7c84 100644 --- a/arch/sh/boot/compressed/.gitignore +++ b/arch/sh/boot/compressed/.gitignore @@ -1,7 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ashiftrt.S -ashldi3.c -ashlsi3.S -ashrsi3.S -lshrsi3.S vmlinux.bin.* diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile index 589d2d8a573db..cf3174df7859e 100644 --- a/arch/sh/boot/compressed/Makefile +++ b/arch/sh/boot/compressed/Makefile @@ -5,12 +5,18 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz \ - vmlinux.bin.bz2 vmlinux.bin.lzma \ - vmlinux.bin.xz vmlinux.bin.lzo \ - head_32.o misc.o piggy.o +OBJECTS := head_32.o misc.o cache.o piggy.o \ + ashiftrt.o ashldi3.o ashrsi3.o ashlsi3.o lshrsi3.o + +# These were previously generated files. When you are building the kernel +# with O=, make sure to remove the stale files in the output tree. Otherwise, +# the build system wrongly compiles the stale ones. +ifdef building_out_of_srctree +$(shell rm -f $(addprefix $(obj)/, ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S)) +endif -OBJECTS = $(obj)/head_32.o $(obj)/misc.o $(obj)/cache.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ + vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo $(OBJECTS) GCOV_PROFILE := n @@ -33,21 +39,9 @@ ccflags-remove-$(CONFIG_MCOUNT) += -pg LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \ -T $(obj)/../../kernel/vmlinux.lds -# -# Pull in the necessary libgcc bits from the in-kernel implementation. -# -lib1funcs-y := ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S -lib1funcs-obj := \ - $(addsuffix .o, $(basename $(addprefix $(obj)/, $(lib1funcs-y)))) - -lib1funcs-dir := $(srctree)/arch/$(SRCARCH)/lib - -KBUILD_CFLAGS += -I$(lib1funcs-dir) -DDISABLE_BRANCH_PROFILING - -$(addprefix $(obj)/,$(lib1funcs-y)): $(obj)/%: $(lib1funcs-dir)/% FORCE - $(call cmd,shipped) +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(lib1funcs-obj) FORCE +$(obj)/vmlinux: $(addprefix $(obj)/, $(OBJECTS)) FORCE $(call if_changed,ld) $(obj)/vmlinux.bin: vmlinux FORCE diff --git a/arch/sh/boot/compressed/ashiftrt.S b/arch/sh/boot/compressed/ashiftrt.S new file mode 100644 index 0000000000000..0f3b291a3f4bc --- /dev/null +++ b/arch/sh/boot/compressed/ashiftrt.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/ashiftrt.S" diff --git a/arch/sh/boot/compressed/ashldi3.c b/arch/sh/boot/compressed/ashldi3.c new file mode 100644 index 0000000000000..7cebd646df839 --- /dev/null +++ b/arch/sh/boot/compressed/ashldi3.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../lib/ashldi3.c" diff --git a/arch/sh/boot/compressed/ashlsi3.S b/arch/sh/boot/compressed/ashlsi3.S new file mode 100644 index 0000000000000..e354262b275f4 --- /dev/null +++ b/arch/sh/boot/compressed/ashlsi3.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/ashlsi3.S" diff --git a/arch/sh/boot/compressed/ashrsi3.S b/arch/sh/boot/compressed/ashrsi3.S new file mode 100644 index 0000000000000..e564be9a4dcd8 --- /dev/null +++ b/arch/sh/boot/compressed/ashrsi3.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/ashrsi3.S" diff --git a/arch/sh/boot/compressed/lshrsi3.S b/arch/sh/boot/compressed/lshrsi3.S new file mode 100644 index 0000000000000..5a8281b7e5161 --- /dev/null +++ b/arch/sh/boot/compressed/lshrsi3.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include "../../lib/lshrsi3.S" From 71ae1f7c775ed6cc9a1f31e34030b3695f3d42a1 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Thu, 26 Nov 2020 10:43:11 +0800 Subject: [PATCH 022/851] maple: fix wrong return value of maple_bus_init(). If KMEM_CACHE or maple_alloc_dev failed, the maple_bus_init() will return 0 rather than error, because the retval is not changed after KMEM_CACHE or maple_alloc_dev failed. Fixes: 17be2d2b1c33 ("sh: Add maple bus support for the SEGA Dreamcast.") Reported-by: Hulk Robot Signed-off-by: Lu Wei Acked-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- drivers/sh/maple/maple.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c index e5d7fb81ad665..44a931d41a132 100644 --- a/drivers/sh/maple/maple.c +++ b/drivers/sh/maple/maple.c @@ -835,8 +835,10 @@ static int __init maple_bus_init(void) maple_queue_cache = KMEM_CACHE(maple_buffer, SLAB_HWCACHE_ALIGN); - if (!maple_queue_cache) + if (!maple_queue_cache) { + retval = -ENOMEM; goto cleanup_bothirqs; + } INIT_LIST_HEAD(&maple_waitq); INIT_LIST_HEAD(&maple_sentq); @@ -849,6 +851,7 @@ static int __init maple_bus_init(void) if (!mdev[i]) { while (i-- > 0) maple_free_dev(mdev[i]); + retval = -ENOMEM; goto cleanup_cache; } baseunits[i] = mdev[i]; From 258cf5f8aef48f76e1933fc9524a476d21e21e68 Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Thu, 10 Dec 2020 14:49:58 +0800 Subject: [PATCH 023/851] sh: kdump: add some attribute to function add '__iomem' for ioremap() and '__user' for copy_to_user(). Signed-off-by: Yejune Deng Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/kernel/crash_dump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c index a9086127b16db..5b41b59698c1e 100644 --- a/arch/sh/kernel/crash_dump.c +++ b/arch/sh/kernel/crash_dump.c @@ -26,7 +26,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { - void *vaddr; + void __iomem *vaddr; if (!csize) return 0; @@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); if (userbuf) { - if (copy_to_user(buf, (vaddr + offset), csize)) { + if (copy_to_user((void __user *)buf, (vaddr + offset), csize)) { iounmap(vaddr); return -EFAULT; } From 2882b7626f4903a8e9250b328cdf7396a6deecac Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 10 Dec 2020 13:44:22 +0100 Subject: [PATCH 024/851] sh: kernel: traps: remove unused variable When building defconfig the following warning shows up: arch/sh/kernel/traps.c: In function 'nmi_trap_handler': arch/sh/kernel/traps.c:183:15: warning: unused variable 'cpu' [-Wunused-variable] unsigned int cpu = smp_processor_id(); ^~~ Remove an unused variable 'cpu'. Fixes: fe3f1d5d7cd3 ("sh: Get rid of nmi_count()") Signed-off-by: Anders Roxell Reviewed-by: Geert Uytterhoeven Acked-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index f5beecdac6938..e76b221570999 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { } BUILD_TRAP_HANDLER(nmi) { - unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; arch_ftrace_nmi_enter(); From fbafce2f535dc8d18ef2f9e1f7a1e4672c0ea469 Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:51 -0500 Subject: [PATCH 025/851] certs: Add EFI_CERT_X509_GUID support for dbx entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes CVE-2020-26541. The Secure Boot Forbidden Signature Database, dbx, contains a list of now revoked signatures and keys previously approved to boot with UEFI Secure Boot enabled. The dbx is capable of containing any number of EFI_CERT_X509_SHA256_GUID, EFI_CERT_SHA256_GUID, and EFI_CERT_X509_GUID entries. Currently when EFI_CERT_X509_GUID are contained in the dbx, the entries are skipped. Add support for EFI_CERT_X509_GUID dbx entries. When a EFI_CERT_X509_GUID is found, it is added as an asymmetrical key to the .blacklist keyring. Anytime the .platform keyring is used, the keys in the .blacklist keyring are referenced, if a matching key is found, the key will be rejected. [DH: Made the following changes: - Added to have a config option to enable the facility. This allows a Kconfig solution to make sure that pkcs7_validate_trust() is enabled. - Moved the functions out from the middle of the blacklist functions. - Added kerneldoc comments.] Signed-off-by: Eric Snowberg Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen cc: Randy Dunlap cc: Mickaël Salaün cc: Arnd Bergmann cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20200901165143.10295-1-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20200909172736.73003-1-eric.snowberg@oracle.com/ # v2 Link: https://lore.kernel.org/r/20200911182230.62266-1-eric.snowberg@oracle.com/ # v3 Link: https://lore.kernel.org/r/20200916004927.64276-1-eric.snowberg@oracle.com/ # v4 Link: https://lore.kernel.org/r/2660556.1610545213@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/20210122181054.32635-2-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/bc2c24e3-ed68-2521-0bf4-a1f6be4a895d@infradead.org/ Link: https://lore.kernel.org/r/20210225125638.1841436-1-arnd@kernel.org/ Link: https://lore.kernel.org/r/161428672051.677100.11064981943343605138.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433310942.902181.4901864302675874242.stgit@warthog.procyon.org.uk/ # v2 --- certs/Kconfig | 9 ++++ certs/blacklist.c | 43 +++++++++++++++++++ certs/blacklist.h | 2 + certs/system_keyring.c | 6 +++ include/keys/system_keyring.h | 15 +++++++ .../platform_certs/keyring_handler.c | 11 +++++ 6 files changed, 86 insertions(+) diff --git a/certs/Kconfig b/certs/Kconfig index c94e93d8bccf0..76e469b56a773 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -83,4 +83,13 @@ config SYSTEM_BLACKLIST_HASH_LIST wrapper to incorporate the list into the kernel. Each should be a string of hex digits. +config SYSTEM_REVOCATION_LIST + bool "Provide system-wide ring of revocation certificates" + depends on SYSTEM_BLACKLIST_KEYRING + depends on PKCS7_MESSAGE_PARSER=y + help + If set, this allows revocation certificates to be stored in the + blacklist keyring and implements a hook whereby a PKCS#7 message can + be checked to see if it matches such a certificate. + endmenu diff --git a/certs/blacklist.c b/certs/blacklist.c index bffe4c6f4a9e2..2b8644123d5fd 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -145,6 +145,49 @@ int is_binary_blacklisted(const u8 *hash, size_t hash_len) } EXPORT_SYMBOL_GPL(is_binary_blacklisted); +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/** + * add_key_to_revocation_list - Add a revocation certificate to the blacklist + * @data: The data blob containing the certificate + * @size: The size of data blob + */ +int add_key_to_revocation_list(const char *data, size_t size) +{ + key_ref_t key; + + key = key_create_or_update(make_key_ref(blacklist_keyring, true), + "asymmetric", + NULL, + data, + size, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW), + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN); + + if (IS_ERR(key)) { + pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key)); + return PTR_ERR(key); + } + + return 0; +} + +/** + * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked + * @pkcs7: The PKCS#7 message to check + */ +int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + int ret; + + ret = pkcs7_validate_trust(pkcs7, blacklist_keyring); + + if (ret == 0) + return -EKEYREJECTED; + + return -ENOKEY; +} +#endif + /* * Initialise the blacklist */ diff --git a/certs/blacklist.h b/certs/blacklist.h index 1efd6fa0dc608..51b320cf85749 100644 --- a/certs/blacklist.h +++ b/certs/blacklist.h @@ -1,3 +1,5 @@ #include +#include +#include extern const char __initconst *const blacklist_hashes[]; diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 4b693da488f14..ed98754d5795a 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -242,6 +242,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len, pr_devel("PKCS#7 platform keyring is not available\n"); goto error; } + + ret = is_key_on_revocation_list(pkcs7); + if (ret != -ENOKEY) { + pr_devel("PKCS#7 platform key is on revocation list\n"); + goto error; + } } ret = pkcs7_validate_trust(pkcs7, trusted_keys); if (ret < 0) { diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index fb8b07daa9d15..875e002a41804 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -31,6 +31,7 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif +extern struct pkcs7_message *pkcs7; #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING extern int mark_hash_blacklisted(const char *hash); extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, @@ -49,6 +50,20 @@ static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len) } #endif +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern int add_key_to_revocation_list(const char *data, size_t size); +extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7); +#else +static inline int add_key_to_revocation_list(const char *data, size_t size) +{ + return 0; +} +static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + return -ENOKEY; +} +#endif + #ifdef CONFIG_IMA_BLACKLIST_KEYRING extern struct key *ima_blacklist_keyring; diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c index c5ba695c10e3a..5604bd57c9907 100644 --- a/security/integrity/platform_certs/keyring_handler.c +++ b/security/integrity/platform_certs/keyring_handler.c @@ -55,6 +55,15 @@ static __init void uefi_blacklist_binary(const char *source, uefi_blacklist_hash(source, data, len, "bin:", 4); } +/* + * Add an X509 cert to the revocation list. + */ +static __init void uefi_revocation_list_x509(const char *source, + const void *data, size_t len) +{ + add_key_to_revocation_list(data, len); +} + /* * Return the appropriate handler for particular signature list types found in * the UEFI db and MokListRT tables. @@ -76,5 +85,7 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) return uefi_blacklist_x509_tbs; if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) return uefi_blacklist_binary; + if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) + return uefi_revocation_list_x509; return 0; } From 9536390dcc8cc19da6442f45c9e0ee14bcfa6dfc Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:52 -0500 Subject: [PATCH 026/851] certs: Move load_system_certificate_list to a common function Move functionality within load_system_certificate_list to a common function, so it can be reused in the future. DH Changes: - Added inclusion of common.h to common.c (Eric [1]). Signed-off-by: Eric Snowberg Acked-by: Jarkko Sakkinen Signed-off-by: David Howells cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20200930201508.35113-2-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20210122181054.32635-3-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/EDA280F9-F72D-4181-93C7-CDBE95976FF7@oracle.com/ [1] Link: https://lore.kernel.org/r/161428672825.677100.7545516389752262918.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433311696.902181.3599366124784670368.stgit@warthog.procyon.org.uk/ # v2 --- certs/Makefile | 2 +- certs/common.c | 57 ++++++++++++++++++++++++++++++++++++++++++ certs/common.h | 9 +++++++ certs/system_keyring.c | 49 +++--------------------------------- 4 files changed, 70 insertions(+), 47 deletions(-) create mode 100644 certs/common.c create mode 100644 certs/common.h diff --git a/certs/Makefile b/certs/Makefile index f4c25b67aad90..f4b90bad8690a 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel signature checking certificates. # -obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o diff --git a/certs/common.c b/certs/common.c new file mode 100644 index 0000000000000..16a220887a53e --- /dev/null +++ b/certs/common.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "common.h" + +int load_certificate_list(const u8 cert_list[], + const unsigned long list_size, + const struct key *keyring) +{ + key_ref_t key; + const u8 *p, *end; + size_t plen; + + p = cert_list; + end = p + list_size; + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(keyring, 1), + "asymmetric", + NULL, + p, + plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return 0; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); + return 0; +} diff --git a/certs/common.h b/certs/common.h new file mode 100644 index 0000000000000..abdb5795936b7 --- /dev/null +++ b/certs/common.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _CERT_COMMON_H +#define _CERT_COMMON_H + +int load_certificate_list(const u8 cert_list[], const unsigned long list_size, + const struct key *keyring); + +#endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index ed98754d5795a..0c9a4795e847b 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -16,6 +16,7 @@ #include #include #include +#include "common.h" static struct key *builtin_trusted_keys; #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING @@ -137,54 +138,10 @@ device_initcall(system_trusted_keyring_init); */ static __init int load_system_certificate_list(void) { - key_ref_t key; - const u8 *p, *end; - size_t plen; - pr_notice("Loading compiled-in X.509 certificates\n"); - p = system_certificate_list; - end = p + system_certificate_list_size; - while (p < end) { - /* Each cert begins with an ASN.1 SEQUENCE tag and must be more - * than 256 bytes in size. - */ - if (end - p < 4) - goto dodgy_cert; - if (p[0] != 0x30 && - p[1] != 0x82) - goto dodgy_cert; - plen = (p[2] << 8) | p[3]; - plen += 4; - if (plen > end - p) - goto dodgy_cert; - - key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1), - "asymmetric", - NULL, - p, - plen, - ((KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA | - KEY_ALLOC_BUILT_IN | - KEY_ALLOC_BYPASS_RESTRICTION); - if (IS_ERR(key)) { - pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", - PTR_ERR(key)); - } else { - pr_notice("Loaded X.509 cert '%s'\n", - key_ref_to_ptr(key)->description); - key_ref_put(key); - } - p += plen; - } - - return 0; - -dodgy_cert: - pr_err("Problem parsing in-kernel X.509 certificate list\n"); - return 0; + return load_certificate_list(system_certificate_list, system_certificate_list_size, + builtin_trusted_keys); } late_initcall(load_system_certificate_list); From ad33a49b42c5b5dc37b5dd0ba6159fb01cf10761 Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:53 -0500 Subject: [PATCH 027/851] certs: Add ability to preload revocation certs Add a new Kconfig option called SYSTEM_REVOCATION_KEYS. If set, this option should be the filename of a PEM-formated file containing X.509 certificates to be included in the default blacklist keyring. DH Changes: - Make the new Kconfig option depend on SYSTEM_REVOCATION_LIST. - Fix SYSTEM_REVOCATION_KEYS=n, but CONFIG_SYSTEM_REVOCATION_LIST=y[1][2]. - Use CONFIG_SYSTEM_REVOCATION_LIST for extract-cert[3]. - Use CONFIG_SYSTEM_REVOCATION_LIST for revocation_certificates.o[3]. Signed-off-by: Eric Snowberg Acked-by: Jarkko Sakkinen Signed-off-by: David Howells cc: Randy Dunlap cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20200930201508.35113-3-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20210122181054.32635-4-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/161428673564.677100.4112098280028451629.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433312452.902181.4146169951896577982.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/e1c15c74-82ce-3a69-44de-a33af9b320ea@infradead.org/ [1] Link: https://lore.kernel.org/r/20210303034418.106762-1-eric.snowberg@oracle.com/ [2] Link: https://lore.kernel.org/keyrings/20210304175030.184131-1-eric.snowberg@oracle.com/ [3] --- certs/Kconfig | 8 ++++++++ certs/Makefile | 19 +++++++++++++++++-- certs/blacklist.c | 21 +++++++++++++++++++++ certs/revocation_certificates.S | 21 +++++++++++++++++++++ scripts/Makefile | 1 + 5 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 certs/revocation_certificates.S diff --git a/certs/Kconfig b/certs/Kconfig index 76e469b56a773..ab88d2a7f3c7f 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -92,4 +92,12 @@ config SYSTEM_REVOCATION_LIST blacklist keyring and implements a hook whereby a PKCS#7 message can be checked to see if it matches such a certificate. +config SYSTEM_REVOCATION_KEYS + string "X.509 certificates to be preloaded into the system blacklist keyring" + depends on SYSTEM_REVOCATION_LIST + help + If set, this option should be the filename of a PEM-formatted file + containing X.509 certificates to be included in the default blacklist + keyring. + endmenu diff --git a/certs/Makefile b/certs/Makefile index f4b90bad8690a..b6db52ebf0beb 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -4,7 +4,8 @@ # obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o -obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o +obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o +obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o else @@ -29,7 +30,7 @@ $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREF $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS)) endif # CONFIG_SYSTEM_TRUSTED_KEYRING -clean-files := x509_certificate_list .x509.list +clean-files := x509_certificate_list .x509.list x509_revocation_list ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### @@ -104,3 +105,17 @@ targets += signing_key.x509 $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY)) endif # CONFIG_MODULE_SIG + +ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y) + +$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS)) + +$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list + +quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2)) + cmd_extract_certs = scripts/extract-cert $(2) $@ + +targets += x509_revocation_list +$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE + $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS)) +endif diff --git a/certs/blacklist.c b/certs/blacklist.c index 2b8644123d5fd..c9a435b15af40 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -17,9 +17,15 @@ #include #include #include "blacklist.h" +#include "common.h" static struct key *blacklist_keyring; +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern __initconst const u8 revocation_certificate_list[]; +extern __initconst const unsigned long revocation_certificate_list_size; +#endif + /* * The description must be a type prefix, a colon and then an even number of * hex digits. The hash is kept in the description. @@ -220,3 +226,18 @@ static int __init blacklist_init(void) * Must be initialised before we try and load the keys into the keyring. */ device_initcall(blacklist_init); + +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/* + * Load the compiled-in list of revocation X.509 certificates. + */ +static __init int load_revocation_certificate_list(void) +{ + if (revocation_certificate_list_size) + pr_notice("Loading compiled-in revocation X.509 certificates\n"); + + return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size, + blacklist_keyring); +} +late_initcall(load_revocation_certificate_list); +#endif diff --git a/certs/revocation_certificates.S b/certs/revocation_certificates.S new file mode 100644 index 0000000000000..f21aae8a8f0ef --- /dev/null +++ b/certs/revocation_certificates.S @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + + __INITRODATA + + .align 8 + .globl revocation_certificate_list +revocation_certificate_list: +__revocation_list_start: + .incbin "certs/x509_revocation_list" +__revocation_list_end: + + .align 8 + .globl revocation_certificate_list_size +revocation_certificate_list_size: +#ifdef CONFIG_64BIT + .quad __revocation_list_end - __revocation_list_start +#else + .long __revocation_list_end - __revocation_list_start +#endif diff --git a/scripts/Makefile b/scripts/Makefile index b5418ec587fbd..bd0718f7c493e 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -11,6 +11,7 @@ hostprogs-always-$(CONFIG_ASN1) += asn1_compiler hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include From e377c31f788fc98815e1ab90b5a35704ce35843a Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:54 -0500 Subject: [PATCH 028/851] integrity: Load mokx variables into the blacklist keyring During boot the Secure Boot Forbidden Signature Database, dbx, is loaded into the blacklist keyring. Systems booted with shim have an equivalent Forbidden Signature Database called mokx. Currently mokx is only used by shim and grub, the contents are ignored by the kernel. Add the ability to load mokx into the blacklist keyring during boot. Signed-off-by: Eric Snowberg Suggested-by: James Bottomley Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20210122181054.32635-5-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/c33c8e3839a41e9654f41cc92c7231104931b1d7.camel@HansenPartnership.com/ Link: https://lore.kernel.org/r/161428674320.677100.12637282414018170743.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433313205.902181.2502803393898221637.stgit@warthog.procyon.org.uk/ # v2 --- security/integrity/platform_certs/load_uefi.c | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index ee4b4c666854f..f290f78c3f301 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -132,8 +132,9 @@ static int __init load_moklist_certs(void) static int __init load_uefi_certs(void) { efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; - void *db = NULL, *dbx = NULL; - unsigned long dbsize = 0, dbxsize = 0; + efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; + void *db = NULL, *dbx = NULL, *mokx = NULL; + unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; efi_status_t status; int rc = 0; @@ -175,6 +176,21 @@ static int __init load_uefi_certs(void) kfree(dbx); } + mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status); + if (!mokx) { + if (status == EFI_NOT_FOUND) + pr_debug("mokx variable wasn't found\n"); + else + pr_info("Couldn't get mokx list\n"); + } else { + rc = parse_efi_signature_list("UEFI:MokListXRT", + mokx, mokxsize, + get_handler_for_dbx); + if (rc) + pr_err("Couldn't parse mokx signatures %d\n", rc); + kfree(mokx); + } + /* Load the MokListRT certs */ rc = load_moklist_certs(); From 173e84953eaaf17864a707efa2b8cd4c233b3129 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 17 Apr 2021 23:09:35 -0400 Subject: [PATCH 029/851] fs: fix reporting supported extra file attributes for statx() statx(2) notes that any attribute that is not indicated as supported by stx_attributes_mask has no usable value. Commits 801e523796004 ("fs: move generic stat response attr handling to vfs_getattr_nosec") and 712b2698e4c02 ("fs/stat: Define DAX statx attribute") sets STATX_ATTR_AUTOMOUNT and STATX_ATTR_DAX, respectively, without setting stx_attributes_mask, which can cause xfstests generic/532 to fail. Fix this in the same way as commit 1b9598c8fb99 ("xfs: fix reporting supported extra file attributes for statx()") Fixes: 801e523796004 ("fs: move generic stat response attr handling to vfs_getattr_nosec") Fixes: 712b2698e4c02 ("fs/stat: Define DAX statx attribute") Cc: stable@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Al Viro --- fs/stat.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/stat.c b/fs/stat.c index fbc171d038aa5..1fa38bdec1a68 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -86,12 +86,20 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, /* SB_NOATIME means filesystem supplies dummy atime value */ if (inode->i_sb->s_flags & SB_NOATIME) stat->result_mask &= ~STATX_ATIME; + + /* + * Note: If you add another clause to set an attribute flag, please + * update attributes_mask below. + */ if (IS_AUTOMOUNT(inode)) stat->attributes |= STATX_ATTR_AUTOMOUNT; if (IS_DAX(inode)) stat->attributes |= STATX_ATTR_DAX; + stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | + STATX_ATTR_DAX); + mnt_userns = mnt_user_ns(path->mnt); if (inode->i_op->getattr) return inode->i_op->getattr(mnt_userns, path, stat, From a5f7166b58cda2430123eb9bb96d60340e699ae4 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 10 May 2021 16:52:32 +0200 Subject: [PATCH 030/851] sparc: explicitly set PCI_IOBASE to 0 Instead of relying on the fallback in asm-generic/io.h which sets PCI_IOBASE 0 if it is not defined set it explicitly. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/lkml/CAK8P3a3PK9zyeP4ymELtc2ZYnymECoACiigw9Za+pvSJpCk5=g@mail.gmail.com/ Signed-off-by: Arnd Bergmann --- arch/sparc/include/asm/io.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h index 2eefa526b38f0..c019e50702c11 100644 --- a/arch/sparc/include/asm/io.h +++ b/arch/sparc/include/asm/io.h @@ -1,6 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ___ASM_SPARC_IO_H #define ___ASM_SPARC_IO_H + +/* + * On LEON PCI addresses below 64k are converted to IO accesses. + * io_remap_xxx() returns a kernel virtual address in the PCI window so + * inb() doesn't need to add an offset. + */ +#define PCI_IOBASE ((void __iomem *)0) + #if defined(__sparc__) && defined(__arch64__) #include #else From 78924148a3d22e030fe8f5c1a0ce10e177856423 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 10 May 2021 16:52:33 +0200 Subject: [PATCH 031/851] risc-v: Use generic io.h helpers for nommu Without MMU support PCI_IOBASE is left undefined because PCI_IO_END is VMEMMAP_START. Nevertheless the in*()/out*() helper macros are left defined with uses of PCI_IOBASE. At the moment this only compiles because asm-generic/io.h defines PCI_IOBASE as 0 if it is undefined and so at macro expansion PCI_IOBASE is defined. This leads to compilation errors when asm-generic/io.h is changed to leave PCI_IOBASE undefined. More importantly it is currently broken at runtime, as accessing a fixed I/O port number of an ISA device on NOMMU RISC-V would turn into a NULL pointer dereference. Instead only define the in*()/out*() helper macros with MMU support and fall back to the asm-generic/io.h helper stubs otherwise. Signed-off-by: Niklas Schnelle Signed-off-by: Arnd Bergmann --- arch/riscv/include/asm/io.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c025a746a1486..31a8b98c0f132 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -23,12 +23,12 @@ #include /* - * I/O port access constants. + * I/O port access constants. Without MMU support leave PCI_IOBASE undefined + * and fall back to generic stubs for I/O access routines. */ #ifdef CONFIG_MMU #define IO_SPACE_LIMIT (PCI_IO_SIZE - 1) #define PCI_IOBASE ((void __iomem *)PCI_IO_START) -#endif /* CONFIG_MMU */ /* * Emulation routines for the port-mapped IO space used by some PCI drivers. @@ -145,6 +145,7 @@ __io_writes_outs(writes, u64, q, __io_bw(), __io_aw()) __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #define outsq(addr, buffer, count) __outsq((void __iomem *)addr, buffer, count) #endif +#endif /* CONFIG_MMU */ #include From 5ae6eadfdaf431f47adbdf1754f3b5a5fd638de2 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 10 May 2021 16:52:34 +0200 Subject: [PATCH 032/851] asm-generic/io.h: warn in inb() and friends with undefined PCI_IOBASE When PCI_IOBASE is not defined, it is set to 0 such that it is ignored in calls to the readX/writeX primitives. This triggers clang's -Wnull-pointer-arithmetic warning and will result in illegal accesses on platforms that do not support I/O ports. Make things explicit and silence the warning by letting inb() and friends fail with WARN_ONCE() and a 0xff... return in case PCI_IOBASE is not defined. Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/lkml/20210421111759.2059976-1-schnelle@linux.ibm.com/ Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 68 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index e93375c710b93..4a3af97a832dc 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -8,6 +8,7 @@ #define __ASM_GENERIC_IO_H #include /* I/O is all done through memory accesses */ +#include #include /* for memset() and memcpy() */ #include @@ -440,10 +441,6 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer, #endif #endif /* CONFIG_64BIT */ -#ifndef PCI_IOBASE -#define PCI_IOBASE ((void __iomem *)0) -#endif - #ifndef IO_SPACE_LIMIT #define IO_SPACE_LIMIT 0xffff #endif @@ -458,12 +455,17 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer, #define _inb _inb static inline u8 _inb(unsigned long addr) { +#ifdef PCI_IOBASE u8 val; __io_pbr(); val = __raw_readb(PCI_IOBASE + addr); __io_par(val); return val; +#else + WARN_ONCE(1, "No I/O port support\n"); + return ~0; +#endif } #endif @@ -471,12 +473,17 @@ static inline u8 _inb(unsigned long addr) #define _inw _inw static inline u16 _inw(unsigned long addr) { +#ifdef PCI_IOBASE u16 val; __io_pbr(); val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); __io_par(val); return val; +#else + WARN_ONCE(1, "No I/O port support\n"); + return ~0; +#endif } #endif @@ -484,12 +491,17 @@ static inline u16 _inw(unsigned long addr) #define _inl _inl static inline u32 _inl(unsigned long addr) { +#ifdef PCI_IOBASE u32 val; __io_pbr(); val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); __io_par(val); return val; +#else + WARN_ONCE(1, "No I/O port support\n"); + return ~0; +#endif } #endif @@ -497,9 +509,13 @@ static inline u32 _inl(unsigned long addr) #define _outb _outb static inline void _outb(u8 value, unsigned long addr) { +#ifdef PCI_IOBASE __io_pbw(); __raw_writeb(value, PCI_IOBASE + addr); __io_paw(); +#else + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -507,9 +523,13 @@ static inline void _outb(u8 value, unsigned long addr) #define _outw _outw static inline void _outw(u16 value, unsigned long addr) { +#ifdef PCI_IOBASE __io_pbw(); __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); __io_paw(); +#else + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -517,9 +537,13 @@ static inline void _outw(u16 value, unsigned long addr) #define _outl _outl static inline void _outl(u32 value, unsigned long addr) { +#ifdef PCI_IOBASE __io_pbw(); __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); __io_paw(); +#else + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -606,7 +630,12 @@ static inline void outl_p(u32 value, unsigned long addr) #define insb insb static inline void insb(unsigned long addr, void *buffer, unsigned int count) { +#ifdef PCI_IOBASE readsb(PCI_IOBASE + addr, buffer, count); +#else + memset(buffer, 0xff, count); + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -614,7 +643,12 @@ static inline void insb(unsigned long addr, void *buffer, unsigned int count) #define insw insw static inline void insw(unsigned long addr, void *buffer, unsigned int count) { +#ifdef PCI_IOBASE readsw(PCI_IOBASE + addr, buffer, count); +#else + memset(buffer, 0xff, count); + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -622,7 +656,12 @@ static inline void insw(unsigned long addr, void *buffer, unsigned int count) #define insl insl static inline void insl(unsigned long addr, void *buffer, unsigned int count) { +#ifdef PCI_IOBASE readsl(PCI_IOBASE + addr, buffer, count); +#else + memset(buffer, 0xff, count); + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -631,7 +670,11 @@ static inline void insl(unsigned long addr, void *buffer, unsigned int count) static inline void outsb(unsigned long addr, const void *buffer, unsigned int count) { +#ifdef PCI_IOBASE writesb(PCI_IOBASE + addr, buffer, count); +#else + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -640,7 +683,11 @@ static inline void outsb(unsigned long addr, const void *buffer, static inline void outsw(unsigned long addr, const void *buffer, unsigned int count) { +#ifdef PCI_IOBASE writesw(PCI_IOBASE + addr, buffer, count); +#else + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -649,7 +696,11 @@ static inline void outsw(unsigned long addr, const void *buffer, static inline void outsl(unsigned long addr, const void *buffer, unsigned int count) { +#ifdef PCI_IOBASE writesl(PCI_IOBASE + addr, buffer, count); +#else + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif @@ -1020,18 +1071,27 @@ static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size) #define ioport_map ioport_map static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) { +#ifdef PCI_IOBASE port &= IO_SPACE_LIMIT; return (port > MMIO_UPPER_LIMIT) ? NULL : PCI_IOBASE + port; +#else + WARN_ONCE(1, "No I/O port support\n"); + return NULL; +#endif } #define __pci_ioport_unmap __pci_ioport_unmap static inline void __pci_ioport_unmap(void __iomem *p) { +#ifdef PCI_IOBASE uintptr_t start = (uintptr_t) PCI_IOBASE; uintptr_t addr = (uintptr_t) p; if (addr >= start && addr < start + IO_SPACE_LIMIT) return; iounmap(p); +#else + WARN_ONCE(1, "No I/O port support\n"); +#endif } #endif From ded39fc4a5b4ecba82e610d80ef86326b67741dd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 10 May 2021 12:26:25 +0200 Subject: [PATCH 033/851] docs: driver-api: fpga: avoid using UTF-8 chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While UTF-8 characters can be used at the Linux documentation, the best is to use them only when ASCII doesn't offer a good replacement. So, replace the occurences of the following UTF-8 characters: - U+2014 ('—'): EM DASH Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Moritz Fischer --- Documentation/driver-api/fpga/fpga-bridge.rst | 10 +++++----- Documentation/driver-api/fpga/fpga-mgr.rst | 12 +++++------ .../driver-api/fpga/fpga-programming.rst | 8 ++++---- Documentation/driver-api/fpga/fpga-region.rst | 20 +++++++++---------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst index 198aadafd3e7d..8d650b4e2ce6d 100644 --- a/Documentation/driver-api/fpga/fpga-bridge.rst +++ b/Documentation/driver-api/fpga/fpga-bridge.rst @@ -4,11 +4,11 @@ FPGA Bridge API to implement a new FPGA bridge ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* struct fpga_bridge — The FPGA Bridge structure -* struct fpga_bridge_ops — Low level Bridge driver ops -* devm_fpga_bridge_create() — Allocate and init a bridge struct -* fpga_bridge_register() — Register a bridge -* fpga_bridge_unregister() — Unregister a bridge +* struct fpga_bridge - The FPGA Bridge structure +* struct fpga_bridge_ops - Low level Bridge driver ops +* devm_fpga_bridge_create() - Allocate and init a bridge struct +* fpga_bridge_register() - Register a bridge +* fpga_bridge_unregister() - Unregister a bridge .. kernel-doc:: include/linux/fpga/fpga-bridge.h :functions: fpga_bridge diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst index 917ee22db429d..4d926b452cb35 100644 --- a/Documentation/driver-api/fpga/fpga-mgr.rst +++ b/Documentation/driver-api/fpga/fpga-mgr.rst @@ -101,12 +101,12 @@ in state. API for implementing a new FPGA Manager driver ---------------------------------------------- -* ``fpga_mgr_states`` — Values for :c:expr:`fpga_manager->state`. -* struct fpga_manager — the FPGA manager struct -* struct fpga_manager_ops — Low level FPGA manager driver ops -* devm_fpga_mgr_create() — Allocate and init a manager struct -* fpga_mgr_register() — Register an FPGA manager -* fpga_mgr_unregister() — Unregister an FPGA manager +* ``fpga_mgr_states`` - Values for :c:expr:`fpga_manager->state`. +* struct fpga_manager - the FPGA manager struct +* struct fpga_manager_ops - Low level FPGA manager driver ops +* devm_fpga_mgr_create() - Allocate and init a manager struct +* fpga_mgr_register() - Register an FPGA manager +* fpga_mgr_unregister() - Unregister an FPGA manager .. kernel-doc:: include/linux/fpga/fpga-mgr.h :functions: fpga_mgr_states diff --git a/Documentation/driver-api/fpga/fpga-programming.rst b/Documentation/driver-api/fpga/fpga-programming.rst index 002392dab04f7..fb4da4240e961 100644 --- a/Documentation/driver-api/fpga/fpga-programming.rst +++ b/Documentation/driver-api/fpga/fpga-programming.rst @@ -84,10 +84,10 @@ will generate that list. Here's some sample code of what to do next:: API for programming an FPGA --------------------------- -* fpga_region_program_fpga() — Program an FPGA -* fpga_image_info() — Specifies what FPGA image to program -* fpga_image_info_alloc() — Allocate an FPGA image info struct -* fpga_image_info_free() — Free an FPGA image info struct +* fpga_region_program_fpga() - Program an FPGA +* fpga_image_info() - Specifies what FPGA image to program +* fpga_image_info_alloc() - Allocate an FPGA image info struct +* fpga_image_info_free() - Free an FPGA image info struct .. kernel-doc:: drivers/fpga/fpga-region.c :functions: fpga_region_program_fpga diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst index 363a8171ab0a5..2636a27c11b24 100644 --- a/Documentation/driver-api/fpga/fpga-region.rst +++ b/Documentation/driver-api/fpga/fpga-region.rst @@ -45,19 +45,19 @@ An example of usage can be seen in the probe function of [#f2]_. API to add a new FPGA region ---------------------------- -* struct fpga_region — The FPGA region struct -* devm_fpga_region_create() — Allocate and init a region struct -* fpga_region_register() — Register an FPGA region -* fpga_region_unregister() — Unregister an FPGA region +* struct fpga_region - The FPGA region struct +* devm_fpga_region_create() - Allocate and init a region struct +* fpga_region_register() - Register an FPGA region +* fpga_region_unregister() - Unregister an FPGA region The FPGA region's probe function will need to get a reference to the FPGA Manager it will be using to do the programming. This usually would happen during the region's probe function. -* fpga_mgr_get() — Get a reference to an FPGA manager, raise ref count -* of_fpga_mgr_get() — Get a reference to an FPGA manager, raise ref count, +* fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count +* of_fpga_mgr_get() - Get a reference to an FPGA manager, raise ref count, given a device node. -* fpga_mgr_put() — Put an FPGA manager +* fpga_mgr_put() - Put an FPGA manager The FPGA region will need to specify which bridges to control while programming the FPGA. The region driver can build a list of bridges during probe time @@ -66,11 +66,11 @@ the list of bridges to program just before programming (:c:expr:`fpga_region->get_bridges`). The FPGA bridge framework supplies the following APIs to handle building or tearing down that list. -* fpga_bridge_get_to_list() — Get a ref of an FPGA bridge, add it to a +* fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a list -* of_fpga_bridge_get_to_list() — Get a ref of an FPGA bridge, add it to a +* of_fpga_bridge_get_to_list() - Get a ref of an FPGA bridge, add it to a list, given a device node -* fpga_bridges_put() — Given a list of bridges, put them +* fpga_bridges_put() - Given a list of bridges, put them .. kernel-doc:: include/linux/fpga/fpga-region.h :functions: fpga_region From cba7dcd8878e04647c47c829846bbde91e67ca2f Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 17 May 2021 16:35:57 +0800 Subject: [PATCH 034/851] 9p/trans_virtio: Remove sysfs file on probe failure This ensures we don't leak the sysfs file if we failed to allocate chan->vc_wq during probe. Link: http://lkml.kernel.org/r/20210517083557.172-1-xieyongji@bytedance.com Fixes: 86c8437383ac ("net/9p: Add sysfs mount_tag file for virtio 9P device") Signed-off-by: Xie Yongji Signed-off-by: Dominique Martinet --- net/9p/trans_virtio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 93f2f86548826..7bcaa46165fe9 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -610,7 +610,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); if (!chan->vc_wq) { err = -ENOMEM; - goto out_free_tag; + goto out_remove_file; } init_waitqueue_head(chan->vc_wq); chan->ring_bufs_avail = 1; @@ -628,6 +628,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) return 0; +out_remove_file: + sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr); out_free_tag: kfree(tag); out_free_vq: From 9c7aad3aa55fa8eb203c394d6f428ab3d28bea70 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 19 May 2021 09:30:56 -0700 Subject: [PATCH 035/851] fpga: fix spelling mistakes Run the fpga subsystem through aspell. Signed-off-by: Tom Rix Reviewed-by: Fernando Pacheco Signed-off-by: Moritz Fischer --- Documentation/fpga/dfl.rst | 4 ++-- drivers/fpga/altera-cvp.c | 2 +- drivers/fpga/dfl-fme-pr.c | 2 +- drivers/fpga/dfl-n3000-nios.c | 2 +- drivers/fpga/dfl.h | 2 +- drivers/fpga/fpga-bridge.c | 4 ++-- drivers/fpga/zynq-fpga.c | 6 +++--- include/linux/fpga/fpga-mgr.h | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/fpga/dfl.rst b/Documentation/fpga/dfl.rst index f3a1223f2517e..ccc33f199df2a 100644 --- a/Documentation/fpga/dfl.rst +++ b/Documentation/fpga/dfl.rst @@ -10,7 +10,7 @@ Authors: - Xu Yilun The Device Feature List (DFL) FPGA framework (and drivers according to -this framework) hides the very details of low layer hardwares and provides +this framework) hides the very details of low layer hardware and provides unified interfaces to userspace. Applications could use these interfaces to configure, enumerate, open and access FPGA accelerators on platforms which implement the DFL in the device memory. Besides this, the DFL framework @@ -205,7 +205,7 @@ given Device Feature Lists and create platform devices for feature devices also abstracts operations for the private features and exposes common ops to feature device drivers. -The FPGA DFL Device could be different hardwares, e.g. PCIe device, platform +The FPGA DFL Device could be different hardware, e.g. PCIe device, platform device and etc. Its driver module is always loaded first once the device is created by the system. This driver plays an infrastructural role in the driver architecture. It locates the DFLs in the device memory, handles them diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c index 4e0edb60bfba6..ccf4546eff297 100644 --- a/drivers/fpga/altera-cvp.c +++ b/drivers/fpga/altera-cvp.c @@ -346,7 +346,7 @@ static int altera_cvp_write_init(struct fpga_manager *mgr, } if (val & VSE_CVP_STATUS_CFG_RDY) { - dev_warn(&mgr->dev, "CvP already started, teardown first\n"); + dev_warn(&mgr->dev, "CvP already started, tear down first\n"); ret = altera_cvp_teardown(mgr, info); if (ret) return ret; diff --git a/drivers/fpga/dfl-fme-pr.c b/drivers/fpga/dfl-fme-pr.c index 1194c0e850e07..d61ce9a188792 100644 --- a/drivers/fpga/dfl-fme-pr.c +++ b/drivers/fpga/dfl-fme-pr.c @@ -148,7 +148,7 @@ static int fme_pr(struct platform_device *pdev, unsigned long arg) /* * it allows userspace to reset the PR region's logic by disabling and - * reenabling the bridge to clear things out between accleration runs. + * reenabling the bridge to clear things out between acceleration runs. * so no need to hold the bridges after partial reconfiguration. */ if (region->get_bridges) diff --git a/drivers/fpga/dfl-n3000-nios.c b/drivers/fpga/dfl-n3000-nios.c index 7a95366f6516f..9ddf1d1d392f3 100644 --- a/drivers/fpga/dfl-n3000-nios.c +++ b/drivers/fpga/dfl-n3000-nios.c @@ -461,7 +461,7 @@ static int n3000_nios_poll_stat_timeout(void __iomem *base, u64 *v) * We don't use the time based timeout here for performance. * * The regbus read/write is on the critical path of Intel PAC N3000 - * image programing. The time based timeout checking will add too much + * image programming. The time based timeout checking will add too much * overhead on it. Usually the state changes in 1 or 2 loops on the * test server, and we set 10000 times loop here for safety. */ diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h index 2b82c96ba56c7..dac9c3d45e6c3 100644 --- a/drivers/fpga/dfl.h +++ b/drivers/fpga/dfl.h @@ -232,7 +232,7 @@ struct dfl_feature_irq_ctx { * @id: sub feature id. * @resource_index: each sub feature has one mmio resource for its registers. * this index is used to find its mmio resource from the - * feature dev (platform device)'s reources. + * feature dev (platform device)'s resources. * @ioaddr: mapped mmio resource address. * @irq_ctx: interrupt context list. * @nr_irqs: number of interrupt contexts. diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c index e9266b2a357f6..e07a619df5329 100644 --- a/drivers/fpga/fpga-bridge.c +++ b/drivers/fpga/fpga-bridge.c @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(fpga_bridges_put); * * Get an exclusive reference to the bridge and and it to the list. * - * Return 0 for success, error code from of_fpga_bridge_get() othewise. + * Return 0 for success, error code from of_fpga_bridge_get() otherwise. */ int of_fpga_bridge_get_to_list(struct device_node *np, struct fpga_image_info *info, @@ -260,7 +260,7 @@ EXPORT_SYMBOL_GPL(of_fpga_bridge_get_to_list); * * Get an exclusive reference to the bridge and and it to the list. * - * Return 0 for success, error code from fpga_bridge_get() othewise. + * Return 0 for success, error code from fpga_bridge_get() otherwise. */ int fpga_bridge_get_to_list(struct device *dev, struct fpga_image_info *info, diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index 07fa8d9ec6750..9b75bd4f93d8e 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -192,7 +192,7 @@ static void zynq_step_dma(struct zynq_fpga_priv *priv) /* Once the first transfer is queued we can turn on the ISR, future * calls to zynq_step_dma will happen from the ISR context. The - * dma_lock spinlock guarentees this handover is done coherently, the + * dma_lock spinlock guarantees this handover is done coherently, the * ISR enable is put at the end to avoid another CPU spinning in the * ISR on this lock. */ @@ -267,7 +267,7 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, ctrl = zynq_fpga_read(priv, CTRL_OFFSET); if (!(ctrl & CTRL_SEC_EN_MASK)) { dev_err(&mgr->dev, - "System not secure, can't use crypted bitstreams\n"); + "System not secure, can't use encrypted bitstreams\n"); err = -EINVAL; goto out_err; } @@ -344,7 +344,7 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, /* set configuration register with following options: * - enable PCAP interface - * - set throughput for maximum speed (if bistream not crypted) + * - set throughput for maximum speed (if bistream not encrypted) * - set CPU in user mode */ ctrl = zynq_fpga_read(priv, CTRL_OFFSET); diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 2bc3030a69e54..3a32b8e201857 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -110,7 +110,7 @@ struct fpga_image_info { * @initial_header_size: Maximum number of bytes that should be passed into write_init * @state: returns an enum value of the FPGA's state * @status: returns status of the FPGA, including reconfiguration error code - * @write_init: prepare the FPGA to receive confuration data + * @write_init: prepare the FPGA to receive configuration data * @write: write count bytes of configuration data to the FPGA * @write_sg: write the scatter list of configuration data to the FPGA * @write_complete: set FPGA to operating state after writing is done From 92c5ddbc93abddab737df19655787ca354a3a397 Mon Sep 17 00:00:00 2001 From: Navin Sankar Velliangiri Date: Fri, 28 May 2021 20:35:57 +0530 Subject: [PATCH 036/851] fpga: fpga-bridge: removed repeated word Removed repeated word and. Reported by checkpatch. Signed-off-by: Navin Sankar Velliangiri Signed-off-by: Moritz Fischer --- drivers/fpga/fpga-bridge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c index e07a619df5329..05c6d4f2d043f 100644 --- a/drivers/fpga/fpga-bridge.c +++ b/drivers/fpga/fpga-bridge.c @@ -228,7 +228,7 @@ EXPORT_SYMBOL_GPL(fpga_bridges_put); * @info: fpga image specific information * @bridge_list: list of FPGA bridges * - * Get an exclusive reference to the bridge and and it to the list. + * Get an exclusive reference to the bridge and it to the list. * * Return 0 for success, error code from of_fpga_bridge_get() otherwise. */ @@ -258,7 +258,7 @@ EXPORT_SYMBOL_GPL(of_fpga_bridge_get_to_list); * @info: fpga image specific information * @bridge_list: list of FPGA bridges * - * Get an exclusive reference to the bridge and and it to the list. + * Get an exclusive reference to the bridge and it to the list. * * Return 0 for success, error code from fpga_bridge_get() otherwise. */ From 495fb48dbd9bcbe15859e086edd24519a6bd2961 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 1 Jun 2021 15:00:09 +0800 Subject: [PATCH 037/851] mfd: wm831x: Use DEFINE_RES_IRQ_NAMED() and DEFINE_RES_IRQ() to simplify code No functional change. Signed-off-by: Zhen Lei Acked-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm831x-core.c | 248 ++++++-------------------------------- 1 file changed, 36 insertions(+), 212 deletions(-) diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index c31809b17547e..d2f444d2ae78f 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -622,18 +622,8 @@ static const struct resource wm831x_dcdc1_resources[] = { .end = WM831X_DC1_DVS_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_DC1, - .end = WM831X_IRQ_UV_DC1, - .flags = IORESOURCE_IRQ, - }, - { - .name = "HC", - .start = WM831X_IRQ_HC_DC1, - .end = WM831X_IRQ_HC_DC1, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_DC1, "UV"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_HC_DC1, "HC"), }; @@ -643,18 +633,8 @@ static const struct resource wm831x_dcdc2_resources[] = { .end = WM831X_DC2_DVS_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_DC2, - .end = WM831X_IRQ_UV_DC2, - .flags = IORESOURCE_IRQ, - }, - { - .name = "HC", - .start = WM831X_IRQ_HC_DC2, - .end = WM831X_IRQ_HC_DC2, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_DC2, "UV"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_HC_DC2, "HC"), }; static const struct resource wm831x_dcdc3_resources[] = { @@ -663,12 +643,7 @@ static const struct resource wm831x_dcdc3_resources[] = { .end = WM831X_DC3_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_DC3, - .end = WM831X_IRQ_UV_DC3, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_DC3, "UV"), }; static const struct resource wm831x_dcdc4_resources[] = { @@ -677,12 +652,7 @@ static const struct resource wm831x_dcdc4_resources[] = { .end = WM831X_DC4_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_DC4, - .end = WM831X_IRQ_UV_DC4, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_DC4, "UV"), }; static const struct resource wm8320_dcdc4_buck_resources[] = { @@ -691,12 +661,7 @@ static const struct resource wm8320_dcdc4_buck_resources[] = { .end = WM832X_DC4_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_DC4, - .end = WM831X_IRQ_UV_DC4, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_DC4, "UV"), }; static const struct resource wm831x_gpio_resources[] = { @@ -713,11 +678,7 @@ static const struct resource wm831x_isink1_resources[] = { .end = WM831X_CURRENT_SINK_1, .flags = IORESOURCE_REG, }, - { - .start = WM831X_IRQ_CS1, - .end = WM831X_IRQ_CS1, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ(WM831X_IRQ_CS1), }; static const struct resource wm831x_isink2_resources[] = { @@ -726,11 +687,7 @@ static const struct resource wm831x_isink2_resources[] = { .end = WM831X_CURRENT_SINK_2, .flags = IORESOURCE_REG, }, - { - .start = WM831X_IRQ_CS2, - .end = WM831X_IRQ_CS2, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ(WM831X_IRQ_CS2), }; static const struct resource wm831x_ldo1_resources[] = { @@ -739,12 +696,7 @@ static const struct resource wm831x_ldo1_resources[] = { .end = WM831X_LDO1_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO1, - .end = WM831X_IRQ_UV_LDO1, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO1, "UV"), }; static const struct resource wm831x_ldo2_resources[] = { @@ -753,12 +705,7 @@ static const struct resource wm831x_ldo2_resources[] = { .end = WM831X_LDO2_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO2, - .end = WM831X_IRQ_UV_LDO2, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO2, "UV"), }; static const struct resource wm831x_ldo3_resources[] = { @@ -767,12 +714,7 @@ static const struct resource wm831x_ldo3_resources[] = { .end = WM831X_LDO3_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO3, - .end = WM831X_IRQ_UV_LDO3, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO3, "UV"), }; static const struct resource wm831x_ldo4_resources[] = { @@ -781,12 +723,7 @@ static const struct resource wm831x_ldo4_resources[] = { .end = WM831X_LDO4_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO4, - .end = WM831X_IRQ_UV_LDO4, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO4, "UV"), }; static const struct resource wm831x_ldo5_resources[] = { @@ -795,12 +732,7 @@ static const struct resource wm831x_ldo5_resources[] = { .end = WM831X_LDO5_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO5, - .end = WM831X_IRQ_UV_LDO5, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO5, "UV"), }; static const struct resource wm831x_ldo6_resources[] = { @@ -809,12 +741,7 @@ static const struct resource wm831x_ldo6_resources[] = { .end = WM831X_LDO6_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO6, - .end = WM831X_IRQ_UV_LDO6, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO6, "UV"), }; static const struct resource wm831x_ldo7_resources[] = { @@ -823,12 +750,7 @@ static const struct resource wm831x_ldo7_resources[] = { .end = WM831X_LDO7_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO7, - .end = WM831X_IRQ_UV_LDO7, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO7, "UV"), }; static const struct resource wm831x_ldo8_resources[] = { @@ -837,12 +759,7 @@ static const struct resource wm831x_ldo8_resources[] = { .end = WM831X_LDO8_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO8, - .end = WM831X_IRQ_UV_LDO8, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO8, "UV"), }; static const struct resource wm831x_ldo9_resources[] = { @@ -851,12 +768,7 @@ static const struct resource wm831x_ldo9_resources[] = { .end = WM831X_LDO9_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO9, - .end = WM831X_IRQ_UV_LDO9, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO9, "UV"), }; static const struct resource wm831x_ldo10_resources[] = { @@ -865,12 +777,7 @@ static const struct resource wm831x_ldo10_resources[] = { .end = WM831X_LDO10_SLEEP_CONTROL, .flags = IORESOURCE_REG, }, - { - .name = "UV", - .start = WM831X_IRQ_UV_LDO10, - .end = WM831X_IRQ_UV_LDO10, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_UV_LDO10, "UV"), }; static const struct resource wm831x_ldo11_resources[] = { @@ -882,96 +789,27 @@ static const struct resource wm831x_ldo11_resources[] = { }; static const struct resource wm831x_on_resources[] = { - { - .start = WM831X_IRQ_ON, - .end = WM831X_IRQ_ON, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ(WM831X_IRQ_ON), }; static const struct resource wm831x_power_resources[] = { - { - .name = "SYSLO", - .start = WM831X_IRQ_PPM_SYSLO, - .end = WM831X_IRQ_PPM_SYSLO, - .flags = IORESOURCE_IRQ, - }, - { - .name = "PWR SRC", - .start = WM831X_IRQ_PPM_PWR_SRC, - .end = WM831X_IRQ_PPM_PWR_SRC, - .flags = IORESOURCE_IRQ, - }, - { - .name = "USB CURR", - .start = WM831X_IRQ_PPM_USB_CURR, - .end = WM831X_IRQ_PPM_USB_CURR, - .flags = IORESOURCE_IRQ, - }, - { - .name = "BATT HOT", - .start = WM831X_IRQ_CHG_BATT_HOT, - .end = WM831X_IRQ_CHG_BATT_HOT, - .flags = IORESOURCE_IRQ, - }, - { - .name = "BATT COLD", - .start = WM831X_IRQ_CHG_BATT_COLD, - .end = WM831X_IRQ_CHG_BATT_COLD, - .flags = IORESOURCE_IRQ, - }, - { - .name = "BATT FAIL", - .start = WM831X_IRQ_CHG_BATT_FAIL, - .end = WM831X_IRQ_CHG_BATT_FAIL, - .flags = IORESOURCE_IRQ, - }, - { - .name = "OV", - .start = WM831X_IRQ_CHG_OV, - .end = WM831X_IRQ_CHG_OV, - .flags = IORESOURCE_IRQ, - }, - { - .name = "END", - .start = WM831X_IRQ_CHG_END, - .end = WM831X_IRQ_CHG_END, - .flags = IORESOURCE_IRQ, - }, - { - .name = "TO", - .start = WM831X_IRQ_CHG_TO, - .end = WM831X_IRQ_CHG_TO, - .flags = IORESOURCE_IRQ, - }, - { - .name = "MODE", - .start = WM831X_IRQ_CHG_MODE, - .end = WM831X_IRQ_CHG_MODE, - .flags = IORESOURCE_IRQ, - }, - { - .name = "START", - .start = WM831X_IRQ_CHG_START, - .end = WM831X_IRQ_CHG_START, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_PPM_SYSLO, "SYSLO"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_PPM_PWR_SRC, "PWR SRC"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_PPM_USB_CURR, "USB CURR"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_BATT_HOT, "BATT HOT"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_BATT_COLD, "BATT COLD"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_BATT_FAIL, "BATT FAIL"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_OV, "OV"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_END, "END"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_TO, "TO"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_MODE, "MODE"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_CHG_START, "START"), }; static const struct resource wm831x_rtc_resources[] = { - { - .name = "PER", - .start = WM831X_IRQ_RTC_PER, - .end = WM831X_IRQ_RTC_PER, - .flags = IORESOURCE_IRQ, - }, - { - .name = "ALM", - .start = WM831X_IRQ_RTC_ALM, - .end = WM831X_IRQ_RTC_ALM, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_RTC_PER, "PER"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_RTC_ALM, "ALM"), }; static const struct resource wm831x_status1_resources[] = { @@ -991,26 +829,12 @@ static const struct resource wm831x_status2_resources[] = { }; static const struct resource wm831x_touch_resources[] = { - { - .name = "TCHPD", - .start = WM831X_IRQ_TCHPD, - .end = WM831X_IRQ_TCHPD, - .flags = IORESOURCE_IRQ, - }, - { - .name = "TCHDATA", - .start = WM831X_IRQ_TCHDATA, - .end = WM831X_IRQ_TCHDATA, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_TCHPD, "TCHPD"), + DEFINE_RES_IRQ_NAMED(WM831X_IRQ_TCHDATA, "TCHDATA"), }; static const struct resource wm831x_wdt_resources[] = { - { - .start = WM831X_IRQ_WDOG_TO, - .end = WM831X_IRQ_WDOG_TO, - .flags = IORESOURCE_IRQ, - }, + DEFINE_RES_IRQ(WM831X_IRQ_WDOG_TO), }; static const struct mfd_cell wm8310_devs[] = { From 6928e7277734412dee205bbe83d27a7ab047e044 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 7 Jun 2021 13:56:20 +0800 Subject: [PATCH 038/851] NTB: Fix an error code in ntb_msit_probe() When the value of nm->isr_ctx is false, the value of ret is 0. So, we set ret to -ENOMEM to indicate this error. Clean up smatch warning: drivers/ntb/test/ntb_msi_test.c:373 ntb_msit_probe() warn: missing error code 'ret'. Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_msi_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c index 7095ecd6223a7..4e18e08776c98 100644 --- a/drivers/ntb/test/ntb_msi_test.c +++ b/drivers/ntb/test/ntb_msi_test.c @@ -369,8 +369,10 @@ static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb) if (ret) goto remove_dbgfs; - if (!nm->isr_ctx) + if (!nm->isr_ctx) { + ret = -ENOMEM; goto remove_dbgfs; + } ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); From f4d655a6e49b577de9c68c16b91937f70fdd0aed Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 7 Jun 2021 16:40:36 +0800 Subject: [PATCH 039/851] NTB: perf: Fix an error code in perf_setup_inbuf() When the function IS_ALIGNED() returns false, the value of ret is 0. So, we set ret to -EINVAL to indicate this error. Clean up smatch warning: drivers/ntb/test/ntb_perf.c:602 perf_setup_inbuf() warn: missing error code 'ret'. Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 89df1350fefd8..65e1e5cf1b29a 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -598,6 +598,7 @@ static int perf_setup_inbuf(struct perf_peer *peer) return -ENOMEM; } if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { + ret = -EINVAL; dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); goto err_free_inbuf; } From 28293b6c68cd93178f021f51b1b504f3ec33f463 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 Jun 2021 12:21:28 +0100 Subject: [PATCH 040/851] ntb: ntb_pingpong: remove redundant initialization of variables msg_data and spad_data The variables msg_data and spad_data are being initialized with values that are never read, they are being updated later on. The initializations are redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_pingpong.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 2164e8492772d..8aeca79140509 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -187,7 +187,7 @@ static void pp_ping(struct pp_ctx *pp) static void pp_pong(struct pp_ctx *pp) { - u32 msg_data = -1, spad_data = -1; + u32 msg_data, spad_data; int pidx = 0; /* Read pong data */ From a9437e51fdf824e59e15fdf2642774fd185258e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?= Date: Sat, 5 Jun 2021 00:31:46 +0800 Subject: [PATCH 041/851] clocksource/drivers/ingenic: Add SMP/SMT support for sysost driver. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OST in Ingenic XBurst®2 SoCs such as X2000 and X2100, has a global timer and two or four percpu timers, add support for the percpu timers. Signed-off-by: 周琰杰 (Zhou Yanjie) Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1622824306-30987-3-git-send-email-zhouyanjie@wanyeetech.com --- drivers/clocksource/ingenic-sysost.c | 315 ++++++++++++++++++++------- 1 file changed, 236 insertions(+), 79 deletions(-) diff --git a/drivers/clocksource/ingenic-sysost.c b/drivers/clocksource/ingenic-sysost.c index a129840f14f96..6f080e4043900 100644 --- a/drivers/clocksource/ingenic-sysost.c +++ b/drivers/clocksource/ingenic-sysost.c @@ -4,6 +4,7 @@ * Copyright (c) 2020 周琰杰 (Zhou Yanjie) */ +#include #include #include #include @@ -13,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -21,10 +24,14 @@ /* OST register offsets */ #define OST_REG_OSTCCR 0x00 +#define OST_REG_OSTER 0x04 #define OST_REG_OSTCR 0x08 #define OST_REG_OSTFR 0x0c +#define OST_REG_OSTCNTH 0x0c #define OST_REG_OSTMR 0x10 +#define OST_REG_OSTCNTL 0x10 #define OST_REG_OST1DFR 0x14 +#define OST_REG_OSTCNTB 0x14 #define OST_REG_OST1CNT 0x18 #define OST_REG_OST2CNTL 0x20 #define OST_REG_OSTCNT2HBUF 0x24 @@ -55,13 +62,23 @@ #define OSTECR_OST1ENC BIT(0) #define OSTECR_OST2ENC BIT(1) +enum ingenic_ost_version { + ID_X1000, + ID_X2000, +}; + struct ingenic_soc_info { + enum ingenic_ost_version version; + const struct ingenic_ost_clk_info *clk_info; + unsigned int num_channels; + unsigned int base_offset; }; struct ingenic_ost_clk_info { struct clk_init_data init_data; - u8 ostccr_reg; + unsigned int idx; + u32 ostcntl_reg; }; struct ingenic_ost_clk { @@ -71,15 +88,27 @@ struct ingenic_ost_clk { const struct ingenic_ost_clk_info *info; }; +struct ingenic_ost_timer { + void __iomem *base; + unsigned int cpu; + unsigned int channel; + struct clock_event_device cevt; + struct ingenic_ost *ost; + struct clk *clk; + char name[20]; +}; + struct ingenic_ost { void __iomem *base; const struct ingenic_soc_info *soc_info; - struct clk *clk, *percpu_timer_clk, *global_timer_clk; - struct clock_event_device cevt; + struct clk *clk, *global_timer_clk; + struct device_node *np; struct clocksource cs; - char name[20]; struct clk_hw_onecell_data *clocks; + struct ingenic_ost_timer __percpu *timers; + + int irq; }; static struct ingenic_ost *ingenic_ost; @@ -94,11 +123,12 @@ static unsigned long ingenic_ost_percpu_timer_recalc_rate(struct clk_hw *hw, { struct ingenic_ost_clk *ost_clk = to_ost_clk(hw); const struct ingenic_ost_clk_info *info = ost_clk->info; + struct ingenic_ost_timer *timer = per_cpu_ptr(ost_clk->ost->timers, info->idx); unsigned int prescale; - prescale = readl(ost_clk->ost->base + info->ostccr_reg); + prescale = readl(timer->base + OST_REG_OSTCCR); - prescale = (prescale & OSTCCR_PRESCALE1_MASK) >> OSTCCR_PRESCALE1_LSB; + prescale = FIELD_GET(OSTCCR_PRESCALE1_MASK, prescale); return parent_rate >> (prescale * 2); } @@ -108,11 +138,15 @@ static unsigned long ingenic_ost_global_timer_recalc_rate(struct clk_hw *hw, { struct ingenic_ost_clk *ost_clk = to_ost_clk(hw); const struct ingenic_ost_clk_info *info = ost_clk->info; + struct ingenic_ost_timer *timer = per_cpu_ptr(ost_clk->ost->timers, info->idx); unsigned int prescale; - prescale = readl(ost_clk->ost->base + info->ostccr_reg); + prescale = readl(timer->base + OST_REG_OSTCCR); - prescale = (prescale & OSTCCR_PRESCALE2_MASK) >> OSTCCR_PRESCALE2_LSB; + if (ost_clk->ost->soc_info->version >= ID_X2000) + prescale = FIELD_GET(OSTCCR_PRESCALE1_MASK, prescale); + else + prescale = FIELD_GET(OSTCCR_PRESCALE2_MASK, prescale); return parent_rate >> (prescale * 2); } @@ -147,12 +181,13 @@ static int ingenic_ost_percpu_timer_set_rate(struct clk_hw *hw, unsigned long re { struct ingenic_ost_clk *ost_clk = to_ost_clk(hw); const struct ingenic_ost_clk_info *info = ost_clk->info; + struct ingenic_ost_timer *timer = per_cpu_ptr(ost_clk->ost->timers, info->idx); u8 prescale = ingenic_ost_get_prescale(parent_rate, req_rate); int val; - val = readl(ost_clk->ost->base + info->ostccr_reg); + val = readl(timer->base + OST_REG_OSTCCR); val = (val & ~OSTCCR_PRESCALE1_MASK) | (prescale << OSTCCR_PRESCALE1_LSB); - writel(val, ost_clk->ost->base + info->ostccr_reg); + writel(val, timer->base + OST_REG_OSTCCR); return 0; } @@ -162,12 +197,18 @@ static int ingenic_ost_global_timer_set_rate(struct clk_hw *hw, unsigned long re { struct ingenic_ost_clk *ost_clk = to_ost_clk(hw); const struct ingenic_ost_clk_info *info = ost_clk->info; + struct ingenic_ost_timer *timer = per_cpu_ptr(ost_clk->ost->timers, info->idx); u8 prescale = ingenic_ost_get_prescale(parent_rate, req_rate); int val; - val = readl(ost_clk->ost->base + info->ostccr_reg); - val = (val & ~OSTCCR_PRESCALE2_MASK) | (prescale << OSTCCR_PRESCALE2_LSB); - writel(val, ost_clk->ost->base + info->ostccr_reg); + val = readl(timer->base + OST_REG_OSTCCR); + + if (ost_clk->ost->soc_info->version >= ID_X2000) + val = (val & ~OSTCCR_PRESCALE1_MASK) | (prescale << OSTCCR_PRESCALE1_LSB); + else + val = (val & ~OSTCCR_PRESCALE2_MASK) | (prescale << OSTCCR_PRESCALE2_LSB); + + writel(val, timer->base + OST_REG_OSTCCR); return 0; } @@ -195,7 +236,42 @@ static const struct ingenic_ost_clk_info x1000_ost_clk_info[] = { .ops = &ingenic_ost_percpu_timer_ops, .flags = CLK_SET_RATE_UNGATE, }, - .ostccr_reg = OST_REG_OSTCCR, + .idx = 0, + }, + + [OST_CLK_GLOBAL_TIMER] = { + .init_data = { + .name = "global timer", + .parent_names = ingenic_ost_clk_parents, + .num_parents = ARRAY_SIZE(ingenic_ost_clk_parents), + .ops = &ingenic_ost_global_timer_ops, + .flags = CLK_SET_RATE_UNGATE, + }, + .ostcntl_reg = OST_REG_OST2CNTL, + }, +}; + +static const struct ingenic_ost_clk_info x2000_ost_clk_info[] = { + [OST_CLK_PERCPU_TIMER0] = { + .init_data = { + .name = "percpu timer0", + .parent_names = ingenic_ost_clk_parents, + .num_parents = ARRAY_SIZE(ingenic_ost_clk_parents), + .ops = &ingenic_ost_percpu_timer_ops, + .flags = CLK_SET_RATE_UNGATE, + }, + .idx = 0, + }, + + [OST_CLK_PERCPU_TIMER1] = { + .init_data = { + .name = "percpu timer1", + .parent_names = ingenic_ost_clk_parents, + .num_parents = ARRAY_SIZE(ingenic_ost_clk_parents), + .ops = &ingenic_ost_percpu_timer_ops, + .flags = CLK_SET_RATE_UNGATE, + }, + .idx = 1, }, [OST_CLK_GLOBAL_TIMER] = { @@ -206,7 +282,7 @@ static const struct ingenic_ost_clk_info x1000_ost_clk_info[] = { .ops = &ingenic_ost_global_timer_ops, .flags = CLK_SET_RATE_UNGATE, }, - .ostccr_reg = OST_REG_OSTCCR, + .ostcntl_reg = OST_REG_OSTCNTL, }, }; @@ -215,7 +291,7 @@ static u64 notrace ingenic_ost_global_timer_read_cntl(void) struct ingenic_ost *ost = ingenic_ost; unsigned int count; - count = readl(ost->base + OST_REG_OST2CNTL); + count = readl(ost->base + ost->soc_info->clk_info->ostcntl_reg); return count; } @@ -225,16 +301,21 @@ static u64 notrace ingenic_ost_clocksource_read(struct clocksource *cs) return ingenic_ost_global_timer_read_cntl(); } -static inline struct ingenic_ost *to_ingenic_ost(struct clock_event_device *evt) +static inline struct ingenic_ost_timer * +to_ingenic_ost_timer(struct clock_event_device *evt) { - return container_of(evt, struct ingenic_ost, cevt); + return container_of(evt, struct ingenic_ost_timer, cevt); } static int ingenic_ost_cevt_set_state_shutdown(struct clock_event_device *evt) { - struct ingenic_ost *ost = to_ingenic_ost(evt); + struct ingenic_ost_timer *timer = to_ingenic_ost_timer(evt); + struct ingenic_ost *ost = timer->ost; - writel(OSTECR_OST1ENC, ost->base + OST_REG_OSTECR); + if (ost->soc_info->version >= ID_X2000) + writel(0, timer->base + OST_REG_OSTER); + else + writel(OSTECR_OST1ENC, timer->base + OST_REG_OSTECR); return 0; } @@ -242,26 +323,34 @@ static int ingenic_ost_cevt_set_state_shutdown(struct clock_event_device *evt) static int ingenic_ost_cevt_set_next(unsigned long next, struct clock_event_device *evt) { - struct ingenic_ost *ost = to_ingenic_ost(evt); - - writel((u32)~OSTFR_FFLAG, ost->base + OST_REG_OSTFR); - writel(next, ost->base + OST_REG_OST1DFR); - writel(OSTCR_OST1CLR, ost->base + OST_REG_OSTCR); - writel(OSTESR_OST1ENS, ost->base + OST_REG_OSTESR); - writel((u32)~OSTMR_FMASK, ost->base + OST_REG_OSTMR); + struct ingenic_ost_timer *timer = to_ingenic_ost_timer(evt); + struct ingenic_ost *ost = timer->ost; + + writel((u32)~OSTFR_FFLAG, timer->base + OST_REG_OSTFR); + writel(next, timer->base + OST_REG_OST1DFR); + writel(OSTCR_OST1CLR, timer->base + OST_REG_OSTCR); + + if (ost->soc_info->version >= ID_X2000) { + writel(OSTESR_OST1ENS, timer->base + OST_REG_OSTER); + } else { + writel(OSTESR_OST1ENS, timer->base + OST_REG_OSTESR); + writel((u32)~OSTMR_FMASK, timer->base + OST_REG_OSTMR); + } return 0; } static irqreturn_t ingenic_ost_cevt_cb(int irq, void *dev_id) { - struct clock_event_device *evt = dev_id; - struct ingenic_ost *ost = to_ingenic_ost(evt); + struct ingenic_ost_timer *timer = dev_id; + struct ingenic_ost *ost = timer->ost; - writel(OSTECR_OST1ENC, ost->base + OST_REG_OSTECR); + if (ost->soc_info->version >= ID_X2000) + writel(0, timer->base + OST_REG_OSTER); + else + writel(OSTECR_OST1ENC, timer->base + OST_REG_OSTECR); - if (evt->event_handler) - evt->event_handler(evt); + timer->cevt.event_handler(&timer->cevt); return IRQ_HANDLED; } @@ -271,6 +360,7 @@ static int __init ingenic_ost_register_clock(struct ingenic_ost *ost, struct clk_hw_onecell_data *clocks) { struct ingenic_ost_clk *ost_clk; + struct ingenic_ost_timer *timer = per_cpu_ptr(ost->timers, info->idx); int val, err; ost_clk = kzalloc(sizeof(*ost_clk), GFP_KERNEL); @@ -283,9 +373,9 @@ static int __init ingenic_ost_register_clock(struct ingenic_ost *ost, ost_clk->ost = ost; /* Reset clock divider */ - val = readl(ost->base + info->ostccr_reg); - val &= ~(OSTCCR_PRESCALE1_MASK | OSTCCR_PRESCALE2_MASK); - writel(val, ost->base + info->ostccr_reg); + val = readl(timer->base + OST_REG_OSTCCR); + val &= ~(OSTCCR_PRESCALE1_MASK); + writel(val, timer->base + OST_REG_OSTCCR); err = clk_hw_register(NULL, &ost_clk->hw); if (err) { @@ -309,57 +399,51 @@ static struct clk * __init ingenic_ost_get_clock(struct device_node *np, int id) return of_clk_get_from_provider(&args); } -static int __init ingenic_ost_percpu_timer_init(struct device_node *np, - struct ingenic_ost *ost) +static int __init ingenic_ost_setup_cevt(unsigned int cpu) { - unsigned int timer_virq, channel = OST_CLK_PERCPU_TIMER; + struct ingenic_ost *ost = ingenic_ost; + struct ingenic_ost_timer *timer = this_cpu_ptr(ost->timers); unsigned long rate; int err; - ost->percpu_timer_clk = ingenic_ost_get_clock(np, channel); - if (IS_ERR(ost->percpu_timer_clk)) - return PTR_ERR(ost->percpu_timer_clk); + timer->clk = ingenic_ost_get_clock(ost->np, timer->channel); + if (IS_ERR(timer->clk)) + return PTR_ERR(timer->clk); - err = clk_prepare_enable(ost->percpu_timer_clk); + err = clk_prepare_enable(timer->clk); if (err) goto err_clk_put; - rate = clk_get_rate(ost->percpu_timer_clk); + rate = clk_get_rate(timer->clk); if (!rate) { err = -EINVAL; goto err_clk_disable; } - timer_virq = of_irq_get(np, 0); - if (!timer_virq) { - err = -EINVAL; - goto err_clk_disable; - } + snprintf(timer->name, sizeof(timer->name), "OST percpu timer%u", cpu); - snprintf(ost->name, sizeof(ost->name), "OST percpu timer"); + /* Unmask full comparison match interrupt */ + writel((u32)~OSTMR_FMASK, timer->base + OST_REG_OSTMR); - err = request_irq(timer_virq, ingenic_ost_cevt_cb, IRQF_TIMER, - ost->name, &ost->cevt); - if (err) - goto err_irq_dispose_mapping; + timer->cpu = smp_processor_id(); + timer->cevt.cpumask = cpumask_of(smp_processor_id()); + timer->cevt.features = CLOCK_EVT_FEAT_ONESHOT; + timer->cevt.name = timer->name; + timer->cevt.rating = 400; + timer->cevt.set_state_shutdown = ingenic_ost_cevt_set_state_shutdown; + timer->cevt.set_next_event = ingenic_ost_cevt_set_next; - ost->cevt.cpumask = cpumask_of(smp_processor_id()); - ost->cevt.features = CLOCK_EVT_FEAT_ONESHOT; - ost->cevt.name = ost->name; - ost->cevt.rating = 400; - ost->cevt.set_state_shutdown = ingenic_ost_cevt_set_state_shutdown; - ost->cevt.set_next_event = ingenic_ost_cevt_set_next; + clockevents_config_and_register(&timer->cevt, rate, 4, 0xffffffff); - clockevents_config_and_register(&ost->cevt, rate, 4, 0xffffffff); + if (ost->soc_info->version >= ID_X2000) + enable_percpu_irq(ost->irq, IRQ_TYPE_NONE); return 0; -err_irq_dispose_mapping: - irq_dispose_mapping(timer_virq); err_clk_disable: - clk_disable_unprepare(ost->percpu_timer_clk); + clk_disable_unprepare(timer->clk); err_clk_put: - clk_put(ost->percpu_timer_clk); + clk_put(timer->clk); return err; } @@ -385,11 +469,14 @@ static int __init ingenic_ost_global_timer_init(struct device_node *np, goto err_clk_disable; } - /* Clear counter CNT registers */ - writel(OSTCR_OST2CLR, ost->base + OST_REG_OSTCR); - - /* Enable OST channel */ - writel(OSTESR_OST2ENS, ost->base + OST_REG_OSTESR); + /* Clear counter CNT registers and enable OST channel */ + if (ost->soc_info->version >= ID_X2000) { + writel(OSTCR_OST1CLR, ost->base + OST_REG_OSTCR); + writel(OSTESR_OST1ENS, ost->base + OST_REG_OSTER); + } else { + writel(OSTCR_OST2CLR, ost->base + OST_REG_OSTCR); + writel(OSTESR_OST2ENS, ost->base + OST_REG_OSTESR); + } cs->name = "ingenic-ost"; cs->rating = 400; @@ -411,18 +498,33 @@ static int __init ingenic_ost_global_timer_init(struct device_node *np, } static const struct ingenic_soc_info x1000_soc_info = { + .version = ID_X1000, + .clk_info = x1000_ost_clk_info, + .num_channels = 2, }; +static const struct ingenic_soc_info x2000_soc_info = { + .version = ID_X2000, + .clk_info = x2000_ost_clk_info, + + .num_channels = 3, + .base_offset = 0x100, +}; + static const struct of_device_id __maybe_unused ingenic_ost_of_matches[] __initconst = { { .compatible = "ingenic,x1000-ost", .data = &x1000_soc_info }, + { .compatible = "ingenic,x2000-ost", .data = &x2000_soc_info }, { /* sentinel */ } }; static int __init ingenic_ost_probe(struct device_node *np) { const struct of_device_id *id = of_match_node(ingenic_ost_of_matches, np); + struct ingenic_ost_timer *timer; struct ingenic_ost *ost; + void __iomem *base; + unsigned int cpu; unsigned int i; int ret; @@ -430,18 +532,43 @@ static int __init ingenic_ost_probe(struct device_node *np) if (!ost) return -ENOMEM; + ost->timers = alloc_percpu(struct ingenic_ost_timer); + if (!ost->timers) { + ret = -ENOMEM; + goto err_free_ost; + } + + ost->np = np; + ost->soc_info = id->data; + ost->base = of_io_request_and_map(np, 0, of_node_full_name(np)); if (IS_ERR(ost->base)) { pr_err("%s: Failed to map OST registers\n", __func__); ret = PTR_ERR(ost->base); - goto err_free_ost; + goto err_free_timers; + } + + if (ost->soc_info->version >= ID_X2000) { + base = of_io_request_and_map(np, 1, of_node_full_name(np)); + if (IS_ERR(base)) { + pr_err("%s: Failed to map OST registers\n", __func__); + ret = PTR_ERR(base); + goto err_free_timers; + } + } + + ost->irq = irq_of_parse_and_map(np, 0); + if (ost->irq < 0) { + pr_crit("%s: Cannot to get OST IRQ\n", __func__); + ret = ost->irq; + goto err_free_timers; } ost->clk = of_clk_get_by_name(np, "ost"); if (IS_ERR(ost->clk)) { - ret = PTR_ERR(ost->clk); pr_crit("%s: Cannot get OST clock\n", __func__); - goto err_free_ost; + ret = PTR_ERR(ost->clk); + goto err_free_timers; } ret = clk_prepare_enable(ost->clk); @@ -450,8 +577,6 @@ static int __init ingenic_ost_probe(struct device_node *np) goto err_put_clk; } - ost->soc_info = id->data; - ost->clocks = kzalloc(struct_size(ost->clocks, hws, ost->soc_info->num_channels), GFP_KERNEL); if (!ost->clocks) { @@ -461,8 +586,21 @@ static int __init ingenic_ost_probe(struct device_node *np) ost->clocks->num = ost->soc_info->num_channels; - for (i = 0; i < ost->clocks->num; i++) { - ret = ingenic_ost_register_clock(ost, i, &x1000_ost_clk_info[i], ost->clocks); + for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + timer = per_cpu_ptr(ost->timers, cpu); + + if (ost->soc_info->version >= ID_X2000) + timer->base = base + ost->soc_info->base_offset * cpu; + else + timer->base = ost->base; + + timer->ost = ost; + timer->cpu = cpu; + timer->channel = OST_CLK_PERCPU_TIMER + cpu; + } + + for (i = 0; i < num_possible_cpus() + 1; i++) { + ret = ingenic_ost_register_clock(ost, i, &ost->soc_info->clk_info[i], ost->clocks); if (ret) { pr_crit("%s: Cannot register clock %d\n", __func__, i); goto err_unregister_ost_clocks; @@ -488,6 +626,8 @@ static int __init ingenic_ost_probe(struct device_node *np) clk_disable_unprepare(ost->clk); err_put_clk: clk_put(ost->clk); +err_free_timers: + free_percpu(ost->timers); err_free_ost: kfree(ost); return ret; @@ -513,13 +653,29 @@ static int __init ingenic_ost_init(struct device_node *np) ret = ingenic_ost_global_timer_init(np, ost); if (ret) { - pr_crit("%s: Unable to init global timer: %x\n", __func__, ret); + pr_crit("%s: Unable to init global timer: %d\n", __func__, ret); goto err_free_ingenic_ost; } - ret = ingenic_ost_percpu_timer_init(np, ost); - if (ret) + if (ost->soc_info->version >= ID_X2000) + ret = request_percpu_irq(ost->irq, ingenic_ost_cevt_cb, + "OST percpu timer", ost->timers); + else + ret = request_irq(ost->irq, ingenic_ost_cevt_cb, IRQF_TIMER, + "OST percpu timer", ost->timers); + + if (ret) { + pr_crit("%s: Unable to request percpu IRQ: %d\n", __func__, ret); + goto err_ost_global_timer_cleanup; + } + + /* Setup clock events on each CPU core */ + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "Ingenic XBurst: online", + ingenic_ost_setup_cevt, NULL); + if (ret < 0) { + pr_crit("%s: Unable to init percpu timers: %d\n", __func__, ret); goto err_ost_global_timer_cleanup; + } /* Register the sched_clock at the end as there's no way to undo it */ rate = clk_get_rate(ost->global_timer_clk); @@ -537,3 +693,4 @@ static int __init ingenic_ost_init(struct device_node *np) } TIMER_OF_DECLARE(x1000_ost, "ingenic,x1000-ost", ingenic_ost_init); +TIMER_OF_DECLARE(x2000_ost, "ingenic,x2000-ost", ingenic_ost_init); From 81782af0dbc37b03a66fc1cb26b715b07331408a Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Tue, 6 Apr 2021 15:00:44 +0200 Subject: [PATCH 042/851] clocksource/drivers/arm_global_timer: Implement rate compensation whenever source clock changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds rate change notification support for the parent clock; should that clock change, then we try to adjust the our prescaler in order to compensate (i.e. we adjust to still get the same timer frequency). This is loosely based on what it's done in timer-cadence-ttc. timer-sun51, mips-gic-timer and smp_twd.c also seem to look at their parent clock rate and to perform some kind of adjustment whenever needed. In this particular case we have only one single counter and prescaler for all clocksource, clockevent and timer_delay, and we just update it for all (i.e. we don't let it go and call clockevents_update_freq() to notify to the kernel that our rate has changed). Note that, there is apparently no other way to fixup things, because once we call register_current_timer_delay(), specifying the timer rate, it seems that that rate is not supposed to change ever. In order for this mechanism to work, we have to make assumptions about how much the initial clock is supposed to eventually decrease from the initial one, and set our initial prescaler to a value that we can eventually decrease enough to compensate. We provide an option in KConfig for this. In case we end up in a situation in which we are not able to compensate the parent clock change, we fail returning NOTIFY_BAD. This fixes a real-world problem with Zynq arch not being able to use this driver and CPU_FREQ at the same time (because ARM global timer is fed by the CPU clock, which may keep changing when CPU_FREQ is enabled). Signed-off-by: Andrea Merello Cc: Patrice Chotard Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: Michal Simek Cc: Sören Brinkmann Reviewed-by: Patrice Chotard Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210406130045.15491-2-andrea.merello@gmail.com --- drivers/clocksource/Kconfig | 13 +++ drivers/clocksource/arm_global_timer.c | 122 +++++++++++++++++++++++-- 2 files changed, 125 insertions(+), 10 deletions(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 39aa21d01e054..19fc5f8883e03 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -358,6 +358,19 @@ config ARM_GLOBAL_TIMER help This option enables support for the ARM global timer unit. +config ARM_GT_INITIAL_PRESCALER_VAL + int "ARM global timer initial prescaler value" + default 1 + depends on ARM_GLOBAL_TIMER + help + When the ARM global timer initializes, its current rate is declared + to the kernel and maintained forever. Should it's parent clock + change, the driver tries to fix the timer's internal prescaler. + On some machs (i.e. Zynq) the initial prescaler value thus poses + bounds about how much the parent clock is allowed to decrease or + increase wrt the initial clock value. + This affects CPU_FREQ max delta from the initial frequency. + config ARM_TIMER_SP804 bool "Support for Dual Timer SP804 module" if COMPILE_TEST depends on GENERIC_SCHED_CLOCK && CLKDEV_LOOKUP diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 88b2d38a7a61a..60a8047fd32e5 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -31,6 +31,10 @@ #define GT_CONTROL_COMP_ENABLE BIT(1) /* banked */ #define GT_CONTROL_IRQ_ENABLE BIT(2) /* banked */ #define GT_CONTROL_AUTO_INC BIT(3) /* banked */ +#define GT_CONTROL_PRESCALER_SHIFT 8 +#define GT_CONTROL_PRESCALER_MAX 0xF +#define GT_CONTROL_PRESCALER_MASK (GT_CONTROL_PRESCALER_MAX << \ + GT_CONTROL_PRESCALER_SHIFT) #define GT_INT_STATUS 0x0c #define GT_INT_STATUS_EVENT_FLAG BIT(0) @@ -39,6 +43,7 @@ #define GT_COMP1 0x14 #define GT_AUTO_INC 0x18 +#define MAX_F_ERR 50 /* * We are expecting to be clocked by the ARM peripheral clock. * @@ -46,7 +51,8 @@ * the units for all operations. */ static void __iomem *gt_base; -static unsigned long gt_clk_rate; +struct notifier_block gt_clk_rate_change_nb; +static u32 gt_psv_new, gt_psv_bck, gt_target_rate; static int gt_ppi; static struct clock_event_device __percpu *gt_evt; @@ -96,7 +102,10 @@ static void gt_compare_set(unsigned long delta, int periodic) unsigned long ctrl; counter += delta; - ctrl = GT_CONTROL_TIMER_ENABLE; + ctrl = readl(gt_base + GT_CONTROL); + ctrl &= ~(GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE | + GT_CONTROL_AUTO_INC | GT_CONTROL_AUTO_INC); + ctrl |= GT_CONTROL_TIMER_ENABLE; writel_relaxed(ctrl, gt_base + GT_CONTROL); writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0); writel_relaxed(upper_32_bits(counter), gt_base + GT_COMP1); @@ -123,7 +132,7 @@ static int gt_clockevent_shutdown(struct clock_event_device *evt) static int gt_clockevent_set_periodic(struct clock_event_device *evt) { - gt_compare_set(DIV_ROUND_CLOSEST(gt_clk_rate, HZ), 1); + gt_compare_set(DIV_ROUND_CLOSEST(gt_target_rate, HZ), 1); return 0; } @@ -177,7 +186,7 @@ static int gt_starting_cpu(unsigned int cpu) clk->cpumask = cpumask_of(cpu); clk->rating = 300; clk->irq = gt_ppi; - clockevents_config_and_register(clk, gt_clk_rate, + clockevents_config_and_register(clk, gt_target_rate, 1, 0xffffffff); enable_percpu_irq(clk->irq, IRQ_TYPE_NONE); return 0; @@ -232,9 +241,28 @@ static struct delay_timer gt_delay_timer = { .read_current_timer = gt_read_long, }; +static void gt_write_presc(u32 psv) +{ + u32 reg; + + reg = readl(gt_base + GT_CONTROL); + reg &= ~GT_CONTROL_PRESCALER_MASK; + reg |= psv << GT_CONTROL_PRESCALER_SHIFT; + writel(reg, gt_base + GT_CONTROL); +} + +static u32 gt_read_presc(void) +{ + u32 reg; + + reg = readl(gt_base + GT_CONTROL); + reg &= GT_CONTROL_PRESCALER_MASK; + return reg >> GT_CONTROL_PRESCALER_SHIFT; +} + static void __init gt_delay_timer_init(void) { - gt_delay_timer.freq = gt_clk_rate; + gt_delay_timer.freq = gt_target_rate; register_current_timer_delay(>_delay_timer); } @@ -243,18 +271,81 @@ static int __init gt_clocksource_init(void) writel(0, gt_base + GT_CONTROL); writel(0, gt_base + GT_COUNTER0); writel(0, gt_base + GT_COUNTER1); - /* enables timer on all the cores */ - writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL); + /* set prescaler and enable timer on all the cores */ + writel(((CONFIG_ARM_GT_INITIAL_PRESCALER_VAL - 1) << + GT_CONTROL_PRESCALER_SHIFT) + | GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL); #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK - sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate); + sched_clock_register(gt_sched_clock_read, 64, gt_target_rate); #endif - return clocksource_register_hz(>_clocksource, gt_clk_rate); + return clocksource_register_hz(>_clocksource, gt_target_rate); +} + +static int gt_clk_rate_change_cb(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct clk_notifier_data *ndata = data; + + switch (event) { + case PRE_RATE_CHANGE: + { + int psv; + + psv = DIV_ROUND_CLOSEST(ndata->new_rate, + gt_target_rate); + + if (abs(gt_target_rate - (ndata->new_rate / psv)) > MAX_F_ERR) + return NOTIFY_BAD; + + psv--; + + /* prescaler within legal range? */ + if (psv < 0 || psv > GT_CONTROL_PRESCALER_MAX) + return NOTIFY_BAD; + + /* + * store timer clock ctrl register so we can restore it in case + * of an abort. + */ + gt_psv_bck = gt_read_presc(); + gt_psv_new = psv; + /* scale down: adjust divider in post-change notification */ + if (ndata->new_rate < ndata->old_rate) + return NOTIFY_DONE; + + /* scale up: adjust divider now - before frequency change */ + gt_write_presc(psv); + break; + } + case POST_RATE_CHANGE: + /* scale up: pre-change notification did the adjustment */ + if (ndata->new_rate > ndata->old_rate) + return NOTIFY_OK; + + /* scale down: adjust divider now - after frequency change */ + gt_write_presc(gt_psv_new); + break; + + case ABORT_RATE_CHANGE: + /* we have to undo the adjustment in case we scale up */ + if (ndata->new_rate < ndata->old_rate) + return NOTIFY_OK; + + /* restore original register value */ + gt_write_presc(gt_psv_bck); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_DONE; } static int __init global_timer_of_register(struct device_node *np) { struct clk *gt_clk; + static unsigned long gt_clk_rate; int err = 0; /* @@ -292,11 +383,20 @@ static int __init global_timer_of_register(struct device_node *np) } gt_clk_rate = clk_get_rate(gt_clk); + gt_target_rate = gt_clk_rate / CONFIG_ARM_GT_INITIAL_PRESCALER_VAL; + gt_clk_rate_change_nb.notifier_call = + gt_clk_rate_change_cb; + err = clk_notifier_register(gt_clk, >_clk_rate_change_nb); + if (err) { + pr_warn("Unable to register clock notifier\n"); + goto out_clk; + } + gt_evt = alloc_percpu(struct clock_event_device); if (!gt_evt) { pr_warn("global-timer: can't allocate memory\n"); err = -ENOMEM; - goto out_clk; + goto out_clk_nb; } err = request_percpu_irq(gt_ppi, gt_clockevent_interrupt, @@ -326,6 +426,8 @@ static int __init global_timer_of_register(struct device_node *np) free_percpu_irq(gt_ppi, gt_evt); out_free: free_percpu(gt_evt); +out_clk_nb: + clk_notifier_unregister(gt_clk, >_clk_rate_change_nb); out_clk: clk_disable_unprepare(gt_clk); out_unmap: From 2766f93a1f959b4b3c71382a9277d269bd1ae3de Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Tue, 6 Apr 2021 15:00:45 +0200 Subject: [PATCH 043/851] arm: zynq: don't disable CONFIG_ARM_GLOBAL_TIMER due to CONFIG_CPU_FREQ anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now ARM global timer driver could work even if it's source clock rate changes, so we don't need to disable that driver when cpu frequency scaling is in use. This cause Zynq arch to get support for timer delay and get_cycles(). Signed-off-by: Andrea Merello Cc: Patrice Chotard Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: Michal Simek Cc: Sören Brinkmann Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210406130045.15491-3-andrea.merello@gmail.com --- arch/arm/mach-zynq/Kconfig | 2 +- drivers/clocksource/Kconfig | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-zynq/Kconfig b/arch/arm/mach-zynq/Kconfig index 43fb941dcd073..a56748d671c43 100644 --- a/arch/arm/mach-zynq/Kconfig +++ b/arch/arm/mach-zynq/Kconfig @@ -6,7 +6,7 @@ config ARCH_ZYNQ select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC - select ARM_GLOBAL_TIMER if !CPU_FREQ + select ARM_GLOBAL_TIMER select CADENCE_TTC_TIMER select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if SMP diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 19fc5f8883e03..9fa28237715a3 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -360,6 +360,7 @@ config ARM_GLOBAL_TIMER config ARM_GT_INITIAL_PRESCALER_VAL int "ARM global timer initial prescaler value" + default 2 if ARCH_ZYNQ default 1 depends on ARM_GLOBAL_TIMER help From e0c208f61165a7078859ea83eb2412cc9785eee6 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Sat, 12 Jun 2021 17:27:26 +0800 Subject: [PATCH 044/851] clocksource/drivers/arm_global_timer: Make symbol 'gt_clk_rate_change_nb' static The sparse tool complains as follows: drivers/clocksource/arm_global_timer.c:54:23: warning: symbol 'gt_clk_rate_change_nb' was not declared. Should it be static? This symbol is not used outside of arm_global_timer.c, so mark it static. Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1623490046-37972-1-git-send-email-zou_wei@huawei.com --- drivers/clocksource/arm_global_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 60a8047fd32e5..68b1d144a4128 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -51,7 +51,7 @@ * the units for all operations. */ static void __iomem *gt_base; -struct notifier_block gt_clk_rate_change_nb; +static struct notifier_block gt_clk_rate_change_nb; static u32 gt_psv_new, gt_psv_bck, gt_target_rate; static int gt_ppi; static struct clock_event_device __percpu *gt_evt; From 6f64c8159af9a4c46aea60344d5ab66a9bb24bc0 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 15 Jun 2021 19:54:40 +0800 Subject: [PATCH 045/851] clocksource/drivers/arm_global_timer: Remove duplicated argument in arm_global_timer Fix the following coccicheck warning: drivers/clocksource/arm_global_timer.c:107:4-23: duplicated argument to & or | Signed-off-by: Wan Jiabing Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210615115440.8881-1-wanjiabing@vivo.com --- drivers/clocksource/arm_global_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c index 68b1d144a4128..44a61dc6f9320 100644 --- a/drivers/clocksource/arm_global_timer.c +++ b/drivers/clocksource/arm_global_timer.c @@ -104,7 +104,7 @@ static void gt_compare_set(unsigned long delta, int periodic) counter += delta; ctrl = readl(gt_base + GT_CONTROL); ctrl &= ~(GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE | - GT_CONTROL_AUTO_INC | GT_CONTROL_AUTO_INC); + GT_CONTROL_AUTO_INC); ctrl |= GT_CONTROL_TIMER_ENABLE; writel_relaxed(ctrl, gt_base + GT_CONTROL); writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0); From 16d7181bb1b5b67dffdda4e2e63dfab0c09b94fc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 19 May 2021 11:33:01 +0200 Subject: [PATCH 046/851] USB: cdnsp: drop irq-flags initialisations There's no need to initialise irq-flags variables before saving the interrupt state. Acked-by: Felipe Balbi Cc: Pawel Laszczak Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210519093303.10789-2-johan@kernel.org Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 56707b6b0f57c..4fddc78f732fa 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1151,7 +1151,7 @@ static int cdnsp_gadget_ep_set_halt(struct usb_ep *ep, int value) struct cdnsp_ep *pep = to_cdnsp_ep(ep); struct cdnsp_device *pdev = pep->pdev; struct cdnsp_request *preq; - unsigned long flags = 0; + unsigned long flags; int ret; spin_lock_irqsave(&pdev->lock, flags); @@ -1176,7 +1176,7 @@ static int cdnsp_gadget_ep_set_wedge(struct usb_ep *ep) { struct cdnsp_ep *pep = to_cdnsp_ep(ep); struct cdnsp_device *pdev = pep->pdev; - unsigned long flags = 0; + unsigned long flags; int ret; spin_lock_irqsave(&pdev->lock, flags); From 8d1250d6c515adb988f153df8e5e66c003d20303 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 16 Jun 2021 17:54:36 +0200 Subject: [PATCH 047/851] ARM: dts: zynq: add NAND flash controller node Recently, a driver for the ARM Primecell PL35x static memory controller (including NAND controller) was added in linux. Add the corresponding device tree node. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20210616155437.27378-3-michael@walle.cc Signed-off-by: Michal Simek --- arch/arm/boot/dts/zynq-7000.dtsi | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi index df9ad831cf05f..47c2a4b14c065 100644 --- a/arch/arm/boot/dts/zynq-7000.dtsi +++ b/arch/arm/boot/dts/zynq-7000.dtsi @@ -252,6 +252,27 @@ #size-cells = <0>; }; + smcc: memory-controller@e000e000 { + compatible = "arm,pl353-smc-r2p1", "arm,primecell"; + reg = <0xe000e000 0x0001000>; + status = "disabled"; + clock-names = "memclk", "apb_pclk"; + clocks = <&clkc 11>, <&clkc 44>; + ranges = <0x0 0x0 0xe1000000 0x1000000 /* Nand CS region */ + 0x1 0x0 0xe2000000 0x2000000 /* SRAM/NOR CS0 region */ + 0x2 0x0 0xe4000000 0x2000000>; /* SRAM/NOR CS1 region */ + #address-cells = <2>; + #size-cells = <1>; + + nfc0: nand-controller@0,0 { + compatible = "arm,pl353-nand-r2p1"; + reg = <0 0 0x1000000>; + status = "disabled"; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + sdhci0: mmc@e0100000 { compatible = "arasan,sdhci-8.9a"; status = "disabled"; From 6584dce74292de07fef64078f5b4bff8899508d6 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 16 Jun 2021 17:54:37 +0200 Subject: [PATCH 048/851] ARM: dts: ebaz4205: enable NAND support The board features a 128MiB NAND chip and recently linux gained support for the NAND controller on the Zynq SoC. Thus add the corresponding devicetree nodes. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20210616155437.27378-4-michael@walle.cc Signed-off-by: Michal Simek --- arch/arm/boot/dts/zynq-ebaz4205.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm/boot/dts/zynq-ebaz4205.dts b/arch/arm/boot/dts/zynq-ebaz4205.dts index b0b836aedd765..53fa6dbfd8fdf 100644 --- a/arch/arm/boot/dts/zynq-ebaz4205.dts +++ b/arch/arm/boot/dts/zynq-ebaz4205.dts @@ -48,6 +48,14 @@ pinctrl-0 = <&pinctrl_gpio0_default>; }; +&nfc0 { + status = "okay"; + + nand@0 { + reg = <0>; + }; +}; + &pinctrl0 { pinctrl_gpio0_default: gpio0-default { mux { @@ -118,6 +126,10 @@ }; }; +&smcc { + status = "okay"; +}; + &sdhci0 { status = "okay"; disable-wp; From 438c451d5ad1951212f864d766b84b182a3f27f7 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 16 Jun 2021 17:54:35 +0200 Subject: [PATCH 049/851] ARM: configs: multi_v7: enable PL35x NAND controller After years, linux finally got a driver for the PL35x NAND controller found on the Xilinx Zynq-7000 SoC for example. Enable support for this driver. Signed-off-by: Michael Walle Link: https://lore.kernel.org/r/20210616155437.27378-2-michael@walle.cc Signed-off-by: Michal Simek --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 52a0400fdd926..8d5ec5986b42b 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -215,6 +215,7 @@ CONFIG_MTD_NAND_GPMI_NAND=y CONFIG_MTD_NAND_VF610_NFC=y CONFIG_MTD_NAND_DAVINCI=y CONFIG_MTD_NAND_STM32_FMC2=y +CONFIG_MTD_NAND_PL35X=y CONFIG_MTD_SPI_NOR=y CONFIG_SPI_ASPEED_SMC=m CONFIG_MTD_UBI=y From 956df1bb0ab8bb823541e66d186ed65559541b69 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 18 Jun 2021 16:28:58 +0800 Subject: [PATCH 050/851] usb: chipidea: host: fix port index underflow and UBSAN complains If wIndex is 0 (and it often is), these calculations underflow and UBSAN complains, here resolve this by not decrementing the index when it is equal to 0, this copies the solution from commit 85e3990bea49 ("USB: EHCI: avoid undefined pointer arithmetic and placate UBSAN") Reported-by: Zhipeng Wang Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1624004938-2399-1-git-send-email-jun.li@nxp.com Signed-off-by: Peter Chen --- drivers/usb/chipidea/host.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index e86d13c04bdbe..bdc3885c0d493 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -240,15 +240,18 @@ static int ci_ehci_hub_control( ) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); + unsigned int ports = HCS_N_PORTS(ehci->hcs_params); u32 __iomem *status_reg; - u32 temp; + u32 temp, port_index; unsigned long flags; int retval = 0; bool done = false; struct device *dev = hcd->self.controller; struct ci_hdrc *ci = dev_get_drvdata(dev); - status_reg = &ehci->regs->port_status[(wIndex & 0xff) - 1]; + port_index = wIndex & 0xff; + port_index -= (port_index > 0); + status_reg = &ehci->regs->port_status[port_index]; spin_lock_irqsave(&ehci->lock, flags); @@ -260,6 +263,11 @@ static int ci_ehci_hub_control( } if (typeReq == SetPortFeature && wValue == USB_PORT_FEAT_SUSPEND) { + if (!wIndex || wIndex > ports) { + retval = -EPIPE; + goto done; + } + temp = ehci_readl(ehci, status_reg); if ((temp & PORT_PE) == 0 || (temp & PORT_RESET) != 0) { retval = -EPIPE; @@ -288,7 +296,7 @@ static int ci_ehci_hub_control( ehci_writel(ehci, temp, status_reg); } - set_bit((wIndex & 0xff) - 1, &ehci->suspended_ports); + set_bit(port_index, &ehci->suspended_ports); goto done; } From d97176f6040f8ab11d574ea3437557bc11d1d19a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Fri, 25 Jun 2021 09:42:09 +0200 Subject: [PATCH 051/851] fpga: dfl: pci: add device IDs for Silicom N501x PAC cards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds the approved PCI Express Device IDs for the Silicom PAC N5010 and N5011 cards (aka. Silicom Lightning Creek cards). The N5010 features an FPGA that manages/interfaces four QSFP ports, and allows on-board custom packet processing/filtering/routing, based on logic loaded with user-provided FPGA bitstreams. The N5011 cards adds a PCIe switch that exposes, in addition to the FPGA itself, two Intel E810 (aka Columbiaville) ethernet controllers. With this, packets can be forwarded from the FPGA to the host for further processing. Signed-off-by: Martin Hundebøll Acked-by: Wu Hao Signed-off-by: Moritz Fischer --- drivers/fpga/dfl-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c index b44523ea8c91a..4d68719e608f7 100644 --- a/drivers/fpga/dfl-pci.c +++ b/drivers/fpga/dfl-pci.c @@ -74,6 +74,9 @@ static void cci_pci_free_irq(struct pci_dev *pcidev) #define PCIE_DEVICE_ID_PF_DSC_1_X 0x09C4 #define PCIE_DEVICE_ID_INTEL_PAC_N3000 0x0B30 #define PCIE_DEVICE_ID_INTEL_PAC_D5005 0x0B2B +#define PCIE_DEVICE_ID_SILICOM_PAC_N5010 0x1000 +#define PCIE_DEVICE_ID_SILICOM_PAC_N5011 0x1001 + /* VF Device */ #define PCIE_DEVICE_ID_VF_INT_5_X 0xBCBF #define PCIE_DEVICE_ID_VF_INT_6_X 0xBCC1 @@ -90,6 +93,8 @@ static struct pci_device_id cci_pcie_id_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_PAC_N3000),}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_PAC_D5005),}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_PAC_D5005_VF),}, + {PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5010),}, + {PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5011),}, {0,} }; MODULE_DEVICE_TABLE(pci, cci_pcie_id_tbl); From c5d4fb2539cad2e62c5a3f0d8237613c394f297e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 Jun 2021 12:37:00 +0200 Subject: [PATCH 052/851] pstore/blk: Use "%lu" to format unsigned long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit: fs/pstore/blk.c: In function ‘__best_effort_init’: include/linux/kern_levels.h:5:18: warning: format ‘%zu’ expects argument of type ‘size_t’, but argument 3 has type ‘long unsigned int’ [-Wformat=] 5 | #define KERN_SOH "\001" /* ASCII Start Of Header */ | ^~~~~~ include/linux/kern_levels.h:14:19: note: in expansion of macro ‘KERN_SOH’ 14 | #define KERN_INFO KERN_SOH "6" /* informational */ | ^~~~~~~~ include/linux/printk.h:373:9: note: in expansion of macro ‘KERN_INFO’ 373 | printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~~~ fs/pstore/blk.c:314:3: note: in expansion of macro ‘pr_info’ 314 | pr_info("attached %s (%zu) (no dedicated panic_write!)\n", | ^~~~~~~ Fixes: 7bb9557b48fcabaa ("pstore/blk: Use the normal block device I/O path") Signed-off-by: Geert Uytterhoeven Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210629103700.1935012-1-geert@linux-m68k.org --- fs/pstore/blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 04ce58c939a0b..6093088de49fd 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -311,7 +311,7 @@ static int __init __best_effort_init(void) if (ret) kfree(best_effort_dev); else - pr_info("attached %s (%zu) (no dedicated panic_write!)\n", + pr_info("attached %s (%lu) (no dedicated panic_write!)\n", blkdev, best_effort_dev->zone.total_size); return ret; From bd71c861f8dc21560f72d640e51222f47be61eb4 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Fri, 2 Jul 2021 17:18:30 +0800 Subject: [PATCH 053/851] fcntl: fix potential deadlocks for &fown_struct.lock Syzbot reports a potential deadlock in do_fcntl: ======================================================== WARNING: possible irq lock inversion dependency detected 5.12.0-syzkaller #0 Not tainted -------------------------------------------------------- syz-executor132/8391 just changed the state of lock: ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: f_getown_ex fs/fcntl.c:211 [inline] ffff888015967bf8 (&f->f_owner.lock){.+..}-{2:2}, at: do_fcntl+0x8b4/0x1200 fs/fcntl.c:395 but this lock was taken by another, HARDIRQ-safe lock in the past: (&dev->event_lock){-...}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&f->f_owner.lock); local_irq_disable(); lock(&dev->event_lock); lock(&new->fa_lock); lock(&dev->event_lock); *** DEADLOCK *** This happens because there is a lock hierarchy of &dev->event_lock --> &new->fa_lock --> &f->f_owner.lock from the following call chain: input_inject_event(): spin_lock_irqsave(&dev->event_lock,...); input_handle_event(): input_pass_values(): input_to_handler(): evdev_events(): evdev_pass_values(): spin_lock(&client->buffer_lock); __pass_event(): kill_fasync(): kill_fasync_rcu(): read_lock(&fa->fa_lock); send_sigio(): read_lock_irqsave(&fown->lock,...); However, since &dev->event_lock is HARDIRQ-safe, interrupts have to be disabled while grabbing &f->f_owner.lock, otherwise we invert the lock hierarchy. Hence, we replace calls to read_lock/read_unlock on &f->f_owner.lock, with read_lock_irq/read_unlock_irq. Reported-and-tested-by: syzbot+e6d5398a02c516ce5e70@syzkaller.appspotmail.com Signed-off-by: Desmond Cheong Zhi Xi Signed-off-by: Jeff Layton --- fs/fcntl.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index dfc72f15be7fc..cf9e81dfa615f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -150,7 +150,8 @@ void f_delown(struct file *filp) pid_t f_getown(struct file *filp) { pid_t pid = 0; - read_lock(&filp->f_owner.lock); + + read_lock_irq(&filp->f_owner.lock); rcu_read_lock(); if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { pid = pid_vnr(filp->f_owner.pid); @@ -158,7 +159,7 @@ pid_t f_getown(struct file *filp) pid = -pid; } rcu_read_unlock(); - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock); return pid; } @@ -208,7 +209,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) struct f_owner_ex owner = {}; int ret = 0; - read_lock(&filp->f_owner.lock); + read_lock_irq(&filp->f_owner.lock); rcu_read_lock(); if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) owner.pid = pid_vnr(filp->f_owner.pid); @@ -231,7 +232,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg) ret = -EINVAL; break; } - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock); if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); @@ -249,10 +250,10 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) uid_t src[2]; int err; - read_lock(&filp->f_owner.lock); + read_lock_irq(&filp->f_owner.lock); src[0] = from_kuid(user_ns, filp->f_owner.uid); src[1] = from_kuid(user_ns, filp->f_owner.euid); - read_unlock(&filp->f_owner.lock); + read_unlock_irq(&filp->f_owner.lock); err = put_user(src[0], &dst[0]); err |= put_user(src[1], &dst[1]); From e1cc6e8c1969a598119b7a5578a08f6d9d0d14c4 Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Fri, 2 Jul 2021 17:18:31 +0800 Subject: [PATCH 054/851] fcntl: fix potential deadlock for &fasync_struct.fa_lock There is an existing lock hierarchy of &dev->event_lock --> &fasync_struct.fa_lock --> &f->f_owner.lock from the following call chain: input_inject_event(): spin_lock_irqsave(&dev->event_lock,...); input_handle_event(): input_pass_values(): input_to_handler(): evdev_events(): evdev_pass_values(): spin_lock(&client->buffer_lock); __pass_event(): kill_fasync(): kill_fasync_rcu(): read_lock(&fa->fa_lock); send_sigio(): read_lock_irqsave(&fown->lock,...); &dev->event_lock is HARDIRQ-safe, so interrupts have to be disabled while grabbing &fasync_struct.fa_lock, otherwise we invert the lock hierarchy. However, since kill_fasync which calls kill_fasync_rcu is an exported symbol, it may not necessarily be called with interrupts disabled. As kill_fasync_rcu may be called with interrupts disabled (for example, in the call chain above), we replace calls to read_lock/read_unlock on &fasync_struct.fa_lock in kill_fasync_rcu with read_lock_irqsave/read_unlock_irqrestore. Signed-off-by: Desmond Cheong Zhi Xi Signed-off-by: Jeff Layton --- fs/fcntl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index cf9e81dfa615f..887db4918a899 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -1004,13 +1004,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; + unsigned long flags; if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - read_lock(&fa->fa_lock); + read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -1019,7 +1020,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - read_unlock(&fa->fa_lock); + read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } } From ed708e124ac22a5cc72097b5e8ef393c2eaaba65 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 23 Jun 2021 09:02:47 +0200 Subject: [PATCH 055/851] usb: cdns3: Fixed incorrect gadget state For delayed status phase, the usb_gadget->state was set to USB_STATE_ADDRESS and it has never been updated to USB_STATE_CONFIGURED. Patch updates the gadget state to correct USB_STATE_CONFIGURED. As a result of this bug the controller was not able to enter to Test Mode while using MSC function. Cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210623070247.46151-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-ep0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/cdns3/cdns3-ep0.c b/drivers/usb/cdns3/cdns3-ep0.c index 9a17802275d51..ec5bfd8944c36 100644 --- a/drivers/usb/cdns3/cdns3-ep0.c +++ b/drivers/usb/cdns3/cdns3-ep0.c @@ -731,6 +731,7 @@ static int cdns3_gadget_ep0_queue(struct usb_ep *ep, request->actual = 0; priv_dev->status_completion_no_call = true; priv_dev->pending_status_request = request; + usb_gadget_set_state(&priv_dev->gadget, USB_STATE_CONFIGURED); spin_unlock_irqrestore(&priv_dev->lock, flags); /* From 186e4f2798192bb383ec6c2d5effff69208b8142 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 25 Jun 2021 12:25:02 +0200 Subject: [PATCH 056/851] usb: cdnsp: Fix incorrect supported maximum speed Driver had hardcoded in initialization maximum supported speed to USB_SPEED_SUPER_PLUS but it should consider the speed returned from usb_get_maximum_speed function. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210625102502.26336-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c083985e387b2..cf03ba79a3553 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1881,7 +1881,7 @@ static int __cdnsp_gadget_init(struct cdns *cdns) pdev->gadget.name = "cdnsp-gadget"; pdev->gadget.speed = USB_SPEED_UNKNOWN; pdev->gadget.sg_supported = 1; - pdev->gadget.max_speed = USB_SPEED_SUPER_PLUS; + pdev->gadget.max_speed = max_speed; pdev->gadget.lpm_capable = 1; pdev->setup_buf = kzalloc(CDNSP_EP0_SETUP_SIZE, GFP_KERNEL); From af66f73a09f58ac7eb2a2df9b68e76d29d6faac8 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 23 Jun 2021 09:27:28 +0200 Subject: [PATCH 057/851] usb: cdnsp: Fixed issue with ZLP The condition "if (need_zero_pkt && zero_len_trb)" was always false and it caused that TRB for ZLP was not prepared. Fix causes that after preparing last TRB in TD, the driver prepares additional TD with ZLP when a ZLP is required. Cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210623072728.41275-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-ring.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 68972746e3636..1b1438457fb04 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1932,15 +1932,13 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) } if (enqd_len + trb_buff_len >= full_len) { - if (need_zero_pkt && zero_len_trb) { - zero_len_trb = true; - } else { - field &= ~TRB_CHAIN; - field |= TRB_IOC; - more_trbs_coming = false; - need_zero_pkt = false; - preq->td.last_trb = ring->enqueue; - } + if (need_zero_pkt) + zero_len_trb = !zero_len_trb; + + field &= ~TRB_CHAIN; + field |= TRB_IOC; + more_trbs_coming = false; + preq->td.last_trb = ring->enqueue; } /* Only set interrupt on short packet for OUT endpoints. */ @@ -1955,7 +1953,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); - cdnsp_queue_trb(pdev, ring, more_trbs_coming | need_zero_pkt, + cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb, lower_32_bits(send_addr), upper_32_bits(send_addr), length_field, From 42c4417937beaf68a9ab07e95cf8634c7080174c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 22 Jun 2021 21:37:48 +0200 Subject: [PATCH 058/851] usb: cdnsp: Fix the IMAN_IE_SET and IMAN_IE_CLEAR macro. IMAN_IE is BIT(1), so these macro are respectively equivalent to BIT(1) and 0, whatever the value of 'p'. The purpose was to set and reset a single bit in 'p'. Fix these macros to do that correctly. Fixes: e93e58d27402 ("usb: cdnsp: Device side header file for CDNSP driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/d12bfcc9cbffb89e27b120668821b3c4f09b6755.1624390584.git.christophe.jaillet@wanadoo.fr Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 783ca8ffde007..f740fa6089d85 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -383,8 +383,8 @@ struct cdnsp_intr_reg { #define IMAN_IE BIT(1) #define IMAN_IP BIT(0) /* bits 2:31 need to be preserved */ -#define IMAN_IE_SET(p) (((p) & IMAN_IE) | 0x2) -#define IMAN_IE_CLEAR(p) (((p) & IMAN_IE) & ~(0x2)) +#define IMAN_IE_SET(p) ((p) | IMAN_IE) +#define IMAN_IE_CLEAR(p) ((p) & ~IMAN_IE) /* IMOD - Interrupter Moderation Register - irq_control bitmasks. */ /* From d637af9fd6b818c1431631f937bf0751a80afe76 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 5 Apr 2021 04:58:39 +0200 Subject: [PATCH 059/851] kallsyms: support big kernel symbols (2-byte lengths) Rust symbols can become quite long due to namespacing introduced by modules, types, traits, generics, etc. Increasing to 255 is not enough in some cases, and therefore we need to introduce 2-byte lengths to the symbol table. We call these "big" symbols. In order to avoid increasing all lengths to 2 bytes (since most of them only require 1 byte, including many Rust ones), we use length zero to mark "big" symbols in the table. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Signed-off-by: Miguel Ojeda --- kernel/kallsyms.c | 7 +++++++ scripts/kallsyms.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index c851ca0ed3576..9d0c23e1993c1 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -73,6 +73,13 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, */ off += len + 1; + /* If zero, it is a "big" symbol, so a two byte length follows. */ + if (len == 0) { + len = (data[0] << 8) | data[1]; + data += 2; + off += len + 2; + } + /* * For every byte on the compressed symbol data, copy the table * entry for that byte. diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 54ad86d137849..bcdabee13aab5 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -470,12 +470,37 @@ static void write_src(void) if ((i & 0xFF) == 0) markers[i >> 8] = off; - printf("\t.byte 0x%02x", table[i]->len); + /* + * There cannot be any symbol of length zero -- we use that + * to mark a "big" symbol (and it doesn't make sense anyway). + */ + if (table[i]->len == 0) { + fprintf(stderr, "kallsyms failure: " + "unexpected zero symbol length\n"); + exit(EXIT_FAILURE); + } + + /* Only lengths that fit in up to two bytes are supported. */ + if (table[i]->len > 0xFFFF) { + fprintf(stderr, "kallsyms failure: " + "unexpected huge symbol length\n"); + exit(EXIT_FAILURE); + } + + if (table[i]->len <= 0xFF) { + /* Most symbols use a single byte for the length. */ + printf("\t.byte 0x%02x", table[i]->len); + off += table[i]->len + 1; + } else { + /* "Big" symbols use a zero and then two bytes. */ + printf("\t.byte 0x00, 0x%02x, 0x%02x", + (table[i]->len >> 8) & 0xFF, + table[i]->len & 0xFF); + off += table[i]->len + 3; + } for (k = 0; k < table[i]->len; k++) printf(", 0x%02x", table[i]->sym[k]); printf("\n"); - - off += table[i]->len + 1; } printf("\n"); From f2f6175186f4ccd33a710f4cd557676a5cc60153 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 5 Apr 2021 05:03:50 +0200 Subject: [PATCH 060/851] kallsyms: increase maximum kernel symbol length to 512 Rust symbols can become quite long due to namespacing introduced by modules, types, traits, generics, etc. For instance, for: pub mod my_module { pub struct MyType; pub struct MyGenericType(T); pub trait MyTrait { fn my_method() -> u32; } impl MyTrait for MyGenericType { fn my_method() -> u32 { 42 } } } generates a symbol of length 96 when using the upcoming v0 mangling scheme: _RNvXNtCshGpAVYOtgW1_7example9my_moduleINtB2_13MyGenericTypeNtB2_6MyTypeENtB2_7MyTrait9my_method At the moment, Rust symbols may reach up to 300 in length. Setting 512 as the maximum seems like a reasonable choice to keep some headroom. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Signed-off-by: Miguel Ojeda --- include/linux/kallsyms.h | 2 +- kernel/livepatch/core.c | 4 ++-- scripts/kallsyms.c | 2 +- tools/include/linux/kallsyms.h | 2 +- tools/include/linux/lockdep.h | 2 +- tools/lib/perf/include/perf/event.h | 2 +- tools/lib/symbol/kallsyms.h | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 465060acc9816..5cdc6903abca3 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,7 +14,7 @@ #include -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 335d988bd8111..73874e5edfda6 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -213,7 +213,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, * we use the smallest/strictest upper bound possible (56, based on * the current definition of MODULE_NAME_LEN) to prevent overflows. */ - BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128); + BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 512); relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ @@ -227,7 +227,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, /* Format: .klp.sym.sym_objname.sym_name,sympos */ cnt = sscanf(strtab + sym->st_name, - ".klp.sym.%55[^.].%127[^,],%lu", + ".klp.sym.%55[^.].%511[^,],%lu", sym_objname, sym_name, &sympos); if (cnt != 3) { pr_err("symbol %s has an incorrectly formatted name\n", diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index bcdabee13aab5..9bab5f55ade3d 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -27,7 +27,7 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct sym_entry { unsigned long long addr; diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index efb6c3f5f2a9a..5a37ccbec54fb 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -6,7 +6,7 @@ #include #include -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct module; diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h index e56997288f2b0..d9c163f3ab242 100644 --- a/tools/include/linux/lockdep.h +++ b/tools/include/linux/lockdep.h @@ -47,7 +47,7 @@ static inline int debug_locks_off(void) #define task_pid_nr(tsk) ((tsk)->pid) -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 #define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define pr_warn pr_err diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 4d0c02ba3f7d3..095d60144a70c 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -95,7 +95,7 @@ struct perf_record_throttle { }; #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif struct perf_record_ksymbol { diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index 72ab9870454ba..542f9b059c3bd 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -7,7 +7,7 @@ #include #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif static inline u8 kallsyms2elf_binding(char type) From e17f1b1f3e01a1a054ae10397915710571426f36 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 5 Apr 2021 21:21:05 +0200 Subject: [PATCH 061/851] Makefile: generate `CLANG_FLAGS` even in GCC builds To support Rust under GCC-built kernels, we need to save the flags that would have been passed if the kernel was being compiled with Clang. The reason is that `bindgen` -- the tool we use to generate Rust bindings to the C side of the kernel -- relies on `libclang` to parse C. Ideally: - `bindgen` would support a GCC backend (requested at [1]), - or the Clang driver would be perfectly compatible with GCC, including plugins. Unlikely, of course, but perhaps a big subset of configs may be possible to guarantee to be kept compatible nevertheless. This is also the reason why GCC builds are very experimental and some configurations may not work (e.g. `GCC_PLUGIN_RANDSTRUCT`). However, we keep GCC builds working (for some example configs) in the CI to avoid diverging/regressing further, so that we are better prepared for the future when a solution might become available. [1] https://github.com/rust-lang/rust-bindgen/issues/1949 Link: https://github.com/Rust-for-Linux/linux/issues/167 Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Signed-off-by: Miguel Ojeda --- Makefile | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 0565caea0362a..6e823d8bd6459 100644 --- a/Makefile +++ b/Makefile @@ -573,18 +573,23 @@ endif # and from include/config/auto.conf.cmd to detect the compiler upgrade. CC_VERSION_TEXT = $(subst $(pound),,$(shell $(CC) --version 2>/dev/null | head -n 1)) -ifneq ($(findstring clang,$(CC_VERSION_TEXT)),) +TENTATIVE_CLANG_FLAGS := -Werror=unknown-warning-option + ifneq ($(CROSS_COMPILE),) -CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) +TENTATIVE_CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) endif ifeq ($(LLVM_IAS),1) -CLANG_FLAGS += -integrated-as +TENTATIVE_CLANG_FLAGS += -integrated-as else -CLANG_FLAGS += -no-integrated-as +TENTATIVE_CLANG_FLAGS += -no-integrated-as GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) -CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) +TENTATIVE_CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) endif -CLANG_FLAGS += -Werror=unknown-warning-option + +export TENTATIVE_CLANG_FLAGS + +ifneq ($(findstring clang,$(CC_VERSION_TEXT)),) +CLANG_FLAGS += $(TENTATIVE_CLANG_FLAGS) KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) export CLANG_FLAGS From 360299269d2ec4a62573bf16fe777b74fd9cb728 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:38:57 +0200 Subject: [PATCH 062/851] vsprintf: add new `%pA` format specifier This patch adds a format specifier `%pA` to `vsprintf` which formats a pointer as `core::fmt::Arguments`. Doing so allows us to directly format to the internal buffer of `printf`, so we do not have to use a temporary buffer on the stack to pre-assemble the message on the Rust side. This specifier is intended only to be used from Rust and not for C, so `checkpatch.pl` is intentionally unchanged to catch any misuse. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- lib/vsprintf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index f0c35d9b65bff..e7afe95400494 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2182,6 +2182,10 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +#ifdef CONFIG_RUST +char *rust_fmt_argument(char* buf, char* end, void *ptr); +#endif + /* Disable pointer hashing if requested */ bool no_hash_pointers __ro_after_init; EXPORT_SYMBOL_GPL(no_hash_pointers); @@ -2335,6 +2339,10 @@ early_param("no_hash_pointers", no_hash_pointers_enable); * * Note: The default behaviour (unadorned %p) is to hash the address, * rendering it useful as a unique identifier. + * + * There is also a '%pA' format specifier, but it is only intended to be used + * from Rust code to format core::fmt::Arguments. Do *not* use it from C. + * See rust/kernel/print.rs for details. */ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, @@ -2407,6 +2415,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); case 'f': return fwnode_string(buf, end, ptr, spec, fmt + 1); +#ifdef CONFIG_RUST + case 'A': + return rust_fmt_argument(buf, end, ptr); +#endif case 'x': return pointer_string(buf, end, ptr, spec); case 'e': From 4de916fef3568612b713880c8ea99c10bfb22a31 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:52:41 +0200 Subject: [PATCH 063/851] rust: add C helpers This source file contains forwarders to C macros and inlined functions. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/helpers.c | 235 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 rust/helpers.c diff --git a/rust/helpers.c b/rust/helpers.c new file mode 100644 index 0000000000000..a6e98abb13efd --- /dev/null +++ b/rust/helpers.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void rust_helper_BUG(void) +{ + BUG(); +} + +unsigned long rust_helper_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return copy_from_user(to, from, n); +} + +unsigned long rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return copy_to_user(to, from, n); +} + +unsigned long rust_helper_clear_user(void __user *to, unsigned long n) +{ + return clear_user(to, n); +} + +void rust_helper_spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + __spin_lock_init(lock, name, key); +#else + spin_lock_init(lock); +#endif +} +EXPORT_SYMBOL_GPL(rust_helper_spin_lock_init); + +void rust_helper_spin_lock(spinlock_t *lock) +{ + spin_lock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_spin_lock); + +void rust_helper_spin_unlock(spinlock_t *lock) +{ + spin_unlock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_spin_unlock); + +void rust_helper_init_wait(struct wait_queue_entry *wq_entry) +{ + init_wait(wq_entry); +} +EXPORT_SYMBOL_GPL(rust_helper_init_wait); + +int rust_helper_signal_pending(struct task_struct *t) +{ + return signal_pending(t); +} +EXPORT_SYMBOL_GPL(rust_helper_signal_pending); + +struct page *rust_helper_alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages(gfp_mask, order); +} +EXPORT_SYMBOL_GPL(rust_helper_alloc_pages); + +void *rust_helper_kmap(struct page *page) +{ + return kmap(page); +} +EXPORT_SYMBOL_GPL(rust_helper_kmap); + +void rust_helper_kunmap(struct page *page) +{ + return kunmap(page); +} +EXPORT_SYMBOL_GPL(rust_helper_kunmap); + +int rust_helper_cond_resched(void) +{ + return cond_resched(); +} +EXPORT_SYMBOL_GPL(rust_helper_cond_resched); + +size_t rust_helper_copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter(addr, bytes, i); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_from_iter); + +size_t rust_helper_copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_to_iter); + +bool rust_helper_is_err(__force const void *ptr) +{ + return IS_ERR(ptr); +} +EXPORT_SYMBOL_GPL(rust_helper_is_err); + +long rust_helper_ptr_err(__force const void *ptr) +{ + return PTR_ERR(ptr); +} +EXPORT_SYMBOL_GPL(rust_helper_ptr_err); + +const char *rust_helper_errname(int err) +{ + return errname(err); +} + +void rust_helper_mutex_lock(struct mutex *lock) +{ + mutex_lock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_mutex_lock); + +void * +rust_helper_platform_get_drvdata(const struct platform_device *pdev) +{ + return platform_get_drvdata(pdev); +} +EXPORT_SYMBOL_GPL(rust_helper_platform_get_drvdata); + +void +rust_helper_platform_set_drvdata(struct platform_device *pdev, + void *data) +{ + return platform_set_drvdata(pdev, data); +} +EXPORT_SYMBOL_GPL(rust_helper_platform_set_drvdata); + +refcount_t rust_helper_refcount_new(void) +{ + return (refcount_t)REFCOUNT_INIT(1); +} +EXPORT_SYMBOL_GPL(rust_helper_refcount_new); + +void rust_helper_refcount_inc(refcount_t *r) +{ + refcount_inc(r); +} +EXPORT_SYMBOL_GPL(rust_helper_refcount_inc); + +bool rust_helper_refcount_dec_and_test(refcount_t *r) +{ + return refcount_dec_and_test(r); +} +EXPORT_SYMBOL_GPL(rust_helper_refcount_dec_and_test); + +void rust_helper_rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + rb_link_node(node, parent, rb_link); +} +EXPORT_SYMBOL_GPL(rust_helper_rb_link_node); + +struct task_struct *rust_helper_get_current(void) +{ + return current; +} +EXPORT_SYMBOL_GPL(rust_helper_get_current); + +void rust_helper_get_task_struct(struct task_struct * t) +{ + get_task_struct(t); +} +EXPORT_SYMBOL_GPL(rust_helper_get_task_struct); + +void rust_helper_put_task_struct(struct task_struct * t) +{ + put_task_struct(t); +} +EXPORT_SYMBOL_GPL(rust_helper_put_task_struct); + +int rust_helper_security_binder_set_context_mgr(struct task_struct *mgr) +{ + return security_binder_set_context_mgr(mgr); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_set_context_mgr); + +int rust_helper_security_binder_transaction(struct task_struct *from, + struct task_struct *to) +{ + return security_binder_transaction(from, to); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_transaction); + +int rust_helper_security_binder_transfer_binder(struct task_struct *from, + struct task_struct *to) +{ + return security_binder_transfer_binder(from, to); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_transfer_binder); + +int rust_helper_security_binder_transfer_file(struct task_struct *from, + struct task_struct *to, + struct file *file) +{ + return security_binder_transfer_file(from, to, file); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_transfer_file); + +/* We use bindgen's --size_t-is-usize option to bind the C size_t type + * as the Rust usize type, so we can use it in contexts where Rust + * expects a usize like slice (array) indices. usize is defined to be + * the same as C's uintptr_t type (can hold any pointer) but not + * necessarily the same as size_t (can hold the size of any single + * object). Most modern platforms use the same concrete integer type for + * both of them, but in case we find ourselves on a platform where + * that's not true, fail early instead of risking ABI or + * integer-overflow issues. + * + * If your platform fails this assertion, it means that you are in + * danger of integer-overflow bugs (even if you attempt to remove + * --size_t-is-usize). It may be easiest to change the kernel ABI on + * your platform such that size_t matches uintptr_t (i.e., to increase + * size_t, because uintptr_t has to be at least as big as size_t). +*/ +static_assert( + sizeof(size_t) == sizeof(uintptr_t) && + __alignof__(size_t) == __alignof__(uintptr_t), + "Rust code expects C size_t to match Rust usize" +); From 2f9cf7636de242fd8c42e95f5a6fba00bade5452 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:54:11 +0200 Subject: [PATCH 064/851] rust: add `compiler_builtins` crate Rust provides `compiler_builtins` as a port of LLVM's `compiler-rt`. Since we do not need the vast majority of them, we avoid the dependency by providing our own crate. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/compiler_builtins.rs | 146 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 rust/compiler_builtins.rs diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs new file mode 100644 index 0000000000000..cb4bbf7be4e31 --- /dev/null +++ b/rust/compiler_builtins.rs @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Our own `compiler_builtins`. +//! +//! Rust provides [`compiler_builtins`] as a port of LLVM's [`compiler-rt`]. +//! Since we do not need the vast majority of them, we avoid the dependency +//! by providing this file. +//! +//! At the moment, some builtins are required that should not be. For instance, +//! [`core`] has floating-point functionality which we should not be compiling +//! in. We will work with upstream [`core`] to provide feature flags to disable +//! the parts we do not need. For the moment, we define them to [`panic!`] at +//! runtime for simplicity to catch mistakes, instead of performing surgery +//! on `core.o`. +//! +//! In any case, all these symbols are weakened to ensure we do not override +//! those that may be provided by the rest of the kernel. +//! +//! [`compiler_builtins`]: https://github.com/rust-lang/compiler-builtins +//! [`compiler-rt`]: https://compiler-rt.llvm.org/ + +#![feature(compiler_builtins)] +#![compiler_builtins] +#![no_builtins] +#![no_std] + +macro_rules! define_panicking_intrinsics( + ($reason: tt, { $($ident: ident, )* }) => { + $( + #[doc(hidden)] + #[no_mangle] + pub extern "C" fn $ident() { + panic!($reason); + } + )* + } +); + +define_panicking_intrinsics!("`f32` should not be used", { + __addsf3, + __addsf3vfp, + __aeabi_fcmpeq, + __aeabi_ul2f, + __divsf3, + __divsf3vfp, + __eqsf2, + __eqsf2vfp, + __fixsfdi, + __fixsfsi, + __fixsfti, + __fixunssfdi, + __fixunssfsi, + __fixunssfti, + __floatdisf, + __floatsisf, + __floattisf, + __floatundisf, + __floatunsisf, + __floatuntisf, + __gesf2, + __gesf2vfp, + __gtsf2, + __gtsf2vfp, + __lesf2, + __lesf2vfp, + __ltsf2, + __ltsf2vfp, + __mulsf3, + __mulsf3vfp, + __nesf2, + __nesf2vfp, + __powisf2, + __subsf3, + __subsf3vfp, + __unordsf2, +}); + +define_panicking_intrinsics!("`f64` should not be used", { + __adddf3, + __adddf3vfp, + __aeabi_dcmpeq, + __aeabi_ul2d, + __divdf3, + __divdf3vfp, + __eqdf2, + __eqdf2vfp, + __fixdfdi, + __fixdfsi, + __fixdfti, + __fixunsdfdi, + __fixunsdfsi, + __fixunsdfti, + __floatdidf, + __floatsidf, + __floattidf, + __floatundidf, + __floatunsidf, + __floatuntidf, + __gedf2, + __gedf2vfp, + __gtdf2, + __gtdf2vfp, + __ledf2, + __ledf2vfp, + __ltdf2, + __ltdf2vfp, + __muldf3, + __muldf3vfp, + __nedf2, + __nedf2vfp, + __powidf2, + __subdf3, + __subdf3vfp, + __unorddf2, +}); + +define_panicking_intrinsics!("`i128` should not be used", { + __ashrti3, + __muloti4, + __multi3, +}); + +define_panicking_intrinsics!("`u128` should not be used", { + __ashlti3, + __lshrti3, + __udivmodti4, + __udivti3, + __umodti3, +}); + +#[cfg(target_arch = "arm")] +define_panicking_intrinsics!("`u64` division/modulo should not be used", { + __aeabi_uldivmod, + __mulodi4, +}); + +extern "C" { + fn rust_helper_BUG() -> !; +} + +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo<'_>) -> ! { + unsafe { + rust_helper_BUG(); + } +} From 25ad6376a155a5fabdc6a0006c22955a72e65574 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:02:21 +0200 Subject: [PATCH 065/851] rust: add `alloc` crate This crate is a subset of the Rust standard library `alloc`, with some additions on top. This is needed because upstream support for fallible allocations is a work in progress (i.e. the `try_*` versions of methods which return a `Result` instead of panicking). Having the library in-tree also gives us a bit more freedom to experiment with new interfaces and allows us to iterate quickly. Eventually, the goal is to have everything the kernel needs in upstream `alloc` and drop it from the kernel tree. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/alloc/README.md | 32 + rust/alloc/alloc.rs | 425 ++++ rust/alloc/borrow.rs | 493 +++++ rust/alloc/boxed.rs | 1728 +++++++++++++++ rust/alloc/collections/mod.rs | 116 + rust/alloc/fmt.rs | 587 ++++++ rust/alloc/lib.rs | 197 ++ rust/alloc/macros.rs | 128 ++ rust/alloc/prelude/mod.rs | 17 + rust/alloc/prelude/v1.rs | 16 + rust/alloc/raw_vec.rs | 612 ++++++ rust/alloc/rc.rs | 2539 ++++++++++++++++++++++ rust/alloc/slice.rs | 1271 +++++++++++ rust/alloc/str.rs | 614 ++++++ rust/alloc/string.rs | 2847 +++++++++++++++++++++++++ rust/alloc/sync.rs | 2631 +++++++++++++++++++++++ rust/alloc/vec/drain.rs | 157 ++ rust/alloc/vec/drain_filter.rs | 145 ++ rust/alloc/vec/into_iter.rs | 296 +++ rust/alloc/vec/is_zero.rs | 106 + rust/alloc/vec/mod.rs | 3255 +++++++++++++++++++++++++++++ rust/alloc/vec/partial_eq.rs | 49 + rust/alloc/vec/set_len_on_drop.rs | 30 + rust/alloc/vec/spec_extend.rs | 170 ++ 24 files changed, 18461 insertions(+) create mode 100644 rust/alloc/README.md create mode 100644 rust/alloc/alloc.rs create mode 100644 rust/alloc/borrow.rs create mode 100644 rust/alloc/boxed.rs create mode 100644 rust/alloc/collections/mod.rs create mode 100644 rust/alloc/fmt.rs create mode 100644 rust/alloc/lib.rs create mode 100644 rust/alloc/macros.rs create mode 100644 rust/alloc/prelude/mod.rs create mode 100644 rust/alloc/prelude/v1.rs create mode 100644 rust/alloc/raw_vec.rs create mode 100644 rust/alloc/rc.rs create mode 100644 rust/alloc/slice.rs create mode 100644 rust/alloc/str.rs create mode 100644 rust/alloc/string.rs create mode 100644 rust/alloc/sync.rs create mode 100644 rust/alloc/vec/drain.rs create mode 100644 rust/alloc/vec/drain_filter.rs create mode 100644 rust/alloc/vec/into_iter.rs create mode 100644 rust/alloc/vec/is_zero.rs create mode 100644 rust/alloc/vec/mod.rs create mode 100644 rust/alloc/vec/partial_eq.rs create mode 100644 rust/alloc/vec/set_len_on_drop.rs create mode 100644 rust/alloc/vec/spec_extend.rs diff --git a/rust/alloc/README.md b/rust/alloc/README.md new file mode 100644 index 0000000000000..a1bcc2cef0e69 --- /dev/null +++ b/rust/alloc/README.md @@ -0,0 +1,32 @@ +# `alloc` + +These source files come from the Rust standard library, hosted in +the https://github.com/rust-lang/rust repository. For copyright +details, see https://github.com/rust-lang/rust/blob/master/COPYRIGHT. + +Please note that these files should be kept as close as possible to +upstream. In general, only additions should be performed (e.g. new +methods). Eventually, changes should make it into upstream so that, +at some point, this fork can be dropped from the kernel tree. + + +## Rationale + +On one hand, kernel folks wanted to keep `alloc` in-tree to have more +freedom in both workflow and actual features if actually needed +(e.g. receiver types if we ended up using them), which is reasonable. + +On the other hand, Rust folks wanted to keep `alloc` as close as +upstream as possible and avoid as much divergence as possible, which +is also reasonable. + +We agreed on a middle-ground: we would keep a subset of `alloc` +in-tree that would be as small and as close as possible to upstream. +Then, upstream can start adding the functions that we add to `alloc` +etc., until we reach a point where the kernel already knows exactly +what it needs in `alloc` and all the new methods are merged into +upstream, so that we can drop `alloc` from the kernel tree and go back +to using the upstream one. + +By doing this, the kernel can go a bit faster now, and Rust can +slowly incorporate and discuss the changes as needed. diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs new file mode 100644 index 0000000000000..a59d64ffb36d5 --- /dev/null +++ b/rust/alloc/alloc.rs @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Memory allocation APIs + +#![stable(feature = "alloc_module", since = "1.28.0")] + +#[cfg(not(test))] +use core::intrinsics; +use core::intrinsics::{min_align_of_val, size_of_val}; + +use core::ptr::Unique; +#[cfg(not(test))] +use core::ptr::{self, NonNull}; + +#[stable(feature = "alloc_module", since = "1.28.0")] +#[doc(inline)] +pub use core::alloc::*; + +#[cfg(test)] +mod tests; + +extern "Rust" { + // These are the magic symbols to call the global allocator. rustc generates + // them to call `__rg_alloc` etc. if there is a `#[global_allocator]` attribute + // (the code expanding that attribute macro generates those functions), or to call + // the default implementations in libstd (`__rdl_alloc` etc. in `library/std/src/alloc.rs`) + // otherwise. + // The rustc fork of LLVM also special-cases these function names to be able to optimize them + // like `malloc`, `realloc`, and `free`, respectively. + #[rustc_allocator] + #[rustc_allocator_nounwind] + fn __rust_alloc(size: usize, align: usize) -> *mut u8; + #[rustc_allocator_nounwind] + fn __rust_dealloc(ptr: *mut u8, size: usize, align: usize); + #[rustc_allocator_nounwind] + fn __rust_realloc(ptr: *mut u8, old_size: usize, align: usize, new_size: usize) -> *mut u8; + #[rustc_allocator_nounwind] + fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8; +} + +/// The global memory allocator. +/// +/// This type implements the [`Allocator`] trait by forwarding calls +/// to the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// Note: while this type is unstable, the functionality it provides can be +/// accessed through the [free functions in `alloc`](self#functions). +#[unstable(feature = "allocator_api", issue = "32838")] +#[derive(Copy, Clone, Default, Debug)] +#[cfg(not(test))] +pub struct Global; + +#[cfg(test)] +pub use std::alloc::Global; + +/// Allocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::alloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `alloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::alloc`]. +/// +/// # Examples +/// +/// ``` +/// use std::alloc::{alloc, dealloc, Layout}; +/// +/// unsafe { +/// let layout = Layout::new::(); +/// let ptr = alloc(layout); +/// +/// *(ptr as *mut u16) = 42; +/// assert_eq!(*(ptr as *mut u16), 42); +/// +/// dealloc(ptr, layout); +/// } +/// ``` +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn alloc(layout: Layout) -> *mut u8 { + unsafe { __rust_alloc(layout.size(), layout.align()) } +} + +/// Deallocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::dealloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `dealloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::dealloc`]. +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn dealloc(ptr: *mut u8, layout: Layout) { + unsafe { __rust_dealloc(ptr, layout.size(), layout.align()) } +} + +/// Reallocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::realloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `realloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::realloc`]. +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn realloc(ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + unsafe { __rust_realloc(ptr, layout.size(), layout.align(), new_size) } +} + +/// Allocate zero-initialized memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::alloc_zeroed`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `alloc_zeroed` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::alloc_zeroed`]. +/// +/// # Examples +/// +/// ``` +/// use std::alloc::{alloc_zeroed, dealloc, Layout}; +/// +/// unsafe { +/// let layout = Layout::new::(); +/// let ptr = alloc_zeroed(layout); +/// +/// assert_eq!(*(ptr as *mut u16), 0); +/// +/// dealloc(ptr, layout); +/// } +/// ``` +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn alloc_zeroed(layout: Layout) -> *mut u8 { + unsafe { __rust_alloc_zeroed(layout.size(), layout.align()) } +} + +#[cfg(not(test))] +impl Global { + #[inline] + fn alloc_impl(&self, layout: Layout, zeroed: bool) -> Result, AllocError> { + match layout.size() { + 0 => Ok(NonNull::slice_from_raw_parts(layout.dangling(), 0)), + // SAFETY: `layout` is non-zero in size, + size => unsafe { + let raw_ptr = if zeroed { alloc_zeroed(layout) } else { alloc(layout) }; + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, size)) + }, + } + } + + // SAFETY: Same as `Allocator::grow` + #[inline] + unsafe fn grow_impl( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + zeroed: bool, + ) -> Result, AllocError> { + debug_assert!( + new_layout.size() >= old_layout.size(), + "`new_layout.size()` must be greater than or equal to `old_layout.size()`" + ); + + match old_layout.size() { + 0 => self.alloc_impl(new_layout, zeroed), + + // SAFETY: `new_size` is non-zero as `old_size` is greater than or equal to `new_size` + // as required by safety conditions. Other conditions must be upheld by the caller + old_size if old_layout.align() == new_layout.align() => unsafe { + let new_size = new_layout.size(); + + // `realloc` probably checks for `new_size >= old_layout.size()` or something similar. + intrinsics::assume(new_size >= old_layout.size()); + + let raw_ptr = realloc(ptr.as_ptr(), old_layout, new_size); + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + if zeroed { + raw_ptr.add(old_size).write_bytes(0, new_size - old_size); + } + Ok(NonNull::slice_from_raw_parts(ptr, new_size)) + }, + + // SAFETY: because `new_layout.size()` must be greater than or equal to `old_size`, + // both the old and new memory allocation are valid for reads and writes for `old_size` + // bytes. Also, because the old allocation wasn't yet deallocated, it cannot overlap + // `new_ptr`. Thus, the call to `copy_nonoverlapping` is safe. The safety contract + // for `dealloc` must be upheld by the caller. + old_size => unsafe { + let new_ptr = self.alloc_impl(new_layout, zeroed)?; + ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_mut_ptr(), old_size); + self.deallocate(ptr, old_layout); + Ok(new_ptr) + }, + } + } +} + +#[unstable(feature = "allocator_api", issue = "32838")] +#[cfg(not(test))] +unsafe impl Allocator for Global { + #[inline] + fn allocate(&self, layout: Layout) -> Result, AllocError> { + self.alloc_impl(layout, false) + } + + #[inline] + fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { + self.alloc_impl(layout, true) + } + + #[inline] + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + if layout.size() != 0 { + // SAFETY: `layout` is non-zero in size, + // other conditions must be upheld by the caller + unsafe { dealloc(ptr.as_ptr(), layout) } + } + } + + #[inline] + unsafe fn grow( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: all conditions must be upheld by the caller + unsafe { self.grow_impl(ptr, old_layout, new_layout, false) } + } + + #[inline] + unsafe fn grow_zeroed( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: all conditions must be upheld by the caller + unsafe { self.grow_impl(ptr, old_layout, new_layout, true) } + } + + #[inline] + unsafe fn shrink( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + debug_assert!( + new_layout.size() <= old_layout.size(), + "`new_layout.size()` must be smaller than or equal to `old_layout.size()`" + ); + + match new_layout.size() { + // SAFETY: conditions must be upheld by the caller + 0 => unsafe { + self.deallocate(ptr, old_layout); + Ok(NonNull::slice_from_raw_parts(new_layout.dangling(), 0)) + }, + + // SAFETY: `new_size` is non-zero. Other conditions must be upheld by the caller + new_size if old_layout.align() == new_layout.align() => unsafe { + // `realloc` probably checks for `new_size <= old_layout.size()` or something similar. + intrinsics::assume(new_size <= old_layout.size()); + + let raw_ptr = realloc(ptr.as_ptr(), old_layout, new_size); + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, new_size)) + }, + + // SAFETY: because `new_size` must be smaller than or equal to `old_layout.size()`, + // both the old and new memory allocation are valid for reads and writes for `new_size` + // bytes. Also, because the old allocation wasn't yet deallocated, it cannot overlap + // `new_ptr`. Thus, the call to `copy_nonoverlapping` is safe. The safety contract + // for `dealloc` must be upheld by the caller. + new_size => unsafe { + let new_ptr = self.allocate(new_layout)?; + ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_mut_ptr(), new_size); + self.deallocate(ptr, old_layout); + Ok(new_ptr) + }, + } + } +} + +/// The allocator for unique pointers. +// This function must not unwind. If it does, MIR codegen will fail. +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[lang = "exchange_malloc"] +#[inline] +unsafe fn exchange_malloc(size: usize, align: usize) -> *mut u8 { + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + match Global.allocate(layout) { + Ok(ptr) => ptr.as_mut_ptr(), + Err(_) => handle_alloc_error(layout), + } +} + +#[cfg_attr(not(test), lang = "box_free")] +#[inline] +// This signature has to be the same as `Box`, otherwise an ICE will happen. +// When an additional parameter to `Box` is added (like `A: Allocator`), this has to be added here as +// well. +// For example if `Box` is changed to `struct Box(Unique, A)`, +// this function has to be changed to `fn box_free(Unique, A)` as well. +pub(crate) unsafe fn box_free(ptr: Unique, alloc: A) { + unsafe { + let size = size_of_val(ptr.as_ref()); + let align = min_align_of_val(ptr.as_ref()); + let layout = Layout::from_size_align_unchecked(size, align); + alloc.deallocate(ptr.cast().into(), layout) + } +} + +// # Allocation error handler + +#[cfg(not(no_global_oom_handling))] +extern "Rust" { + // This is the magic symbol to call the global alloc error handler. rustc generates + // it to call `__rg_oom` if there is a `#[alloc_error_handler]`, or to call the + // default implementations below (`__rdl_oom`) otherwise. + #[rustc_allocator_nounwind] + fn __rust_alloc_error_handler(size: usize, align: usize) -> !; +} + +/// Abort on memory allocation error or failure. +/// +/// Callers of memory allocation APIs wishing to abort computation +/// in response to an allocation error are encouraged to call this function, +/// rather than directly invoking `panic!` or similar. +/// +/// The default behavior of this function is to print a message to standard error +/// and abort the process. +/// It can be replaced with [`set_alloc_error_hook`] and [`take_alloc_error_hook`]. +/// +/// [`set_alloc_error_hook`]: ../../std/alloc/fn.set_alloc_error_hook.html +/// [`take_alloc_error_hook`]: ../../std/alloc/fn.take_alloc_error_hook.html +#[stable(feature = "global_alloc", since = "1.28.0")] +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[rustc_allocator_nounwind] +#[cold] +pub fn handle_alloc_error(layout: Layout) -> ! { + unsafe { + __rust_alloc_error_handler(layout.size(), layout.align()); + } +} + +// For alloc test `std::alloc::handle_alloc_error` can be used directly. +#[cfg(all(not(no_global_oom_handling), test))] +pub use std::alloc::handle_alloc_error; + +#[cfg(all(not(no_global_oom_handling), not(any(target_os = "hermit", test))))] +#[doc(hidden)] +#[allow(unused_attributes)] +#[unstable(feature = "alloc_internals", issue = "none")] +pub mod __alloc_error_handler { + use crate::alloc::Layout; + + // called via generated `__rust_alloc_error_handler` + + // if there is no `#[alloc_error_handler]` + #[rustc_std_internal_symbol] + pub unsafe extern "C" fn __rdl_oom(size: usize, _align: usize) -> ! { + panic!("memory allocation of {} bytes failed", size) + } + + // if there is a `#[alloc_error_handler]` + #[rustc_std_internal_symbol] + pub unsafe extern "C" fn __rg_oom(size: usize, align: usize) -> ! { + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + extern "Rust" { + #[lang = "oom"] + fn oom_impl(layout: Layout) -> !; + } + unsafe { oom_impl(layout) } + } +} + +/// Specialize clones into pre-allocated, uninitialized memory. +/// Used by `Box::clone` and `Rc`/`Arc::make_mut`. +pub(crate) trait WriteCloneIntoRaw: Sized { + unsafe fn write_clone_into_raw(&self, target: *mut Self); +} + +impl WriteCloneIntoRaw for T { + #[inline] + default unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // Having allocated *first* may allow the optimizer to create + // the cloned value in-place, skipping the local and move. + unsafe { target.write(self.clone()) }; + } +} + +impl WriteCloneIntoRaw for T { + #[inline] + unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // We can always copy in-place, without ever involving a local value. + unsafe { target.copy_from_nonoverlapping(self, 1) }; + } +} diff --git a/rust/alloc/borrow.rs b/rust/alloc/borrow.rs new file mode 100644 index 0000000000000..beaf7b330f2b0 --- /dev/null +++ b/rust/alloc/borrow.rs @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A module for working with borrowed data. + +#![stable(feature = "rust1", since = "1.0.0")] + +use core::cmp::Ordering; +use core::hash::{Hash, Hasher}; +use core::ops::Deref; +#[cfg(not(no_global_oom_handling))] +use core::ops::{Add, AddAssign}; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::borrow::{Borrow, BorrowMut}; + +use crate::fmt; +#[cfg(not(no_global_oom_handling))] +use crate::string::String; + +use Cow::*; + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, B: ?Sized> Borrow for Cow<'a, B> +where + B: ToOwned, + ::Owned: 'a, +{ + fn borrow(&self) -> &B { + &**self + } +} + +/// A generalization of `Clone` to borrowed data. +/// +/// Some types make it possible to go from borrowed to owned, usually by +/// implementing the `Clone` trait. But `Clone` works only for going from `&T` +/// to `T`. The `ToOwned` trait generalizes `Clone` to construct owned data +/// from any borrow of a given type. +#[cfg_attr(not(test), rustc_diagnostic_item = "ToOwned")] +#[stable(feature = "rust1", since = "1.0.0")] +pub trait ToOwned { + /// The resulting type after obtaining ownership. + #[stable(feature = "rust1", since = "1.0.0")] + type Owned: Borrow; + + /// Creates owned data from borrowed data, usually by cloning. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s: &str = "a"; + /// let ss: String = s.to_owned(); + /// + /// let v: &[i32] = &[1, 2]; + /// let vv: Vec = v.to_owned(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "cloning is often expensive and is not expected to have side effects"] + fn to_owned(&self) -> Self::Owned; + + /// Uses borrowed data to replace owned data, usually by cloning. + /// + /// This is borrow-generalized version of `Clone::clone_from`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # #![feature(toowned_clone_into)] + /// let mut s: String = String::new(); + /// "hello".clone_into(&mut s); + /// + /// let mut v: Vec = Vec::new(); + /// [1, 2][..].clone_into(&mut v); + /// ``` + #[unstable(feature = "toowned_clone_into", reason = "recently added", issue = "41263")] + fn clone_into(&self, target: &mut Self::Owned) { + *target = self.to_owned(); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for T +where + T: Clone, +{ + type Owned = T; + fn to_owned(&self) -> T { + self.clone() + } + + fn clone_into(&self, target: &mut T) { + target.clone_from(self); + } +} + +/// A clone-on-write smart pointer. +/// +/// The type `Cow` is a smart pointer providing clone-on-write functionality: it +/// can enclose and provide immutable access to borrowed data, and clone the +/// data lazily when mutation or ownership is required. The type is designed to +/// work with general borrowed data via the `Borrow` trait. +/// +/// `Cow` implements `Deref`, which means that you can call +/// non-mutating methods directly on the data it encloses. If mutation +/// is desired, `to_mut` will obtain a mutable reference to an owned +/// value, cloning if necessary. +/// +/// If you need reference-counting pointers, note that +/// [`Rc::make_mut`][crate::rc::Rc::make_mut] and +/// [`Arc::make_mut`][crate::sync::Arc::make_mut] can provide clone-on-write +/// functionality as well. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// +/// fn abs_all(input: &mut Cow<[i32]>) { +/// for i in 0..input.len() { +/// let v = input[i]; +/// if v < 0 { +/// // Clones into a vector if not already owned. +/// input.to_mut()[i] = -v; +/// } +/// } +/// } +/// +/// // No clone occurs because `input` doesn't need to be mutated. +/// let slice = [0, 1, 2]; +/// let mut input = Cow::from(&slice[..]); +/// abs_all(&mut input); +/// +/// // Clone occurs because `input` needs to be mutated. +/// let slice = [-1, 0, 1]; +/// let mut input = Cow::from(&slice[..]); +/// abs_all(&mut input); +/// +/// // No clone occurs because `input` is already owned. +/// let mut input = Cow::from(vec![-1, 0, 1]); +/// abs_all(&mut input); +/// ``` +/// +/// Another example showing how to keep `Cow` in a struct: +/// +/// ``` +/// use std::borrow::Cow; +/// +/// struct Items<'a, X: 'a> where [X]: ToOwned> { +/// values: Cow<'a, [X]>, +/// } +/// +/// impl<'a, X: Clone + 'a> Items<'a, X> where [X]: ToOwned> { +/// fn new(v: Cow<'a, [X]>) -> Self { +/// Items { values: v } +/// } +/// } +/// +/// // Creates a container from borrowed values of a slice +/// let readonly = [1, 2]; +/// let borrowed = Items::new((&readonly[..]).into()); +/// match borrowed { +/// Items { values: Cow::Borrowed(b) } => println!("borrowed {:?}", b), +/// _ => panic!("expect borrowed value"), +/// } +/// +/// let mut clone_on_write = borrowed; +/// // Mutates the data from slice into owned vec and pushes a new value on top +/// clone_on_write.values.to_mut().push(3); +/// println!("clone_on_write = {:?}", clone_on_write.values); +/// +/// // The data was mutated. Let check it out. +/// match clone_on_write { +/// Items { values: Cow::Owned(_) } => println!("clone_on_write contains owned data"), +/// _ => panic!("expect owned data"), +/// } +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +pub enum Cow<'a, B: ?Sized + 'a> +where + B: ToOwned, +{ + /// Borrowed data. + #[stable(feature = "rust1", since = "1.0.0")] + Borrowed(#[stable(feature = "rust1", since = "1.0.0")] &'a B), + + /// Owned data. + #[stable(feature = "rust1", since = "1.0.0")] + Owned(#[stable(feature = "rust1", since = "1.0.0")] ::Owned), +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Cow<'_, B> { + fn clone(&self) -> Self { + match *self { + Borrowed(b) => Borrowed(b), + Owned(ref o) => { + let b: &B = o.borrow(); + Owned(b.to_owned()) + } + } + } + + fn clone_from(&mut self, source: &Self) { + match (self, source) { + (&mut Owned(ref mut dest), &Owned(ref o)) => o.borrow().clone_into(dest), + (t, s) => *t = s.clone(), + } + } +} + +impl Cow<'_, B> { + /// Returns true if the data is borrowed, i.e. if `to_mut` would require additional work. + /// + /// # Examples + /// + /// ``` + /// #![feature(cow_is_borrowed)] + /// use std::borrow::Cow; + /// + /// let cow = Cow::Borrowed("moo"); + /// assert!(cow.is_borrowed()); + /// + /// let bull: Cow<'_, str> = Cow::Owned("...moo?".to_string()); + /// assert!(!bull.is_borrowed()); + /// ``` + #[unstable(feature = "cow_is_borrowed", issue = "65143")] + #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] + pub const fn is_borrowed(&self) -> bool { + match *self { + Borrowed(_) => true, + Owned(_) => false, + } + } + + /// Returns true if the data is owned, i.e. if `to_mut` would be a no-op. + /// + /// # Examples + /// + /// ``` + /// #![feature(cow_is_borrowed)] + /// use std::borrow::Cow; + /// + /// let cow: Cow<'_, str> = Cow::Owned("moo".to_string()); + /// assert!(cow.is_owned()); + /// + /// let bull = Cow::Borrowed("...moo?"); + /// assert!(!bull.is_owned()); + /// ``` + #[unstable(feature = "cow_is_borrowed", issue = "65143")] + #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] + pub const fn is_owned(&self) -> bool { + !self.is_borrowed() + } + + /// Acquires a mutable reference to the owned form of the data. + /// + /// Clones the data if it is not already owned. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let mut cow = Cow::Borrowed("foo"); + /// cow.to_mut().make_ascii_uppercase(); + /// + /// assert_eq!( + /// cow, + /// Cow::Owned(String::from("FOO")) as Cow + /// ); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn to_mut(&mut self) -> &mut ::Owned { + match *self { + Borrowed(borrowed) => { + *self = Owned(borrowed.to_owned()); + match *self { + Borrowed(..) => unreachable!(), + Owned(ref mut owned) => owned, + } + } + Owned(ref mut owned) => owned, + } + } + + /// Extracts the owned data. + /// + /// Clones the data if it is not already owned. + /// + /// # Examples + /// + /// Calling `into_owned` on a `Cow::Borrowed` clones the underlying data + /// and becomes a `Cow::Owned`: + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let s = "Hello world!"; + /// let cow = Cow::Borrowed(s); + /// + /// assert_eq!( + /// cow.into_owned(), + /// String::from(s) + /// ); + /// ``` + /// + /// Calling `into_owned` on a `Cow::Owned` is a no-op: + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let s = "Hello world!"; + /// let cow: Cow = Cow::Owned(String::from(s)); + /// + /// assert_eq!( + /// cow.into_owned(), + /// String::from(s) + /// ); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_owned(self) -> ::Owned { + match self { + Borrowed(borrowed) => borrowed.to_owned(), + Owned(owned) => owned, + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Deref for Cow<'_, B> { + type Target = B; + + fn deref(&self) -> &B { + match *self { + Borrowed(borrowed) => borrowed, + Owned(ref owned) => owned.borrow(), + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Cow<'_, B> where B: Eq + ToOwned {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Cow<'_, B> +where + B: Ord + ToOwned, +{ + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, 'b, B: ?Sized, C: ?Sized> PartialEq> for Cow<'a, B> +where + B: PartialEq + ToOwned, + C: ToOwned, +{ + #[inline] + fn eq(&self, other: &Cow<'b, C>) -> bool { + PartialEq::eq(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, B: ?Sized> PartialOrd for Cow<'a, B> +where + B: PartialOrd + ToOwned, +{ + #[inline] + fn partial_cmp(&self, other: &Cow<'a, B>) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Cow<'_, B> +where + B: fmt::Debug + ToOwned, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Borrowed(ref b) => fmt::Debug::fmt(b, f), + Owned(ref o) => fmt::Debug::fmt(o, f), + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Cow<'_, B> +where + B: fmt::Display + ToOwned, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Borrowed(ref b) => fmt::Display::fmt(b, f), + Owned(ref o) => fmt::Display::fmt(o, f), + } + } +} + +#[stable(feature = "default", since = "1.11.0")] +impl Default for Cow<'_, B> +where + B: ToOwned, +{ + /// Creates an owned Cow<'a, B> with the default value for the contained owned value. + fn default() -> Self { + Owned(::Owned::default()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Cow<'_, B> +where + B: Hash + ToOwned, +{ + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Cow<'_, T> { + fn as_ref(&self) -> &T { + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> Add<&'a str> for Cow<'a, str> { + type Output = Cow<'a, str>; + + #[inline] + fn add(mut self, rhs: &'a str) -> Self::Output { + self += rhs; + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> Add> for Cow<'a, str> { + type Output = Cow<'a, str>; + + #[inline] + fn add(mut self, rhs: Cow<'a, str>) -> Self::Output { + self += rhs; + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> AddAssign<&'a str> for Cow<'a, str> { + fn add_assign(&mut self, rhs: &'a str) { + if self.is_empty() { + *self = Cow::Borrowed(rhs) + } else if !rhs.is_empty() { + if let Cow::Borrowed(lhs) = *self { + let mut s = String::with_capacity(lhs.len() + rhs.len()); + s.push_str(lhs); + *self = Cow::Owned(s); + } + self.to_mut().push_str(rhs); + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> AddAssign> for Cow<'a, str> { + fn add_assign(&mut self, rhs: Cow<'a, str>) { + if self.is_empty() { + *self = rhs + } else if !rhs.is_empty() { + if let Cow::Borrowed(lhs) = *self { + let mut s = String::with_capacity(lhs.len() + rhs.len()); + s.push_str(lhs); + *self = Cow::Owned(s); + } + self.to_mut().push_str(&rhs); + } + } +} diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs new file mode 100644 index 0000000000000..df6a77eea06b6 --- /dev/null +++ b/rust/alloc/boxed.rs @@ -0,0 +1,1728 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A pointer type for heap allocation. +//! +//! [`Box`], casually referred to as a 'box', provides the simplest form of +//! heap allocation in Rust. Boxes provide ownership for this allocation, and +//! drop their contents when they go out of scope. Boxes also ensure that they +//! never allocate more than `isize::MAX` bytes. +//! +//! # Examples +//! +//! Move a value from the stack to the heap by creating a [`Box`]: +//! +//! ``` +//! let val: u8 = 5; +//! let boxed: Box = Box::new(val); +//! ``` +//! +//! Move a value from a [`Box`] back to the stack by [dereferencing]: +//! +//! ``` +//! let boxed: Box = Box::new(5); +//! let val: u8 = *boxed; +//! ``` +//! +//! Creating a recursive data structure: +//! +//! ``` +//! #[derive(Debug)] +//! enum List { +//! Cons(T, Box>), +//! Nil, +//! } +//! +//! let list: List = List::Cons(1, Box::new(List::Cons(2, Box::new(List::Nil)))); +//! println!("{:?}", list); +//! ``` +//! +//! This will print `Cons(1, Cons(2, Nil))`. +//! +//! Recursive structures must be boxed, because if the definition of `Cons` +//! looked like this: +//! +//! ```compile_fail,E0072 +//! # enum List { +//! Cons(T, List), +//! # } +//! ``` +//! +//! It wouldn't work. This is because the size of a `List` depends on how many +//! elements are in the list, and so we don't know how much memory to allocate +//! for a `Cons`. By introducing a [`Box`], which has a defined size, we know how +//! big `Cons` needs to be. +//! +//! # Memory layout +//! +//! For non-zero-sized values, a [`Box`] will use the [`Global`] allocator for +//! its allocation. It is valid to convert both ways between a [`Box`] and a +//! raw pointer allocated with the [`Global`] allocator, given that the +//! [`Layout`] used with the allocator is correct for the type. More precisely, +//! a `value: *mut T` that has been allocated with the [`Global`] allocator +//! with `Layout::for_value(&*value)` may be converted into a box using +//! [`Box::::from_raw(value)`]. Conversely, the memory backing a `value: *mut +//! T` obtained from [`Box::::into_raw`] may be deallocated using the +//! [`Global`] allocator with [`Layout::for_value(&*value)`]. +//! +//! For zero-sized values, the `Box` pointer still has to be [valid] for reads +//! and writes and sufficiently aligned. In particular, casting any aligned +//! non-zero integer literal to a raw pointer produces a valid pointer, but a +//! pointer pointing into previously allocated memory that since got freed is +//! not valid. The recommended way to build a Box to a ZST if `Box::new` cannot +//! be used is to use [`ptr::NonNull::dangling`]. +//! +//! So long as `T: Sized`, a `Box` is guaranteed to be represented +//! as a single pointer and is also ABI-compatible with C pointers +//! (i.e. the C type `T*`). This means that if you have extern "C" +//! Rust functions that will be called from C, you can define those +//! Rust functions using `Box` types, and use `T*` as corresponding +//! type on the C side. As an example, consider this C header which +//! declares functions that create and destroy some kind of `Foo` +//! value: +//! +//! ```c +//! /* C header */ +//! +//! /* Returns ownership to the caller */ +//! struct Foo* foo_new(void); +//! +//! /* Takes ownership from the caller; no-op when invoked with null */ +//! void foo_delete(struct Foo*); +//! ``` +//! +//! These two functions might be implemented in Rust as follows. Here, the +//! `struct Foo*` type from C is translated to `Box`, which captures +//! the ownership constraints. Note also that the nullable argument to +//! `foo_delete` is represented in Rust as `Option>`, since `Box` +//! cannot be null. +//! +//! ``` +//! #[repr(C)] +//! pub struct Foo; +//! +//! #[no_mangle] +//! pub extern "C" fn foo_new() -> Box { +//! Box::new(Foo) +//! } +//! +//! #[no_mangle] +//! pub extern "C" fn foo_delete(_: Option>) {} +//! ``` +//! +//! Even though `Box` has the same representation and C ABI as a C pointer, +//! this does not mean that you can convert an arbitrary `T*` into a `Box` +//! and expect things to work. `Box` values will always be fully aligned, +//! non-null pointers. Moreover, the destructor for `Box` will attempt to +//! free the value with the global allocator. In general, the best practice +//! is to only use `Box` for pointers that originated from the global +//! allocator. +//! +//! **Important.** At least at present, you should avoid using +//! `Box` types for functions that are defined in C but invoked +//! from Rust. In those cases, you should directly mirror the C types +//! as closely as possible. Using types like `Box` where the C +//! definition is just using `T*` can lead to undefined behavior, as +//! described in [rust-lang/unsafe-code-guidelines#198][ucg#198]. +//! +//! [ucg#198]: https://github.com/rust-lang/unsafe-code-guidelines/issues/198 +//! [dereferencing]: core::ops::Deref +//! [`Box::::from_raw(value)`]: Box::from_raw +//! [`Global`]: crate::alloc::Global +//! [`Layout`]: crate::alloc::Layout +//! [`Layout::for_value(&*value)`]: crate::alloc::Layout::for_value +//! [valid]: ptr#safety + +#![stable(feature = "rust1", since = "1.0.0")] + +use core::any::Any; +use core::borrow; +use core::cmp::Ordering; +use core::convert::{From, TryFrom}; +use core::fmt; +use core::future::Future; +use core::hash::{Hash, Hasher}; +#[cfg(not(no_global_oom_handling))] +use core::iter::FromIterator; +use core::iter::{FusedIterator, Iterator}; +use core::marker::{Unpin, Unsize}; +use core::mem; +use core::ops::{ + CoerceUnsized, Deref, DerefMut, DispatchFromDyn, Generator, GeneratorState, Receiver, +}; +use core::pin::Pin; +use core::ptr::{self, Unique}; +use core::stream::Stream; +use core::task::{Context, Poll}; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::{handle_alloc_error, WriteCloneIntoRaw}; +use crate::alloc::{AllocError, Allocator, Global, Layout}; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::Cow; +#[cfg(not(no_global_oom_handling))] +use crate::raw_vec::RawVec; +#[cfg(not(no_global_oom_handling))] +use crate::str::from_boxed_utf8_unchecked; +#[cfg(not(no_global_oom_handling))] +use crate::vec::Vec; + +/// A pointer type for heap allocation. +/// +/// See the [module-level documentation](../../std/boxed/index.html) for more. +#[lang = "owned_box"] +#[fundamental] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Box< + T: ?Sized, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +>(Unique, A); + +impl Box { + /// Allocates memory on the heap and then places `x` into it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// let five = Box::new(5); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline(always)] + #[doc(alias = "alloc")] + #[doc(alias = "malloc")] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(x: T) -> Self { + box x + } + + /// Constructs a new box with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut five = Box::::new_uninit(); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn new_uninit() -> Box> { + Self::new_uninit_in(Global) + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let zero = Box::::new_zeroed(); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[inline] + #[doc(alias = "calloc")] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed() -> Box> { + Self::new_zeroed_in(Global) + } + + /// Constructs a new `Pin>`. If `T` does not implement `Unpin`, then + /// `x` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "pin", since = "1.33.0")] + #[inline(always)] + pub fn pin(x: T) -> Pin> { + (box x).into() + } + + /// Allocates memory on the heap then places `x` into it, + /// returning an error if the allocation fails + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// let five = Box::try_new(5)?; + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new(x: T) -> Result { + Self::try_new_in(x, Global) + } + + /// Constructs a new box with uninitialized contents on the heap, + /// returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let mut five = Box::::try_new_uninit()?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn try_new_uninit() -> Result>, AllocError> { + Box::try_new_uninit_in(Global) + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes on the heap + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let zero = Box::::try_new_zeroed()?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn try_new_zeroed() -> Result>, AllocError> { + Box::try_new_zeroed_in(Global) + } +} + +impl Box { + /// Allocates memory in the given allocator then places `x` into it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let five = Box::new_in(5, System); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn new_in(x: T, alloc: A) -> Self { + let mut boxed = Self::new_uninit_in(alloc); + unsafe { + boxed.as_mut_ptr().write(x); + boxed.assume_init() + } + } + + /// Allocates memory in the given allocator then places `x` into it, + /// returning an error if the allocation fails + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let five = Box::try_new_in(5, System)?; + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new_in(x: T, alloc: A) -> Result { + let mut boxed = Self::try_new_uninit_in(alloc)?; + unsafe { + boxed.as_mut_ptr().write(x); + Ok(boxed.assume_init()) + } + } + + /// Constructs a new box with uninitialized contents in the provided allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut five = Box::::new_uninit_in(System); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[cfg(not(no_global_oom_handling))] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_in(alloc: A) -> Box, A> { + let layout = Layout::new::>(); + // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable. + // That would make code size bigger. + match Box::try_new_uninit_in(alloc) { + Ok(m) => m, + Err(_) => handle_alloc_error(layout), + } + } + + /// Constructs a new box with uninitialized contents in the provided allocator, + /// returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut five = Box::::try_new_uninit_in(System)?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_uninit_in(alloc: A) -> Result, A>, AllocError> { + let layout = Layout::new::>(); + let ptr = alloc.allocate(layout)?.cast(); + unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) } + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes in the provided allocator. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let zero = Box::::new_zeroed_in(System); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + #[cfg(not(no_global_oom_handling))] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_in(alloc: A) -> Box, A> { + let layout = Layout::new::>(); + // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable. + // That would make code size bigger. + match Box::try_new_zeroed_in(alloc) { + Ok(m) => m, + Err(_) => handle_alloc_error(layout), + } + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes in the provided allocator, + /// returning an error if the allocation fails, + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let zero = Box::::try_new_zeroed_in(System)?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_zeroed_in(alloc: A) -> Result, A>, AllocError> { + let layout = Layout::new::>(); + let ptr = alloc.allocate_zeroed(layout)?.cast(); + unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) } + } + + /// Constructs a new `Pin>`. If `T` does not implement `Unpin`, then + /// `x` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline(always)] + pub fn pin_in(x: T, alloc: A) -> Pin + where + A: 'static, + { + Self::new_in(x, alloc).into() + } + + /// Converts a `Box` into a `Box<[T]>` + /// + /// This conversion does not allocate on the heap and happens in place. + #[unstable(feature = "box_into_boxed_slice", issue = "71582")] + pub fn into_boxed_slice(boxed: Self) -> Box<[T], A> { + let (raw, alloc) = Box::into_raw_with_allocator(boxed); + unsafe { Box::from_raw_in(raw as *mut [T; 1], alloc) } + } + + /// Consumes the `Box`, returning the wrapped value. + /// + /// # Examples + /// + /// ``` + /// #![feature(box_into_inner)] + /// + /// let c = Box::new(5); + /// + /// assert_eq!(Box::into_inner(c), 5); + /// ``` + #[unstable(feature = "box_into_inner", issue = "80437")] + #[inline] + pub fn into_inner(boxed: Self) -> T { + *boxed + } +} + +impl Box<[T]> { + /// Constructs a new boxed slice with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut values = Box::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_slice(len: usize) -> Box<[mem::MaybeUninit]> { + unsafe { RawVec::with_capacity(len).into_box(len) } + } + + /// Constructs a new boxed slice with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let values = Box::<[u32]>::new_zeroed_slice(3); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_slice(len: usize) -> Box<[mem::MaybeUninit]> { + unsafe { RawVec::with_capacity_zeroed(len).into_box(len) } + } +} + +impl Box<[T], A> { + /// Constructs a new boxed slice with uninitialized contents in the provided allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut values = Box::<[u32], _>::new_uninit_slice_in(3, System); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_slice_in(len: usize, alloc: A) -> Box<[mem::MaybeUninit], A> { + unsafe { RawVec::with_capacity_in(len, alloc).into_box(len) } + } + + /// Constructs a new boxed slice with uninitialized contents in the provided allocator, + /// with the memory being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let values = Box::<[u32], _>::new_zeroed_slice_in(3, System); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_slice_in(len: usize, alloc: A) -> Box<[mem::MaybeUninit], A> { + unsafe { RawVec::with_capacity_zeroed_in(len, alloc).into_box(len) } + } +} + +impl Box, A> { + /// Converts to `Box`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the value + /// really is in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut five = Box::::new_uninit(); + /// + /// let five: Box = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Box { + let (raw, alloc) = Box::into_raw_with_allocator(self); + unsafe { Box::from_raw_in(raw as *mut T, alloc) } + } +} + +impl Box<[mem::MaybeUninit], A> { + /// Converts to `Box<[T], A>`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the values + /// really are in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut values = Box::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Box<[T], A> { + let (raw, alloc) = Box::into_raw_with_allocator(self); + unsafe { Box::from_raw_in(raw as *mut [T], alloc) } + } +} + +impl Box { + /// Constructs a box from a raw pointer. + /// + /// After calling this function, the raw pointer is owned by the + /// resulting `Box`. Specifically, the `Box` destructor will call + /// the destructor of `T` and free the allocated memory. For this + /// to be safe, the memory must have been allocated in accordance + /// with the [memory layout] used by `Box` . + /// + /// # Safety + /// + /// This function is unsafe because improper use may lead to + /// memory problems. For example, a double-free may occur if the + /// function is called twice on the same raw pointer. + /// + /// The safety conditions are described in the [memory layout] section. + /// + /// # Examples + /// + /// Recreate a `Box` which was previously converted to a raw pointer + /// using [`Box::into_raw`]: + /// ``` + /// let x = Box::new(5); + /// let ptr = Box::into_raw(x); + /// let x = unsafe { Box::from_raw(ptr) }; + /// ``` + /// Manually create a `Box` from scratch by using the global allocator: + /// ``` + /// use std::alloc::{alloc, Layout}; + /// + /// unsafe { + /// let ptr = alloc(Layout::new::()) as *mut i32; + /// // In general .write is required to avoid attempting to destruct + /// // the (uninitialized) previous contents of `ptr`, though for this + /// // simple example `*ptr = 5` would have worked as well. + /// ptr.write(5); + /// let x = Box::from_raw(ptr); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + /// [`Layout`]: crate::Layout + #[stable(feature = "box_raw", since = "1.4.0")] + #[inline] + pub unsafe fn from_raw(raw: *mut T) -> Self { + unsafe { Self::from_raw_in(raw, Global) } + } +} + +impl Box { + /// Constructs a box from a raw pointer in the given allocator. + /// + /// After calling this function, the raw pointer is owned by the + /// resulting `Box`. Specifically, the `Box` destructor will call + /// the destructor of `T` and free the allocated memory. For this + /// to be safe, the memory must have been allocated in accordance + /// with the [memory layout] used by `Box` . + /// + /// # Safety + /// + /// This function is unsafe because improper use may lead to + /// memory problems. For example, a double-free may occur if the + /// function is called twice on the same raw pointer. + /// + /// + /// # Examples + /// + /// Recreate a `Box` which was previously converted to a raw pointer + /// using [`Box::into_raw_with_allocator`]: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let x = Box::new_in(5, System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// let x = unsafe { Box::from_raw_in(ptr, alloc) }; + /// ``` + /// Manually create a `Box` from scratch by using the system allocator: + /// ``` + /// #![feature(allocator_api, slice_ptr_get)] + /// + /// use std::alloc::{Allocator, Layout, System}; + /// + /// unsafe { + /// let ptr = System.allocate(Layout::new::())?.as_mut_ptr() as *mut i32; + /// // In general .write is required to avoid attempting to destruct + /// // the (uninitialized) previous contents of `ptr`, though for this + /// // simple example `*ptr = 5` would have worked as well. + /// ptr.write(5); + /// let x = Box::from_raw_in(ptr, System); + /// } + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [memory layout]: self#memory-layout + /// [`Layout`]: crate::Layout + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub unsafe fn from_raw_in(raw: *mut T, alloc: A) -> Self { + Box(unsafe { Unique::new_unchecked(raw) }, alloc) + } + + /// Consumes the `Box`, returning a wrapped raw pointer. + /// + /// The pointer will be properly aligned and non-null. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Box`. In particular, the + /// caller should properly destroy `T` and release the memory, taking + /// into account the [memory layout] used by `Box`. The easiest way to + /// do this is to convert the raw pointer back into a `Box` with the + /// [`Box::from_raw`] function, allowing the `Box` destructor to perform + /// the cleanup. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::into_raw(b)` instead of `b.into_raw()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// Converting the raw pointer back into a `Box` with [`Box::from_raw`] + /// for automatic cleanup: + /// ``` + /// let x = Box::new(String::from("Hello")); + /// let ptr = Box::into_raw(x); + /// let x = unsafe { Box::from_raw(ptr) }; + /// ``` + /// Manual cleanup by explicitly running the destructor and deallocating + /// the memory: + /// ``` + /// use std::alloc::{dealloc, Layout}; + /// use std::ptr; + /// + /// let x = Box::new(String::from("Hello")); + /// let p = Box::into_raw(x); + /// unsafe { + /// ptr::drop_in_place(p); + /// dealloc(p as *mut u8, Layout::new::()); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + #[stable(feature = "box_raw", since = "1.4.0")] + #[inline] + pub fn into_raw(b: Self) -> *mut T { + Self::into_raw_with_allocator(b).0 + } + + /// Consumes the `Box`, returning a wrapped raw pointer and the allocator. + /// + /// The pointer will be properly aligned and non-null. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Box`. In particular, the + /// caller should properly destroy `T` and release the memory, taking + /// into account the [memory layout] used by `Box`. The easiest way to + /// do this is to convert the raw pointer back into a `Box` with the + /// [`Box::from_raw_in`] function, allowing the `Box` destructor to perform + /// the cleanup. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::into_raw_with_allocator(b)` instead of `b.into_raw_with_allocator()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// Converting the raw pointer back into a `Box` with [`Box::from_raw_in`] + /// for automatic cleanup: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let x = Box::new_in(String::from("Hello"), System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// let x = unsafe { Box::from_raw_in(ptr, alloc) }; + /// ``` + /// Manual cleanup by explicitly running the destructor and deallocating + /// the memory: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::{Allocator, Layout, System}; + /// use std::ptr::{self, NonNull}; + /// + /// let x = Box::new_in(String::from("Hello"), System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// unsafe { + /// ptr::drop_in_place(ptr); + /// let non_null = NonNull::new_unchecked(ptr); + /// alloc.deallocate(non_null.cast(), Layout::new::()); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn into_raw_with_allocator(b: Self) -> (*mut T, A) { + let (leaked, alloc) = Box::into_unique(b); + (leaked.as_ptr(), alloc) + } + + #[unstable( + feature = "ptr_internals", + issue = "none", + reason = "use `Box::leak(b).into()` or `Unique::from(Box::leak(b))` instead" + )] + #[inline] + #[doc(hidden)] + pub fn into_unique(b: Self) -> (Unique, A) { + // Box is recognized as a "unique pointer" by Stacked Borrows, but internally it is a + // raw pointer for the type system. Turning it directly into a raw pointer would not be + // recognized as "releasing" the unique pointer to permit aliased raw accesses, + // so all raw pointer methods have to go through `Box::leak`. Turning *that* to a raw pointer + // behaves correctly. + let alloc = unsafe { ptr::read(&b.1) }; + (Unique::from(Box::leak(b)), alloc) + } + + /// Returns a reference to the underlying allocator. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::allocator(&b)` instead of `b.allocator()`. This + /// is so that there is no conflict with a method on the inner type. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(b: &Self) -> &A { + &b.1 + } + + /// Consumes and leaks the `Box`, returning a mutable reference, + /// `&'a mut T`. Note that the type `T` must outlive the chosen lifetime + /// `'a`. If the type has only static references, or none at all, then this + /// may be chosen to be `'static`. + /// + /// This function is mainly useful for data that lives for the remainder of + /// the program's life. Dropping the returned reference will cause a memory + /// leak. If this is not acceptable, the reference should first be wrapped + /// with the [`Box::from_raw`] function producing a `Box`. This `Box` can + /// then be dropped which will properly destroy `T` and release the + /// allocated memory. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::leak(b)` instead of `b.leak()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// Simple usage: + /// + /// ``` + /// let x = Box::new(41); + /// let static_ref: &'static mut usize = Box::leak(x); + /// *static_ref += 1; + /// assert_eq!(*static_ref, 42); + /// ``` + /// + /// Unsized data: + /// + /// ``` + /// let x = vec![1, 2, 3].into_boxed_slice(); + /// let static_ref = Box::leak(x); + /// static_ref[0] = 4; + /// assert_eq!(*static_ref, [4, 2, 3]); + /// ``` + #[stable(feature = "box_leak", since = "1.26.0")] + #[inline] + pub fn leak<'a>(b: Self) -> &'a mut T + where + A: 'a, + { + unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() } + } + + /// Converts a `Box` into a `Pin>` + /// + /// This conversion does not allocate on the heap and happens in place. + /// + /// This is also available via [`From`]. + #[unstable(feature = "box_into_pin", issue = "62370")] + pub fn into_pin(boxed: Self) -> Pin + where + A: 'static, + { + // It's not possible to move or replace the insides of a `Pin>` + // when `T: !Unpin`, so it's safe to pin it directly without any + // additional requirements. + unsafe { Pin::new_unchecked(boxed) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box { + fn drop(&mut self) { + // FIXME: Do nothing, drop is currently performed by compiler. + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Box { + /// Creates a `Box`, with the `Default` value for T. + fn default() -> Self { + box T::default() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Box<[T]> { + fn default() -> Self { + Box::<[T; 0]>::new([]) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "default_box_extra", since = "1.17.0")] +impl Default for Box { + fn default() -> Self { + unsafe { from_boxed_utf8_unchecked(Default::default()) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Box { + /// Returns a new box with a `clone()` of this box's contents. + /// + /// # Examples + /// + /// ``` + /// let x = Box::new(5); + /// let y = x.clone(); + /// + /// // The value is the same + /// assert_eq!(x, y); + /// + /// // But they are unique objects + /// assert_ne!(&*x as *const i32, &*y as *const i32); + /// ``` + #[inline] + fn clone(&self) -> Self { + // Pre-allocate memory to allow writing the cloned value directly. + let mut boxed = Self::new_uninit_in(self.1.clone()); + unsafe { + (**self).write_clone_into_raw(boxed.as_mut_ptr()); + boxed.assume_init() + } + } + + /// Copies `source`'s contents into `self` without creating a new allocation. + /// + /// # Examples + /// + /// ``` + /// let x = Box::new(5); + /// let mut y = Box::new(10); + /// let yp: *const i32 = &*y; + /// + /// y.clone_from(&x); + /// + /// // The value is the same + /// assert_eq!(x, y); + /// + /// // And no allocation occurred + /// assert_eq!(yp, &*y); + /// ``` + #[inline] + fn clone_from(&mut self, source: &Self) { + (**self).clone_from(&(**source)); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_slice_clone", since = "1.3.0")] +impl Clone for Box { + fn clone(&self) -> Self { + // this makes a copy of the data + let buf: Box<[u8]> = self.as_bytes().into(); + unsafe { from_boxed_utf8_unchecked(buf) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for Box { + #[inline] + fn eq(&self, other: &Self) -> bool { + PartialEq::eq(&**self, &**other) + } + #[inline] + fn ne(&self, other: &Self) -> bool { + PartialEq::ne(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for Box { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } + #[inline] + fn lt(&self, other: &Self) -> bool { + PartialOrd::lt(&**self, &**other) + } + #[inline] + fn le(&self, other: &Self) -> bool { + PartialOrd::le(&**self, &**other) + } + #[inline] + fn ge(&self, other: &Self) -> bool { + PartialOrd::ge(&**self, &**other) + } + #[inline] + fn gt(&self, other: &Self) -> bool { + PartialOrd::gt(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Box { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Box {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Box { + fn hash(&self, state: &mut H) { + (**self).hash(state); + } +} + +#[stable(feature = "indirect_hasher_impl", since = "1.22.0")] +impl Hasher for Box { + fn finish(&self) -> u64 { + (**self).finish() + } + fn write(&mut self, bytes: &[u8]) { + (**self).write(bytes) + } + fn write_u8(&mut self, i: u8) { + (**self).write_u8(i) + } + fn write_u16(&mut self, i: u16) { + (**self).write_u16(i) + } + fn write_u32(&mut self, i: u32) { + (**self).write_u32(i) + } + fn write_u64(&mut self, i: u64) { + (**self).write_u64(i) + } + fn write_u128(&mut self, i: u128) { + (**self).write_u128(i) + } + fn write_usize(&mut self, i: usize) { + (**self).write_usize(i) + } + fn write_i8(&mut self, i: i8) { + (**self).write_i8(i) + } + fn write_i16(&mut self, i: i16) { + (**self).write_i16(i) + } + fn write_i32(&mut self, i: i32) { + (**self).write_i32(i) + } + fn write_i64(&mut self, i: i64) { + (**self).write_i64(i) + } + fn write_i128(&mut self, i: i128) { + (**self).write_i128(i) + } + fn write_isize(&mut self, i: isize) { + (**self).write_isize(i) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_for_ptrs", since = "1.6.0")] +impl From for Box { + /// Converts a `T` into a `Box` + /// + /// The conversion allocates on the heap and moves `t` + /// from the stack into it. + /// + /// # Examples + /// ```rust + /// let x = 5; + /// let boxed = Box::new(5); + /// + /// assert_eq!(Box::from(x), boxed); + /// ``` + fn from(t: T) -> Self { + Box::new(t) + } +} + +#[stable(feature = "pin", since = "1.33.0")] +impl From> for Pin> +where + A: 'static, +{ + /// Converts a `Box` into a `Pin>` + /// + /// This conversion does not allocate on the heap and happens in place. + fn from(boxed: Box) -> Self { + Box::into_pin(boxed) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_slice", since = "1.17.0")] +impl From<&[T]> for Box<[T]> { + /// Converts a `&[T]` into a `Box<[T]>` + /// + /// This conversion allocates on the heap + /// and performs a copy of `slice`. + /// + /// # Examples + /// ```rust + /// // create a &[u8] which will be used to create a Box<[u8]> + /// let slice: &[u8] = &[104, 101, 108, 108, 111]; + /// let boxed_slice: Box<[u8]> = Box::from(slice); + /// + /// println!("{:?}", boxed_slice); + /// ``` + fn from(slice: &[T]) -> Box<[T]> { + let len = slice.len(); + let buf = RawVec::with_capacity(len); + unsafe { + ptr::copy_nonoverlapping(slice.as_ptr(), buf.ptr(), len); + buf.into_box(slice.len()).assume_init() + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From> for Box<[T]> { + #[inline] + fn from(cow: Cow<'_, [T]>) -> Box<[T]> { + match cow { + Cow::Borrowed(slice) => Box::from(slice), + Cow::Owned(slice) => Box::from(slice), + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_slice", since = "1.17.0")] +impl From<&str> for Box { + /// Converts a `&str` into a `Box` + /// + /// This conversion allocates on the heap + /// and performs a copy of `s`. + /// + /// # Examples + /// ```rust + /// let boxed: Box = Box::from("hello"); + /// println!("{}", boxed); + /// ``` + #[inline] + fn from(s: &str) -> Box { + unsafe { from_boxed_utf8_unchecked(Box::from(s.as_bytes())) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From> for Box { + #[inline] + fn from(cow: Cow<'_, str>) -> Box { + match cow { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +#[stable(feature = "boxed_str_conv", since = "1.19.0")] +impl From> for Box<[u8], A> { + /// Converts a `Box` into a `Box<[u8]>` + /// + /// This conversion does not allocate on the heap and happens in place. + /// + /// # Examples + /// ```rust + /// // create a Box which will be used to create a Box<[u8]> + /// let boxed: Box = Box::from("hello"); + /// let boxed_str: Box<[u8]> = Box::from(boxed); + /// + /// // create a &[u8] which will be used to create a Box<[u8]> + /// let slice: &[u8] = &[104, 101, 108, 108, 111]; + /// let boxed_slice = Box::from(slice); + /// + /// assert_eq!(boxed_slice, boxed_str); + /// ``` + #[inline] + fn from(s: Box) -> Self { + let (raw, alloc) = Box::into_raw_with_allocator(s); + unsafe { Box::from_raw_in(raw as *mut [u8], alloc) } + } +} + +#[stable(feature = "box_from_array", since = "1.45.0")] +impl From<[T; N]> for Box<[T]> { + /// Converts a `[T; N]` into a `Box<[T]>` + /// + /// This conversion moves the array to newly heap-allocated memory. + /// + /// # Examples + /// ```rust + /// let boxed: Box<[u8]> = Box::from([4, 2]); + /// println!("{:?}", boxed); + /// ``` + fn from(array: [T; N]) -> Box<[T]> { + box array + } +} + +#[stable(feature = "boxed_slice_try_from", since = "1.43.0")] +impl TryFrom> for Box<[T; N]> { + type Error = Box<[T]>; + + fn try_from(boxed_slice: Box<[T]>) -> Result { + if boxed_slice.len() == N { + Ok(unsafe { Box::from_raw(Box::into_raw(boxed_slice) as *mut [T; N]) }) + } else { + Err(boxed_slice) + } + } +} + +impl Box { + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Self> { + if self.is::() { + unsafe { + let (raw, alloc): (*mut dyn Any, _) = Box::into_raw_with_allocator(self); + Ok(Box::from_raw_in(raw as *mut T, alloc)) + } + } else { + Err(self) + } + } +} + +impl Box { + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Self> { + if self.is::() { + unsafe { + let (raw, alloc): (*mut (dyn Any + Send), _) = Box::into_raw_with_allocator(self); + Ok(Box::from_raw_in(raw as *mut T, alloc)) + } + } else { + Err(self) + } + } +} + +impl Box { + #[inline] + #[stable(feature = "box_send_sync_any_downcast", since = "1.51.0")] + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Self> { + if self.is::() { + unsafe { + let (raw, alloc): (*mut (dyn Any + Send + Sync), _) = + Box::into_raw_with_allocator(self); + Ok(Box::from_raw_in(raw as *mut T, alloc)) + } + } else { + Err(self) + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Box { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Box { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Pointer for Box { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's not possible to extract the inner Uniq directly from the Box, + // instead we cast it to a *const which aliases the Unique + let ptr: *const T = &**self; + fmt::Pointer::fmt(&ptr, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Deref for Box { + type Target = T; + + fn deref(&self) -> &T { + &**self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl DerefMut for Box { + fn deref_mut(&mut self) -> &mut T { + &mut **self + } +} + +#[unstable(feature = "receiver_trait", issue = "none")] +impl Receiver for Box {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for Box { + type Item = I::Item; + fn next(&mut self) -> Option { + (**self).next() + } + fn size_hint(&self) -> (usize, Option) { + (**self).size_hint() + } + fn nth(&mut self, n: usize) -> Option { + (**self).nth(n) + } + fn last(self) -> Option { + BoxIter::last(self) + } +} + +trait BoxIter { + type Item; + fn last(self) -> Option; +} + +impl BoxIter for Box { + type Item = I::Item; + default fn last(self) -> Option { + #[inline] + fn some(_: Option, x: T) -> Option { + Some(x) + } + + self.fold(None, some) + } +} + +/// Specialization for sized `I`s that uses `I`s implementation of `last()` +/// instead of the default. +#[stable(feature = "rust1", since = "1.0.0")] +impl BoxIter for Box { + fn last(self) -> Option { + (*self).last() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl DoubleEndedIterator for Box { + fn next_back(&mut self) -> Option { + (**self).next_back() + } + fn nth_back(&mut self, n: usize) -> Option { + (**self).nth_back(n) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ExactSizeIterator for Box { + fn len(&self) -> usize { + (**self).len() + } + fn is_empty(&self) -> bool { + (**self).is_empty() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Box {} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl + ?Sized, A: Allocator> FnOnce for Box { + type Output = >::Output; + + extern "rust-call" fn call_once(self, args: Args) -> Self::Output { + >::call_once(*self, args) + } +} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl + ?Sized, A: Allocator> FnMut for Box { + extern "rust-call" fn call_mut(&mut self, args: Args) -> Self::Output { + >::call_mut(self, args) + } +} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl + ?Sized, A: Allocator> Fn for Box { + extern "rust-call" fn call(&self, args: Args) -> Self::Output { + >::call(self, args) + } +} + +#[unstable(feature = "coerce_unsized", issue = "27732")] +impl, U: ?Sized, A: Allocator> CoerceUnsized> for Box {} + +#[unstable(feature = "dispatch_from_dyn", issue = "none")] +impl, U: ?Sized> DispatchFromDyn> for Box {} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "boxed_slice_from_iter", since = "1.32.0")] +impl FromIterator for Box<[I]> { + fn from_iter>(iter: T) -> Self { + iter.into_iter().collect::>().into_boxed_slice() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_slice_clone", since = "1.3.0")] +impl Clone for Box<[T], A> { + fn clone(&self) -> Self { + let alloc = Box::allocator(self).clone(); + self.to_vec_in(alloc).into_boxed_slice() + } + + fn clone_from(&mut self, other: &Self) { + if self.len() == other.len() { + self.clone_from_slice(&other); + } else { + *self = other.clone(); + } + } +} + +#[stable(feature = "box_borrow", since = "1.1.0")] +impl borrow::Borrow for Box { + fn borrow(&self) -> &T { + &**self + } +} + +#[stable(feature = "box_borrow", since = "1.1.0")] +impl borrow::BorrowMut for Box { + fn borrow_mut(&mut self) -> &mut T { + &mut **self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl AsRef for Box { + fn as_ref(&self) -> &T { + &**self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl AsMut for Box { + fn as_mut(&mut self) -> &mut T { + &mut **self + } +} + +/* Nota bene + * + * We could have chosen not to add this impl, and instead have written a + * function of Pin> to Pin. Such a function would not be sound, + * because Box implements Unpin even when T does not, as a result of + * this impl. + * + * We chose this API instead of the alternative for a few reasons: + * - Logically, it is helpful to understand pinning in regard to the + * memory region being pointed to. For this reason none of the + * standard library pointer types support projecting through a pin + * (Box is the only pointer type in std for which this would be + * safe.) + * - It is in practice very useful to have Box be unconditionally + * Unpin because of trait objects, for which the structural auto + * trait functionality does not apply (e.g., Box would + * otherwise not be Unpin). + * + * Another type with the same semantics as Box but only a conditional + * implementation of `Unpin` (where `T: Unpin`) would be valid/safe, and + * could have a method to project a Pin from it. + */ +#[stable(feature = "pin", since = "1.33.0")] +impl Unpin for Box where A: 'static {} + +#[unstable(feature = "generator_trait", issue = "43122")] +impl + Unpin, R, A: Allocator> Generator for Box +where + A: 'static, +{ + type Yield = G::Yield; + type Return = G::Return; + + fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState { + G::resume(Pin::new(&mut *self), arg) + } +} + +#[unstable(feature = "generator_trait", issue = "43122")] +impl, R, A: Allocator> Generator for Pin> +where + A: 'static, +{ + type Yield = G::Yield; + type Return = G::Return; + + fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState { + G::resume((*self).as_mut(), arg) + } +} + +#[stable(feature = "futures_api", since = "1.36.0")] +impl Future for Box +where + A: 'static, +{ + type Output = F::Output; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + F::poll(Pin::new(&mut *self), cx) + } +} + +#[unstable(feature = "async_stream", issue = "79024")] +impl Stream for Box { + type Item = S::Item; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut **self).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option) { + (**self).size_hint() + } +} diff --git a/rust/alloc/collections/mod.rs b/rust/alloc/collections/mod.rs new file mode 100644 index 0000000000000..2970fe44a21bf --- /dev/null +++ b/rust/alloc/collections/mod.rs @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Collection types. + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +pub mod binary_heap; +#[cfg(not(no_global_oom_handling))] +mod btree; +#[cfg(not(no_global_oom_handling))] +pub mod linked_list; +#[cfg(not(no_global_oom_handling))] +pub mod vec_deque; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod btree_map { + //! A map based on a B-Tree. + #[stable(feature = "rust1", since = "1.0.0")] + pub use super::btree::map::*; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod btree_set { + //! A set based on a B-Tree. + #[stable(feature = "rust1", since = "1.0.0")] + pub use super::btree::set::*; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use binary_heap::BinaryHeap; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use btree_map::BTreeMap; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use btree_set::BTreeSet; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use linked_list::LinkedList; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use vec_deque::VecDeque; + +use crate::alloc::{Layout, LayoutError}; +use core::fmt::Display; + +/// The error type for `try_reserve` methods. +#[derive(Clone, PartialEq, Eq, Debug)] +#[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] +pub enum TryReserveError { + /// Error due to the computed capacity exceeding the collection's maximum + /// (usually `isize::MAX` bytes). + CapacityOverflow, + + /// The memory allocator returned an error + AllocError { + /// The layout of allocation request that failed + layout: Layout, + + #[doc(hidden)] + #[unstable( + feature = "container_error_extra", + issue = "none", + reason = "\ + Enable exposing the allocator’s custom error value \ + if an associated type is added in the future: \ + https://github.com/rust-lang/wg-allocators/issues/23" + )] + non_exhaustive: (), + }, +} + +#[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] +impl From for TryReserveError { + #[inline] + fn from(_: LayoutError) -> Self { + TryReserveError::CapacityOverflow + } +} + +#[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] +impl Display for TryReserveError { + fn fmt( + &self, + fmt: &mut core::fmt::Formatter<'_>, + ) -> core::result::Result<(), core::fmt::Error> { + fmt.write_str("memory allocation failed")?; + let reason = match &self { + TryReserveError::CapacityOverflow => { + " because the computed capacity exceeded the collection's maximum" + } + TryReserveError::AllocError { .. } => " because the memory allocator returned a error", + }; + fmt.write_str(reason) + } +} + +/// An intermediate trait for specialization of `Extend`. +#[doc(hidden)] +trait SpecExtend { + /// Extends `self` with the contents of the given iterator. + fn spec_extend(&mut self, iter: I); +} diff --git a/rust/alloc/fmt.rs b/rust/alloc/fmt.rs new file mode 100644 index 0000000000000..9c4e0b2f2111d --- /dev/null +++ b/rust/alloc/fmt.rs @@ -0,0 +1,587 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Utilities for formatting and printing `String`s. +//! +//! This module contains the runtime support for the [`format!`] syntax extension. +//! This macro is implemented in the compiler to emit calls to this module in +//! order to format arguments at runtime into strings. +//! +//! # Usage +//! +//! The [`format!`] macro is intended to be familiar to those coming from C's +//! `printf`/`fprintf` functions or Python's `str.format` function. +//! +//! Some examples of the [`format!`] extension are: +//! +//! ``` +//! format!("Hello"); // => "Hello" +//! format!("Hello, {}!", "world"); // => "Hello, world!" +//! format!("The number is {}", 1); // => "The number is 1" +//! format!("{:?}", (3, 4)); // => "(3, 4)" +//! format!("{value}", value=4); // => "4" +//! format!("{} {}", 1, 2); // => "1 2" +//! format!("{:04}", 42); // => "0042" with leading zeros +//! format!("{:#?}", (100, 200)); // => "( +//! // 100, +//! // 200, +//! // )" +//! ``` +//! +//! From these, you can see that the first argument is a format string. It is +//! required by the compiler for this to be a string literal; it cannot be a +//! variable passed in (in order to perform validity checking). The compiler +//! will then parse the format string and determine if the list of arguments +//! provided is suitable to pass to this format string. +//! +//! To convert a single value to a string, use the [`to_string`] method. This +//! will use the [`Display`] formatting trait. +//! +//! ## Positional parameters +//! +//! Each formatting argument is allowed to specify which value argument it's +//! referencing, and if omitted it is assumed to be "the next argument". For +//! example, the format string `{} {} {}` would take three parameters, and they +//! would be formatted in the same order as they're given. The format string +//! `{2} {1} {0}`, however, would format arguments in reverse order. +//! +//! Things can get a little tricky once you start intermingling the two types of +//! positional specifiers. The "next argument" specifier can be thought of as an +//! iterator over the argument. Each time a "next argument" specifier is seen, +//! the iterator advances. This leads to behavior like this: +//! +//! ``` +//! format!("{1} {} {0} {}", 1, 2); // => "2 1 1 2" +//! ``` +//! +//! The internal iterator over the argument has not been advanced by the time +//! the first `{}` is seen, so it prints the first argument. Then upon reaching +//! the second `{}`, the iterator has advanced forward to the second argument. +//! Essentially, parameters that explicitly name their argument do not affect +//! parameters that do not name an argument in terms of positional specifiers. +//! +//! A format string is required to use all of its arguments, otherwise it is a +//! compile-time error. You may refer to the same argument more than once in the +//! format string. +//! +//! ## Named parameters +//! +//! Rust itself does not have a Python-like equivalent of named parameters to a +//! function, but the [`format!`] macro is a syntax extension that allows it to +//! leverage named parameters. Named parameters are listed at the end of the +//! argument list and have the syntax: +//! +//! ```text +//! identifier '=' expression +//! ``` +//! +//! For example, the following [`format!`] expressions all use named argument: +//! +//! ``` +//! format!("{argument}", argument = "test"); // => "test" +//! format!("{name} {}", 1, name = 2); // => "2 1" +//! format!("{a} {c} {b}", a="a", b='b', c=3); // => "a 3 b" +//! ``` +//! +//! It is not valid to put positional parameters (those without names) after +//! arguments that have names. Like with positional parameters, it is not +//! valid to provide named parameters that are unused by the format string. +//! +//! # Formatting Parameters +//! +//! Each argument being formatted can be transformed by a number of formatting +//! parameters (corresponding to `format_spec` in [the syntax](#syntax)). These +//! parameters affect the string representation of what's being formatted. +//! +//! ## Width +//! +//! ``` +//! // All of these print "Hello x !" +//! println!("Hello {:5}!", "x"); +//! println!("Hello {:1$}!", "x", 5); +//! println!("Hello {1:0$}!", 5, "x"); +//! println!("Hello {:width$}!", "x", width = 5); +//! ``` +//! +//! This is a parameter for the "minimum width" that the format should take up. +//! If the value's string does not fill up this many characters, then the +//! padding specified by fill/alignment will be used to take up the required +//! space (see below). +//! +//! The value for the width can also be provided as a [`usize`] in the list of +//! parameters by adding a postfix `$`, indicating that the second argument is +//! a [`usize`] specifying the width. +//! +//! Referring to an argument with the dollar syntax does not affect the "next +//! argument" counter, so it's usually a good idea to refer to arguments by +//! position, or use named arguments. +//! +//! ## Fill/Alignment +//! +//! ``` +//! assert_eq!(format!("Hello {:<5}!", "x"), "Hello x !"); +//! assert_eq!(format!("Hello {:-<5}!", "x"), "Hello x----!"); +//! assert_eq!(format!("Hello {:^5}!", "x"), "Hello x !"); +//! assert_eq!(format!("Hello {:>5}!", "x"), "Hello x!"); +//! ``` +//! +//! The optional fill character and alignment is provided normally in conjunction with the +//! [`width`](#width) parameter. It must be defined before `width`, right after the `:`. +//! This indicates that if the value being formatted is smaller than +//! `width` some extra characters will be printed around it. +//! Filling comes in the following variants for different alignments: +//! +//! * `[fill]<` - the argument is left-aligned in `width` columns +//! * `[fill]^` - the argument is center-aligned in `width` columns +//! * `[fill]>` - the argument is right-aligned in `width` columns +//! +//! The default [fill/alignment](#fillalignment) for non-numerics is a space and +//! left-aligned. The +//! default for numeric formatters is also a space character but with right-alignment. If +//! the `0` flag (see below) is specified for numerics, then the implicit fill character is +//! `0`. +//! +//! Note that alignment may not be implemented by some types. In particular, it +//! is not generally implemented for the `Debug` trait. A good way to ensure +//! padding is applied is to format your input, then pad this resulting string +//! to obtain your output: +//! +//! ``` +//! println!("Hello {:^15}!", format!("{:?}", Some("hi"))); // => "Hello Some("hi") !" +//! ``` +//! +//! ## Sign/`#`/`0` +//! +//! ``` +//! assert_eq!(format!("Hello {:+}!", 5), "Hello +5!"); +//! assert_eq!(format!("{:#x}!", 27), "0x1b!"); +//! assert_eq!(format!("Hello {:05}!", 5), "Hello 00005!"); +//! assert_eq!(format!("Hello {:05}!", -5), "Hello -0005!"); +//! assert_eq!(format!("{:#010x}!", 27), "0x0000001b!"); +//! ``` +//! +//! These are all flags altering the behavior of the formatter. +//! +//! * `+` - This is intended for numeric types and indicates that the sign +//! should always be printed. Positive signs are never printed by +//! default, and the negative sign is only printed by default for signed values. +//! This flag indicates that the correct sign (`+` or `-`) should always be printed. +//! * `-` - Currently not used +//! * `#` - This flag indicates that the "alternate" form of printing should +//! be used. The alternate forms are: +//! * `#?` - pretty-print the [`Debug`] formatting (adds linebreaks and indentation) +//! * `#x` - precedes the argument with a `0x` +//! * `#X` - precedes the argument with a `0x` +//! * `#b` - precedes the argument with a `0b` +//! * `#o` - precedes the argument with a `0o` +//! * `0` - This is used to indicate for integer formats that the padding to `width` should +//! both be done with a `0` character as well as be sign-aware. A format +//! like `{:08}` would yield `00000001` for the integer `1`, while the +//! same format would yield `-0000001` for the integer `-1`. Notice that +//! the negative version has one fewer zero than the positive version. +//! Note that padding zeros are always placed after the sign (if any) +//! and before the digits. When used together with the `#` flag, a similar +//! rule applies: padding zeros are inserted after the prefix but before +//! the digits. The prefix is included in the total width. +//! +//! ## Precision +//! +//! For non-numeric types, this can be considered a "maximum width". If the resulting string is +//! longer than this width, then it is truncated down to this many characters and that truncated +//! value is emitted with proper `fill`, `alignment` and `width` if those parameters are set. +//! +//! For integral types, this is ignored. +//! +//! For floating-point types, this indicates how many digits after the decimal point should be +//! printed. +//! +//! There are three possible ways to specify the desired `precision`: +//! +//! 1. An integer `.N`: +//! +//! the integer `N` itself is the precision. +//! +//! 2. An integer or name followed by dollar sign `.N$`: +//! +//! use format *argument* `N` (which must be a `usize`) as the precision. +//! +//! 3. An asterisk `.*`: +//! +//! `.*` means that this `{...}` is associated with *two* format inputs rather than one: the +//! first input holds the `usize` precision, and the second holds the value to print. Note that +//! in this case, if one uses the format string `{:.*}`, then the `` part refers +//! to the *value* to print, and the `precision` must come in the input preceding ``. +//! +//! For example, the following calls all print the same thing `Hello x is 0.01000`: +//! +//! ``` +//! // Hello {arg 0 ("x")} is {arg 1 (0.01) with precision specified inline (5)} +//! println!("Hello {0} is {1:.5}", "x", 0.01); +//! +//! // Hello {arg 1 ("x")} is {arg 2 (0.01) with precision specified in arg 0 (5)} +//! println!("Hello {1} is {2:.0$}", 5, "x", 0.01); +//! +//! // Hello {arg 0 ("x")} is {arg 2 (0.01) with precision specified in arg 1 (5)} +//! println!("Hello {0} is {2:.1$}", "x", 5, 0.01); +//! +//! // Hello {next arg ("x")} is {second of next two args (0.01) with precision +//! // specified in first of next two args (5)} +//! println!("Hello {} is {:.*}", "x", 5, 0.01); +//! +//! // Hello {next arg ("x")} is {arg 2 (0.01) with precision +//! // specified in its predecessor (5)} +//! println!("Hello {} is {2:.*}", "x", 5, 0.01); +//! +//! // Hello {next arg ("x")} is {arg "number" (0.01) with precision specified +//! // in arg "prec" (5)} +//! println!("Hello {} is {number:.prec$}", "x", prec = 5, number = 0.01); +//! ``` +//! +//! While these: +//! +//! ``` +//! println!("{}, `{name:.*}` has 3 fractional digits", "Hello", 3, name=1234.56); +//! println!("{}, `{name:.*}` has 3 characters", "Hello", 3, name="1234.56"); +//! println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56"); +//! ``` +//! +//! print three significantly different things: +//! +//! ```text +//! Hello, `1234.560` has 3 fractional digits +//! Hello, `123` has 3 characters +//! Hello, ` 123` has 3 right-aligned characters +//! ``` +//! +//! ## Localization +//! +//! In some programming languages, the behavior of string formatting functions +//! depends on the operating system's locale setting. The format functions +//! provided by Rust's standard library do not have any concept of locale and +//! will produce the same results on all systems regardless of user +//! configuration. +//! +//! For example, the following code will always print `1.5` even if the system +//! locale uses a decimal separator other than a dot. +//! +//! ``` +//! println!("The value is {}", 1.5); +//! ``` +//! +//! # Escaping +//! +//! The literal characters `{` and `}` may be included in a string by preceding +//! them with the same character. For example, the `{` character is escaped with +//! `{{` and the `}` character is escaped with `}}`. +//! +//! ``` +//! assert_eq!(format!("Hello {{}}"), "Hello {}"); +//! assert_eq!(format!("{{ Hello"), "{ Hello"); +//! ``` +//! +//! # Syntax +//! +//! To summarize, here you can find the full grammar of format strings. +//! The syntax for the formatting language used is drawn from other languages, +//! so it should not be too alien. Arguments are formatted with Python-like +//! syntax, meaning that arguments are surrounded by `{}` instead of the C-like +//! `%`. The actual grammar for the formatting syntax is: +//! +//! ```text +//! format_string := text [ maybe_format text ] * +//! maybe_format := '{' '{' | '}' '}' | format +//! format := '{' [ argument ] [ ':' format_spec ] '}' +//! argument := integer | identifier +//! +//! format_spec := [[fill]align][sign]['#']['0'][width]['.' precision]type +//! fill := character +//! align := '<' | '^' | '>' +//! sign := '+' | '-' +//! width := count +//! precision := count | '*' +//! type := '' | '?' | 'x?' | 'X?' | identifier +//! count := parameter | integer +//! parameter := argument '$' +//! ``` +//! In the above grammar, `text` may not contain any `'{'` or `'}'` characters. +//! +//! # Formatting traits +//! +//! When requesting that an argument be formatted with a particular type, you +//! are actually requesting that an argument ascribes to a particular trait. +//! This allows multiple actual types to be formatted via `{:x}` (like [`i8`] as +//! well as [`isize`]). The current mapping of types to traits is: +//! +//! * *nothing* ⇒ [`Display`] +//! * `?` ⇒ [`Debug`] +//! * `x?` ⇒ [`Debug`] with lower-case hexadecimal integers +//! * `X?` ⇒ [`Debug`] with upper-case hexadecimal integers +//! * `o` ⇒ [`Octal`] +//! * `x` ⇒ [`LowerHex`] +//! * `X` ⇒ [`UpperHex`] +//! * `p` ⇒ [`Pointer`] +//! * `b` ⇒ [`Binary`] +//! * `e` ⇒ [`LowerExp`] +//! * `E` ⇒ [`UpperExp`] +//! +//! What this means is that any type of argument which implements the +//! [`fmt::Binary`][`Binary`] trait can then be formatted with `{:b}`. Implementations +//! are provided for these traits for a number of primitive types by the +//! standard library as well. If no format is specified (as in `{}` or `{:6}`), +//! then the format trait used is the [`Display`] trait. +//! +//! When implementing a format trait for your own type, you will have to +//! implement a method of the signature: +//! +//! ``` +//! # #![allow(dead_code)] +//! # use std::fmt; +//! # struct Foo; // our custom type +//! # impl fmt::Display for Foo { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! # write!(f, "testing, testing") +//! # } } +//! ``` +//! +//! Your type will be passed as `self` by-reference, and then the function +//! should emit output into the `f.buf` stream. It is up to each format trait +//! implementation to correctly adhere to the requested formatting parameters. +//! The values of these parameters will be listed in the fields of the +//! [`Formatter`] struct. In order to help with this, the [`Formatter`] struct also +//! provides some helper methods. +//! +//! Additionally, the return value of this function is [`fmt::Result`] which is a +//! type alias of [`Result`]`<(), `[`std::fmt::Error`]`>`. Formatting implementations +//! should ensure that they propagate errors from the [`Formatter`] (e.g., when +//! calling [`write!`]). However, they should never return errors spuriously. That +//! is, a formatting implementation must and may only return an error if the +//! passed-in [`Formatter`] returns an error. This is because, contrary to what +//! the function signature might suggest, string formatting is an infallible +//! operation. This function only returns a result because writing to the +//! underlying stream might fail and it must provide a way to propagate the fact +//! that an error has occurred back up the stack. +//! +//! An example of implementing the formatting traits would look +//! like: +//! +//! ``` +//! use std::fmt; +//! +//! #[derive(Debug)] +//! struct Vector2D { +//! x: isize, +//! y: isize, +//! } +//! +//! impl fmt::Display for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! // The `f` value implements the `Write` trait, which is what the +//! // write! macro is expecting. Note that this formatting ignores the +//! // various flags provided to format strings. +//! write!(f, "({}, {})", self.x, self.y) +//! } +//! } +//! +//! // Different traits allow different forms of output of a type. The meaning +//! // of this format is to print the magnitude of a vector. +//! impl fmt::Binary for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! let magnitude = (self.x * self.x + self.y * self.y) as f64; +//! let magnitude = magnitude.sqrt(); +//! +//! // Respect the formatting flags by using the helper method +//! // `pad_integral` on the Formatter object. See the method +//! // documentation for details, and the function `pad` can be used +//! // to pad strings. +//! let decimals = f.precision().unwrap_or(3); +//! let string = format!("{:.*}", decimals, magnitude); +//! f.pad_integral(true, "", &string) +//! } +//! } +//! +//! fn main() { +//! let myvector = Vector2D { x: 3, y: 4 }; +//! +//! println!("{}", myvector); // => "(3, 4)" +//! println!("{:?}", myvector); // => "Vector2D {x: 3, y:4}" +//! println!("{:10.3b}", myvector); // => " 5.000" +//! } +//! ``` +//! +//! ### `fmt::Display` vs `fmt::Debug` +//! +//! These two formatting traits have distinct purposes: +//! +//! - [`fmt::Display`][`Display`] implementations assert that the type can be faithfully +//! represented as a UTF-8 string at all times. It is **not** expected that +//! all types implement the [`Display`] trait. +//! - [`fmt::Debug`][`Debug`] implementations should be implemented for **all** public types. +//! Output will typically represent the internal state as faithfully as possible. +//! The purpose of the [`Debug`] trait is to facilitate debugging Rust code. In +//! most cases, using `#[derive(Debug)]` is sufficient and recommended. +//! +//! Some examples of the output from both traits: +//! +//! ``` +//! assert_eq!(format!("{} {:?}", 3, 4), "3 4"); +//! assert_eq!(format!("{} {:?}", 'a', 'b'), "a 'b'"); +//! assert_eq!(format!("{} {:?}", "foo\n", "bar\n"), "foo\n \"bar\\n\""); +//! ``` +//! +//! # Related macros +//! +//! There are a number of related macros in the [`format!`] family. The ones that +//! are currently implemented are: +//! +//! ```ignore (only-for-syntax-highlight) +//! format! // described above +//! write! // first argument is a &mut io::Write, the destination +//! writeln! // same as write but appends a newline +//! print! // the format string is printed to the standard output +//! println! // same as print but appends a newline +//! eprint! // the format string is printed to the standard error +//! eprintln! // same as eprint but appends a newline +//! format_args! // described below. +//! ``` +//! +//! ### `write!` +//! +//! This and [`writeln!`] are two macros which are used to emit the format string +//! to a specified stream. This is used to prevent intermediate allocations of +//! format strings and instead directly write the output. Under the hood, this +//! function is actually invoking the [`write_fmt`] function defined on the +//! [`std::io::Write`] trait. Example usage is: +//! +//! ``` +//! # #![allow(unused_must_use)] +//! use std::io::Write; +//! let mut w = Vec::new(); +//! write!(&mut w, "Hello {}!", "world"); +//! ``` +//! +//! ### `print!` +//! +//! This and [`println!`] emit their output to stdout. Similarly to the [`write!`] +//! macro, the goal of these macros is to avoid intermediate allocations when +//! printing output. Example usage is: +//! +//! ``` +//! print!("Hello {}!", "world"); +//! println!("I have a newline {}", "character at the end"); +//! ``` +//! ### `eprint!` +//! +//! The [`eprint!`] and [`eprintln!`] macros are identical to +//! [`print!`] and [`println!`], respectively, except they emit their +//! output to stderr. +//! +//! ### `format_args!` +//! +//! This is a curious macro used to safely pass around +//! an opaque object describing the format string. This object +//! does not require any heap allocations to create, and it only +//! references information on the stack. Under the hood, all of +//! the related macros are implemented in terms of this. First +//! off, some example usage is: +//! +//! ``` +//! # #![allow(unused_must_use)] +//! use std::fmt; +//! use std::io::{self, Write}; +//! +//! let mut some_writer = io::stdout(); +//! write!(&mut some_writer, "{}", format_args!("print with a {}", "macro")); +//! +//! fn my_fmt_fn(args: fmt::Arguments) { +//! write!(&mut io::stdout(), "{}", args); +//! } +//! my_fmt_fn(format_args!(", or a {} too", "function")); +//! ``` +//! +//! The result of the [`format_args!`] macro is a value of type [`fmt::Arguments`]. +//! This structure can then be passed to the [`write`] and [`format`] functions +//! inside this module in order to process the format string. +//! The goal of this macro is to even further prevent intermediate allocations +//! when dealing with formatting strings. +//! +//! For example, a logging library could use the standard formatting syntax, but +//! it would internally pass around this structure until it has been determined +//! where output should go to. +//! +//! [`fmt::Result`]: Result +//! [`Result`]: core::result::Result +//! [`std::fmt::Error`]: Error +//! [`write!`]: core::write +//! [`write`]: core::write +//! [`format!`]: crate::format +//! [`to_string`]: crate::string::ToString +//! [`writeln!`]: core::writeln +//! [`write_fmt`]: ../../std/io/trait.Write.html#method.write_fmt +//! [`std::io::Write`]: ../../std/io/trait.Write.html +//! [`print!`]: ../../std/macro.print.html +//! [`println!`]: ../../std/macro.println.html +//! [`eprint!`]: ../../std/macro.eprint.html +//! [`eprintln!`]: ../../std/macro.eprintln.html +//! [`format_args!`]: core::format_args +//! [`fmt::Arguments`]: Arguments +//! [`format`]: crate::format + +#![stable(feature = "rust1", since = "1.0.0")] + +#[unstable(feature = "fmt_internals", issue = "none")] +pub use core::fmt::rt; +#[stable(feature = "fmt_flags_align", since = "1.28.0")] +pub use core::fmt::Alignment; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::Error; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{write, ArgumentV1, Arguments}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Binary, Octal}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Debug, Display}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Formatter, Result, Write}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{LowerExp, UpperExp}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{LowerHex, Pointer, UpperHex}; + +#[cfg(not(no_global_oom_handling))] +use crate::string; + +/// The `format` function takes an [`Arguments`] struct and returns the resulting +/// formatted string. +/// +/// The [`Arguments`] instance can be created with the [`format_args!`] macro. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::fmt; +/// +/// let s = fmt::format(format_args!("Hello, {}!", "world")); +/// assert_eq!(s, "Hello, world!"); +/// ``` +/// +/// Please note that using [`format!`] might be preferable. +/// Example: +/// +/// ``` +/// let s = format!("Hello, {}!", "world"); +/// assert_eq!(s, "Hello, world!"); +/// ``` +/// +/// [`format_args!`]: core::format_args +/// [`format!`]: crate::format +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn format(args: Arguments<'_>) -> string::String { + let capacity = args.estimated_capacity(); + let mut output = string::String::with_capacity(capacity); + output.write_fmt(args).expect("a formatting trait implementation returned an error"); + output +} diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs new file mode 100644 index 0000000000000..f109e7902b201 --- /dev/null +++ b/rust/alloc/lib.rs @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! # The Rust core allocation and collections library +//! +//! This library provides smart pointers and collections for managing +//! heap-allocated values. +//! +//! This library, like libcore, normally doesn’t need to be used directly +//! since its contents are re-exported in the [`std` crate](../std/index.html). +//! Crates that use the `#![no_std]` attribute however will typically +//! not depend on `std`, so they’d use this crate instead. +//! +//! ## Boxed values +//! +//! The [`Box`] type is a smart pointer type. There can only be one owner of a +//! [`Box`], and the owner can decide to mutate the contents, which live on the +//! heap. +//! +//! This type can be sent among threads efficiently as the size of a `Box` value +//! is the same as that of a pointer. Tree-like data structures are often built +//! with boxes because each node often has only one owner, the parent. +//! +//! ## Reference counted pointers +//! +//! The [`Rc`] type is a non-threadsafe reference-counted pointer type intended +//! for sharing memory within a thread. An [`Rc`] pointer wraps a type, `T`, and +//! only allows access to `&T`, a shared reference. +//! +//! This type is useful when inherited mutability (such as using [`Box`]) is too +//! constraining for an application, and is often paired with the [`Cell`] or +//! [`RefCell`] types in order to allow mutation. +//! +//! ## Atomically reference counted pointers +//! +//! The [`Arc`] type is the threadsafe equivalent of the [`Rc`] type. It +//! provides all the same functionality of [`Rc`], except it requires that the +//! contained type `T` is shareable. Additionally, [`Arc`][`Arc`] is itself +//! sendable while [`Rc`][`Rc`] is not. +//! +//! This type allows for shared access to the contained data, and is often +//! paired with synchronization primitives such as mutexes to allow mutation of +//! shared resources. +//! +//! ## Collections +//! +//! Implementations of the most common general purpose data structures are +//! defined in this library. They are re-exported through the +//! [standard collections library](../std/collections/index.html). +//! +//! ## Heap interfaces +//! +//! The [`alloc`](alloc/index.html) module defines the low-level interface to the +//! default global allocator. It is not compatible with the libc allocator API. +//! +//! [`Arc`]: sync +//! [`Box`]: boxed +//! [`Cell`]: core::cell +//! [`Rc`]: rc +//! [`RefCell`]: core::cell + +#![allow(unused_attributes)] +#![stable(feature = "alloc", since = "1.36.0")] +#![doc( + html_playground_url = "https://play.rust-lang.org/", + issue_tracker_base_url = "https://github.com/rust-lang/rust/issues/", + test(no_crate_inject, attr(allow(unused_variables), deny(warnings))) +)] +#![no_std] +#![needs_allocator] +#![warn(deprecated_in_future)] +#![warn(missing_docs)] +#![warn(missing_debug_implementations)] +#![allow(explicit_outlives_requirements)] +#![deny(unsafe_op_in_unsafe_fn)] +#![feature(rustc_allow_const_fn_unstable)] +#![cfg_attr(not(test), feature(generator_trait))] +#![cfg_attr(test, feature(test))] +#![cfg_attr(test, feature(new_uninit))] +#![feature(allocator_api)] +#![feature(array_chunks)] +#![feature(array_methods)] +#![feature(array_windows)] +#![feature(allow_internal_unstable)] +#![feature(arbitrary_self_types)] +#![feature(async_stream)] +#![feature(box_patterns)] +#![feature(box_syntax)] +#![feature(cfg_sanitize)] +#![feature(cfg_target_has_atomic)] +#![feature(coerce_unsized)] +#![cfg_attr(not(no_global_oom_handling), feature(const_btree_new))] +#![feature(const_fn_trait_bound)] +#![feature(cow_is_borrowed)] +#![feature(const_cow_is_borrowed)] +#![feature(destructuring_assignment)] +#![feature(dispatch_from_dyn)] +#![feature(core_intrinsics)] +#![feature(dropck_eyepatch)] +#![feature(exact_size_is_empty)] +#![feature(exclusive_range_pattern)] +#![feature(extend_one)] +#![feature(fmt_internals)] +#![feature(fn_traits)] +#![feature(fundamental)] +#![feature(inplace_iteration)] +// Technically, this is a bug in rustdoc: rustdoc sees the documentation on `#[lang = slice_alloc]` +// blocks is for `&[T]`, which also has documentation using this feature in `core`, and gets mad +// that the feature-gate isn't enabled. Ideally, it wouldn't check for the feature gate for docs +// from other crates, but since this can only appear for lang items, it doesn't seem worth fixing. +#![feature(intra_doc_pointers)] +#![feature(iter_zip)] +#![feature(lang_items)] +#![feature(layout_for_ptr)] +#![feature(maybe_uninit_ref)] +#![feature(negative_impls)] +#![feature(never_type)] +#![feature(nll)] +#![feature(nonnull_slice_from_raw_parts)] +#![feature(auto_traits)] +#![feature(option_result_unwrap_unchecked)] +#![feature(pattern)] +#![feature(ptr_internals)] +#![feature(rustc_attrs)] +#![feature(receiver_trait)] +#![feature(min_specialization)] +#![feature(set_ptr_value)] +#![feature(slice_ptr_get)] +#![feature(slice_ptr_len)] +#![feature(slice_range)] +#![feature(staged_api)] +#![feature(str_internals)] +#![feature(trusted_len)] +#![feature(unboxed_closures)] +#![feature(unicode_internals)] +#![feature(unsize)] +#![feature(unsized_fn_params)] +#![feature(allocator_internals)] +#![feature(slice_partition_dedup)] +#![feature(maybe_uninit_extra, maybe_uninit_slice, maybe_uninit_uninit_array)] +#![feature(alloc_layout_extra)] +#![feature(trusted_random_access)] +#![cfg_attr(bootstrap, feature(try_trait))] +#![cfg_attr(not(bootstrap), feature(try_trait_v2))] +#![feature(min_type_alias_impl_trait)] +#![feature(associated_type_bounds)] +#![feature(slice_group_by)] +#![feature(decl_macro)] +#![feature(bindings_after_at)] +// Allow testing this library + +#[cfg(test)] +#[macro_use] +extern crate std; +#[cfg(test)] +extern crate test; + +// Module with internal macros used by other modules (needs to be included before other modules). +#[macro_use] +mod macros; + +// Heaps provided for low-level allocation strategies + +pub mod alloc; + +// Primitive types using the heaps above + +// Need to conditionally define the mod from `boxed.rs` to avoid +// duplicating the lang-items when building in test cfg; but also need +// to allow code to have `use boxed::Box;` declarations. +#[cfg(not(test))] +pub mod boxed; +#[cfg(test)] +mod boxed { + pub use std::boxed::Box; +} +pub mod borrow; +pub mod collections; +pub mod fmt; +pub mod prelude; +pub mod raw_vec; +pub mod rc; +pub mod slice; +pub mod str; +pub mod string; +#[cfg(target_has_atomic = "ptr")] +pub mod sync; +#[cfg(all(not(no_global_oom_handling), target_has_atomic = "ptr"))] +pub mod task; +#[cfg(test)] +mod tests; +pub mod vec; + +#[doc(hidden)] +#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] +pub mod __export { + pub use core::format_args; +} diff --git a/rust/alloc/macros.rs b/rust/alloc/macros.rs new file mode 100644 index 0000000000000..1dea4ec36c3e6 --- /dev/null +++ b/rust/alloc/macros.rs @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +/// Creates a [`Vec`] containing the arguments. +/// +/// `vec!` allows `Vec`s to be defined with the same syntax as array expressions. +/// There are two forms of this macro: +/// +/// - Create a [`Vec`] containing a given list of elements: +/// +/// ``` +/// let v = vec![1, 2, 3]; +/// assert_eq!(v[0], 1); +/// assert_eq!(v[1], 2); +/// assert_eq!(v[2], 3); +/// ``` +/// +/// - Create a [`Vec`] from a given element and size: +/// +/// ``` +/// let v = vec![1; 3]; +/// assert_eq!(v, [1, 1, 1]); +/// ``` +/// +/// Note that unlike array expressions this syntax supports all elements +/// which implement [`Clone`] and the number of elements doesn't have to be +/// a constant. +/// +/// This will use `clone` to duplicate an expression, so one should be careful +/// using this with types having a nonstandard `Clone` implementation. For +/// example, `vec![Rc::new(1); 5]` will create a vector of five references +/// to the same boxed integer value, not five references pointing to independently +/// boxed integers. +/// +/// Also, note that `vec![expr; 0]` is allowed, and produces an empty vector. +/// This will still evaluate `expr`, however, and immediately drop the resulting value, so +/// be mindful of side effects. +/// +/// [`Vec`]: crate::vec::Vec +#[cfg(not(test))] +#[doc(alias = "alloc")] +#[doc(alias = "malloc")] +#[macro_export] +#[stable(feature = "rust1", since = "1.0.0")] +#[allow_internal_unstable(box_syntax, liballoc_internals)] +macro_rules! vec { + () => ( + $crate::__rust_force_expr!($crate::vec::Vec::new()) + ); + ($elem:expr; $n:expr) => ( + $crate::__rust_force_expr!($crate::vec::from_elem($elem, $n)) + ); + ($($x:expr),+ $(,)?) => ( + $crate::__rust_force_expr!(<[_]>::into_vec(box [$($x),+])) + ); +} + +// HACK(japaric): with cfg(test) the inherent `[T]::into_vec` method, which is +// required for this macro definition, is not available. Instead use the +// `slice::into_vec` function which is only available with cfg(test) +// NB see the slice::hack module in slice.rs for more information +#[cfg(test)] +macro_rules! vec { + () => ( + $crate::vec::Vec::new() + ); + ($elem:expr; $n:expr) => ( + $crate::vec::from_elem($elem, $n) + ); + ($($x:expr),*) => ( + $crate::slice::into_vec(box [$($x),*]) + ); + ($($x:expr,)*) => (vec![$($x),*]) +} + +/// Creates a `String` using interpolation of runtime expressions. +/// +/// The first argument `format!` receives is a format string. This must be a string +/// literal. The power of the formatting string is in the `{}`s contained. +/// +/// Additional parameters passed to `format!` replace the `{}`s within the +/// formatting string in the order given unless named or positional parameters +/// are used; see [`std::fmt`] for more information. +/// +/// A common use for `format!` is concatenation and interpolation of strings. +/// The same convention is used with [`print!`] and [`write!`] macros, +/// depending on the intended destination of the string. +/// +/// To convert a single value to a string, use the [`to_string`] method. This +/// will use the [`Display`] formatting trait. +/// +/// [`std::fmt`]: ../std/fmt/index.html +/// [`print!`]: ../std/macro.print.html +/// [`write!`]: core::write +/// [`to_string`]: crate::string::ToString +/// [`Display`]: core::fmt::Display +/// +/// # Panics +/// +/// `format!` panics if a formatting trait implementation returns an error. +/// This indicates an incorrect implementation +/// since `fmt::Write for String` never returns an error itself. +/// +/// # Examples +/// +/// ``` +/// format!("test"); +/// format!("hello {}", "world!"); +/// format!("x = {}, y = {y}", 10, y = 30); +/// ``` +#[macro_export] +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "format_macro")] +macro_rules! format { + ($($arg:tt)*) => {{ + let res = $crate::fmt::format($crate::__export::format_args!($($arg)*)); + res + }} +} + +/// Force AST node to an expression to improve diagnostics in pattern position. +#[doc(hidden)] +#[macro_export] +#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] +macro_rules! __rust_force_expr { + ($e:expr) => { + $e + }; +} diff --git a/rust/alloc/prelude/mod.rs b/rust/alloc/prelude/mod.rs new file mode 100644 index 0000000000000..a64a1843760eb --- /dev/null +++ b/rust/alloc/prelude/mod.rs @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! The alloc Prelude +//! +//! The purpose of this module is to alleviate imports of commonly-used +//! items of the `alloc` crate by adding a glob import to the top of modules: +//! +//! ``` +//! # #![allow(unused_imports)] +//! #![feature(alloc_prelude)] +//! extern crate alloc; +//! use alloc::prelude::v1::*; +//! ``` + +#![unstable(feature = "alloc_prelude", issue = "58935")] + +pub mod v1; diff --git a/rust/alloc/prelude/v1.rs b/rust/alloc/prelude/v1.rs new file mode 100644 index 0000000000000..48d75431c0d13 --- /dev/null +++ b/rust/alloc/prelude/v1.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! The first version of the prelude of `alloc` crate. +//! +//! See the [module-level documentation](../index.html) for more. + +#![unstable(feature = "alloc_prelude", issue = "58935")] + +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::borrow::ToOwned; +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::boxed::Box; +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::string::{String, ToString}; +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::vec::Vec; diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs new file mode 100644 index 0000000000000..629dbd3927d12 --- /dev/null +++ b/rust/alloc/raw_vec.rs @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#![unstable(feature = "raw_vec_internals", reason = "implementation detail", issue = "none")] +#![doc(hidden)] + +use core::alloc::LayoutError; +use core::cmp; +use core::intrinsics; +use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::ops::Drop; +use core::ptr::{self, NonNull, Unique}; +use core::slice; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::handle_alloc_error; +use crate::alloc::{Allocator, Global, Layout}; +use crate::boxed::Box; +use crate::collections::TryReserveError::{self, *}; + +#[cfg(test)] +mod tests; + +#[allow(dead_code)] +enum AllocInit { + /// The contents of the new memory are uninitialized. + Uninitialized, + /// The new memory is guaranteed to be zeroed. + Zeroed, +} + +/// A low-level utility for more ergonomically allocating, reallocating, and deallocating +/// a buffer of memory on the heap without having to worry about all the corner cases +/// involved. This type is excellent for building your own data structures like Vec and VecDeque. +/// In particular: +/// +/// * Produces `Unique::dangling()` on zero-sized types. +/// * Produces `Unique::dangling()` on zero-length allocations. +/// * Avoids freeing `Unique::dangling()`. +/// * Catches all overflows in capacity computations (promotes them to "capacity overflow" panics). +/// * Guards against 32-bit systems allocating more than isize::MAX bytes. +/// * Guards against overflowing your length. +/// * Calls `handle_alloc_error` for fallible allocations. +/// * Contains a `ptr::Unique` and thus endows the user with all related benefits. +/// * Uses the excess returned from the allocator to use the largest available capacity. +/// +/// This type does not in anyway inspect the memory that it manages. When dropped it *will* +/// free its memory, but it *won't* try to drop its contents. It is up to the user of `RawVec` +/// to handle the actual things *stored* inside of a `RawVec`. +/// +/// Note that the excess of a zero-sized types is always infinite, so `capacity()` always returns +/// `usize::MAX`. This means that you need to be careful when round-tripping this type with a +/// `Box<[T]>`, since `capacity()` won't yield the length. +#[allow(missing_debug_implementations)] +pub struct RawVec { + ptr: Unique, + cap: usize, + alloc: A, +} + +impl RawVec { + /// HACK(Centril): This exists because stable `const fn` can only call stable `const fn`, so + /// they cannot call `Self::new()`. + /// + /// If you change `RawVec::new` or dependencies, please take care to not introduce anything + /// that would truly const-call something unstable. + pub const NEW: Self = Self::new(); + + /// Creates the biggest possible `RawVec` (on the system heap) + /// without allocating. If `T` has positive size, then this makes a + /// `RawVec` with capacity `0`. If `T` is zero-sized, then it makes a + /// `RawVec` with capacity `usize::MAX`. Useful for implementing + /// delayed allocation. + pub const fn new() -> Self { + Self::new_in(Global) + } + + /// Creates a `RawVec` (on the system heap) with exactly the + /// capacity and alignment requirements for a `[T; capacity]`. This is + /// equivalent to calling `RawVec::new` when `capacity` is `0` or `T` is + /// zero-sized. Note that if `T` is zero-sized this means you will + /// *not* get a `RawVec` with the requested capacity. + /// + /// # Panics + /// + /// Panics if the requested capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } + + /// Tries to create a `RawVec` (on the system heap) with exactly the + /// capacity and alignment requirements for a `[T; capacity]`. This is + /// equivalent to calling `RawVec::new` when `capacity` is `0` or `T` is + /// zero-sized. Note that if `T` is zero-sized this means you will + /// *not* get a `RawVec` with the requested capacity. + #[inline] + pub fn try_with_capacity(capacity: usize) -> Result { + Self::try_with_capacity_in(capacity, Global) + } + + /// Like `with_capacity`, but guarantees the buffer is zeroed. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_zeroed(capacity: usize) -> Self { + Self::with_capacity_zeroed_in(capacity, Global) + } + + /// Reconstitutes a `RawVec` from a pointer and capacity. + /// + /// # Safety + /// + /// The `ptr` must be allocated (on the system heap), and with the given `capacity`. + /// The `capacity` cannot exceed `isize::MAX` for sized types. (only a concern on 32-bit + /// systems). ZST vectors may have a capacity up to `usize::MAX`. + /// If the `ptr` and `capacity` come from a `RawVec`, then this is guaranteed. + #[inline] + pub unsafe fn from_raw_parts(ptr: *mut T, capacity: usize) -> Self { + unsafe { Self::from_raw_parts_in(ptr, capacity, Global) } + } +} + +impl RawVec { + // Tiny Vecs are dumb. Skip to: + // - 8 if the element size is 1, because any heap allocators is likely + // to round up a request of less than 8 bytes to at least 8 bytes. + // - 4 if elements are moderate-sized (<= 1 KiB). + // - 1 otherwise, to avoid wasting too much space for very short Vecs. + const MIN_NON_ZERO_CAP: usize = if mem::size_of::() == 1 { + 8 + } else if mem::size_of::() <= 1024 { + 4 + } else { + 1 + }; + + /// Like `new`, but parameterized over the choice of allocator for + /// the returned `RawVec`. + #[rustc_allow_const_fn_unstable(const_fn)] + pub const fn new_in(alloc: A) -> Self { + // `cap: 0` means "unallocated". zero-sized types are ignored. + Self { ptr: Unique::dangling(), cap: 0, alloc } + } + + /// Like `with_capacity`, but parameterized over the choice of + /// allocator for the returned `RawVec`. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Self::allocate_in(capacity, AllocInit::Uninitialized, alloc) + } + + /// Like `try_with_capacity`, but parameterized over the choice of + /// allocator for the returned `RawVec`. + #[inline] + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { + Self::try_allocate_in(capacity, AllocInit::Uninitialized, alloc) + } + + /// Like `with_capacity_zeroed`, but parameterized over the choice + /// of allocator for the returned `RawVec`. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_zeroed_in(capacity: usize, alloc: A) -> Self { + Self::allocate_in(capacity, AllocInit::Zeroed, alloc) + } + + /// Converts a `Box<[T]>` into a `RawVec`. + pub fn from_box(slice: Box<[T], A>) -> Self { + unsafe { + let (slice, alloc) = Box::into_raw_with_allocator(slice); + RawVec::from_raw_parts_in(slice.as_mut_ptr(), slice.len(), alloc) + } + } + + /// Converts the entire buffer into `Box<[MaybeUninit]>` with the specified `len`. + /// + /// Note that this will correctly reconstitute any `cap` changes + /// that may have been performed. (See description of type for details.) + /// + /// # Safety + /// + /// * `len` must be greater than or equal to the most recently requested capacity, and + /// * `len` must be less than or equal to `self.capacity()`. + /// + /// Note, that the requested capacity and `self.capacity()` could differ, as + /// an allocator could overallocate and return a greater memory block than requested. + pub unsafe fn into_box(self, len: usize) -> Box<[MaybeUninit], A> { + // Sanity-check one half of the safety requirement (we cannot check the other half). + debug_assert!( + len <= self.capacity(), + "`len` must be smaller than or equal to `self.capacity()`" + ); + + let me = ManuallyDrop::new(self); + unsafe { + let slice = slice::from_raw_parts_mut(me.ptr() as *mut MaybeUninit, len); + Box::from_raw_in(slice, ptr::read(&me.alloc)) + } + } + + #[cfg(not(no_global_oom_handling))] + fn allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Self { + if mem::size_of::() == 0 { + Self::new_in(alloc) + } else { + // We avoid `unwrap_or_else` here because it bloats the amount of + // LLVM IR generated. + let layout = match Layout::array::(capacity) { + Ok(layout) => layout, + Err(_) => capacity_overflow(), + }; + match alloc_guard(layout.size()) { + Ok(_) => {} + Err(_) => capacity_overflow(), + } + let result = match init { + AllocInit::Uninitialized => alloc.allocate(layout), + AllocInit::Zeroed => alloc.allocate_zeroed(layout), + }; + let ptr = match result { + Ok(ptr) => ptr, + Err(_) => handle_alloc_error(layout), + }; + + Self { + ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }, + cap: Self::capacity_from_bytes(ptr.len()), + alloc, + } + } + } + + fn try_allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Result { + if mem::size_of::() == 0 { + return Ok(Self::new_in(alloc)); + } + + let layout = Layout::array::(capacity)?; + alloc_guard(layout.size())?; + let result = match init { + AllocInit::Uninitialized => alloc.allocate(layout), + AllocInit::Zeroed => alloc.allocate_zeroed(layout), + }; + let ptr = match result { + Ok(ptr) => ptr, + Err(_) => return Err(TryReserveError::AllocError { layout, non_exhaustive: () }), + }; + + Ok(Self { + ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }, + cap: Self::capacity_from_bytes(ptr.len()), + alloc, + }) + } + + /// Reconstitutes a `RawVec` from a pointer, capacity, and allocator. + /// + /// # Safety + /// + /// The `ptr` must be allocated (via the given allocator `alloc`), and with the given + /// `capacity`. + /// The `capacity` cannot exceed `isize::MAX` for sized types. (only a concern on 32-bit + /// systems). ZST vectors may have a capacity up to `usize::MAX`. + /// If the `ptr` and `capacity` come from a `RawVec` created via `alloc`, then this is + /// guaranteed. + #[inline] + pub unsafe fn from_raw_parts_in(ptr: *mut T, capacity: usize, alloc: A) -> Self { + Self { ptr: unsafe { Unique::new_unchecked(ptr) }, cap: capacity, alloc } + } + + /// Gets a raw pointer to the start of the allocation. Note that this is + /// `Unique::dangling()` if `capacity == 0` or `T` is zero-sized. In the former case, you must + /// be careful. + #[inline] + pub fn ptr(&self) -> *mut T { + self.ptr.as_ptr() + } + + /// Gets the capacity of the allocation. + /// + /// This will always be `usize::MAX` if `T` is zero-sized. + #[inline(always)] + pub fn capacity(&self) -> usize { + if mem::size_of::() == 0 { usize::MAX } else { self.cap } + } + + /// Returns a shared reference to the allocator backing this `RawVec`. + pub fn allocator(&self) -> &A { + &self.alloc + } + + fn current_memory(&self) -> Option<(NonNull, Layout)> { + if mem::size_of::() == 0 || self.cap == 0 { + None + } else { + // We have an allocated chunk of memory, so we can bypass runtime + // checks to get our current layout. + unsafe { + let align = mem::align_of::(); + let size = mem::size_of::() * self.cap; + let layout = Layout::from_size_align_unchecked(size, align); + Some((self.ptr.cast().into(), layout)) + } + } + } + + /// Ensures that the buffer contains at least enough space to hold `len + + /// additional` elements. If it doesn't already have enough capacity, will + /// reallocate enough space plus comfortable slack space to get amortized + /// *O*(1) behavior. Will limit this behavior if it would needlessly cause + /// itself to panic. + /// + /// If `len` exceeds `self.capacity()`, this may fail to actually allocate + /// the requested space. This is not really unsafe, but the unsafe + /// code *you* write that relies on the behavior of this function may break. + /// + /// This is ideal for implementing a bulk-push operation like `extend`. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + /// + /// # Examples + /// + /// ``` + /// # #![feature(raw_vec_internals)] + /// # extern crate alloc; + /// # use std::ptr; + /// # use alloc::raw_vec::RawVec; + /// struct MyVec { + /// buf: RawVec, + /// len: usize, + /// } + /// + /// impl MyVec { + /// pub fn push_all(&mut self, elems: &[T]) { + /// self.buf.reserve(self.len, elems.len()); + /// // reserve would have aborted or panicked if the len exceeded + /// // `isize::MAX` so this is safe to do unchecked now. + /// for x in elems { + /// unsafe { + /// ptr::write(self.buf.ptr().add(self.len), x.clone()); + /// } + /// self.len += 1; + /// } + /// } + /// } + /// # fn main() { + /// # let mut vector = MyVec { buf: RawVec::new(), len: 0 }; + /// # vector.push_all(&[1, 3, 5, 7, 9]); + /// # } + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn reserve(&mut self, len: usize, additional: usize) { + // Callers expect this function to be very cheap when there is already sufficient capacity. + // Therefore, we move all the resizing and error-handling logic from grow_amortized and + // handle_reserve behind a call, while making sure that the this function is likely to be + // inlined as just a comparison and a call if the comparison fails. + #[cold] + fn do_reserve_and_handle( + slf: &mut RawVec, + len: usize, + additional: usize, + ) { + handle_reserve(slf.grow_amortized(len, additional)); + } + + if self.needs_to_grow(len, additional) { + do_reserve_and_handle(self, len, additional); + } + } + + /// The same as `reserve`, but returns on errors instead of panicking or aborting. + pub fn try_reserve(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + if self.needs_to_grow(len, additional) { + self.grow_amortized(len, additional) + } else { + Ok(()) + } + } + + /// Ensures that the buffer contains at least enough space to hold `len + + /// additional` elements. If it doesn't already, will reallocate the + /// minimum possible amount of memory necessary. Generally this will be + /// exactly the amount of memory necessary, but in principle the allocator + /// is free to give back more than we asked for. + /// + /// If `len` exceeds `self.capacity()`, this may fail to actually allocate + /// the requested space. This is not really unsafe, but the unsafe code + /// *you* write that relies on the behavior of this function may break. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + pub fn reserve_exact(&mut self, len: usize, additional: usize) { + handle_reserve(self.try_reserve_exact(len, additional)); + } + + /// The same as `reserve_exact`, but returns on errors instead of panicking or aborting. + pub fn try_reserve_exact( + &mut self, + len: usize, + additional: usize, + ) -> Result<(), TryReserveError> { + if self.needs_to_grow(len, additional) { self.grow_exact(len, additional) } else { Ok(()) } + } + + /// Shrinks the allocation down to the specified amount. If the given amount + /// is 0, actually completely deallocates. + /// + /// # Panics + /// + /// Panics if the given amount is *larger* than the current capacity. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + pub fn shrink_to_fit(&mut self, amount: usize) { + handle_reserve(self.shrink(amount)); + } + + /// Tries to shrink the allocation down to the specified amount. If the given amount + /// is 0, actually completely deallocates. + /// + /// # Panics + /// + /// Panics if the given amount is *larger* than the current capacity. + pub fn try_shrink_to_fit(&mut self, amount: usize) -> Result<(), TryReserveError> { + self.shrink(amount) + } +} + +impl RawVec { + /// Returns if the buffer needs to grow to fulfill the needed extra capacity. + /// Mainly used to make inlining reserve-calls possible without inlining `grow`. + fn needs_to_grow(&self, len: usize, additional: usize) -> bool { + additional > self.capacity().wrapping_sub(len) + } + + fn capacity_from_bytes(excess: usize) -> usize { + debug_assert_ne!(mem::size_of::(), 0); + excess / mem::size_of::() + } + + fn set_ptr(&mut self, ptr: NonNull<[u8]>) { + self.ptr = unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }; + self.cap = Self::capacity_from_bytes(ptr.len()); + } + + // This method is usually instantiated many times. So we want it to be as + // small as possible, to improve compile times. But we also want as much of + // its contents to be statically computable as possible, to make the + // generated code run faster. Therefore, this method is carefully written + // so that all of the code that depends on `T` is within it, while as much + // of the code that doesn't depend on `T` as possible is in functions that + // are non-generic over `T`. + fn grow_amortized(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + // This is ensured by the calling contexts. + debug_assert!(additional > 0); + + if mem::size_of::() == 0 { + // Since we return a capacity of `usize::MAX` when `elem_size` is + // 0, getting to here necessarily means the `RawVec` is overfull. + return Err(CapacityOverflow); + } + + // Nothing we can really do about these checks, sadly. + let required_cap = len.checked_add(additional).ok_or(CapacityOverflow)?; + + // This guarantees exponential growth. The doubling cannot overflow + // because `cap <= isize::MAX` and the type of `cap` is `usize`. + let cap = cmp::max(self.cap * 2, required_cap); + let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap); + + let new_layout = Layout::array::(cap); + + // `finish_grow` is non-generic over `T`. + let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?; + self.set_ptr(ptr); + Ok(()) + } + + // The constraints on this method are much the same as those on + // `grow_amortized`, but this method is usually instantiated less often so + // it's less critical. + fn grow_exact(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + if mem::size_of::() == 0 { + // Since we return a capacity of `usize::MAX` when the type size is + // 0, getting to here necessarily means the `RawVec` is overfull. + return Err(CapacityOverflow); + } + + let cap = len.checked_add(additional).ok_or(CapacityOverflow)?; + let new_layout = Layout::array::(cap); + + // `finish_grow` is non-generic over `T`. + let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?; + self.set_ptr(ptr); + Ok(()) + } + + fn shrink(&mut self, amount: usize) -> Result<(), TryReserveError> { + assert!(amount <= self.capacity(), "Tried to shrink to a larger capacity"); + + let (ptr, layout) = if let Some(mem) = self.current_memory() { mem } else { return Ok(()) }; + let new_size = amount * mem::size_of::(); + + let ptr = unsafe { + let new_layout = Layout::from_size_align_unchecked(new_size, layout.align()); + self.alloc.shrink(ptr, layout, new_layout).map_err(|_| TryReserveError::AllocError { + layout: new_layout, + non_exhaustive: (), + })? + }; + self.set_ptr(ptr); + Ok(()) + } +} + +// This function is outside `RawVec` to minimize compile times. See the comment +// above `RawVec::grow_amortized` for details. (The `A` parameter isn't +// significant, because the number of different `A` types seen in practice is +// much smaller than the number of `T` types.) +#[inline(never)] +fn finish_grow( + new_layout: Result, + current_memory: Option<(NonNull, Layout)>, + alloc: &mut A, +) -> Result, TryReserveError> +where + A: Allocator, +{ + // Check for the error here to minimize the size of `RawVec::grow_*`. + let new_layout = new_layout.map_err(|_| CapacityOverflow)?; + + alloc_guard(new_layout.size())?; + + let memory = if let Some((ptr, old_layout)) = current_memory { + debug_assert_eq!(old_layout.align(), new_layout.align()); + unsafe { + // The allocator checks for alignment equality + intrinsics::assume(old_layout.align() == new_layout.align()); + alloc.grow(ptr, old_layout, new_layout) + } + } else { + alloc.allocate(new_layout) + }; + + memory.map_err(|_| AllocError { layout: new_layout, non_exhaustive: () }) +} + +unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawVec { + /// Frees the memory owned by the `RawVec` *without* trying to drop its contents. + fn drop(&mut self) { + if let Some((ptr, layout)) = self.current_memory() { + unsafe { self.alloc.deallocate(ptr, layout) } + } + } +} + +// Central function for reserve error handling. +#[cfg(not(no_global_oom_handling))] +#[inline] +fn handle_reserve(result: Result<(), TryReserveError>) { + match result { + Err(CapacityOverflow) => capacity_overflow(), + Err(AllocError { layout, .. }) => handle_alloc_error(layout), + Ok(()) => { /* yay */ } + } +} + +// We need to guarantee the following: +// * We don't ever allocate `> isize::MAX` byte-size objects. +// * We don't overflow `usize::MAX` and actually allocate too little. +// +// On 64-bit we just need to check for overflow since trying to allocate +// `> isize::MAX` bytes will surely fail. On 32-bit and 16-bit we need to add +// an extra guard for this in case we're running on a platform which can use +// all 4GB in user-space, e.g., PAE or x32. + +#[inline] +fn alloc_guard(alloc_size: usize) -> Result<(), TryReserveError> { + if usize::BITS < 64 && alloc_size > isize::MAX as usize { + Err(CapacityOverflow) + } else { + Ok(()) + } +} + +// One central function responsible for reporting capacity overflows. This'll +// ensure that the code generation related to these panics is minimal as there's +// only one location which panics rather than a bunch throughout the module. +#[cfg(not(no_global_oom_handling))] +fn capacity_overflow() -> ! { + panic!("capacity overflow"); +} diff --git a/rust/alloc/rc.rs b/rust/alloc/rc.rs new file mode 100644 index 0000000000000..7344cd9a449ef --- /dev/null +++ b/rust/alloc/rc.rs @@ -0,0 +1,2539 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Single-threaded reference-counting pointers. 'Rc' stands for 'Reference +//! Counted'. +//! +//! The type [`Rc`][`Rc`] provides shared ownership of a value of type `T`, +//! allocated in the heap. Invoking [`clone`][clone] on [`Rc`] produces a new +//! pointer to the same allocation in the heap. When the last [`Rc`] pointer to a +//! given allocation is destroyed, the value stored in that allocation (often +//! referred to as "inner value") is also dropped. +//! +//! Shared references in Rust disallow mutation by default, and [`Rc`] +//! is no exception: you cannot generally obtain a mutable reference to +//! something inside an [`Rc`]. If you need mutability, put a [`Cell`] +//! or [`RefCell`] inside the [`Rc`]; see [an example of mutability +//! inside an `Rc`][mutability]. +//! +//! [`Rc`] uses non-atomic reference counting. This means that overhead is very +//! low, but an [`Rc`] cannot be sent between threads, and consequently [`Rc`] +//! does not implement [`Send`][send]. As a result, the Rust compiler +//! will check *at compile time* that you are not sending [`Rc`]s between +//! threads. If you need multi-threaded, atomic reference counting, use +//! [`sync::Arc`][arc]. +//! +//! The [`downgrade`][downgrade] method can be used to create a non-owning +//! [`Weak`] pointer. A [`Weak`] pointer can be [`upgrade`][upgrade]d +//! to an [`Rc`], but this will return [`None`] if the value stored in the allocation has +//! already been dropped. In other words, `Weak` pointers do not keep the value +//! inside the allocation alive; however, they *do* keep the allocation +//! (the backing store for the inner value) alive. +//! +//! A cycle between [`Rc`] pointers will never be deallocated. For this reason, +//! [`Weak`] is used to break cycles. For example, a tree could have strong +//! [`Rc`] pointers from parent nodes to children, and [`Weak`] pointers from +//! children back to their parents. +//! +//! `Rc` automatically dereferences to `T` (via the [`Deref`] trait), +//! so you can call `T`'s methods on a value of type [`Rc`][`Rc`]. To avoid name +//! clashes with `T`'s methods, the methods of [`Rc`][`Rc`] itself are associated +//! functions, called using [fully qualified syntax]: +//! +//! ``` +//! use std::rc::Rc; +//! +//! let my_rc = Rc::new(()); +//! Rc::downgrade(&my_rc); +//! ``` +//! +//! `Rc`'s implementations of traits like `Clone` may also be called using +//! fully qualified syntax. Some people prefer to use fully qualified syntax, +//! while others prefer using method-call syntax. +//! +//! ``` +//! use std::rc::Rc; +//! +//! let rc = Rc::new(()); +//! // Method-call syntax +//! let rc2 = rc.clone(); +//! // Fully qualified syntax +//! let rc3 = Rc::clone(&rc); +//! ``` +//! +//! [`Weak`][`Weak`] does not auto-dereference to `T`, because the inner value may have +//! already been dropped. +//! +//! # Cloning references +//! +//! Creating a new reference to the same allocation as an existing reference counted pointer +//! is done using the `Clone` trait implemented for [`Rc`][`Rc`] and [`Weak`][`Weak`]. +//! +//! ``` +//! use std::rc::Rc; +//! +//! let foo = Rc::new(vec![1.0, 2.0, 3.0]); +//! // The two syntaxes below are equivalent. +//! let a = foo.clone(); +//! let b = Rc::clone(&foo); +//! // a and b both point to the same memory location as foo. +//! ``` +//! +//! The `Rc::clone(&from)` syntax is the most idiomatic because it conveys more explicitly +//! the meaning of the code. In the example above, this syntax makes it easier to see that +//! this code is creating a new reference rather than copying the whole content of foo. +//! +//! # Examples +//! +//! Consider a scenario where a set of `Gadget`s are owned by a given `Owner`. +//! We want to have our `Gadget`s point to their `Owner`. We can't do this with +//! unique ownership, because more than one gadget may belong to the same +//! `Owner`. [`Rc`] allows us to share an `Owner` between multiple `Gadget`s, +//! and have the `Owner` remain allocated as long as any `Gadget` points at it. +//! +//! ``` +//! use std::rc::Rc; +//! +//! struct Owner { +//! name: String, +//! // ...other fields +//! } +//! +//! struct Gadget { +//! id: i32, +//! owner: Rc, +//! // ...other fields +//! } +//! +//! fn main() { +//! // Create a reference-counted `Owner`. +//! let gadget_owner: Rc = Rc::new( +//! Owner { +//! name: "Gadget Man".to_string(), +//! } +//! ); +//! +//! // Create `Gadget`s belonging to `gadget_owner`. Cloning the `Rc` +//! // gives us a new pointer to the same `Owner` allocation, incrementing +//! // the reference count in the process. +//! let gadget1 = Gadget { +//! id: 1, +//! owner: Rc::clone(&gadget_owner), +//! }; +//! let gadget2 = Gadget { +//! id: 2, +//! owner: Rc::clone(&gadget_owner), +//! }; +//! +//! // Dispose of our local variable `gadget_owner`. +//! drop(gadget_owner); +//! +//! // Despite dropping `gadget_owner`, we're still able to print out the name +//! // of the `Owner` of the `Gadget`s. This is because we've only dropped a +//! // single `Rc`, not the `Owner` it points to. As long as there are +//! // other `Rc` pointing at the same `Owner` allocation, it will remain +//! // live. The field projection `gadget1.owner.name` works because +//! // `Rc` automatically dereferences to `Owner`. +//! println!("Gadget {} owned by {}", gadget1.id, gadget1.owner.name); +//! println!("Gadget {} owned by {}", gadget2.id, gadget2.owner.name); +//! +//! // At the end of the function, `gadget1` and `gadget2` are destroyed, and +//! // with them the last counted references to our `Owner`. Gadget Man now +//! // gets destroyed as well. +//! } +//! ``` +//! +//! If our requirements change, and we also need to be able to traverse from +//! `Owner` to `Gadget`, we will run into problems. An [`Rc`] pointer from `Owner` +//! to `Gadget` introduces a cycle. This means that their +//! reference counts can never reach 0, and the allocation will never be destroyed: +//! a memory leak. In order to get around this, we can use [`Weak`] +//! pointers. +//! +//! Rust actually makes it somewhat difficult to produce this loop in the first +//! place. In order to end up with two values that point at each other, one of +//! them needs to be mutable. This is difficult because [`Rc`] enforces +//! memory safety by only giving out shared references to the value it wraps, +//! and these don't allow direct mutation. We need to wrap the part of the +//! value we wish to mutate in a [`RefCell`], which provides *interior +//! mutability*: a method to achieve mutability through a shared reference. +//! [`RefCell`] enforces Rust's borrowing rules at runtime. +//! +//! ``` +//! use std::rc::Rc; +//! use std::rc::Weak; +//! use std::cell::RefCell; +//! +//! struct Owner { +//! name: String, +//! gadgets: RefCell>>, +//! // ...other fields +//! } +//! +//! struct Gadget { +//! id: i32, +//! owner: Rc, +//! // ...other fields +//! } +//! +//! fn main() { +//! // Create a reference-counted `Owner`. Note that we've put the `Owner`'s +//! // vector of `Gadget`s inside a `RefCell` so that we can mutate it through +//! // a shared reference. +//! let gadget_owner: Rc = Rc::new( +//! Owner { +//! name: "Gadget Man".to_string(), +//! gadgets: RefCell::new(vec![]), +//! } +//! ); +//! +//! // Create `Gadget`s belonging to `gadget_owner`, as before. +//! let gadget1 = Rc::new( +//! Gadget { +//! id: 1, +//! owner: Rc::clone(&gadget_owner), +//! } +//! ); +//! let gadget2 = Rc::new( +//! Gadget { +//! id: 2, +//! owner: Rc::clone(&gadget_owner), +//! } +//! ); +//! +//! // Add the `Gadget`s to their `Owner`. +//! { +//! let mut gadgets = gadget_owner.gadgets.borrow_mut(); +//! gadgets.push(Rc::downgrade(&gadget1)); +//! gadgets.push(Rc::downgrade(&gadget2)); +//! +//! // `RefCell` dynamic borrow ends here. +//! } +//! +//! // Iterate over our `Gadget`s, printing their details out. +//! for gadget_weak in gadget_owner.gadgets.borrow().iter() { +//! +//! // `gadget_weak` is a `Weak`. Since `Weak` pointers can't +//! // guarantee the allocation still exists, we need to call +//! // `upgrade`, which returns an `Option>`. +//! // +//! // In this case we know the allocation still exists, so we simply +//! // `unwrap` the `Option`. In a more complicated program, you might +//! // need graceful error handling for a `None` result. +//! +//! let gadget = gadget_weak.upgrade().unwrap(); +//! println!("Gadget {} owned by {}", gadget.id, gadget.owner.name); +//! } +//! +//! // At the end of the function, `gadget_owner`, `gadget1`, and `gadget2` +//! // are destroyed. There are now no strong (`Rc`) pointers to the +//! // gadgets, so they are destroyed. This zeroes the reference count on +//! // Gadget Man, so he gets destroyed as well. +//! } +//! ``` +//! +//! [clone]: Clone::clone +//! [`Cell`]: core::cell::Cell +//! [`RefCell`]: core::cell::RefCell +//! [send]: core::marker::Send +//! [arc]: crate::sync::Arc +//! [`Deref`]: core::ops::Deref +//! [downgrade]: Rc::downgrade +//! [upgrade]: Weak::upgrade +//! [mutability]: core::cell#introducing-mutability-inside-of-something-immutable +//! [fully qualified syntax]: https://doc.rust-lang.org/book/ch19-03-advanced-traits.html#fully-qualified-syntax-for-disambiguation-calling-methods-with-the-same-name + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(test))] +use crate::boxed::Box; +#[cfg(test)] +use std::boxed::Box; + +use core::any::Any; +use core::borrow; +use core::cell::Cell; +use core::cmp::Ordering; +use core::convert::{From, TryFrom}; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::intrinsics::abort; +#[cfg(not(no_global_oom_handling))] +use core::iter; +use core::marker::{self, PhantomData, Unpin, Unsize}; +#[cfg(not(no_global_oom_handling))] +use core::mem::size_of_val; +use core::mem::{self, align_of_val_raw, forget}; +use core::ops::{CoerceUnsized, Deref, DispatchFromDyn, Receiver}; +#[cfg(not(no_global_oom_handling))] +use core::pin::Pin; +use core::ptr::{self, NonNull}; +#[cfg(not(no_global_oom_handling))] +use core::slice::from_raw_parts_mut; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::handle_alloc_error; +#[cfg(not(no_global_oom_handling))] +use crate::alloc::{box_free, WriteCloneIntoRaw}; +use crate::alloc::{AllocError, Allocator, Global, Layout}; +use crate::borrow::{Cow, ToOwned}; +#[cfg(not(no_global_oom_handling))] +use crate::string::String; +#[cfg(not(no_global_oom_handling))] +use crate::vec::Vec; + +#[cfg(test)] +mod tests; + +// This is repr(C) to future-proof against possible field-reordering, which +// would interfere with otherwise safe [into|from]_raw() of transmutable +// inner types. +#[repr(C)] +struct RcBox { + strong: Cell, + weak: Cell, + value: T, +} + +/// A single-threaded reference-counting pointer. 'Rc' stands for 'Reference +/// Counted'. +/// +/// See the [module-level documentation](./index.html) for more details. +/// +/// The inherent methods of `Rc` are all associated functions, which means +/// that you have to call them as e.g., [`Rc::get_mut(&mut value)`][get_mut] instead of +/// `value.get_mut()`. This avoids conflicts with methods of the inner type `T`. +/// +/// [get_mut]: Rc::get_mut +#[cfg_attr(not(test), rustc_diagnostic_item = "Rc")] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Rc { + ptr: NonNull>, + phantom: PhantomData>, +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl !marker::Send for Rc {} +#[stable(feature = "rust1", since = "1.0.0")] +impl !marker::Sync for Rc {} + +#[unstable(feature = "coerce_unsized", issue = "27732")] +impl, U: ?Sized> CoerceUnsized> for Rc {} + +#[unstable(feature = "dispatch_from_dyn", issue = "none")] +impl, U: ?Sized> DispatchFromDyn> for Rc {} + +impl Rc { + #[inline(always)] + fn inner(&self) -> &RcBox { + // This unsafety is ok because while this Rc is alive we're guaranteed + // that the inner pointer is valid. + unsafe { self.ptr.as_ref() } + } + + fn from_inner(ptr: NonNull>) -> Self { + Self { ptr, phantom: PhantomData } + } + + unsafe fn from_ptr(ptr: *mut RcBox) -> Self { + Self::from_inner(unsafe { NonNull::new_unchecked(ptr) }) + } +} + +impl Rc { + /// Constructs a new `Rc`. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(value: T) -> Rc { + // There is an implicit weak pointer owned by all the strong + // pointers, which ensures that the weak destructor never frees + // the allocation while the strong destructor is running, even + // if the weak pointer is stored inside the strong one. + Self::from_inner( + Box::leak(box RcBox { strong: Cell::new(1), weak: Cell::new(1), value }).into(), + ) + } + + /// Constructs a new `Rc` using a weak reference to itself. Attempting + /// to upgrade the weak reference before this function returns will result + /// in a `None` value. However, the weak reference may be cloned freely and + /// stored for use at a later time. + /// + /// # Examples + /// + /// ``` + /// #![feature(arc_new_cyclic)] + /// #![allow(dead_code)] + /// use std::rc::{Rc, Weak}; + /// + /// struct Gadget { + /// self_weak: Weak, + /// // ... more fields + /// } + /// impl Gadget { + /// pub fn new() -> Rc { + /// Rc::new_cyclic(|self_weak| { + /// Gadget { self_weak: self_weak.clone(), /* ... */ } + /// }) + /// } + /// } + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "arc_new_cyclic", issue = "75861")] + pub fn new_cyclic(data_fn: impl FnOnce(&Weak) -> T) -> Rc { + // Construct the inner in the "uninitialized" state with a single + // weak reference. + let uninit_ptr: NonNull<_> = Box::leak(box RcBox { + strong: Cell::new(0), + weak: Cell::new(1), + value: mem::MaybeUninit::::uninit(), + }) + .into(); + + let init_ptr: NonNull> = uninit_ptr.cast(); + + let weak = Weak { ptr: init_ptr }; + + // It's important we don't give up ownership of the weak pointer, or + // else the memory might be freed by the time `data_fn` returns. If + // we really wanted to pass ownership, we could create an additional + // weak pointer for ourselves, but this would result in additional + // updates to the weak reference count which might not be necessary + // otherwise. + let data = data_fn(&weak); + + unsafe { + let inner = init_ptr.as_ptr(); + ptr::write(ptr::addr_of_mut!((*inner).value), data); + + let prev_value = (*inner).strong.get(); + debug_assert_eq!(prev_value, 0, "No prior strong references should exist"); + (*inner).strong.set(1); + } + + let strong = Rc::from_inner(init_ptr); + + // Strong references should collectively own a shared weak reference, + // so don't run the destructor for our old weak reference. + mem::forget(weak); + strong + } + + /// Constructs a new `Rc` with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::rc::Rc; + /// + /// let mut five = Rc::::new_uninit(); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// Rc::get_mut_unchecked(&mut five).as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit() -> Rc> { + unsafe { + Rc::from_ptr(Rc::allocate_for_layout( + Layout::new::(), + |layout| Global.allocate(layout), + |mem| mem as *mut RcBox>, + )) + } + } + + /// Constructs a new `Rc` with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and + /// incorrect usage of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// use std::rc::Rc; + /// + /// let zero = Rc::::new_zeroed(); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed() -> Rc> { + unsafe { + Rc::from_ptr(Rc::allocate_for_layout( + Layout::new::(), + |layout| Global.allocate_zeroed(layout), + |mem| mem as *mut RcBox>, + )) + } + } + + /// Constructs a new `Rc`, returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// use std::rc::Rc; + /// + /// let five = Rc::try_new(5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + pub fn try_new(value: T) -> Result, AllocError> { + // There is an implicit weak pointer owned by all the strong + // pointers, which ensures that the weak destructor never frees + // the allocation while the strong destructor is running, even + // if the weak pointer is stored inside the strong one. + Ok(Self::from_inner( + Box::leak(Box::try_new(RcBox { strong: Cell::new(1), weak: Cell::new(1), value })?) + .into(), + )) + } + + /// Constructs a new `Rc` with uninitialized contents, returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::rc::Rc; + /// + /// let mut five = Rc::::try_new_uninit()?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// Rc::get_mut_unchecked(&mut five).as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_uninit() -> Result>, AllocError> { + unsafe { + Ok(Rc::from_ptr(Rc::try_allocate_for_layout( + Layout::new::(), + |layout| Global.allocate(layout), + |mem| mem as *mut RcBox>, + )?)) + } + } + + /// Constructs a new `Rc` with uninitialized contents, with the memory + /// being filled with `0` bytes, returning an error if the allocation fails + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and + /// incorrect usage of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::rc::Rc; + /// + /// let zero = Rc::::try_new_zeroed()?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + //#[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_zeroed() -> Result>, AllocError> { + unsafe { + Ok(Rc::from_ptr(Rc::try_allocate_for_layout( + Layout::new::(), + |layout| Global.allocate_zeroed(layout), + |mem| mem as *mut RcBox>, + )?)) + } + } + /// Constructs a new `Pin>`. If `T` does not implement `Unpin`, then + /// `value` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "pin", since = "1.33.0")] + pub fn pin(value: T) -> Pin> { + unsafe { Pin::new_unchecked(Rc::new(value)) } + } + + /// Returns the inner value, if the `Rc` has exactly one strong reference. + /// + /// Otherwise, an [`Err`] is returned with the same `Rc` that was + /// passed in. + /// + /// This will succeed even if there are outstanding weak references. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let x = Rc::new(3); + /// assert_eq!(Rc::try_unwrap(x), Ok(3)); + /// + /// let x = Rc::new(4); + /// let _y = Rc::clone(&x); + /// assert_eq!(*Rc::try_unwrap(x).unwrap_err(), 4); + /// ``` + #[inline] + #[stable(feature = "rc_unique", since = "1.4.0")] + pub fn try_unwrap(this: Self) -> Result { + if Rc::strong_count(&this) == 1 { + unsafe { + let val = ptr::read(&*this); // copy the contained object + + // Indicate to Weaks that they can't be promoted by decrementing + // the strong count, and then remove the implicit "strong weak" + // pointer while also handling drop logic by just crafting a + // fake Weak. + this.inner().dec_strong(); + let _weak = Weak { ptr: this.ptr }; + forget(this); + Ok(val) + } + } else { + Err(this) + } + } +} + +impl Rc<[T]> { + /// Constructs a new reference-counted slice with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::rc::Rc; + /// + /// let mut values = Rc::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// Rc::get_mut_unchecked(&mut values)[0].as_mut_ptr().write(1); + /// Rc::get_mut_unchecked(&mut values)[1].as_mut_ptr().write(2); + /// Rc::get_mut_unchecked(&mut values)[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_slice(len: usize) -> Rc<[mem::MaybeUninit]> { + unsafe { Rc::from_ptr(Rc::allocate_for_slice(len)) } + } + + /// Constructs a new reference-counted slice with uninitialized contents, with the memory being + /// filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and + /// incorrect usage of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// use std::rc::Rc; + /// + /// let values = Rc::<[u32]>::new_zeroed_slice(3); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_slice(len: usize) -> Rc<[mem::MaybeUninit]> { + unsafe { + Rc::from_ptr(Rc::allocate_for_layout( + Layout::array::(len).unwrap(), + |layout| Global.allocate_zeroed(layout), + |mem| { + ptr::slice_from_raw_parts_mut(mem as *mut T, len) + as *mut RcBox<[mem::MaybeUninit]> + }, + )) + } + } +} + +impl Rc> { + /// Converts to `Rc`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the inner value + /// really is in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::rc::Rc; + /// + /// let mut five = Rc::::new_uninit(); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// Rc::get_mut_unchecked(&mut five).as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Rc { + Rc::from_inner(mem::ManuallyDrop::new(self).ptr.cast()) + } +} + +impl Rc<[mem::MaybeUninit]> { + /// Converts to `Rc<[T]>`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the inner value + /// really is in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::rc::Rc; + /// + /// let mut values = Rc::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// Rc::get_mut_unchecked(&mut values)[0].as_mut_ptr().write(1); + /// Rc::get_mut_unchecked(&mut values)[1].as_mut_ptr().write(2); + /// Rc::get_mut_unchecked(&mut values)[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Rc<[T]> { + unsafe { Rc::from_ptr(mem::ManuallyDrop::new(self).ptr.as_ptr() as _) } + } +} + +impl Rc { + /// Consumes the `Rc`, returning the wrapped pointer. + /// + /// To avoid a memory leak the pointer must be converted back to an `Rc` using + /// [`Rc::from_raw`][from_raw]. + /// + /// [from_raw]: Rc::from_raw + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let x = Rc::new("hello".to_owned()); + /// let x_ptr = Rc::into_raw(x); + /// assert_eq!(unsafe { &*x_ptr }, "hello"); + /// ``` + #[stable(feature = "rc_raw", since = "1.17.0")] + pub fn into_raw(this: Self) -> *const T { + let ptr = Self::as_ptr(&this); + mem::forget(this); + ptr + } + + /// Provides a raw pointer to the data. + /// + /// The counts are not affected in any way and the `Rc` is not consumed. The pointer is valid + /// for as long there are strong counts in the `Rc`. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let x = Rc::new("hello".to_owned()); + /// let y = Rc::clone(&x); + /// let x_ptr = Rc::as_ptr(&x); + /// assert_eq!(x_ptr, Rc::as_ptr(&y)); + /// assert_eq!(unsafe { &*x_ptr }, "hello"); + /// ``` + #[stable(feature = "weak_into_raw", since = "1.45.0")] + pub fn as_ptr(this: &Self) -> *const T { + let ptr: *mut RcBox = NonNull::as_ptr(this.ptr); + + // SAFETY: This cannot go through Deref::deref or Rc::inner because + // this is required to retain raw/mut provenance such that e.g. `get_mut` can + // write through the pointer after the Rc is recovered through `from_raw`. + unsafe { ptr::addr_of_mut!((*ptr).value) } + } + + /// Constructs an `Rc` from a raw pointer. + /// + /// The raw pointer must have been previously returned by a call to + /// [`Rc::into_raw`][into_raw] where `U` must have the same size + /// and alignment as `T`. This is trivially true if `U` is `T`. + /// Note that if `U` is not `T` but has the same size and alignment, this is + /// basically like transmuting references of different types. See + /// [`mem::transmute`][transmute] for more information on what + /// restrictions apply in this case. + /// + /// The user of `from_raw` has to make sure a specific value of `T` is only + /// dropped once. + /// + /// This function is unsafe because improper use may lead to memory unsafety, + /// even if the returned `Rc` is never accessed. + /// + /// [into_raw]: Rc::into_raw + /// [transmute]: core::mem::transmute + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let x = Rc::new("hello".to_owned()); + /// let x_ptr = Rc::into_raw(x); + /// + /// unsafe { + /// // Convert back to an `Rc` to prevent leak. + /// let x = Rc::from_raw(x_ptr); + /// assert_eq!(&*x, "hello"); + /// + /// // Further calls to `Rc::from_raw(x_ptr)` would be memory-unsafe. + /// } + /// + /// // The memory was freed when `x` went out of scope above, so `x_ptr` is now dangling! + /// ``` + #[stable(feature = "rc_raw", since = "1.17.0")] + pub unsafe fn from_raw(ptr: *const T) -> Self { + let offset = unsafe { data_offset(ptr) }; + + // Reverse the offset to find the original RcBox. + let rc_ptr = + unsafe { (ptr as *mut RcBox).set_ptr_value((ptr as *mut u8).offset(-offset)) }; + + unsafe { Self::from_ptr(rc_ptr) } + } + + /// Creates a new [`Weak`] pointer to this allocation. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// let weak_five = Rc::downgrade(&five); + /// ``` + #[stable(feature = "rc_weak", since = "1.4.0")] + pub fn downgrade(this: &Self) -> Weak { + this.inner().inc_weak(); + // Make sure we do not create a dangling Weak + debug_assert!(!is_dangling(this.ptr.as_ptr())); + Weak { ptr: this.ptr } + } + + /// Gets the number of [`Weak`] pointers to this allocation. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// let _weak_five = Rc::downgrade(&five); + /// + /// assert_eq!(1, Rc::weak_count(&five)); + /// ``` + #[inline] + #[stable(feature = "rc_counts", since = "1.15.0")] + pub fn weak_count(this: &Self) -> usize { + this.inner().weak() - 1 + } + + /// Gets the number of strong (`Rc`) pointers to this allocation. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// let _also_five = Rc::clone(&five); + /// + /// assert_eq!(2, Rc::strong_count(&five)); + /// ``` + #[inline] + #[stable(feature = "rc_counts", since = "1.15.0")] + pub fn strong_count(this: &Self) -> usize { + this.inner().strong() + } + + /// Increments the strong reference count on the `Rc` associated with the + /// provided pointer by one. + /// + /// # Safety + /// + /// The pointer must have been obtained through `Rc::into_raw`, and the + /// associated `Rc` instance must be valid (i.e. the strong count must be at + /// least 1) for the duration of this method. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// unsafe { + /// let ptr = Rc::into_raw(five); + /// Rc::increment_strong_count(ptr); + /// + /// let five = Rc::from_raw(ptr); + /// assert_eq!(2, Rc::strong_count(&five)); + /// } + /// ``` + #[inline] + #[stable(feature = "rc_mutate_strong_count", since = "1.53.0")] + pub unsafe fn increment_strong_count(ptr: *const T) { + // Retain Rc, but don't touch refcount by wrapping in ManuallyDrop + let rc = unsafe { mem::ManuallyDrop::new(Rc::::from_raw(ptr)) }; + // Now increase refcount, but don't drop new refcount either + let _rc_clone: mem::ManuallyDrop<_> = rc.clone(); + } + + /// Decrements the strong reference count on the `Rc` associated with the + /// provided pointer by one. + /// + /// # Safety + /// + /// The pointer must have been obtained through `Rc::into_raw`, and the + /// associated `Rc` instance must be valid (i.e. the strong count must be at + /// least 1) when invoking this method. This method can be used to release + /// the final `Rc` and backing storage, but **should not** be called after + /// the final `Rc` has been released. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// unsafe { + /// let ptr = Rc::into_raw(five); + /// Rc::increment_strong_count(ptr); + /// + /// let five = Rc::from_raw(ptr); + /// assert_eq!(2, Rc::strong_count(&five)); + /// Rc::decrement_strong_count(ptr); + /// assert_eq!(1, Rc::strong_count(&five)); + /// } + /// ``` + #[inline] + #[stable(feature = "rc_mutate_strong_count", since = "1.53.0")] + pub unsafe fn decrement_strong_count(ptr: *const T) { + unsafe { mem::drop(Rc::from_raw(ptr)) }; + } + + /// Returns `true` if there are no other `Rc` or [`Weak`] pointers to + /// this allocation. + #[inline] + fn is_unique(this: &Self) -> bool { + Rc::weak_count(this) == 0 && Rc::strong_count(this) == 1 + } + + /// Returns a mutable reference into the given `Rc`, if there are + /// no other `Rc` or [`Weak`] pointers to the same allocation. + /// + /// Returns [`None`] otherwise, because it is not safe to + /// mutate a shared value. + /// + /// See also [`make_mut`][make_mut], which will [`clone`][clone] + /// the inner value when there are other pointers. + /// + /// [make_mut]: Rc::make_mut + /// [clone]: Clone::clone + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let mut x = Rc::new(3); + /// *Rc::get_mut(&mut x).unwrap() = 4; + /// assert_eq!(*x, 4); + /// + /// let _y = Rc::clone(&x); + /// assert!(Rc::get_mut(&mut x).is_none()); + /// ``` + #[inline] + #[stable(feature = "rc_unique", since = "1.4.0")] + pub fn get_mut(this: &mut Self) -> Option<&mut T> { + if Rc::is_unique(this) { unsafe { Some(Rc::get_mut_unchecked(this)) } } else { None } + } + + /// Returns a mutable reference into the given `Rc`, + /// without any check. + /// + /// See also [`get_mut`], which is safe and does appropriate checks. + /// + /// [`get_mut`]: Rc::get_mut + /// + /// # Safety + /// + /// Any other `Rc` or [`Weak`] pointers to the same allocation must not be dereferenced + /// for the duration of the returned borrow. + /// This is trivially the case if no such pointers exist, + /// for example immediately after `Rc::new`. + /// + /// # Examples + /// + /// ``` + /// #![feature(get_mut_unchecked)] + /// + /// use std::rc::Rc; + /// + /// let mut x = Rc::new(String::new()); + /// unsafe { + /// Rc::get_mut_unchecked(&mut x).push_str("foo") + /// } + /// assert_eq!(*x, "foo"); + /// ``` + #[inline] + #[unstable(feature = "get_mut_unchecked", issue = "63292")] + pub unsafe fn get_mut_unchecked(this: &mut Self) -> &mut T { + // We are careful to *not* create a reference covering the "count" fields, as + // this would conflict with accesses to the reference counts (e.g. by `Weak`). + unsafe { &mut (*this.ptr.as_ptr()).value } + } + + #[inline] + #[stable(feature = "ptr_eq", since = "1.17.0")] + /// Returns `true` if the two `Rc`s point to the same allocation + /// (in a vein similar to [`ptr::eq`]). + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// let same_five = Rc::clone(&five); + /// let other_five = Rc::new(5); + /// + /// assert!(Rc::ptr_eq(&five, &same_five)); + /// assert!(!Rc::ptr_eq(&five, &other_five)); + /// ``` + /// + /// [`ptr::eq`]: core::ptr::eq + pub fn ptr_eq(this: &Self, other: &Self) -> bool { + this.ptr.as_ptr() == other.ptr.as_ptr() + } +} + +impl Rc { + /// Makes a mutable reference into the given `Rc`. + /// + /// If there are other `Rc` pointers to the same allocation, then `make_mut` will + /// [`clone`] the inner value to a new allocation to ensure unique ownership. This is also + /// referred to as clone-on-write. + /// + /// If there are no other `Rc` pointers to this allocation, then [`Weak`] + /// pointers to this allocation will be disassociated. + /// + /// See also [`get_mut`], which will fail rather than cloning. + /// + /// [`clone`]: Clone::clone + /// [`get_mut`]: Rc::get_mut + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let mut data = Rc::new(5); + /// + /// *Rc::make_mut(&mut data) += 1; // Won't clone anything + /// let mut other_data = Rc::clone(&data); // Won't clone inner data + /// *Rc::make_mut(&mut data) += 1; // Clones inner data + /// *Rc::make_mut(&mut data) += 1; // Won't clone anything + /// *Rc::make_mut(&mut other_data) *= 2; // Won't clone anything + /// + /// // Now `data` and `other_data` point to different allocations. + /// assert_eq!(*data, 8); + /// assert_eq!(*other_data, 12); + /// ``` + /// + /// [`Weak`] pointers will be disassociated: + /// + /// ``` + /// use std::rc::Rc; + /// + /// let mut data = Rc::new(75); + /// let weak = Rc::downgrade(&data); + /// + /// assert!(75 == *data); + /// assert!(75 == *weak.upgrade().unwrap()); + /// + /// *Rc::make_mut(&mut data) += 1; + /// + /// assert!(76 == *data); + /// assert!(weak.upgrade().is_none()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rc_unique", since = "1.4.0")] + pub fn make_mut(this: &mut Self) -> &mut T { + if Rc::strong_count(this) != 1 { + // Gotta clone the data, there are other Rcs. + // Pre-allocate memory to allow writing the cloned value directly. + let mut rc = Self::new_uninit(); + unsafe { + let data = Rc::get_mut_unchecked(&mut rc); + (**this).write_clone_into_raw(data.as_mut_ptr()); + *this = rc.assume_init(); + } + } else if Rc::weak_count(this) != 0 { + // Can just steal the data, all that's left is Weaks + let mut rc = Self::new_uninit(); + unsafe { + let data = Rc::get_mut_unchecked(&mut rc); + data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1); + + this.inner().dec_strong(); + // Remove implicit strong-weak ref (no need to craft a fake + // Weak here -- we know other Weaks can clean up for us) + this.inner().dec_weak(); + ptr::write(this, rc.assume_init()); + } + } + // This unsafety is ok because we're guaranteed that the pointer + // returned is the *only* pointer that will ever be returned to T. Our + // reference count is guaranteed to be 1 at this point, and we required + // the `Rc` itself to be `mut`, so we're returning the only possible + // reference to the allocation. + unsafe { &mut this.ptr.as_mut().value } + } +} + +impl Rc { + #[inline] + #[stable(feature = "rc_downcast", since = "1.29.0")] + /// Attempt to downcast the `Rc` to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// use std::rc::Rc; + /// + /// fn print_if_string(value: Rc) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Rc::new(my_string)); + /// print_if_string(Rc::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Rc> { + if (*self).is::() { + let ptr = self.ptr.cast::>(); + forget(self); + Ok(Rc::from_inner(ptr)) + } else { + Err(self) + } + } +} + +impl Rc { + /// Allocates an `RcBox` with sufficient space for + /// a possibly-unsized inner value where the value has the layout provided. + /// + /// The function `mem_to_rcbox` is called with the data pointer + /// and must return back a (potentially fat)-pointer for the `RcBox`. + #[cfg(not(no_global_oom_handling))] + unsafe fn allocate_for_layout( + value_layout: Layout, + allocate: impl FnOnce(Layout) -> Result, AllocError>, + mem_to_rcbox: impl FnOnce(*mut u8) -> *mut RcBox, + ) -> *mut RcBox { + // Calculate layout using the given value layout. + // Previously, layout was calculated on the expression + // `&*(ptr as *const RcBox)`, but this created a misaligned + // reference (see #54908). + let layout = Layout::new::>().extend(value_layout).unwrap().0.pad_to_align(); + unsafe { + Rc::try_allocate_for_layout(value_layout, allocate, mem_to_rcbox) + .unwrap_or_else(|_| handle_alloc_error(layout)) + } + } + + /// Allocates an `RcBox` with sufficient space for + /// a possibly-unsized inner value where the value has the layout provided, + /// returning an error if allocation fails. + /// + /// The function `mem_to_rcbox` is called with the data pointer + /// and must return back a (potentially fat)-pointer for the `RcBox`. + #[inline] + unsafe fn try_allocate_for_layout( + value_layout: Layout, + allocate: impl FnOnce(Layout) -> Result, AllocError>, + mem_to_rcbox: impl FnOnce(*mut u8) -> *mut RcBox, + ) -> Result<*mut RcBox, AllocError> { + // Calculate layout using the given value layout. + // Previously, layout was calculated on the expression + // `&*(ptr as *const RcBox)`, but this created a misaligned + // reference (see #54908). + let layout = Layout::new::>().extend(value_layout).unwrap().0.pad_to_align(); + + // Allocate for the layout. + let ptr = allocate(layout)?; + + // Initialize the RcBox + let inner = mem_to_rcbox(ptr.as_non_null_ptr().as_ptr()); + unsafe { + debug_assert_eq!(Layout::for_value(&*inner), layout); + + ptr::write(&mut (*inner).strong, Cell::new(1)); + ptr::write(&mut (*inner).weak, Cell::new(1)); + } + + Ok(inner) + } + + /// Allocates an `RcBox` with sufficient space for an unsized inner value + #[cfg(not(no_global_oom_handling))] + unsafe fn allocate_for_ptr(ptr: *const T) -> *mut RcBox { + // Allocate for the `RcBox` using the given value. + unsafe { + Self::allocate_for_layout( + Layout::for_value(&*ptr), + |layout| Global.allocate(layout), + |mem| (ptr as *mut RcBox).set_ptr_value(mem), + ) + } + } + + #[cfg(not(no_global_oom_handling))] + fn from_box(v: Box) -> Rc { + unsafe { + let (box_unique, alloc) = Box::into_unique(v); + let bptr = box_unique.as_ptr(); + + let value_size = size_of_val(&*bptr); + let ptr = Self::allocate_for_ptr(bptr); + + // Copy value as bytes + ptr::copy_nonoverlapping( + bptr as *const T as *const u8, + &mut (*ptr).value as *mut _ as *mut u8, + value_size, + ); + + // Free the allocation without dropping its contents + box_free(box_unique, alloc); + + Self::from_ptr(ptr) + } + } +} + +impl Rc<[T]> { + /// Allocates an `RcBox<[T]>` with the given length. + #[cfg(not(no_global_oom_handling))] + unsafe fn allocate_for_slice(len: usize) -> *mut RcBox<[T]> { + unsafe { + Self::allocate_for_layout( + Layout::array::(len).unwrap(), + |layout| Global.allocate(layout), + |mem| ptr::slice_from_raw_parts_mut(mem as *mut T, len) as *mut RcBox<[T]>, + ) + } + } + + /// Copy elements from slice into newly allocated Rc<\[T\]> + /// + /// Unsafe because the caller must either take ownership or bind `T: Copy` + #[cfg(not(no_global_oom_handling))] + unsafe fn copy_from_slice(v: &[T]) -> Rc<[T]> { + unsafe { + let ptr = Self::allocate_for_slice(v.len()); + ptr::copy_nonoverlapping(v.as_ptr(), &mut (*ptr).value as *mut [T] as *mut T, v.len()); + Self::from_ptr(ptr) + } + } + + /// Constructs an `Rc<[T]>` from an iterator known to be of a certain size. + /// + /// Behavior is undefined should the size be wrong. + #[cfg(not(no_global_oom_handling))] + unsafe fn from_iter_exact(iter: impl iter::Iterator, len: usize) -> Rc<[T]> { + // Panic guard while cloning T elements. + // In the event of a panic, elements that have been written + // into the new RcBox will be dropped, then the memory freed. + struct Guard { + mem: NonNull, + elems: *mut T, + layout: Layout, + n_elems: usize, + } + + impl Drop for Guard { + fn drop(&mut self) { + unsafe { + let slice = from_raw_parts_mut(self.elems, self.n_elems); + ptr::drop_in_place(slice); + + Global.deallocate(self.mem, self.layout); + } + } + } + + unsafe { + let ptr = Self::allocate_for_slice(len); + + let mem = ptr as *mut _ as *mut u8; + let layout = Layout::for_value(&*ptr); + + // Pointer to first element + let elems = &mut (*ptr).value as *mut [T] as *mut T; + + let mut guard = Guard { mem: NonNull::new_unchecked(mem), elems, layout, n_elems: 0 }; + + for (i, item) in iter.enumerate() { + ptr::write(elems.add(i), item); + guard.n_elems += 1; + } + + // All clear. Forget the guard so it doesn't free the new RcBox. + forget(guard); + + Self::from_ptr(ptr) + } + } +} + +/// Specialization trait used for `From<&[T]>`. +trait RcFromSlice { + fn from_slice(slice: &[T]) -> Self; +} + +#[cfg(not(no_global_oom_handling))] +impl RcFromSlice for Rc<[T]> { + #[inline] + default fn from_slice(v: &[T]) -> Self { + unsafe { Self::from_iter_exact(v.iter().cloned(), v.len()) } + } +} + +#[cfg(not(no_global_oom_handling))] +impl RcFromSlice for Rc<[T]> { + #[inline] + fn from_slice(v: &[T]) -> Self { + unsafe { Rc::copy_from_slice(v) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Deref for Rc { + type Target = T; + + #[inline(always)] + fn deref(&self) -> &T { + &self.inner().value + } +} + +#[unstable(feature = "receiver_trait", issue = "none")] +impl Receiver for Rc {} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T: ?Sized> Drop for Rc { + /// Drops the `Rc`. + /// + /// This will decrement the strong reference count. If the strong reference + /// count reaches zero then the only other references (if any) are + /// [`Weak`], so we `drop` the inner value. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// struct Foo; + /// + /// impl Drop for Foo { + /// fn drop(&mut self) { + /// println!("dropped!"); + /// } + /// } + /// + /// let foo = Rc::new(Foo); + /// let foo2 = Rc::clone(&foo); + /// + /// drop(foo); // Doesn't print anything + /// drop(foo2); // Prints "dropped!" + /// ``` + fn drop(&mut self) { + unsafe { + self.inner().dec_strong(); + if self.inner().strong() == 0 { + // destroy the contained object + ptr::drop_in_place(Self::get_mut_unchecked(self)); + + // remove the implicit "strong weak" pointer now that we've + // destroyed the contents. + self.inner().dec_weak(); + + if self.inner().weak() == 0 { + Global.deallocate(self.ptr.cast(), Layout::for_value(self.ptr.as_ref())); + } + } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Rc { + /// Makes a clone of the `Rc` pointer. + /// + /// This creates another pointer to the same allocation, increasing the + /// strong reference count. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// let _ = Rc::clone(&five); + /// ``` + #[inline] + fn clone(&self) -> Rc { + self.inner().inc_strong(); + Self::from_inner(self.ptr) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Rc { + /// Creates a new `Rc`, with the `Default` value for `T`. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let x: Rc = Default::default(); + /// assert_eq!(*x, 0); + /// ``` + #[inline] + fn default() -> Rc { + Rc::new(Default::default()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +trait RcEqIdent { + fn eq(&self, other: &Rc) -> bool; + fn ne(&self, other: &Rc) -> bool; +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl RcEqIdent for Rc { + #[inline] + default fn eq(&self, other: &Rc) -> bool { + **self == **other + } + + #[inline] + default fn ne(&self, other: &Rc) -> bool { + **self != **other + } +} + +// Hack to allow specializing on `Eq` even though `Eq` has a method. +#[rustc_unsafe_specialization_marker] +pub(crate) trait MarkerEq: PartialEq {} + +impl MarkerEq for T {} + +/// We're doing this specialization here, and not as a more general optimization on `&T`, because it +/// would otherwise add a cost to all equality checks on refs. We assume that `Rc`s are used to +/// store large values, that are slow to clone, but also heavy to check for equality, causing this +/// cost to pay off more easily. It's also more likely to have two `Rc` clones, that point to +/// the same value, than two `&T`s. +/// +/// We can only do this when `T: Eq` as a `PartialEq` might be deliberately irreflexive. +#[stable(feature = "rust1", since = "1.0.0")] +impl RcEqIdent for Rc { + #[inline] + fn eq(&self, other: &Rc) -> bool { + Rc::ptr_eq(self, other) || **self == **other + } + + #[inline] + fn ne(&self, other: &Rc) -> bool { + !Rc::ptr_eq(self, other) && **self != **other + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for Rc { + /// Equality for two `Rc`s. + /// + /// Two `Rc`s are equal if their inner values are equal, even if they are + /// stored in different allocation. + /// + /// If `T` also implements `Eq` (implying reflexivity of equality), + /// two `Rc`s that point to the same allocation are + /// always equal. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// assert!(five == Rc::new(5)); + /// ``` + #[inline] + fn eq(&self, other: &Rc) -> bool { + RcEqIdent::eq(self, other) + } + + /// Inequality for two `Rc`s. + /// + /// Two `Rc`s are unequal if their inner values are unequal. + /// + /// If `T` also implements `Eq` (implying reflexivity of equality), + /// two `Rc`s that point to the same allocation are + /// never unequal. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// assert!(five != Rc::new(6)); + /// ``` + #[inline] + fn ne(&self, other: &Rc) -> bool { + RcEqIdent::ne(self, other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Rc {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for Rc { + /// Partial comparison for two `Rc`s. + /// + /// The two are compared by calling `partial_cmp()` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// use std::cmp::Ordering; + /// + /// let five = Rc::new(5); + /// + /// assert_eq!(Some(Ordering::Less), five.partial_cmp(&Rc::new(6))); + /// ``` + #[inline(always)] + fn partial_cmp(&self, other: &Rc) -> Option { + (**self).partial_cmp(&**other) + } + + /// Less-than comparison for two `Rc`s. + /// + /// The two are compared by calling `<` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// assert!(five < Rc::new(6)); + /// ``` + #[inline(always)] + fn lt(&self, other: &Rc) -> bool { + **self < **other + } + + /// 'Less than or equal to' comparison for two `Rc`s. + /// + /// The two are compared by calling `<=` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// assert!(five <= Rc::new(5)); + /// ``` + #[inline(always)] + fn le(&self, other: &Rc) -> bool { + **self <= **other + } + + /// Greater-than comparison for two `Rc`s. + /// + /// The two are compared by calling `>` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// assert!(five > Rc::new(4)); + /// ``` + #[inline(always)] + fn gt(&self, other: &Rc) -> bool { + **self > **other + } + + /// 'Greater than or equal to' comparison for two `Rc`s. + /// + /// The two are compared by calling `>=` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// assert!(five >= Rc::new(5)); + /// ``` + #[inline(always)] + fn ge(&self, other: &Rc) -> bool { + **self >= **other + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Rc { + /// Comparison for two `Rc`s. + /// + /// The two are compared by calling `cmp()` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// use std::cmp::Ordering; + /// + /// let five = Rc::new(5); + /// + /// assert_eq!(Ordering::Less, five.cmp(&Rc::new(6))); + /// ``` + #[inline] + fn cmp(&self, other: &Rc) -> Ordering { + (**self).cmp(&**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Rc { + fn hash(&self, state: &mut H) { + (**self).hash(state); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Rc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Rc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Pointer for Rc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Pointer::fmt(&(&**self as *const T), f) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_for_ptrs", since = "1.6.0")] +impl From for Rc { + /// Converts a generic type `T` into a `Rc` + /// + /// The conversion allocates on the heap and moves `t` + /// from the stack into it. + /// + /// # Example + /// ```rust + /// # use std::rc::Rc; + /// let x = 5; + /// let rc = Rc::new(5); + /// + /// assert_eq!(Rc::from(x), rc); + /// ``` + fn from(t: T) -> Self { + Rc::new(t) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From<&[T]> for Rc<[T]> { + /// Allocate a reference-counted slice and fill it by cloning `v`'s items. + /// + /// # Example + /// + /// ``` + /// # use std::rc::Rc; + /// let original: &[i32] = &[1, 2, 3]; + /// let shared: Rc<[i32]> = Rc::from(original); + /// assert_eq!(&[1, 2, 3], &shared[..]); + /// ``` + #[inline] + fn from(v: &[T]) -> Rc<[T]> { + >::from_slice(v) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From<&str> for Rc { + /// Allocate a reference-counted string slice and copy `v` into it. + /// + /// # Example + /// + /// ``` + /// # use std::rc::Rc; + /// let shared: Rc = Rc::from("statue"); + /// assert_eq!("statue", &shared[..]); + /// ``` + #[inline] + fn from(v: &str) -> Rc { + let rc = Rc::<[u8]>::from(v.as_bytes()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const str) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From for Rc { + /// Allocate a reference-counted string slice and copy `v` into it. + /// + /// # Example + /// + /// ``` + /// # use std::rc::Rc; + /// let original: String = "statue".to_owned(); + /// let shared: Rc = Rc::from(original); + /// assert_eq!("statue", &shared[..]); + /// ``` + #[inline] + fn from(v: String) -> Rc { + Rc::from(&v[..]) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From> for Rc { + /// Move a boxed object to a new, reference counted, allocation. + /// + /// # Example + /// + /// ``` + /// # use std::rc::Rc; + /// let original: Box = Box::new(1); + /// let shared: Rc = Rc::from(original); + /// assert_eq!(1, *shared); + /// ``` + #[inline] + fn from(v: Box) -> Rc { + Rc::from_box(v) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From> for Rc<[T]> { + /// Allocate a reference-counted slice and move `v`'s items into it. + /// + /// # Example + /// + /// ``` + /// # use std::rc::Rc; + /// let original: Box> = Box::new(vec![1, 2, 3]); + /// let shared: Rc> = Rc::from(original); + /// assert_eq!(vec![1, 2, 3], *shared); + /// ``` + #[inline] + fn from(mut v: Vec) -> Rc<[T]> { + unsafe { + let rc = Rc::copy_from_slice(&v); + + // Allow the Vec to free its memory, but not destroy its contents + v.set_len(0); + + rc + } + } +} + +#[stable(feature = "shared_from_cow", since = "1.45.0")] +impl<'a, B> From> for Rc +where + B: ToOwned + ?Sized, + Rc: From<&'a B> + From, +{ + /// Create a reference-counted pointer from + /// a clone-on-write pointer by copying its content. + /// + /// # Example + /// + /// ```rust + /// # use std::rc::Rc; + /// # use std::borrow::Cow; + /// let cow: Cow = Cow::Borrowed("eggplant"); + /// let shared: Rc = Rc::from(cow); + /// assert_eq!("eggplant", &shared[..]); + /// ``` + #[inline] + fn from(cow: Cow<'a, B>) -> Rc { + match cow { + Cow::Borrowed(s) => Rc::from(s), + Cow::Owned(s) => Rc::from(s), + } + } +} + +#[stable(feature = "boxed_slice_try_from", since = "1.43.0")] +impl TryFrom> for Rc<[T; N]> { + type Error = Rc<[T]>; + + fn try_from(boxed_slice: Rc<[T]>) -> Result { + if boxed_slice.len() == N { + Ok(unsafe { Rc::from_raw(Rc::into_raw(boxed_slice) as *mut [T; N]) }) + } else { + Err(boxed_slice) + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_iter", since = "1.37.0")] +impl iter::FromIterator for Rc<[T]> { + /// Takes each element in the `Iterator` and collects it into an `Rc<[T]>`. + /// + /// # Performance characteristics + /// + /// ## The general case + /// + /// In the general case, collecting into `Rc<[T]>` is done by first + /// collecting into a `Vec`. That is, when writing the following: + /// + /// ```rust + /// # use std::rc::Rc; + /// let evens: Rc<[u8]> = (0..10).filter(|&x| x % 2 == 0).collect(); + /// # assert_eq!(&*evens, &[0, 2, 4, 6, 8]); + /// ``` + /// + /// this behaves as if we wrote: + /// + /// ```rust + /// # use std::rc::Rc; + /// let evens: Rc<[u8]> = (0..10).filter(|&x| x % 2 == 0) + /// .collect::>() // The first set of allocations happens here. + /// .into(); // A second allocation for `Rc<[T]>` happens here. + /// # assert_eq!(&*evens, &[0, 2, 4, 6, 8]); + /// ``` + /// + /// This will allocate as many times as needed for constructing the `Vec` + /// and then it will allocate once for turning the `Vec` into the `Rc<[T]>`. + /// + /// ## Iterators of known length + /// + /// When your `Iterator` implements `TrustedLen` and is of an exact size, + /// a single allocation will be made for the `Rc<[T]>`. For example: + /// + /// ```rust + /// # use std::rc::Rc; + /// let evens: Rc<[u8]> = (0..10).collect(); // Just a single allocation happens here. + /// # assert_eq!(&*evens, &*(0..10).collect::>()); + /// ``` + fn from_iter>(iter: I) -> Self { + ToRcSlice::to_rc_slice(iter.into_iter()) + } +} + +/// Specialization trait used for collecting into `Rc<[T]>`. +#[cfg(not(no_global_oom_handling))] +trait ToRcSlice: Iterator + Sized { + fn to_rc_slice(self) -> Rc<[T]>; +} + +#[cfg(not(no_global_oom_handling))] +impl> ToRcSlice for I { + default fn to_rc_slice(self) -> Rc<[T]> { + self.collect::>().into() + } +} + +#[cfg(not(no_global_oom_handling))] +impl> ToRcSlice for I { + fn to_rc_slice(self) -> Rc<[T]> { + // This is the case for a `TrustedLen` iterator. + let (low, high) = self.size_hint(); + if let Some(high) = high { + debug_assert_eq!( + low, + high, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + + unsafe { + // SAFETY: We need to ensure that the iterator has an exact length and we have. + Rc::from_iter_exact(self, low) + } + } else { + // TrustedLen contract guarantees that `upper_bound == `None` implies an iterator + // length exceeding `usize::MAX`. + // The default implementation would collect into a vec which would panic. + // Thus we panic here immediately without invoking `Vec` code. + panic!("capacity overflow"); + } + } +} + +/// `Weak` is a version of [`Rc`] that holds a non-owning reference to the +/// managed allocation. The allocation is accessed by calling [`upgrade`] on the `Weak` +/// pointer, which returns an [`Option`]`<`[`Rc`]`>`. +/// +/// Since a `Weak` reference does not count towards ownership, it will not +/// prevent the value stored in the allocation from being dropped, and `Weak` itself makes no +/// guarantees about the value still being present. Thus it may return [`None`] +/// when [`upgrade`]d. Note however that a `Weak` reference *does* prevent the allocation +/// itself (the backing store) from being deallocated. +/// +/// A `Weak` pointer is useful for keeping a temporary reference to the allocation +/// managed by [`Rc`] without preventing its inner value from being dropped. It is also used to +/// prevent circular references between [`Rc`] pointers, since mutual owning references +/// would never allow either [`Rc`] to be dropped. For example, a tree could +/// have strong [`Rc`] pointers from parent nodes to children, and `Weak` +/// pointers from children back to their parents. +/// +/// The typical way to obtain a `Weak` pointer is to call [`Rc::downgrade`]. +/// +/// [`upgrade`]: Weak::upgrade +#[stable(feature = "rc_weak", since = "1.4.0")] +pub struct Weak { + // This is a `NonNull` to allow optimizing the size of this type in enums, + // but it is not necessarily a valid pointer. + // `Weak::new` sets this to `usize::MAX` so that it doesn’t need + // to allocate space on the heap. That's not a value a real pointer + // will ever have because RcBox has alignment at least 2. + // This is only possible when `T: Sized`; unsized `T` never dangle. + ptr: NonNull>, +} + +#[stable(feature = "rc_weak", since = "1.4.0")] +impl !marker::Send for Weak {} +#[stable(feature = "rc_weak", since = "1.4.0")] +impl !marker::Sync for Weak {} + +#[unstable(feature = "coerce_unsized", issue = "27732")] +impl, U: ?Sized> CoerceUnsized> for Weak {} + +#[unstable(feature = "dispatch_from_dyn", issue = "none")] +impl, U: ?Sized> DispatchFromDyn> for Weak {} + +impl Weak { + /// Constructs a new `Weak`, without allocating any memory. + /// Calling [`upgrade`] on the return value always gives [`None`]. + /// + /// [`upgrade`]: Weak::upgrade + /// + /// # Examples + /// + /// ``` + /// use std::rc::Weak; + /// + /// let empty: Weak = Weak::new(); + /// assert!(empty.upgrade().is_none()); + /// ``` + #[stable(feature = "downgraded_weak", since = "1.10.0")] + pub fn new() -> Weak { + Weak { ptr: NonNull::new(usize::MAX as *mut RcBox).expect("MAX is not 0") } + } +} + +pub(crate) fn is_dangling(ptr: *mut T) -> bool { + let address = ptr as *mut () as usize; + address == usize::MAX +} + +/// Helper type to allow accessing the reference counts without +/// making any assertions about the data field. +struct WeakInner<'a> { + weak: &'a Cell, + strong: &'a Cell, +} + +impl Weak { + /// Returns a raw pointer to the object `T` pointed to by this `Weak`. + /// + /// The pointer is valid only if there are some strong references. The pointer may be dangling, + /// unaligned or even [`null`] otherwise. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// use std::ptr; + /// + /// let strong = Rc::new("hello".to_owned()); + /// let weak = Rc::downgrade(&strong); + /// // Both point to the same object + /// assert!(ptr::eq(&*strong, weak.as_ptr())); + /// // The strong here keeps it alive, so we can still access the object. + /// assert_eq!("hello", unsafe { &*weak.as_ptr() }); + /// + /// drop(strong); + /// // But not any more. We can do weak.as_ptr(), but accessing the pointer would lead to + /// // undefined behaviour. + /// // assert_eq!("hello", unsafe { &*weak.as_ptr() }); + /// ``` + /// + /// [`null`]: core::ptr::null + #[stable(feature = "rc_as_ptr", since = "1.45.0")] + pub fn as_ptr(&self) -> *const T { + let ptr: *mut RcBox = NonNull::as_ptr(self.ptr); + + if is_dangling(ptr) { + // If the pointer is dangling, we return the sentinel directly. This cannot be + // a valid payload address, as the payload is at least as aligned as RcBox (usize). + ptr as *const T + } else { + // SAFETY: if is_dangling returns false, then the pointer is dereferencable. + // The payload may be dropped at this point, and we have to maintain provenance, + // so use raw pointer manipulation. + unsafe { ptr::addr_of_mut!((*ptr).value) } + } + } + + /// Consumes the `Weak` and turns it into a raw pointer. + /// + /// This converts the weak pointer into a raw pointer, while still preserving the ownership of + /// one weak reference (the weak count is not modified by this operation). It can be turned + /// back into the `Weak` with [`from_raw`]. + /// + /// The same restrictions of accessing the target of the pointer as with + /// [`as_ptr`] apply. + /// + /// # Examples + /// + /// ``` + /// use std::rc::{Rc, Weak}; + /// + /// let strong = Rc::new("hello".to_owned()); + /// let weak = Rc::downgrade(&strong); + /// let raw = weak.into_raw(); + /// + /// assert_eq!(1, Rc::weak_count(&strong)); + /// assert_eq!("hello", unsafe { &*raw }); + /// + /// drop(unsafe { Weak::from_raw(raw) }); + /// assert_eq!(0, Rc::weak_count(&strong)); + /// ``` + /// + /// [`from_raw`]: Weak::from_raw + /// [`as_ptr`]: Weak::as_ptr + #[stable(feature = "weak_into_raw", since = "1.45.0")] + pub fn into_raw(self) -> *const T { + let result = self.as_ptr(); + mem::forget(self); + result + } + + /// Converts a raw pointer previously created by [`into_raw`] back into `Weak`. + /// + /// This can be used to safely get a strong reference (by calling [`upgrade`] + /// later) or to deallocate the weak count by dropping the `Weak`. + /// + /// It takes ownership of one weak reference (with the exception of pointers created by [`new`], + /// as these don't own anything; the method still works on them). + /// + /// # Safety + /// + /// The pointer must have originated from the [`into_raw`] and must still own its potential + /// weak reference. + /// + /// It is allowed for the strong count to be 0 at the time of calling this. Nevertheless, this + /// takes ownership of one weak reference currently represented as a raw pointer (the weak + /// count is not modified by this operation) and therefore it must be paired with a previous + /// call to [`into_raw`]. + /// + /// # Examples + /// + /// ``` + /// use std::rc::{Rc, Weak}; + /// + /// let strong = Rc::new("hello".to_owned()); + /// + /// let raw_1 = Rc::downgrade(&strong).into_raw(); + /// let raw_2 = Rc::downgrade(&strong).into_raw(); + /// + /// assert_eq!(2, Rc::weak_count(&strong)); + /// + /// assert_eq!("hello", &*unsafe { Weak::from_raw(raw_1) }.upgrade().unwrap()); + /// assert_eq!(1, Rc::weak_count(&strong)); + /// + /// drop(strong); + /// + /// // Decrement the last weak count. + /// assert!(unsafe { Weak::from_raw(raw_2) }.upgrade().is_none()); + /// ``` + /// + /// [`into_raw`]: Weak::into_raw + /// [`upgrade`]: Weak::upgrade + /// [`new`]: Weak::new + #[stable(feature = "weak_into_raw", since = "1.45.0")] + pub unsafe fn from_raw(ptr: *const T) -> Self { + // See Weak::as_ptr for context on how the input pointer is derived. + + let ptr = if is_dangling(ptr as *mut T) { + // This is a dangling Weak. + ptr as *mut RcBox + } else { + // Otherwise, we're guaranteed the pointer came from a nondangling Weak. + // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. + let offset = unsafe { data_offset(ptr) }; + // Thus, we reverse the offset to get the whole RcBox. + // SAFETY: the pointer originated from a Weak, so this offset is safe. + unsafe { (ptr as *mut RcBox).set_ptr_value((ptr as *mut u8).offset(-offset)) } + }; + + // SAFETY: we now have recovered the original Weak pointer, so can create the Weak. + Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } } + } + + /// Attempts to upgrade the `Weak` pointer to an [`Rc`], delaying + /// dropping of the inner value if successful. + /// + /// Returns [`None`] if the inner value has since been dropped. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let five = Rc::new(5); + /// + /// let weak_five = Rc::downgrade(&five); + /// + /// let strong_five: Option> = weak_five.upgrade(); + /// assert!(strong_five.is_some()); + /// + /// // Destroy all strong pointers. + /// drop(strong_five); + /// drop(five); + /// + /// assert!(weak_five.upgrade().is_none()); + /// ``` + #[stable(feature = "rc_weak", since = "1.4.0")] + pub fn upgrade(&self) -> Option> { + let inner = self.inner()?; + if inner.strong() == 0 { + None + } else { + inner.inc_strong(); + Some(Rc::from_inner(self.ptr)) + } + } + + /// Gets the number of strong (`Rc`) pointers pointing to this allocation. + /// + /// If `self` was created using [`Weak::new`], this will return 0. + #[stable(feature = "weak_counts", since = "1.41.0")] + pub fn strong_count(&self) -> usize { + if let Some(inner) = self.inner() { inner.strong() } else { 0 } + } + + /// Gets the number of `Weak` pointers pointing to this allocation. + /// + /// If no strong pointers remain, this will return zero. + #[stable(feature = "weak_counts", since = "1.41.0")] + pub fn weak_count(&self) -> usize { + self.inner() + .map(|inner| { + if inner.strong() > 0 { + inner.weak() - 1 // subtract the implicit weak ptr + } else { + 0 + } + }) + .unwrap_or(0) + } + + /// Returns `None` when the pointer is dangling and there is no allocated `RcBox`, + /// (i.e., when this `Weak` was created by `Weak::new`). + #[inline] + fn inner(&self) -> Option> { + if is_dangling(self.ptr.as_ptr()) { + None + } else { + // We are careful to *not* create a reference covering the "data" field, as + // the field may be mutated concurrently (for example, if the last `Rc` + // is dropped, the data field will be dropped in-place). + Some(unsafe { + let ptr = self.ptr.as_ptr(); + WeakInner { strong: &(*ptr).strong, weak: &(*ptr).weak } + }) + } + } + + /// Returns `true` if the two `Weak`s point to the same allocation (similar to + /// [`ptr::eq`]), or if both don't point to any allocation + /// (because they were created with `Weak::new()`). + /// + /// # Notes + /// + /// Since this compares pointers it means that `Weak::new()` will equal each + /// other, even though they don't point to any allocation. + /// + /// # Examples + /// + /// ``` + /// use std::rc::Rc; + /// + /// let first_rc = Rc::new(5); + /// let first = Rc::downgrade(&first_rc); + /// let second = Rc::downgrade(&first_rc); + /// + /// assert!(first.ptr_eq(&second)); + /// + /// let third_rc = Rc::new(5); + /// let third = Rc::downgrade(&third_rc); + /// + /// assert!(!first.ptr_eq(&third)); + /// ``` + /// + /// Comparing `Weak::new`. + /// + /// ``` + /// use std::rc::{Rc, Weak}; + /// + /// let first = Weak::new(); + /// let second = Weak::new(); + /// assert!(first.ptr_eq(&second)); + /// + /// let third_rc = Rc::new(()); + /// let third = Rc::downgrade(&third_rc); + /// assert!(!first.ptr_eq(&third)); + /// ``` + /// + /// [`ptr::eq`]: core::ptr::eq + #[inline] + #[stable(feature = "weak_ptr_eq", since = "1.39.0")] + pub fn ptr_eq(&self, other: &Self) -> bool { + self.ptr.as_ptr() == other.ptr.as_ptr() + } +} + +#[stable(feature = "rc_weak", since = "1.4.0")] +unsafe impl<#[may_dangle] T: ?Sized> Drop for Weak { + /// Drops the `Weak` pointer. + /// + /// # Examples + /// + /// ``` + /// use std::rc::{Rc, Weak}; + /// + /// struct Foo; + /// + /// impl Drop for Foo { + /// fn drop(&mut self) { + /// println!("dropped!"); + /// } + /// } + /// + /// let foo = Rc::new(Foo); + /// let weak_foo = Rc::downgrade(&foo); + /// let other_weak_foo = Weak::clone(&weak_foo); + /// + /// drop(weak_foo); // Doesn't print anything + /// drop(foo); // Prints "dropped!" + /// + /// assert!(other_weak_foo.upgrade().is_none()); + /// ``` + fn drop(&mut self) { + let inner = if let Some(inner) = self.inner() { inner } else { return }; + + inner.dec_weak(); + // the weak count starts at 1, and will only go to zero if all + // the strong pointers have disappeared. + if inner.weak() == 0 { + unsafe { + Global.deallocate(self.ptr.cast(), Layout::for_value_raw(self.ptr.as_ptr())); + } + } + } +} + +#[stable(feature = "rc_weak", since = "1.4.0")] +impl Clone for Weak { + /// Makes a clone of the `Weak` pointer that points to the same allocation. + /// + /// # Examples + /// + /// ``` + /// use std::rc::{Rc, Weak}; + /// + /// let weak_five = Rc::downgrade(&Rc::new(5)); + /// + /// let _ = Weak::clone(&weak_five); + /// ``` + #[inline] + fn clone(&self) -> Weak { + if let Some(inner) = self.inner() { + inner.inc_weak() + } + Weak { ptr: self.ptr } + } +} + +#[stable(feature = "rc_weak", since = "1.4.0")] +impl fmt::Debug for Weak { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "(Weak)") + } +} + +#[stable(feature = "downgraded_weak", since = "1.10.0")] +impl Default for Weak { + /// Constructs a new `Weak`, without allocating any memory. + /// Calling [`upgrade`] on the return value always gives [`None`]. + /// + /// [`None`]: Option + /// [`upgrade`]: Weak::upgrade + /// + /// # Examples + /// + /// ``` + /// use std::rc::Weak; + /// + /// let empty: Weak = Default::default(); + /// assert!(empty.upgrade().is_none()); + /// ``` + fn default() -> Weak { + Weak::new() + } +} + +// NOTE: We checked_add here to deal with mem::forget safely. In particular +// if you mem::forget Rcs (or Weaks), the ref-count can overflow, and then +// you can free the allocation while outstanding Rcs (or Weaks) exist. +// We abort because this is such a degenerate scenario that we don't care about +// what happens -- no real program should ever experience this. +// +// This should have negligible overhead since you don't actually need to +// clone these much in Rust thanks to ownership and move-semantics. + +#[doc(hidden)] +trait RcInnerPtr { + fn weak_ref(&self) -> &Cell; + fn strong_ref(&self) -> &Cell; + + #[inline] + fn strong(&self) -> usize { + self.strong_ref().get() + } + + #[inline] + fn inc_strong(&self) { + let strong = self.strong(); + + // We want to abort on overflow instead of dropping the value. + // The reference count will never be zero when this is called; + // nevertheless, we insert an abort here to hint LLVM at + // an otherwise missed optimization. + if strong == 0 || strong == usize::MAX { + abort(); + } + self.strong_ref().set(strong + 1); + } + + #[inline] + fn dec_strong(&self) { + self.strong_ref().set(self.strong() - 1); + } + + #[inline] + fn weak(&self) -> usize { + self.weak_ref().get() + } + + #[inline] + fn inc_weak(&self) { + let weak = self.weak(); + + // We want to abort on overflow instead of dropping the value. + // The reference count will never be zero when this is called; + // nevertheless, we insert an abort here to hint LLVM at + // an otherwise missed optimization. + if weak == 0 || weak == usize::MAX { + abort(); + } + self.weak_ref().set(weak + 1); + } + + #[inline] + fn dec_weak(&self) { + self.weak_ref().set(self.weak() - 1); + } +} + +impl RcInnerPtr for RcBox { + #[inline(always)] + fn weak_ref(&self) -> &Cell { + &self.weak + } + + #[inline(always)] + fn strong_ref(&self) -> &Cell { + &self.strong + } +} + +impl<'a> RcInnerPtr for WeakInner<'a> { + #[inline(always)] + fn weak_ref(&self) -> &Cell { + self.weak + } + + #[inline(always)] + fn strong_ref(&self) -> &Cell { + self.strong + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl borrow::Borrow for Rc { + fn borrow(&self) -> &T { + &**self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl AsRef for Rc { + fn as_ref(&self) -> &T { + &**self + } +} + +#[stable(feature = "pin", since = "1.33.0")] +impl Unpin for Rc {} + +/// Get the offset within an `RcBox` for the payload behind a pointer. +/// +/// # Safety +/// +/// The pointer must point to (and have valid metadata for) a previously +/// valid instance of T, but the T is allowed to be dropped. +unsafe fn data_offset(ptr: *const T) -> isize { + // Align the unsized value to the end of the RcBox. + // Because RcBox is repr(C), it will always be the last field in memory. + // SAFETY: since the only unsized types possible are slices, trait objects, + // and extern types, the input safety requirement is currently enough to + // satisfy the requirements of align_of_val_raw; this is an implementation + // detail of the language that may not be relied upon outside of std. + unsafe { data_offset_align(align_of_val_raw(ptr)) } +} + +#[inline] +fn data_offset_align(align: usize) -> isize { + let layout = Layout::new::>(); + (layout.size() + layout.padding_needed_for(align)) as isize +} diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs new file mode 100644 index 0000000000000..455d1be60c132 --- /dev/null +++ b/rust/alloc/slice.rs @@ -0,0 +1,1271 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A dynamically-sized view into a contiguous sequence, `[T]`. +//! +//! *[See also the slice primitive type](slice).* +//! +//! Slices are a view into a block of memory represented as a pointer and a +//! length. +//! +//! ``` +//! // slicing a Vec +//! let vec = vec![1, 2, 3]; +//! let int_slice = &vec[..]; +//! // coercing an array to a slice +//! let str_slice: &[&str] = &["one", "two", "three"]; +//! ``` +//! +//! Slices are either mutable or shared. The shared slice type is `&[T]`, +//! while the mutable slice type is `&mut [T]`, where `T` represents the element +//! type. For example, you can mutate the block of memory that a mutable slice +//! points to: +//! +//! ``` +//! let x = &mut [1, 2, 3]; +//! x[1] = 7; +//! assert_eq!(x, &[1, 7, 3]); +//! ``` +//! +//! Here are some of the things this module contains: +//! +//! ## Structs +//! +//! There are several structs that are useful for slices, such as [`Iter`], which +//! represents iteration over a slice. +//! +//! ## Trait Implementations +//! +//! There are several implementations of common traits for slices. Some examples +//! include: +//! +//! * [`Clone`] +//! * [`Eq`], [`Ord`] - for slices whose element type are [`Eq`] or [`Ord`]. +//! * [`Hash`] - for slices whose element type is [`Hash`]. +//! +//! ## Iteration +//! +//! The slices implement `IntoIterator`. The iterator yields references to the +//! slice elements. +//! +//! ``` +//! let numbers = &[0, 1, 2]; +//! for n in numbers { +//! println!("{} is a number!", n); +//! } +//! ``` +//! +//! The mutable slice yields mutable references to the elements: +//! +//! ``` +//! let mut scores = [7, 8, 9]; +//! for score in &mut scores[..] { +//! *score += 1; +//! } +//! ``` +//! +//! This iterator yields mutable references to the slice's elements, so while +//! the element type of the slice is `i32`, the element type of the iterator is +//! `&mut i32`. +//! +//! * [`.iter`] and [`.iter_mut`] are the explicit methods to return the default +//! iterators. +//! * Further methods that return iterators are [`.split`], [`.splitn`], +//! [`.chunks`], [`.windows`] and more. +//! +//! [`Hash`]: core::hash::Hash +//! [`.iter`]: slice::iter +//! [`.iter_mut`]: slice::iter_mut +//! [`.split`]: slice::split +//! [`.splitn`]: slice::splitn +//! [`.chunks`]: slice::chunks +//! [`.windows`]: slice::windows +#![stable(feature = "rust1", since = "1.0.0")] +// Many of the usings in this module are only used in the test configuration. +// It's cleaner to just turn off the unused_imports warning than to fix them. +#![cfg_attr(test, allow(unused_imports, dead_code))] + +use core::borrow::{Borrow, BorrowMut}; +#[cfg(not(no_global_oom_handling))] +use core::cmp::Ordering::{self, Less}; +#[cfg(not(no_global_oom_handling))] +use core::mem; +#[cfg(not(no_global_oom_handling))] +use core::mem::size_of; +#[cfg(not(no_global_oom_handling))] +use core::ptr; + +use crate::alloc::Allocator; +use crate::alloc::Global; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::ToOwned; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::vec::Vec; + +#[unstable(feature = "slice_range", issue = "76393")] +pub use core::slice::range; +#[unstable(feature = "array_chunks", issue = "74985")] +pub use core::slice::ArrayChunks; +#[unstable(feature = "array_chunks", issue = "74985")] +pub use core::slice::ArrayChunksMut; +#[unstable(feature = "array_windows", issue = "75027")] +pub use core::slice::ArrayWindows; +#[stable(feature = "slice_get_slice", since = "1.28.0")] +pub use core::slice::SliceIndex; +#[stable(feature = "from_ref", since = "1.28.0")] +pub use core::slice::{from_mut, from_ref}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{from_raw_parts, from_raw_parts_mut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{Chunks, Windows}; +#[stable(feature = "chunks_exact", since = "1.31.0")] +pub use core::slice::{ChunksExact, ChunksExactMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{ChunksMut, Split, SplitMut}; +#[unstable(feature = "slice_group_by", issue = "80552")] +pub use core::slice::{GroupBy, GroupByMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{Iter, IterMut}; +#[stable(feature = "rchunks", since = "1.31.0")] +pub use core::slice::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; +#[stable(feature = "slice_rsplit", since = "1.27.0")] +pub use core::slice::{RSplit, RSplitMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{RSplitN, RSplitNMut, SplitN, SplitNMut}; + +//////////////////////////////////////////////////////////////////////////////// +// Basic slice extension methods +//////////////////////////////////////////////////////////////////////////////// + +// HACK(japaric) needed for the implementation of `vec!` macro during testing +// N.B., see the `hack` module in this file for more details. +#[cfg(test)] +pub use hack::into_vec; + +// HACK(japaric) needed for the implementation of `Vec::clone` during testing +// N.B., see the `hack` module in this file for more details. +#[cfg(test)] +pub use hack::to_vec; + +// HACK(japaric): With cfg(test) `impl [T]` is not available, these three +// functions are actually methods that are in `impl [T]` but not in +// `core::slice::SliceExt` - we need to supply these functions for the +// `test_permutations` test +mod hack { + use core::alloc::Allocator; + + use crate::boxed::Box; + use crate::collections::TryReserveError; + use crate::vec::Vec; + + // We shouldn't add inline attribute to this since this is used in + // `vec!` macro mostly and causes perf regression. See #71204 for + // discussion and perf results. + pub fn into_vec(b: Box<[T], A>) -> Vec { + unsafe { + let len = b.len(); + let (b, alloc) = Box::into_raw_with_allocator(b); + Vec::from_raw_parts_in(b as *mut T, len, len, alloc) + } + } + + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn to_vec(s: &[T], alloc: A) -> Vec { + T::to_vec(s, alloc) + } + + #[inline] + pub fn try_to_vec(s: &[T], alloc: A) -> Result, TryReserveError> { + T::try_to_vec(s, alloc) + } + + #[cfg(not(no_global_oom_handling))] + pub trait ConvertVec { + fn to_vec(s: &[Self], alloc: A) -> Vec + where + Self: Sized; + } + + pub trait TryConvertVec { + fn try_to_vec(s: &[Self], alloc: A) -> Result, TryReserveError> + where + Self: Sized; + } + + #[cfg(not(no_global_oom_handling))] + impl ConvertVec for T { + #[inline] + default fn to_vec(s: &[Self], alloc: A) -> Vec { + struct DropGuard<'a, T, A: Allocator> { + vec: &'a mut Vec, + num_init: usize, + } + impl<'a, T, A: Allocator> Drop for DropGuard<'a, T, A> { + #[inline] + fn drop(&mut self) { + // SAFETY: + // items were marked initialized in the loop below + unsafe { + self.vec.set_len(self.num_init); + } + } + } + let mut vec = Vec::with_capacity_in(s.len(), alloc); + let mut guard = DropGuard { vec: &mut vec, num_init: 0 }; + let slots = guard.vec.spare_capacity_mut(); + // .take(slots.len()) is necessary for LLVM to remove bounds checks + // and has better codegen than zip. + for (i, b) in s.iter().enumerate().take(slots.len()) { + guard.num_init = i; + slots[i].write(b.clone()); + } + core::mem::forget(guard); + // SAFETY: + // the vec was allocated and initialized above to at least this length. + unsafe { + vec.set_len(s.len()); + } + vec + } + } + + #[cfg(not(no_global_oom_handling))] + impl ConvertVec for T { + #[inline] + fn to_vec(s: &[Self], alloc: A) -> Vec { + let mut v = Vec::with_capacity_in(s.len(), alloc); + // SAFETY: + // allocated above with the capacity of `s`, and initialize to `s.len()` in + // ptr::copy_to_non_overlapping below. + unsafe { + s.as_ptr().copy_to_nonoverlapping(v.as_mut_ptr(), s.len()); + v.set_len(s.len()); + } + v + } + } + + impl TryConvertVec for T { + #[inline] + default fn try_to_vec(s: &[Self], alloc: A) -> Result, TryReserveError> { + struct DropGuard<'a, T, A: Allocator> { + vec: &'a mut Vec, + num_init: usize, + } + impl<'a, T, A: Allocator> Drop for DropGuard<'a, T, A> { + #[inline] + fn drop(&mut self) { + // SAFETY: + // items were marked initialized in the loop below + unsafe { + self.vec.set_len(self.num_init); + } + } + } + let mut vec = Vec::try_with_capacity_in(s.len(), alloc)?; + let mut guard = DropGuard { vec: &mut vec, num_init: 0 }; + let slots = guard.vec.spare_capacity_mut(); + // .take(slots.len()) is necessary for LLVM to remove bounds checks + // and has better codegen than zip. + for (i, b) in s.iter().enumerate().take(slots.len()) { + guard.num_init = i; + slots[i].write(b.clone()); + } + core::mem::forget(guard); + // SAFETY: + // the vec was allocated and initialized above to at least this length. + unsafe { + vec.set_len(s.len()); + } + Ok(vec) + } + } +} + +#[lang = "slice_alloc"] +#[cfg(not(test))] +impl [T] { + /// Sorts the slice. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable`](slice::sort_unstable). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5, 4, 1, -3, 2]; + /// + /// v.sort(); + /// assert!(v == [-5, -3, 1, 2, 4]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn sort(&mut self) + where + T: Ord, + { + merge_sort(self, |a, b| a.lt(b)); + } + + /// Sorts the slice with a comparator function. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. + /// + /// The comparator function must define a total ordering for the elements in the slice. If + /// the ordering is not total, the order of the elements is unspecified. An order is a + /// total order if it is (for all `a`, `b` and `c`): + /// + /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and + /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`. + /// + /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use + /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`. + /// + /// ``` + /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0]; + /// floats.sort_by(|a, b| a.partial_cmp(b).unwrap()); + /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]); + /// ``` + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable_by`](slice::sort_unstable_by). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [5, 4, 1, 3, 2]; + /// v.sort_by(|a, b| a.cmp(b)); + /// assert!(v == [1, 2, 3, 4, 5]); + /// + /// // reverse sorting + /// v.sort_by(|a, b| b.cmp(a)); + /// assert!(v == [5, 4, 3, 2, 1]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn sort_by(&mut self, mut compare: F) + where + F: FnMut(&T, &T) -> Ordering, + { + merge_sort(self, |a, b| compare(a, b) == Less); + } + + /// Sorts the slice with a key extraction function. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*)) + /// worst-case, where the key function is *O*(*m*). + /// + /// For expensive key functions (e.g. functions that are not simple property accesses or + /// basic operations), [`sort_by_cached_key`](slice::sort_by_cached_key) is likely to be + /// significantly faster, as it does not recompute element keys. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable_by_key`](slice::sort_unstable_by_key). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5i32, 4, 1, -3, 2]; + /// + /// v.sort_by_key(|k| k.abs()); + /// assert!(v == [1, 2, -3, 4, -5]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "slice_sort_by_key", since = "1.7.0")] + #[inline] + pub fn sort_by_key(&mut self, mut f: F) + where + F: FnMut(&T) -> K, + K: Ord, + { + merge_sort(self, |a, b| f(a).lt(&f(b))); + } + + /// Sorts the slice with a key extraction function. + /// + /// During sorting, the key function is called only once per element. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*)) + /// worst-case, where the key function is *O*(*m*). + /// + /// For simple key functions (e.g., functions that are property accesses or + /// basic operations), [`sort_by_key`](slice::sort_by_key) is likely to be + /// faster. + /// + /// # Current implementation + /// + /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, + /// which combines the fast average case of randomized quicksort with the fast worst case of + /// heapsort, while achieving linear time on slices with certain patterns. It uses some + /// randomization to avoid degenerate cases, but with a fixed seed to always provide + /// deterministic behavior. + /// + /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the + /// length of the slice. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5i32, 4, 32, -3, 2]; + /// + /// v.sort_by_cached_key(|k| k.to_string()); + /// assert!(v == [-3, -5, 2, 32, 4]); + /// ``` + /// + /// [pdqsort]: https://github.com/orlp/pdqsort + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "slice_sort_by_cached_key", since = "1.34.0")] + #[inline] + pub fn sort_by_cached_key(&mut self, f: F) + where + F: FnMut(&T) -> K, + K: Ord, + { + // Helper macro for indexing our vector by the smallest possible type, to reduce allocation. + macro_rules! sort_by_key { + ($t:ty, $slice:ident, $f:ident) => {{ + let mut indices: Vec<_> = + $slice.iter().map($f).enumerate().map(|(i, k)| (k, i as $t)).collect(); + // The elements of `indices` are unique, as they are indexed, so any sort will be + // stable with respect to the original slice. We use `sort_unstable` here because + // it requires less memory allocation. + indices.sort_unstable(); + for i in 0..$slice.len() { + let mut index = indices[i].1; + while (index as usize) < i { + index = indices[index as usize].1; + } + indices[i].1 = index; + $slice.swap(i, index as usize); + } + }}; + } + + let sz_u8 = mem::size_of::<(K, u8)>(); + let sz_u16 = mem::size_of::<(K, u16)>(); + let sz_u32 = mem::size_of::<(K, u32)>(); + let sz_usize = mem::size_of::<(K, usize)>(); + + let len = self.len(); + if len < 2 { + return; + } + if sz_u8 < sz_u16 && len <= (u8::MAX as usize) { + return sort_by_key!(u8, self, f); + } + if sz_u16 < sz_u32 && len <= (u16::MAX as usize) { + return sort_by_key!(u16, self, f); + } + if sz_u32 < sz_usize && len <= (u32::MAX as usize) { + return sort_by_key!(u32, self, f); + } + sort_by_key!(usize, self, f) + } + + /// Copies `self` into a new `Vec`. + /// + /// # Examples + /// + /// ``` + /// let s = [10, 40, 30]; + /// let x = s.to_vec(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_conversion_suggestion] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn to_vec(&self) -> Vec + where + T: Clone, + { + self.to_vec_in(Global) + } + + /// Tries to copy `self` into a new `Vec`. + /// + /// # Examples + /// + /// ``` + /// let s = [10, 40, 30]; + /// let x = s.try_to_vec().unwrap(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_vec(&self) -> Result, TryReserveError> + where + T: Clone, + { + self.try_to_vec_in(Global) + } + + /// Copies `self` into a new `Vec` with an allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let s = [10, 40, 30]; + /// let x = s.to_vec_in(System); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub fn to_vec_in(&self, alloc: A) -> Vec + where + T: Clone, + { + // N.B., see the `hack` module in this file for more details. + hack::to_vec(self, alloc) + } + + /// Tries to copy `self` into a new `Vec` with an allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let s = [10, 40, 30]; + /// let x = s.try_to_vec_in(System).unwrap(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_vec_in(&self, alloc: A) -> Result, TryReserveError> + where + T: Clone, + { + // N.B., see the `hack` module in this file for more details. + hack::try_to_vec(self, alloc) + } + + /// Converts `self` into a vector without clones or allocation. + /// + /// The resulting vector can be converted back into a box via + /// `Vec`'s `into_boxed_slice` method. + /// + /// # Examples + /// + /// ``` + /// let s: Box<[i32]> = Box::new([10, 40, 30]); + /// let x = s.into_vec(); + /// // `s` cannot be used anymore because it has been converted into `x`. + /// + /// assert_eq!(x, vec![10, 40, 30]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn into_vec(self: Box) -> Vec { + // N.B., see the `hack` module in this file for more details. + hack::into_vec(self) + } + + /// Creates a vector by repeating a slice `n` times. + /// + /// # Panics + /// + /// This function will panic if the capacity would overflow. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// assert_eq!([1, 2].repeat(3), vec![1, 2, 1, 2, 1, 2]); + /// ``` + /// + /// A panic upon overflow: + /// + /// ```should_panic + /// // this will panic at runtime + /// b"0123456789abcdef".repeat(usize::MAX); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "repeat_generic_slice", since = "1.40.0")] + pub fn repeat(&self, n: usize) -> Vec + where + T: Copy, + { + if n == 0 { + return Vec::new(); + } + + // If `n` is larger than zero, it can be split as + // `n = 2^expn + rem (2^expn > rem, expn >= 0, rem >= 0)`. + // `2^expn` is the number represented by the leftmost '1' bit of `n`, + // and `rem` is the remaining part of `n`. + + // Using `Vec` to access `set_len()`. + let capacity = self.len().checked_mul(n).expect("capacity overflow"); + let mut buf = Vec::with_capacity(capacity); + + // `2^expn` repetition is done by doubling `buf` `expn`-times. + buf.extend(self); + { + let mut m = n >> 1; + // If `m > 0`, there are remaining bits up to the leftmost '1'. + while m > 0 { + // `buf.extend(buf)`: + unsafe { + ptr::copy_nonoverlapping( + buf.as_ptr(), + (buf.as_mut_ptr() as *mut T).add(buf.len()), + buf.len(), + ); + // `buf` has capacity of `self.len() * n`. + let buf_len = buf.len(); + buf.set_len(buf_len * 2); + } + + m >>= 1; + } + } + + // `rem` (`= n - 2^expn`) repetition is done by copying + // first `rem` repetitions from `buf` itself. + let rem_len = capacity - buf.len(); // `self.len() * rem` + if rem_len > 0 { + // `buf.extend(buf[0 .. rem_len])`: + unsafe { + // This is non-overlapping since `2^expn > rem`. + ptr::copy_nonoverlapping( + buf.as_ptr(), + (buf.as_mut_ptr() as *mut T).add(buf.len()), + rem_len, + ); + // `buf.len() + rem_len` equals to `buf.capacity()` (`= self.len() * n`). + buf.set_len(capacity); + } + } + buf + } + + /// Flattens a slice of `T` into a single value `Self::Output`. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(["hello", "world"].concat(), "helloworld"); + /// assert_eq!([[1, 2], [3, 4]].concat(), [1, 2, 3, 4]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn concat(&self) -> >::Output + where + Self: Concat, + { + Concat::concat(self) + } + + /// Flattens a slice of `T` into a single value `Self::Output`, placing a + /// given separator between each. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(["hello", "world"].join(" "), "hello world"); + /// assert_eq!([[1, 2], [3, 4]].join(&0), [1, 2, 0, 3, 4]); + /// assert_eq!([[1, 2], [3, 4]].join(&[0, 0][..]), [1, 2, 0, 0, 3, 4]); + /// ``` + #[stable(feature = "rename_connect_to_join", since = "1.3.0")] + pub fn join(&self, sep: Separator) -> >::Output + where + Self: Join, + { + Join::join(self, sep) + } + + /// Flattens a slice of `T` into a single value `Self::Output`, placing a + /// given separator between each. + /// + /// # Examples + /// + /// ``` + /// # #![allow(deprecated)] + /// assert_eq!(["hello", "world"].connect(" "), "hello world"); + /// assert_eq!([[1, 2], [3, 4]].connect(&0), [1, 2, 0, 3, 4]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[rustc_deprecated(since = "1.3.0", reason = "renamed to join")] + pub fn connect(&self, sep: Separator) -> >::Output + where + Self: Join, + { + Join::join(self, sep) + } +} + +#[lang = "slice_u8_alloc"] +#[cfg(not(test))] +impl [u8] { + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// [`make_ascii_uppercase`]: slice::make_ascii_uppercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> Vec { + let mut me = self.to_vec(); + me.make_ascii_uppercase(); + me + } + + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// [`make_ascii_lowercase`]: slice::make_ascii_lowercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> Vec { + let mut me = self.to_vec(); + me.make_ascii_lowercase(); + me + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Extension traits for slices over specific kinds of data +//////////////////////////////////////////////////////////////////////////////// + +/// Helper trait for [`[T]::concat`](slice::concat). +/// +/// Note: the `Item` type parameter is not used in this trait, +/// but it allows impls to be more generic. +/// Without it, we get this error: +/// +/// ```error +/// error[E0207]: the type parameter `T` is not constrained by the impl trait, self type, or predica +/// --> src/liballoc/slice.rs:608:6 +/// | +/// 608 | impl> Concat for [V] { +/// | ^ unconstrained type parameter +/// ``` +/// +/// This is because there could exist `V` types with multiple `Borrow<[_]>` impls, +/// such that multiple `T` types would apply: +/// +/// ``` +/// # #[allow(dead_code)] +/// pub struct Foo(Vec, Vec); +/// +/// impl std::borrow::Borrow<[u32]> for Foo { +/// fn borrow(&self) -> &[u32] { &self.0 } +/// } +/// +/// impl std::borrow::Borrow<[String]> for Foo { +/// fn borrow(&self) -> &[String] { &self.1 } +/// } +/// ``` +#[unstable(feature = "slice_concat_trait", issue = "27747")] +pub trait Concat { + #[unstable(feature = "slice_concat_trait", issue = "27747")] + /// The resulting type after concatenation + type Output; + + /// Implementation of [`[T]::concat`](slice::concat) + #[unstable(feature = "slice_concat_trait", issue = "27747")] + fn concat(slice: &Self) -> Self::Output; +} + +/// Helper trait for [`[T]::join`](slice::join) +#[unstable(feature = "slice_concat_trait", issue = "27747")] +pub trait Join { + #[unstable(feature = "slice_concat_trait", issue = "27747")] + /// The resulting type after concatenation + type Output; + + /// Implementation of [`[T]::join`](slice::join) + #[unstable(feature = "slice_concat_trait", issue = "27747")] + fn join(slice: &Self, sep: Separator) -> Self::Output; +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Concat for [V] { + type Output = Vec; + + fn concat(slice: &Self) -> Vec { + let size = slice.iter().map(|slice| slice.borrow().len()).sum(); + let mut result = Vec::with_capacity(size); + for v in slice { + result.extend_from_slice(v.borrow()) + } + result + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Join<&T> for [V] { + type Output = Vec; + + fn join(slice: &Self, sep: &T) -> Vec { + let mut iter = slice.iter(); + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + let size = slice.iter().map(|v| v.borrow().len()).sum::() + slice.len() - 1; + let mut result = Vec::with_capacity(size); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.push(sep.clone()); + result.extend_from_slice(v.borrow()) + } + result + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Join<&[T]> for [V] { + type Output = Vec; + + fn join(slice: &Self, sep: &[T]) -> Vec { + let mut iter = slice.iter(); + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + let size = + slice.iter().map(|v| v.borrow().len()).sum::() + sep.len() * (slice.len() - 1); + let mut result = Vec::with_capacity(size); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.extend_from_slice(sep); + result.extend_from_slice(v.borrow()) + } + result + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Standard trait implementations for slices +//////////////////////////////////////////////////////////////////////////////// + +#[stable(feature = "rust1", since = "1.0.0")] +impl Borrow<[T]> for Vec { + fn borrow(&self) -> &[T] { + &self[..] + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl BorrowMut<[T]> for Vec { + fn borrow_mut(&mut self) -> &mut [T] { + &mut self[..] + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for [T] { + type Owned = Vec; + #[cfg(not(test))] + fn to_owned(&self) -> Vec { + self.to_vec() + } + + #[cfg(test)] + fn to_owned(&self) -> Vec { + hack::to_vec(self, Global) + } + + fn clone_into(&self, target: &mut Vec) { + // drop anything in target that will not be overwritten + target.truncate(self.len()); + + // target.len <= self.len due to the truncate above, so the + // slices here are always in-bounds. + let (init, tail) = self.split_at(target.len()); + + // reuse the contained values' allocations/resources. + target.clone_from_slice(init); + target.extend_from_slice(tail); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Sorting +//////////////////////////////////////////////////////////////////////////////// + +/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted. +/// +/// This is the integral subroutine of insertion sort. +#[cfg(not(no_global_oom_handling))] +fn insert_head(v: &mut [T], is_less: &mut F) +where + F: FnMut(&T, &T) -> bool, +{ + if v.len() >= 2 && is_less(&v[1], &v[0]) { + unsafe { + // There are three ways to implement insertion here: + // + // 1. Swap adjacent elements until the first one gets to its final destination. + // However, this way we copy data around more than is necessary. If elements are big + // structures (costly to copy), this method will be slow. + // + // 2. Iterate until the right place for the first element is found. Then shift the + // elements succeeding it to make room for it and finally place it into the + // remaining hole. This is a good method. + // + // 3. Copy the first element into a temporary variable. Iterate until the right place + // for it is found. As we go along, copy every traversed element into the slot + // preceding it. Finally, copy data from the temporary variable into the remaining + // hole. This method is very good. Benchmarks demonstrated slightly better + // performance than with the 2nd method. + // + // All methods were benchmarked, and the 3rd showed best results. So we chose that one. + let mut tmp = mem::ManuallyDrop::new(ptr::read(&v[0])); + + // Intermediate state of the insertion process is always tracked by `hole`, which + // serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` in the end. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and + // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it + // initially held exactly once. + let mut hole = InsertionHole { src: &mut *tmp, dest: &mut v[1] }; + ptr::copy_nonoverlapping(&v[1], &mut v[0], 1); + + for i in 2..v.len() { + if !is_less(&v[i], &*tmp) { + break; + } + ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1); + hole.dest = &mut v[i]; + } + // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`. + } + } + + // When dropped, copies from `src` into `dest`. + struct InsertionHole { + src: *mut T, + dest: *mut T, + } + + impl Drop for InsertionHole { + fn drop(&mut self) { + unsafe { + ptr::copy_nonoverlapping(self.src, self.dest, 1); + } + } + } +} + +/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and +/// stores the result into `v[..]`. +/// +/// # Safety +/// +/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough +/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type. +#[cfg(not(no_global_oom_handling))] +unsafe fn merge(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F) +where + F: FnMut(&T, &T) -> bool, +{ + let len = v.len(); + let v = v.as_mut_ptr(); + let (v_mid, v_end) = unsafe { (v.add(mid), v.add(len)) }; + + // The merge process first copies the shorter run into `buf`. Then it traces the newly copied + // run and the longer run forwards (or backwards), comparing their next unconsumed elements and + // copying the lesser (or greater) one into `v`. + // + // As soon as the shorter run is fully consumed, the process is done. If the longer run gets + // consumed first, then we must copy whatever is left of the shorter run into the remaining + // hole in `v`. + // + // Intermediate state of the process is always tracked by `hole`, which serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` if the longer run gets consumed first. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and fill the + // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every + // object it initially held exactly once. + let mut hole; + + if mid <= len - mid { + // The left run is shorter. + unsafe { + ptr::copy_nonoverlapping(v, buf, mid); + hole = MergeHole { start: buf, end: buf.add(mid), dest: v }; + } + + // Initially, these pointers point to the beginnings of their arrays. + let left = &mut hole.start; + let mut right = v_mid; + let out = &mut hole.dest; + + while *left < hole.end && right < v_end { + // Consume the lesser side. + // If equal, prefer the left run to maintain stability. + unsafe { + let to_copy = if is_less(&*right, &**left) { + get_and_increment(&mut right) + } else { + get_and_increment(left) + }; + ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1); + } + } + } else { + // The right run is shorter. + unsafe { + ptr::copy_nonoverlapping(v_mid, buf, len - mid); + hole = MergeHole { start: buf, end: buf.add(len - mid), dest: v_mid }; + } + + // Initially, these pointers point past the ends of their arrays. + let left = &mut hole.dest; + let right = &mut hole.end; + let mut out = v_end; + + while v < *left && buf < *right { + // Consume the greater side. + // If equal, prefer the right run to maintain stability. + unsafe { + let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) { + decrement_and_get(left) + } else { + decrement_and_get(right) + }; + ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1); + } + } + } + // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of + // it will now be copied into the hole in `v`. + + unsafe fn get_and_increment(ptr: &mut *mut T) -> *mut T { + let old = *ptr; + *ptr = unsafe { ptr.offset(1) }; + old + } + + unsafe fn decrement_and_get(ptr: &mut *mut T) -> *mut T { + *ptr = unsafe { ptr.offset(-1) }; + *ptr + } + + // When dropped, copies the range `start..end` into `dest..`. + struct MergeHole { + start: *mut T, + end: *mut T, + dest: *mut T, + } + + impl Drop for MergeHole { + fn drop(&mut self) { + // `T` is not a zero-sized type, so it's okay to divide by its size. + let len = (self.end as usize - self.start as usize) / mem::size_of::(); + unsafe { + ptr::copy_nonoverlapping(self.start, self.dest, len); + } + } + } +} + +/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail +/// [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt). +/// +/// The algorithm identifies strictly descending and non-descending subsequences, which are called +/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed +/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are +/// satisfied: +/// +/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len` +/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len` +/// +/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case. +#[cfg(not(no_global_oom_handling))] +fn merge_sort(v: &mut [T], mut is_less: F) +where + F: FnMut(&T, &T) -> bool, +{ + // Slices of up to this length get sorted using insertion sort. + const MAX_INSERTION: usize = 20; + // Very short runs are extended using insertion sort to span at least this many elements. + const MIN_RUN: usize = 10; + + // Sorting has no meaningful behavior on zero-sized types. + if size_of::() == 0 { + return; + } + + let len = v.len(); + + // Short arrays get sorted in-place via insertion sort to avoid allocations. + if len <= MAX_INSERTION { + if len >= 2 { + for i in (0..len - 1).rev() { + insert_head(&mut v[i..], &mut is_less); + } + } + return; + } + + // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it + // shallow copies of the contents of `v` without risking the dtors running on copies if + // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run, + // which will always have length at most `len / 2`. + let mut buf = Vec::with_capacity(len / 2); + + // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a + // strange decision, but consider the fact that merges more often go in the opposite direction + // (forwards). According to benchmarks, merging forwards is slightly faster than merging + // backwards. To conclude, identifying runs by traversing backwards improves performance. + let mut runs = vec![]; + let mut end = len; + while end > 0 { + // Find the next natural run, and reverse it if it's strictly descending. + let mut start = end - 1; + if start > 0 { + start -= 1; + unsafe { + if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) { + while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) { + start -= 1; + } + v[start..end].reverse(); + } else { + while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) + { + start -= 1; + } + } + } + } + + // Insert some more elements into the run if it's too short. Insertion sort is faster than + // merge sort on short sequences, so this significantly improves performance. + while start > 0 && end - start < MIN_RUN { + start -= 1; + insert_head(&mut v[start..end], &mut is_less); + } + + // Push this run onto the stack. + runs.push(Run { start, len: end - start }); + end = start; + + // Merge some pairs of adjacent runs to satisfy the invariants. + while let Some(r) = collapse(&runs) { + let left = runs[r + 1]; + let right = runs[r]; + unsafe { + merge( + &mut v[left.start..right.start + right.len], + left.len, + buf.as_mut_ptr(), + &mut is_less, + ); + } + runs[r] = Run { start: left.start, len: left.len + right.len }; + runs.remove(r + 1); + } + } + + // Finally, exactly one run must remain in the stack. + debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len); + + // Examines the stack of runs and identifies the next pair of runs to merge. More specifically, + // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the + // algorithm should continue building a new run instead, `None` is returned. + // + // TimSort is infamous for its buggy implementations, as described here: + // http://envisage-project.eu/timsort-specification-and-verification/ + // + // The gist of the story is: we must enforce the invariants on the top four runs on the stack. + // Enforcing them on just top three is not sufficient to ensure that the invariants will still + // hold for *all* runs in the stack. + // + // This function correctly checks invariants for the top four runs. Additionally, if the top + // run starts at index 0, it will always demand a merge operation until the stack is fully + // collapsed, in order to complete the sort. + #[inline] + fn collapse(runs: &[Run]) -> Option { + let n = runs.len(); + if n >= 2 + && (runs[n - 1].start == 0 + || runs[n - 2].len <= runs[n - 1].len + || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len) + || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len)) + { + if n >= 3 && runs[n - 3].len < runs[n - 1].len { Some(n - 3) } else { Some(n - 2) } + } else { + None + } + } + + #[derive(Clone, Copy)] + struct Run { + start: usize, + len: usize, + } +} diff --git a/rust/alloc/str.rs b/rust/alloc/str.rs new file mode 100644 index 0000000000000..ed31405fa1c44 --- /dev/null +++ b/rust/alloc/str.rs @@ -0,0 +1,614 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Unicode string slices. +//! +//! *[See also the `str` primitive type](str).* +//! +//! The `&str` type is one of the two main string types, the other being `String`. +//! Unlike its `String` counterpart, its contents are borrowed. +//! +//! # Basic Usage +//! +//! A basic string declaration of `&str` type: +//! +//! ``` +//! let hello_world = "Hello, World!"; +//! ``` +//! +//! Here we have declared a string literal, also known as a string slice. +//! String literals have a static lifetime, which means the string `hello_world` +//! is guaranteed to be valid for the duration of the entire program. +//! We can explicitly specify `hello_world`'s lifetime as well: +//! +//! ``` +//! let hello_world: &'static str = "Hello, world!"; +//! ``` + +#![stable(feature = "rust1", since = "1.0.0")] +// Many of the usings in this module are only used in the test configuration. +// It's cleaner to just turn off the unused_imports warning than to fix them. +#![allow(unused_imports)] + +use core::borrow::{Borrow, BorrowMut}; +use core::iter::FusedIterator; +use core::mem; +use core::ptr; +use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; +use core::unicode::conversions; + +use crate::borrow::ToOwned; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::slice::{Concat, Join, SliceIndex}; +use crate::string::String; +use crate::vec::Vec; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::pattern; +#[stable(feature = "encode_utf16", since = "1.8.0")] +pub use core::str::EncodeUtf16; +#[stable(feature = "split_ascii_whitespace", since = "1.34.0")] +pub use core::str::SplitAsciiWhitespace; +#[stable(feature = "split_inclusive", since = "1.53.0")] +pub use core::str::SplitInclusive; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::SplitWhitespace; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{from_utf8, from_utf8_mut, Bytes, CharIndices, Chars}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError}; +#[stable(feature = "str_escape", since = "1.34.0")] +pub use core::str::{EscapeDebug, EscapeDefault, EscapeUnicode}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{FromStr, Utf8Error}; +#[allow(deprecated)] +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{Lines, LinesAny}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{MatchIndices, RMatchIndices}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{Matches, RMatches}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplit, Split}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplitN, SplitN}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplitTerminator, SplitTerminator}; + +/// Note: `str` in `Concat` is not meaningful here. +/// This type parameter of the trait only exists to enable another impl. +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Concat for [S] { + type Output = String; + + fn concat(slice: &Self) -> String { + Join::join(slice, "") + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Join<&str> for [S] { + type Output = String; + + fn join(slice: &Self, sep: &str) -> String { + unsafe { String::from_utf8_unchecked(join_generic_copy(slice, sep.as_bytes())) } + } +} + +#[cfg(not(no_global_oom_handling))] +macro_rules! specialize_for_lengths { + ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {{ + let mut target = $target; + let iter = $iter; + let sep_bytes = $separator; + match $separator.len() { + $( + // loops with hardcoded sizes run much faster + // specialize the cases with small separator lengths + $num => { + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + let content_bytes = s.borrow().as_ref(); + copy_slice_and_advance!(target, content_bytes); + } + }, + )* + _ => { + // arbitrary non-zero size fallback + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + let content_bytes = s.borrow().as_ref(); + copy_slice_and_advance!(target, content_bytes); + } + } + } + target + }} +} + +#[cfg(not(no_global_oom_handling))] +macro_rules! copy_slice_and_advance { + ($target:expr, $bytes:expr) => { + let len = $bytes.len(); + let (head, tail) = { $target }.split_at_mut(len); + head.copy_from_slice($bytes); + $target = tail; + }; +} + +// Optimized join implementation that works for both Vec (T: Copy) and String's inner vec +// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262) +// For this reason SliceConcat is not specialized for T: Copy and SliceConcat is the +// only user of this function. It is left in place for the time when that is fixed. +// +// the bounds for String-join are S: Borrow and for Vec-join Borrow<[T]> +// [T] and str both impl AsRef<[T]> for some T +// => s.borrow().as_ref() and we always have slices +#[cfg(not(no_global_oom_handling))] +fn join_generic_copy(slice: &[S], sep: &[T]) -> Vec +where + T: Copy, + B: AsRef<[T]> + ?Sized, + S: Borrow, +{ + let sep_len = sep.len(); + let mut iter = slice.iter(); + + // the first slice is the only one without a separator preceding it + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + + // compute the exact total length of the joined Vec + // if the `len` calculation overflows, we'll panic + // we would have run out of memory anyway and the rest of the function requires + // the entire Vec pre-allocated for safety + let reserved_len = sep_len + .checked_mul(iter.len()) + .and_then(|n| { + slice.iter().map(|s| s.borrow().as_ref().len()).try_fold(n, usize::checked_add) + }) + .expect("attempt to join into collection with len > usize::MAX"); + + // prepare an uninitialized buffer + let mut result = Vec::with_capacity(reserved_len); + debug_assert!(result.capacity() >= reserved_len); + + result.extend_from_slice(first.borrow().as_ref()); + + unsafe { + let pos = result.len(); + let target = result.get_unchecked_mut(pos..reserved_len); + + // copy separator and slices over without bounds checks + // generate loops with hardcoded offsets for small separators + // massive improvements possible (~ x2) + let remain = specialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4); + + // A weird borrow implementation may return different + // slices for the length calculation and the actual copy. + // Make sure we don't expose uninitialized bytes to the caller. + let result_len = reserved_len - remain.len(); + result.set_len(result_len); + } + result +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Borrow for String { + #[inline] + fn borrow(&self) -> &str { + &self[..] + } +} + +#[stable(feature = "string_borrow_mut", since = "1.36.0")] +impl BorrowMut for String { + #[inline] + fn borrow_mut(&mut self) -> &mut str { + &mut self[..] + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for str { + type Owned = String; + #[inline] + fn to_owned(&self) -> String { + unsafe { String::from_utf8_unchecked(self.as_bytes().to_owned()) } + } + + fn clone_into(&self, target: &mut String) { + let mut b = mem::take(target).into_bytes(); + self.as_bytes().clone_into(&mut b); + *target = unsafe { String::from_utf8_unchecked(b) } + } +} + +/// Methods for string slices. +#[lang = "str_alloc"] +#[cfg(not(test))] +impl str { + /// Converts a `Box` into a `Box<[u8]>` without copying or allocating. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "this is a string"; + /// let boxed_str = s.to_owned().into_boxed_str(); + /// let boxed_bytes = boxed_str.into_boxed_bytes(); + /// assert_eq!(*boxed_bytes, *s.as_bytes()); + /// ``` + #[stable(feature = "str_box_extras", since = "1.20.0")] + #[inline] + pub fn into_boxed_bytes(self: Box) -> Box<[u8]> { + self.into() + } + + /// Replaces all matches of a pattern with another string. + /// + /// `replace` creates a new [`String`], and copies the data from this string slice into it. + /// While doing so, it attempts to find matches of a pattern. If it finds any, it + /// replaces them with the replacement string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "this is old"; + /// + /// assert_eq!("this is new", s.replace("old", "new")); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// let s = "this is old"; + /// assert_eq!(s, s.replace("cookie monster", "little lamb")); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[must_use = "this returns the replaced string as a new allocation, \ + without modifying the original"] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn replace<'a, P: Pattern<'a>>(&'a self, from: P, to: &str) -> String { + let mut result = String::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// Replaces first N matches of a pattern with another string. + /// + /// `replacen` creates a new [`String`], and copies the data from this string slice into it. + /// While doing so, it attempts to find matches of a pattern. If it finds any, it + /// replaces them with the replacement string slice at most `count` times. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "foo foo 123 foo"; + /// assert_eq!("new new 123 foo", s.replacen("foo", "new", 2)); + /// assert_eq!("faa fao 123 foo", s.replacen('o', "a", 3)); + /// assert_eq!("foo foo new23 foo", s.replacen(char::is_numeric, "new", 1)); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// let s = "this is old"; + /// assert_eq!(s, s.replacen("cookie monster", "little lamb", 10)); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[must_use = "this returns the replaced string as a new allocation, \ + without modifying the original"] + #[stable(feature = "str_replacen", since = "1.16.0")] + pub fn replacen<'a, P: Pattern<'a>>(&'a self, pat: P, to: &str, count: usize) -> String { + // Hope to reduce the times of re-allocation + let mut result = String::with_capacity(32); + let mut last_end = 0; + for (start, part) in self.match_indices(pat).take(count) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// Returns the lowercase equivalent of this string slice, as a new [`String`]. + /// + /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property + /// `Lowercase`. + /// + /// Since some characters can expand into multiple characters when changing + /// the case, this function returns a [`String`] instead of modifying the + /// parameter in-place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "HELLO"; + /// + /// assert_eq!("hello", s.to_lowercase()); + /// ``` + /// + /// A tricky example, with sigma: + /// + /// ``` + /// let sigma = "Σ"; + /// + /// assert_eq!("σ", sigma.to_lowercase()); + /// + /// // but at the end of a word, it's ς, not σ: + /// let odysseus = "ὈΔΥΣΣΕΎΣ"; + /// + /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase()); + /// ``` + /// + /// Languages without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_lowercase()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "unicode_case_mapping", since = "1.2.0")] + pub fn to_lowercase(&self) -> String { + let mut s = String::with_capacity(self.len()); + for (i, c) in self[..].char_indices() { + if c == 'Σ' { + // Σ maps to σ, except at the end of a word where it maps to ς. + // This is the only conditional (contextual) but language-independent mapping + // in `SpecialCasing.txt`, + // so hard-code it rather than have a generic "condition" mechanism. + // See https://github.com/rust-lang/rust/issues/26035 + map_uppercase_sigma(self, i, &mut s) + } else { + match conversions::to_lower(c) { + [a, '\0', _] => s.push(a), + [a, b, '\0'] => { + s.push(a); + s.push(b); + } + [a, b, c] => { + s.push(a); + s.push(b); + s.push(c); + } + } + } + } + return s; + + fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) { + // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 + // for the definition of `Final_Sigma`. + debug_assert!('Σ'.len_utf8() == 2); + let is_word_final = case_ignoreable_then_cased(from[..i].chars().rev()) + && !case_ignoreable_then_cased(from[i + 2..].chars()); + to.push_str(if is_word_final { "ς" } else { "σ" }); + } + + fn case_ignoreable_then_cased>(iter: I) -> bool { + use core::unicode::{Case_Ignorable, Cased}; + match iter.skip_while(|&c| Case_Ignorable(c)).next() { + Some(c) => Cased(c), + None => false, + } + } + } + + /// Returns the uppercase equivalent of this string slice, as a new [`String`]. + /// + /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property + /// `Uppercase`. + /// + /// Since some characters can expand into multiple characters when changing + /// the case, this function returns a [`String`] instead of modifying the + /// parameter in-place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "hello"; + /// + /// assert_eq!("HELLO", s.to_uppercase()); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_uppercase()); + /// ``` + /// + /// One character can become multiple: + /// ``` + /// let s = "tschüß"; + /// + /// assert_eq!("TSCHÜSS", s.to_uppercase()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "unicode_case_mapping", since = "1.2.0")] + pub fn to_uppercase(&self) -> String { + let mut s = String::with_capacity(self.len()); + for c in self[..].chars() { + match conversions::to_upper(c) { + [a, '\0', _] => s.push(a), + [a, b, '\0'] => { + s.push(a); + s.push(b); + } + [a, b, c] => { + s.push(a); + s.push(b); + s.push(c); + } + } + } + s + } + + /// Converts a [`Box`] into a [`String`] without copying or allocating. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let string = String::from("birthday gift"); + /// let boxed_str = string.clone().into_boxed_str(); + /// + /// assert_eq!(boxed_str.into_string(), string); + /// ``` + #[stable(feature = "box_str", since = "1.4.0")] + #[inline] + pub fn into_string(self: Box) -> String { + let slice = Box::<[u8]>::from(self); + unsafe { String::from_utf8_unchecked(slice.into_vec()) } + } + + /// Creates a new [`String`] by repeating a string `n` times. + /// + /// # Panics + /// + /// This function will panic if the capacity would overflow. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// assert_eq!("abc".repeat(4), String::from("abcabcabcabc")); + /// ``` + /// + /// A panic upon overflow: + /// + /// ```should_panic + /// // this will panic at runtime + /// "0123456789abcdef".repeat(usize::MAX); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "repeat_str", since = "1.16.0")] + pub fn repeat(&self, n: usize) -> String { + unsafe { String::from_utf8_unchecked(self.as_bytes().repeat(n)) } + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// To uppercase ASCII characters in addition to non-ASCII characters, use + /// [`to_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); + /// ``` + /// + /// [`make_ascii_uppercase`]: str::make_ascii_uppercase + /// [`to_uppercase`]: #method.to_uppercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_uppercase(); + // make_ascii_uppercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// To lowercase ASCII characters in addition to non-ASCII characters, use + /// [`to_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); + /// ``` + /// + /// [`make_ascii_lowercase`]: str::make_ascii_lowercase + /// [`to_lowercase`]: #method.to_lowercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_lowercase(); + // make_ascii_lowercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Tries to create a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s: &str = "a"; + /// let ss: String = s.try_to_owned().unwrap(); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_owned(&self) -> Result { + unsafe { Ok(String::from_utf8_unchecked(self.as_bytes().try_to_vec()?)) } + } +} + +/// Converts a boxed slice of bytes to a boxed string slice without checking +/// that the string contains valid UTF-8. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// let smile_utf8 = Box::new([226, 152, 186]); +/// let smile = unsafe { std::str::from_boxed_utf8_unchecked(smile_utf8) }; +/// +/// assert_eq!("☺", &*smile); +/// ``` +#[stable(feature = "str_box_extras", since = "1.20.0")] +#[inline] +pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box { + unsafe { Box::from_raw(Box::into_raw(v) as *mut str) } +} diff --git a/rust/alloc/string.rs b/rust/alloc/string.rs new file mode 100644 index 0000000000000..55293c3041e7c --- /dev/null +++ b/rust/alloc/string.rs @@ -0,0 +1,2847 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A UTF-8–encoded, growable string. +//! +//! This module contains the [`String`] type, the [`ToString`] trait for +//! converting to strings, and several error types that may result from +//! working with [`String`]s. +//! +//! # Examples +//! +//! There are multiple ways to create a new [`String`] from a string literal: +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let s = String::from("world"); +//! let s: String = "also this".into(); +//! ``` +//! +//! You can create a new [`String`] from an existing one by concatenating with +//! `+`: +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let message = s + " world!"; +//! ``` +//! +//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of +//! it. You can do the reverse too. +//! +//! ``` +//! let sparkle_heart = vec![240, 159, 146, 150]; +//! +//! // We know these bytes are valid, so we'll use `unwrap()`. +//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +//! +//! assert_eq!("💖", sparkle_heart); +//! +//! let bytes = sparkle_heart.into_bytes(); +//! +//! assert_eq!(bytes, [240, 159, 146, 150]); +//! ``` + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +use core::char::{decode_utf16, REPLACEMENT_CHARACTER}; +use core::fmt; +use core::hash; +#[cfg(not(no_global_oom_handling))] +use core::iter::{from_fn, FromIterator}; +use core::iter::FusedIterator; +#[cfg(not(no_global_oom_handling))] +use core::ops::Add; +#[cfg(not(no_global_oom_handling))] +use core::ops::AddAssign; +#[cfg(not(no_global_oom_handling))] +use core::ops::Bound::{Excluded, Included, Unbounded}; +use core::ops::{self, Index, IndexMut, Range, RangeBounds}; +use core::ptr; +use core::slice; +#[cfg(not(no_global_oom_handling))] +use core::str::lossy; +use core::str::pattern::Pattern; + +#[cfg(not(no_global_oom_handling))] +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::str::{self, Chars, Utf8Error}; +#[cfg(not(no_global_oom_handling))] +use crate::str::{from_boxed_utf8_unchecked, FromStr}; +use crate::vec::Vec; + +/// A UTF-8–encoded, growable string. +/// +/// The `String` type is the most common string type that has ownership over the +/// contents of the string. It has a close relationship with its borrowed +/// counterpart, the primitive [`str`]. +/// +/// # Examples +/// +/// You can create a `String` from [a literal string][`str`] with [`String::from`]: +/// +/// [`String::from`]: From::from +/// +/// ``` +/// let hello = String::from("Hello, world!"); +/// ``` +/// +/// You can append a [`char`] to a `String` with the [`push`] method, and +/// append a [`&str`] with the [`push_str`] method: +/// +/// ``` +/// let mut hello = String::from("Hello, "); +/// +/// hello.push('w'); +/// hello.push_str("orld!"); +/// ``` +/// +/// [`push`]: String::push +/// [`push_str`]: String::push_str +/// +/// If you have a vector of UTF-8 bytes, you can create a `String` from it with +/// the [`from_utf8`] method: +/// +/// ``` +/// // some bytes, in a vector +/// let sparkle_heart = vec![240, 159, 146, 150]; +/// +/// // We know these bytes are valid, so we'll use `unwrap()`. +/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +/// +/// assert_eq!("💖", sparkle_heart); +/// ``` +/// +/// [`from_utf8`]: String::from_utf8 +/// +/// # UTF-8 +/// +/// `String`s are always valid UTF-8. This has a few implications, the first of +/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is +/// similar, but without the UTF-8 constraint. The second implication is that +/// you cannot index into a `String`: +/// +/// ```compile_fail,E0277 +/// let s = "hello"; +/// +/// println!("The first letter of s is {}", s[0]); // ERROR!!! +/// ``` +/// +/// [`OsString`]: ../../std/ffi/struct.OsString.html +/// +/// Indexing is intended to be a constant-time operation, but UTF-8 encoding +/// does not allow us to do this. Furthermore, it's not clear what sort of +/// thing the index should return: a byte, a codepoint, or a grapheme cluster. +/// The [`bytes`] and [`chars`] methods return iterators over the first +/// two, respectively. +/// +/// [`bytes`]: str::bytes +/// [`chars`]: str::chars +/// +/// # Deref +/// +/// `String`s implement [`Deref`]``, and so inherit all of [`str`]'s +/// methods. In addition, this means that you can pass a `String` to a +/// function which takes a [`&str`] by using an ampersand (`&`): +/// +/// ``` +/// fn takes_str(s: &str) { } +/// +/// let s = String::from("Hello"); +/// +/// takes_str(&s); +/// ``` +/// +/// This will create a [`&str`] from the `String` and pass it in. This +/// conversion is very inexpensive, and so generally, functions will accept +/// [`&str`]s as arguments unless they need a `String` for some specific +/// reason. +/// +/// In certain cases Rust doesn't have enough information to make this +/// conversion, known as [`Deref`] coercion. In the following example a string +/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function +/// `example_func` takes anything that implements the trait. In this case Rust +/// would need to make two implicit conversions, which Rust doesn't have the +/// means to do. For that reason, the following example will not compile. +/// +/// ```compile_fail,E0277 +/// trait TraitExample {} +/// +/// impl<'a> TraitExample for &'a str {} +/// +/// fn example_func(example_arg: A) {} +/// +/// let example_string = String::from("example_string"); +/// example_func(&example_string); +/// ``` +/// +/// There are two options that would work instead. The first would be to +/// change the line `example_func(&example_string);` to +/// `example_func(example_string.as_str());`, using the method [`as_str()`] +/// to explicitly extract the string slice containing the string. The second +/// way changes `example_func(&example_string);` to +/// `example_func(&*example_string);`. In this case we are dereferencing a +/// `String` to a [`str`][`&str`], then referencing the [`str`][`&str`] back to +/// [`&str`]. The second way is more idiomatic, however both work to do the +/// conversion explicitly rather than relying on the implicit conversion. +/// +/// # Representation +/// +/// A `String` is made up of three components: a pointer to some bytes, a +/// length, and a capacity. The pointer points to an internal buffer `String` +/// uses to store its data. The length is the number of bytes currently stored +/// in the buffer, and the capacity is the size of the buffer in bytes. As such, +/// the length will always be less than or equal to the capacity. +/// +/// This buffer is always stored on the heap. +/// +/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`] +/// methods: +/// +/// ``` +/// use std::mem; +/// +/// let story = String::from("Once upon a time..."); +/// +// FIXME Update this when vec_into_raw_parts is stabilized +/// // Prevent automatically dropping the String's data +/// let mut story = mem::ManuallyDrop::new(story); +/// +/// let ptr = story.as_mut_ptr(); +/// let len = story.len(); +/// let capacity = story.capacity(); +/// +/// // story has nineteen bytes +/// assert_eq!(19, len); +/// +/// // We can re-build a String out of ptr, len, and capacity. This is all +/// // unsafe because we are responsible for making sure the components are +/// // valid: +/// let s = unsafe { String::from_raw_parts(ptr, len, capacity) } ; +/// +/// assert_eq!(String::from("Once upon a time..."), s); +/// ``` +/// +/// [`as_ptr`]: str::as_ptr +/// [`len`]: String::len +/// [`capacity`]: String::capacity +/// +/// If a `String` has enough capacity, adding elements to it will not +/// re-allocate. For example, consider this program: +/// +/// ``` +/// let mut s = String::new(); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// This will output the following: +/// +/// ```text +/// 0 +/// 5 +/// 10 +/// 20 +/// 20 +/// 40 +/// ``` +/// +/// At first, we have no memory allocated at all, but as we append to the +/// string, it increases its capacity appropriately. If we instead use the +/// [`with_capacity`] method to allocate the correct capacity initially: +/// +/// ``` +/// let mut s = String::with_capacity(25); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// [`with_capacity`]: String::with_capacity +/// +/// We end up with a different output: +/// +/// ```text +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// ``` +/// +/// Here, there's no need to allocate more memory inside the loop. +/// +/// [`str`]: prim@str +/// [`&str`]: prim@str +/// [`Deref`]: core::ops::Deref +/// [`as_str()`]: String::as_str +#[derive(PartialOrd, Eq, Ord)] +#[cfg_attr(not(test), rustc_diagnostic_item = "string_type")] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct String { + vec: Vec, +} + +/// A possible error value when converting a `String` from a UTF-8 byte vector. +/// +/// This type is the error type for the [`from_utf8`] method on [`String`]. It +/// is designed in such a way to carefully avoid reallocations: the +/// [`into_bytes`] method will give back the byte vector that was used in the +/// conversion attempt. +/// +/// [`from_utf8`]: String::from_utf8 +/// [`into_bytes`]: FromUtf8Error::into_bytes +/// +/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may +/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's +/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error` +/// through the [`utf8_error`] method. +/// +/// [`Utf8Error`]: core::str::Utf8Error +/// [`std::str`]: core::str +/// [`&str`]: prim@str +/// [`utf8_error`]: Self::utf8_error +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // some invalid bytes, in a vector +/// let bytes = vec![0, 159]; +/// +/// let value = String::from_utf8(bytes); +/// +/// assert!(value.is_err()); +/// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(no_global_oom_handling), derive(Clone))] +#[derive(Debug, PartialEq, Eq)] +pub struct FromUtf8Error { + bytes: Vec, + error: Utf8Error, +} + +/// A possible error value when converting a `String` from a UTF-16 byte slice. +/// +/// This type is the error type for the [`from_utf16`] method on [`String`]. +/// +/// [`from_utf16`]: String::from_utf16 +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // 𝄞muic +/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, +/// 0xD800, 0x0069, 0x0063]; +/// +/// assert!(String::from_utf16(v).is_err()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] +pub struct FromUtf16Error(()); + +impl String { + /// Creates a new empty `String`. + /// + /// Given that the `String` is empty, this will not allocate any initial + /// buffer. While that means that this initial operation is very + /// inexpensive, it may cause excessive allocation later when you add + /// data. If you have an idea of how much data the `String` will hold, + /// consider the [`with_capacity`] method to prevent excessive + /// re-allocation. + /// + /// [`with_capacity`]: String::with_capacity + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::new(); + /// ``` + #[inline] + #[rustc_const_stable(feature = "const_string_new", since = "1.39.0")] + #[stable(feature = "rust1", since = "1.0.0")] + pub const fn new() -> String { + String { vec: Vec::new() } + } + + /// Creates a new empty `String` with a particular capacity. + /// + /// `String`s have an internal buffer to hold their data. The capacity is + /// the length of that buffer, and can be queried with the [`capacity`] + /// method. This method creates an empty `String`, but one with an initial + /// buffer that can hold `capacity` bytes. This is useful when you may be + /// appending a bunch of data to the `String`, reducing the number of + /// reallocations it needs to do. + /// + /// [`capacity`]: String::capacity + /// + /// If the given capacity is `0`, no allocation will occur, and this method + /// is identical to the [`new`] method. + /// + /// [`new`]: String::new + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// + /// // The String contains no chars, even though it has capacity for more + /// assert_eq!(s.len(), 0); + /// + /// // These are all done without reallocating... + /// let cap = s.capacity(); + /// for _ in 0..10 { + /// s.push('a'); + /// } + /// + /// assert_eq!(s.capacity(), cap); + /// + /// // ...but this may make the string reallocate + /// s.push('a'); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[doc(alias = "alloc")] + #[doc(alias = "malloc")] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize) -> String { + String { vec: Vec::with_capacity(capacity) } + } + + // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is + // required for this method definition, is not available. Since we don't + // require this method for testing purposes, I'll just stub it + // NB see the slice::hack module in slice.rs for more information + #[inline] + #[cfg(test)] + pub fn from_str(_: &str) -> String { + panic!("not available with cfg(test)"); + } + + /// Converts a vector of bytes to a `String`. + /// + /// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes + /// ([`Vec`]) is made of bytes, so this function converts between the + /// two. Not all byte slices are valid `String`s, however: `String` + /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that + /// the bytes are valid UTF-8, and then does the conversion. + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the validity check, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the check. + /// + /// This method will take care to not copy the vector, for efficiency's + /// sake. + /// + /// If you need a [`&str`] instead of a `String`, consider + /// [`str::from_utf8`]. + /// + /// The inverse of this method is [`into_bytes`]. + /// + /// # Errors + /// + /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the + /// provided bytes are not UTF-8. The vector you moved in is also included. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// // We know these bytes are valid, so we'll use `unwrap()`. + /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let sparkle_heart = vec![0, 159, 146, 150]; + /// + /// assert!(String::from_utf8(sparkle_heart).is_err()); + /// ``` + /// + /// See the docs for [`FromUtf8Error`] for more details on what you can do + /// with this error. + /// + /// [`from_utf8_unchecked`]: String::from_utf8_unchecked + /// [`Vec`]: crate::vec::Vec + /// [`&str`]: prim@str + /// [`into_bytes`]: String::into_bytes + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8(vec: Vec) -> Result { + match str::from_utf8(&vec) { + Ok(..) => Ok(String { vec }), + Err(e) => Err(FromUtf8Error { bytes: vec, error: e }), + } + } + + /// Converts a slice of bytes to a string, including invalid characters. + /// + /// Strings are made of bytes ([`u8`]), and a slice of bytes + /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts + /// between the two. Not all byte slices are valid strings, however: strings + /// are required to be valid UTF-8. During this conversion, + /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: � + /// + /// [byteslice]: prim@slice + /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the conversion, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the checks. + /// + /// [`from_utf8_unchecked`]: String::from_utf8_unchecked + /// + /// This function returns a [`Cow<'a, str>`]. If our byte slice is invalid + /// UTF-8, then we need to insert the replacement characters, which will + /// change the size of the string, and hence, require a `String`. But if + /// it's already valid UTF-8, we don't need a new allocation. This return + /// type allows us to handle both cases. + /// + /// [`Cow<'a, str>`]: crate::borrow::Cow + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = String::from_utf8_lossy(&sparkle_heart); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes + /// let input = b"Hello \xF0\x90\x80World"; + /// let output = String::from_utf8_lossy(input); + /// + /// assert_eq!("Hello �World", output); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8_lossy(v: &[u8]) -> Cow<'_, str> { + let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks(); + + let (first_valid, first_broken) = if let Some(chunk) = iter.next() { + let lossy::Utf8LossyChunk { valid, broken } = chunk; + if valid.len() == v.len() { + debug_assert!(broken.is_empty()); + return Cow::Borrowed(valid); + } + (valid, broken) + } else { + return Cow::Borrowed(""); + }; + + const REPLACEMENT: &str = "\u{FFFD}"; + + let mut res = String::with_capacity(v.len()); + res.push_str(first_valid); + if !first_broken.is_empty() { + res.push_str(REPLACEMENT); + } + + for lossy::Utf8LossyChunk { valid, broken } in iter { + res.push_str(valid); + if !broken.is_empty() { + res.push_str(REPLACEMENT); + } + } + + Cow::Owned(res) + } + + /// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`] + /// if `v` contains any invalid data. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞music + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0x0069, 0x0063]; + /// assert_eq!(String::from("𝄞music"), + /// String::from_utf16(v).unwrap()); + /// + /// // 𝄞muic + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0xD800, 0x0069, 0x0063]; + /// assert!(String::from_utf16(v).is_err()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16(v: &[u16]) -> Result { + // This isn't done via collect::>() for performance reasons. + // FIXME: the function can be simplified again when #48994 is closed. + let mut ret = String::with_capacity(v.len()); + for c in decode_utf16(v.iter().cloned()) { + if let Ok(c) = c { + ret.push(c); + } else { + return Err(FromUtf16Error(())); + } + } + Ok(ret) + } + + /// Decode a UTF-16–encoded slice `v` into a `String`, replacing + /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD]. + /// + /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`], + /// `from_utf16_lossy` returns a `String` since the UTF-16 to UTF-8 + /// conversion requires a memory allocation. + /// + /// [`from_utf8_lossy`]: String::from_utf8_lossy + /// [`Cow<'a, str>`]: crate::borrow::Cow + /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞music + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0xDD1E, 0x0069, 0x0063, + /// 0xD834]; + /// + /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"), + /// String::from_utf16_lossy(v)); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16_lossy(v: &[u16]) -> String { + decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect() + } + + /// Decomposes a `String` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of + /// the string (in bytes), and the allocated capacity of the data + /// (in bytes). These are the same arguments in the same order as + /// the arguments to [`from_raw_parts`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `String`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `String` with the [`from_raw_parts`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts`]: String::from_raw_parts + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_into_raw_parts)] + /// let s = String::from("hello"); + /// + /// let (ptr, len, cap) = s.into_raw_parts(); + /// + /// let rebuilt = unsafe { String::from_raw_parts(ptr, len, cap) }; + /// assert_eq!(rebuilt, "hello"); + /// ``` + #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts(self) -> (*mut u8, usize, usize) { + self.vec.into_raw_parts() + } + + /// Creates a new `String` from a length, capacity, and pointer. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * The memory at `buf` needs to have been previously allocated by the + /// same allocator the standard library uses, with a required alignment of exactly 1. + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the correct value. + /// * The first `length` bytes at `buf` need to be valid UTF-8. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. + /// + /// The ownership of `buf` is effectively transferred to the + /// `String` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::mem; + /// + /// unsafe { + /// let s = String::from("hello"); + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent automatically dropping the String's data + /// let mut s = mem::ManuallyDrop::new(s); + /// + /// let ptr = s.as_mut_ptr(); + /// let len = s.len(); + /// let capacity = s.capacity(); + /// + /// let s = String::from_raw_parts(ptr, len, capacity); + /// + /// assert_eq!(String::from("hello"), s); + /// } + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String { + unsafe { String { vec: Vec::from_raw_parts(buf, length, capacity) } } + } + + /// Converts a vector of bytes to a `String` without checking that the + /// string contains valid UTF-8. + /// + /// See the safe version, [`from_utf8`], for more details. + /// + /// [`from_utf8`]: String::from_utf8 + /// + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = unsafe { + /// String::from_utf8_unchecked(sparkle_heart) + /// }; + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_utf8_unchecked(bytes: Vec) -> String { + String { vec: bytes } + } + + /// Converts a `String` into a byte vector. + /// + /// This consumes the `String`, so we do not need to copy its contents. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// let bytes = s.into_bytes(); + /// + /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec { + self.vec + } + + /// Extracts a string slice containing the entire `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("foo"); + /// + /// assert_eq!("foo", s.as_str()); + /// ``` + #[inline] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_str(&self) -> &str { + self + } + + /// Converts a `String` into a mutable string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foobar"); + /// let s_mut_str = s.as_mut_str(); + /// + /// s_mut_str.make_ascii_uppercase(); + /// + /// assert_eq!("FOOBAR", s_mut_str); + /// ``` + #[inline] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_mut_str(&mut self) -> &mut str { + self + } + + /// Appends a given string slice onto the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.push_str("bar"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push_str(&mut self, string: &str) { + self.vec.extend_from_slice(string.as_bytes()) + } + + /// Copies elements from `src` range to the end of the string. + /// + /// ## Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// ## Examples + /// + /// ``` + /// #![feature(string_extend_from_within)] + /// let mut string = String::from("abcde"); + /// + /// string.extend_from_within(2..); + /// assert_eq!(string, "abcdecde"); + /// + /// string.extend_from_within(..2); + /// assert_eq!(string, "abcdecdeab"); + /// + /// string.extend_from_within(4..8); + /// assert_eq!(string, "abcdecdeabecde"); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "string_extend_from_within", issue = "none")] + pub fn extend_from_within(&mut self, src: R) + where + R: RangeBounds, + { + let src @ Range { start, end } = slice::range(src, ..self.len()); + + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + self.vec.extend_from_within(src); + } + + /// Returns this `String`'s capacity, in bytes. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::with_capacity(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.vec.capacity() + } + + /// Ensures that this `String`'s capacity is at least `additional` bytes + /// larger than its length. + /// + /// The capacity may be increased by more than `additional` bytes if it + /// chooses, to prevent frequent reallocations. + /// + /// If you do not want this "at least" behavior, see the [`reserve_exact`] + /// method. + /// + /// # Panics + /// + /// Panics if the new capacity overflows [`usize`]. + /// + /// [`reserve_exact`]: String::reserve_exact + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.vec.reserve(additional) + } + + /// Ensures that this `String`'s capacity is `additional` bytes + /// larger than its length. + /// + /// Consider using the [`reserve`] method unless you absolutely know + /// better than the allocator. + /// + /// [`reserve`]: String::reserve + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve_exact(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve_exact(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve_exact(&mut self, additional: usize) { + self.vec.reserve_exact(additional) + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `String`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result { + /// let mut output = String::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.push_str(data); + /// + /// Ok(output) + /// } + /// # process_data("rust").expect("why is the test harness OOMing on 4 bytes?"); + /// ``` + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve(additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` more elements to + /// be inserted in the given `String`. After calling `reserve_exact`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer `reserve` if future insertions are expected. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result { + /// let mut output = String::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.push_str(data); + /// + /// Ok(output) + /// } + /// # process_data("rust").expect("why is the test harness OOMing on 4 bytes?"); + /// ``` + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve_exact(additional) + } + + /// Shrinks the capacity of this `String` to match its length. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + self.vec.shrink_to_fit() + } + + /// Shrinks the capacity of this `String` with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// If the current capacity is less than the lower limit, this is a no-op. + /// + /// # Examples + /// + /// ``` + /// #![feature(shrink_to)] + /// let mut s = String::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to(10); + /// assert!(s.capacity() >= 10); + /// s.shrink_to(0); + /// assert!(s.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "shrink_to", reason = "new API", issue = "56431")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.vec.shrink_to(min_capacity) + } + + /// Appends the given [`char`] to the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("abc"); + /// + /// s.push('1'); + /// s.push('2'); + /// s.push('3'); + /// + /// assert_eq!("abc123", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push(&mut self, ch: char) { + match ch.len_utf8() { + 1 => self.vec.push(ch as u8), + _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + } + } + + /// Returns a byte slice of this `String`'s contents. + /// + /// The inverse of this method is [`from_utf8`]. + /// + /// [`from_utf8`]: String::from_utf8 + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.vec + } + + /// Shortens this `String` to the specified length. + /// + /// If `new_len` is greater than the string's current length, this has no + /// effect. + /// + /// Note that this method has no effect on the allocated capacity + /// of the string + /// + /// # Panics + /// + /// Panics if `new_len` does not lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// s.truncate(2); + /// + /// assert_eq!("he", s); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn truncate(&mut self, new_len: usize) { + if new_len <= self.len() { + assert!(self.is_char_boundary(new_len)); + self.vec.truncate(new_len) + } + } + + /// Removes the last character from the string buffer and returns it. + /// + /// Returns [`None`] if this `String` is empty. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('f')); + /// + /// assert_eq!(s.pop(), None); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> Option { + let ch = self.chars().rev().next()?; + let newlen = self.len() - ch.len_utf8(); + unsafe { + self.vec.set_len(newlen); + } + Some(ch) + } + + /// Removes a [`char`] from this `String` at a byte position and returns it. + /// + /// This is an *O*(*n*) operation, as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than or equal to the `String`'s length, + /// or if it does not lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.remove(0), 'f'); + /// assert_eq!(s.remove(1), 'o'); + /// assert_eq!(s.remove(0), 'o'); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, idx: usize) -> char { + let ch = match self[idx..].chars().next() { + Some(ch) => ch, + None => panic!("cannot remove a char from the end of a string"), + }; + + let next = idx + ch.len_utf8(); + let len = self.len(); + unsafe { + ptr::copy(self.vec.as_ptr().add(next), self.vec.as_mut_ptr().add(idx), len - next); + self.vec.set_len(len - (next - idx)); + } + ch + } + + /// Remove all matches of pattern `pat` in the `String`. + /// + /// # Examples + /// + /// ``` + /// #![feature(string_remove_matches)] + /// let mut s = String::from("Trees are not green, the sky is not blue."); + /// s.remove_matches("not "); + /// assert_eq!("Trees are green, the sky is blue.", s); + /// ``` + /// + /// Matches will be detected and removed iteratively, so in cases where + /// patterns overlap, only the first pattern will be removed: + /// + /// ``` + /// #![feature(string_remove_matches)] + /// let mut s = String::from("banana"); + /// s.remove_matches("ana"); + /// assert_eq!("bna", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "string_remove_matches", reason = "new API", issue = "72826")] + pub fn remove_matches<'a, P>(&'a mut self, pat: P) + where + P: for<'x> Pattern<'x>, + { + use core::str::pattern::Searcher; + + let rejections = { + let mut searcher = pat.into_searcher(self); + // Per Searcher::next: + // + // A Match result needs to contain the whole matched pattern, + // however Reject results may be split up into arbitrary many + // adjacent fragments. Both ranges may have zero length. + // + // In practice the implementation of Searcher::next_match tends to + // be more efficient, so we use it here and do some work to invert + // matches into rejections since that's what we want to copy below. + let mut front = 0; + let rejections: Vec<_> = from_fn(|| { + let (start, end) = searcher.next_match()?; + let prev_front = front; + front = end; + Some((prev_front, start)) + }) + .collect(); + rejections.into_iter().chain(core::iter::once((front, self.len()))) + }; + + let mut len = 0; + let ptr = self.vec.as_mut_ptr(); + + for (start, end) in rejections { + let count = end - start; + if start != len { + // SAFETY: per Searcher::next: + // + // The stream of Match and Reject values up to a Done will + // contain index ranges that are adjacent, non-overlapping, + // covering the whole haystack, and laying on utf8 + // boundaries. + unsafe { + ptr::copy(ptr.add(start), ptr.add(len), count); + } + } + len += count; + } + + unsafe { + self.vec.set_len(len); + } + } + + /// Retains only the characters specified by the predicate. + /// + /// In other words, remove all characters `c` such that `f(c)` returns `false`. + /// This method operates in place, visiting each character exactly once in the + /// original order, and preserves the order of the retained characters. + /// + /// # Examples + /// + /// ``` + /// let mut s = String::from("f_o_ob_ar"); + /// + /// s.retain(|c| c != '_'); + /// + /// assert_eq!(s, "foobar"); + /// ``` + /// + /// The exact order may be useful for tracking external state, like an index. + /// + /// ``` + /// let mut s = String::from("abcde"); + /// let keep = [false, true, true, false, true]; + /// let mut i = 0; + /// s.retain(|_| (keep[i], i += 1).0); + /// assert_eq!(s, "bce"); + /// ``` + #[inline] + #[stable(feature = "string_retain", since = "1.26.0")] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(char) -> bool, + { + struct SetLenOnDrop<'a> { + s: &'a mut String, + idx: usize, + del_bytes: usize, + } + + impl<'a> Drop for SetLenOnDrop<'a> { + fn drop(&mut self) { + let new_len = self.idx - self.del_bytes; + debug_assert!(new_len <= self.s.len()); + unsafe { self.s.vec.set_len(new_len) }; + } + } + + let len = self.len(); + let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 }; + + while guard.idx < len { + let ch = unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap() }; + let ch_len = ch.len_utf8(); + + if !f(ch) { + guard.del_bytes += ch_len; + } else if guard.del_bytes > 0 { + unsafe { + ptr::copy( + guard.s.vec.as_ptr().add(guard.idx), + guard.s.vec.as_mut_ptr().add(guard.idx - guard.del_bytes), + ch_len, + ); + } + } + + // Point idx to the next char + guard.idx += ch_len; + } + + drop(guard); + } + + /// Inserts a character into this `String` at a byte position. + /// + /// This is an *O*(*n*) operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(3); + /// + /// s.insert(0, 'f'); + /// s.insert(1, 'o'); + /// s.insert(2, 'o'); + /// + /// assert_eq!("foo", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, idx: usize, ch: char) { + assert!(self.is_char_boundary(idx)); + let mut bits = [0; 4]; + let bits = ch.encode_utf8(&mut bits).as_bytes(); + + unsafe { + self.insert_bytes(idx, bits); + } + } + + #[cfg(not(no_global_oom_handling))] + unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) { + let len = self.len(); + let amt = bytes.len(); + self.vec.reserve(amt); + + unsafe { + ptr::copy(self.vec.as_ptr().add(idx), self.vec.as_mut_ptr().add(idx + amt), len - idx); + ptr::copy(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt); + self.vec.set_len(len + amt); + } + } + + /// Inserts a string slice into this `String` at a byte position. + /// + /// This is an *O*(*n*) operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("bar"); + /// + /// s.insert_str(0, "foo"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "insert_str", since = "1.16.0")] + pub fn insert_str(&mut self, idx: usize, string: &str) { + assert!(self.is_char_boundary(idx)); + + unsafe { + self.insert_bytes(idx, string.as_bytes()); + } + } + + /// Returns a mutable reference to the contents of this `String`. + /// + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// unsafe { + /// let vec = s.as_mut_vec(); + /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]); + /// + /// vec.reverse(); + /// } + /// assert_eq!(s, "olleh"); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.vec + } + + /// Returns the length of this `String`, in bytes, not [`char`]s or + /// graphemes. In other words, it may not be what a human considers the + /// length of the string. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let a = String::from("foo"); + /// assert_eq!(a.len(), 3); + /// + /// let fancy_f = String::from("ƒoo"); + /// assert_eq!(fancy_f.len(), 4); + /// assert_eq!(fancy_f.chars().count(), 3); + /// ``` + #[doc(alias = "length")] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.vec.len() + } + + /// Returns `true` if this `String` has a length of zero, and `false` otherwise. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut v = String::new(); + /// assert!(v.is_empty()); + /// + /// v.push('a'); + /// assert!(!v.is_empty()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Splits the string into two at the given byte index. + /// + /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and + /// the returned `String` contains bytes `[at, len)`. `at` must be on the + /// boundary of a UTF-8 code point. + /// + /// Note that the capacity of `self` does not change. + /// + /// # Panics + /// + /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last + /// code point of the string. + /// + /// # Examples + /// + /// ``` + /// # fn main() { + /// let mut hello = String::from("Hello, World!"); + /// let world = hello.split_off(7); + /// assert_eq!(hello, "Hello, "); + /// assert_eq!(world, "World!"); + /// # } + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "string_split_off", since = "1.16.0")] + #[must_use = "use `.truncate()` if you don't need the other half"] + pub fn split_off(&mut self, at: usize) -> String { + assert!(self.is_char_boundary(at)); + let other = self.vec.split_off(at); + unsafe { String::from_utf8_unchecked(other) } + } + + /// Truncates this `String`, removing all contents. + /// + /// While this means the `String` will have a length of zero, it does not + /// touch its capacity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.clear(); + /// + /// assert!(s.is_empty()); + /// assert_eq!(0, s.len()); + /// assert_eq!(3, s.capacity()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + self.vec.clear() + } + + /// Creates a draining iterator that removes the specified range in the `String` + /// and yields the removed `chars`. + /// + /// Note: The element range is removed even if the iterator is not + /// consumed until the end. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Remove the range up until the β from the string + /// let t: String = s.drain(..beta_offset).collect(); + /// assert_eq!(t, "α is alpha, "); + /// assert_eq!(s, "β is beta"); + /// + /// // A full range clears the string + /// s.drain(..); + /// assert_eq!(s, ""); + /// ``` + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain(&mut self, range: R) -> Drain<'_> + where + R: RangeBounds, + { + // Memory safety + // + // The String version of Drain does not have the memory safety issues + // of the vector version. The data is just plain bytes. + // Because the range removal happens in Drop, if the Drain iterator is leaked, + // the removal will not happen. + let Range { start, end } = slice::range(range, ..self.len()); + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + // Take out two simultaneous borrows. The &mut String won't be accessed + // until iteration is over, in Drop. + let self_ptr = self as *mut _; + // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks. + let chars_iter = unsafe { self.get_unchecked(start..end) }.chars(); + + Drain { start, end, iter: chars_iter, string: self_ptr } + } + + /// Removes the specified range in the string, + /// and replaces it with the given string. + /// The given string doesn't need to be the same length as the range. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Replace the range up until the β from the string + /// s.replace_range(..beta_offset, "Α is capital alpha; "); + /// assert_eq!(s, "Α is capital alpha; β is beta"); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "splice", since = "1.27.0")] + pub fn replace_range(&mut self, range: R, replace_with: &str) + where + R: RangeBounds, + { + // Memory safety + // + // Replace_range does not have the memory safety issues of a vector Splice. + // of the vector version. The data is just plain bytes. + + // WARNING: Inlining this variable would be unsound (#81138) + let start = range.start_bound(); + match start { + Included(&n) => assert!(self.is_char_boundary(n)), + Excluded(&n) => assert!(self.is_char_boundary(n + 1)), + Unbounded => {} + }; + // WARNING: Inlining this variable would be unsound (#81138) + let end = range.end_bound(); + match end { + Included(&n) => assert!(self.is_char_boundary(n + 1)), + Excluded(&n) => assert!(self.is_char_boundary(n)), + Unbounded => {} + }; + + // Using `range` again would be unsound (#81138) + // We assume the bounds reported by `range` remain the same, but + // an adversarial implementation could change between calls + unsafe { self.as_mut_vec() }.splice((start, end), replace_with.bytes()); + } + + /// Converts this `String` into a [`Box`]`<`[`str`]`>`. + /// + /// This will drop any excess capacity. + /// + /// [`str`]: prim@str + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// let b = s.into_boxed_str(); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "box_str", since = "1.4.0")] + #[inline] + pub fn into_boxed_str(self) -> Box { + let slice = self.vec.into_boxed_slice(); + unsafe { from_boxed_utf8_unchecked(slice) } + } +} + +impl FromUtf8Error { + /// Returns a slice of [`u8`]s bytes that were attempted to convert to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes()); + /// ``` + #[stable(feature = "from_utf8_error_as_bytes", since = "1.26.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes[..] + } + + /// Returns the bytes that were attempted to convert to a `String`. + /// + /// This method is carefully constructed to avoid allocation. It will + /// consume the error, moving out the bytes, so that a copy of the bytes + /// does not need to be made. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec { + self.bytes + } + + /// Fetch a `Utf8Error` to get more details about the conversion failure. + /// + /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may + /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's + /// an analogue to `FromUtf8Error`. See its documentation for more details + /// on using it. + /// + /// [`std::str`]: core::str + /// [`&str`]: prim@str + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let error = String::from_utf8(bytes).unwrap_err().utf8_error(); + /// + /// // the first byte is invalid here + /// assert_eq!(1, error.valid_up_to()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn utf8_error(&self) -> Utf8Error { + self.error + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf8Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.error, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf16Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt("invalid utf-16: lone surrogate found", f) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for String { + fn clone(&self) -> Self { + String { vec: self.vec.clone() } + } + + fn clone_from(&mut self, source: &Self) { + self.vec.clone_from(&source.vec); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator for String { + fn from_iter>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_from_iter_by_ref", since = "1.17.0")] +impl<'a> FromIterator<&'a char> for String { + fn from_iter>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> FromIterator<&'a str> for String { + fn from_iter>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_string", since = "1.4.0")] +impl FromIterator for String { + fn from_iter>(iter: I) -> String { + let mut iterator = iter.into_iter(); + + // Because we're iterating over `String`s, we can avoid at least + // one allocation by getting the first string from the iterator + // and appending to it all the subsequent strings. + match iterator.next() { + None => String::new(), + Some(mut buf) => { + buf.extend(iterator); + buf + } + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_str2", since = "1.45.0")] +impl FromIterator> for String { + fn from_iter>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> FromIterator> for String { + fn from_iter>>(iter: I) -> String { + let mut iterator = iter.into_iter(); + + // Because we're iterating over CoWs, we can (potentially) avoid at least + // one allocation by getting the first item and appending to it all the + // subsequent items. + match iterator.next() { + None => String::new(), + Some(cow) => { + let mut buf = cow.into_owned(); + buf.extend(iterator); + buf + } + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend for String { + fn extend>(&mut self, iter: I) { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(lower_bound); + iterator.for_each(move |c| self.push(c)); + } + + #[inline] + fn extend_one(&mut self, c: char) { + self.push(c); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_ref", since = "1.2.0")] +impl<'a> Extend<&'a char> for String { + fn extend>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } + + #[inline] + fn extend_one(&mut self, &c: &'a char) { + self.push(c); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Extend<&'a str> for String { + fn extend>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(s)); + } + + #[inline] + fn extend_one(&mut self, s: &'a str) { + self.push_str(s); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_str2", since = "1.45.0")] +impl Extend> for String { + fn extend>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_string", since = "1.4.0")] +impl Extend for String { + fn extend>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } + + #[inline] + fn extend_one(&mut self, s: String) { + self.push_str(&s); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> Extend> for String { + fn extend>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } + + #[inline] + fn extend_one(&mut self, s: Cow<'a, str>) { + self.push_str(&s); + } +} + +/// A convenience impl that delegates to the impl for `&str`. +/// +/// # Examples +/// +/// ``` +/// assert_eq!(String::from("Hello world").find("world"), Some(6)); +/// ``` +#[unstable( + feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721" +)] +impl<'a, 'b> Pattern<'a> for &'b String { + type Searcher = <&'b str as Pattern<'a>>::Searcher; + + fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<'a>>::Searcher { + self[..].into_searcher(haystack) + } + + #[inline] + fn is_contained_in(self, haystack: &'a str) -> bool { + self[..].is_contained_in(haystack) + } + + #[inline] + fn is_prefix_of(self, haystack: &'a str) -> bool { + self[..].is_prefix_of(haystack) + } + + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_prefix_of(haystack) + } + + #[inline] + fn is_suffix_of(self, haystack: &'a str) -> bool { + self[..].is_suffix_of(haystack) + } + + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_suffix_of(haystack) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for String { + #[inline] + fn eq(&self, other: &String) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &String) -> bool { + PartialEq::ne(&self[..], &other[..]) + } +} + +macro_rules! impl_eq { + ($lhs:ty, $rhs: ty) => { + #[stable(feature = "rust1", since = "1.0.0")] + #[allow(unused_lifetimes)] + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &$rhs) -> bool { + PartialEq::ne(&self[..], &other[..]) + } + } + + #[stable(feature = "rust1", since = "1.0.0")] + #[allow(unused_lifetimes)] + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &$lhs) -> bool { + PartialEq::ne(&self[..], &other[..]) + } + } + }; +} + +impl_eq! { String, str } +impl_eq! { String, &'a str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, &'b str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, String } + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for String { + /// Creates an empty `String`. + #[inline] + fn default() -> String { + String::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl hash::Hash for String { + #[inline] + fn hash(&self, hasher: &mut H) { + (**self).hash(hasher) + } +} + +/// Implements the `+` operator for concatenating two strings. +/// +/// This consumes the `String` on the left-hand side and re-uses its buffer (growing it if +/// necessary). This is done to avoid allocating a new `String` and copying the entire contents on +/// every operation, which would lead to *O*(*n*^2) running time when building an *n*-byte string by +/// repeated concatenation. +/// +/// The string on the right-hand side is only borrowed; its contents are copied into the returned +/// `String`. +/// +/// # Examples +/// +/// Concatenating two `String`s takes the first by value and borrows the second: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a + &b; +/// // `a` is moved and can no longer be used here. +/// ``` +/// +/// If you want to keep using the first `String`, you can clone it and append to the clone instead: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a.clone() + &b; +/// // `a` is still valid here. +/// ``` +/// +/// Concatenating `&str` slices can be done by converting the first to a `String`: +/// +/// ``` +/// let a = "hello"; +/// let b = " world"; +/// let c = a.to_string() + b; +/// ``` +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Add<&str> for String { + type Output = String; + + #[inline] + fn add(mut self, other: &str) -> String { + self.push_str(other); + self + } +} + +/// Implements the `+=` operator for appending to a `String`. +/// +/// This has the same behavior as the [`push_str`][String::push_str] method. +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "stringaddassign", since = "1.12.0")] +impl AddAssign<&str> for String { + #[inline] + fn add_assign(&mut self, other: &str) { + self.push_str(other); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::Range) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeTo) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeFrom) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index for String { + type Output = str; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeInclusive) -> &str { + Index::index(&**self, index) + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeToInclusive) -> &str { + Index::index(&**self, index) + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::Range) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeTo) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeFrom) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut for String { + #[inline] + fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeInclusive) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeToInclusive) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for String { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::DerefMut for String { + #[inline] + fn deref_mut(&mut self) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} + +/// A type alias for [`Infallible`]. +/// +/// This alias exists for backwards compatibility, and may be eventually deprecated. +/// +/// [`Infallible`]: core::convert::Infallible +#[stable(feature = "str_parse_error", since = "1.5.0")] +pub type ParseError = core::convert::Infallible; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromStr for String { + type Err = core::convert::Infallible; + #[inline] + fn from_str(s: &str) -> Result { + Ok(String::from(s)) + } +} + +/// A trait for converting a value to a `String`. +/// +/// This trait is automatically implemented for any type which implements the +/// [`Display`] trait. As such, `ToString` shouldn't be implemented directly: +/// [`Display`] should be implemented instead, and you get the `ToString` +/// implementation for free. +/// +/// [`Display`]: fmt::Display +#[cfg_attr(not(test), rustc_diagnostic_item = "ToString")] +#[stable(feature = "rust1", since = "1.0.0")] +pub trait ToString { + /// Converts the given value to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let i = 5; + /// let five = String::from("5"); + /// + /// assert_eq!(five, i.to_string()); + /// ``` + #[rustc_conversion_suggestion] + #[stable(feature = "rust1", since = "1.0.0")] + fn to_string(&self) -> String; +} + +/// # Panics +/// +/// In this implementation, the `to_string` method panics +/// if the `Display` implementation returns an error. +/// This indicates an incorrect `Display` implementation +/// since `fmt::Write for String` never returns an error itself. +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl ToString for T { + // A common guideline is to not inline generic functions. However, + // removing `#[inline]` from this method causes non-negligible regressions. + // See , the last attempt + // to try to remove it. + #[inline] + default fn to_string(&self) -> String { + let mut buf = String::new(); + let mut formatter = core::fmt::Formatter::new(&mut buf); + // Bypass format_args!() to avoid write_str with zero-length strs + fmt::Display::fmt(self, &mut formatter) + .expect("a Display implementation returned an error unexpectedly"); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "char_to_string_specialization", since = "1.46.0")] +impl ToString for char { + #[inline] + fn to_string(&self) -> String { + String::from(self.encode_utf8(&mut [0; 4])) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "u8_to_string_specialization", since = "1.54.0")] +impl ToString for u8 { + #[inline] + fn to_string(&self) -> String { + let mut buf = String::with_capacity(3); + let mut n = *self; + if n >= 10 { + if n >= 100 { + buf.push((b'0' + n / 100) as char); + n %= 100; + } + buf.push((b'0' + n / 10) as char); + n %= 10; + } + buf.push((b'0' + n) as char); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "i8_to_string_specialization", since = "1.54.0")] +impl ToString for i8 { + #[inline] + fn to_string(&self) -> String { + let mut buf = String::with_capacity(4); + if self.is_negative() { + buf.push('-'); + } + let mut n = self.unsigned_abs(); + if n >= 10 { + if n >= 100 { + buf.push('1'); + n -= 100; + } + buf.push((b'0' + n / 10) as char); + n %= 10; + } + buf.push((b'0' + n) as char); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "str_to_string_specialization", since = "1.9.0")] +impl ToString for str { + #[inline] + fn to_string(&self) -> String { + String::from(self) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_to_string_specialization", since = "1.17.0")] +impl ToString for Cow<'_, str> { + #[inline] + fn to_string(&self) -> String { + self[..].to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_to_string_specialization", since = "1.17.0")] +impl ToString for String { + #[inline] + fn to_string(&self) -> String { + self.to_owned() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +#[stable(feature = "string_as_mut", since = "1.43.0")] +impl AsMut for String { + #[inline] + fn as_mut(&mut self) -> &mut str { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<[u8]> for String { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&str> for String { + /// Converts a `&str` into a [`String`]. + /// + /// The result is allocated on the heap. + #[inline] + fn from(s: &str) -> String { + s.to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_mut_str_for_string", since = "1.44.0")] +impl From<&mut str> for String { + /// Converts a `&mut str` into a [`String`]. + /// + /// The result is allocated on the heap. + #[inline] + fn from(s: &mut str) -> String { + s.to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_ref_string", since = "1.35.0")] +impl From<&String> for String { + /// Converts a `&String` into a [`String`]. + /// + /// This clones `s` and returns the clone. + #[inline] + fn from(s: &String) -> String { + s.clone() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(test))] +#[stable(feature = "string_from_box", since = "1.18.0")] +impl From> for String { + /// Converts the given boxed `str` slice to a [`String`]. + /// It is notable that the `str` slice is owned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1: String = String::from("hello world"); + /// let s2: Box = s1.into_boxed_str(); + /// let s3: String = String::from(s2); + /// + /// assert_eq!("hello world", s3) + /// ``` + fn from(s: Box) -> String { + s.into_string() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_str", since = "1.20.0")] +impl From for Box { + /// Converts the given [`String`] to a boxed `str` slice that is owned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1: String = String::from("hello world"); + /// let s2: Box = Box::from(s1); + /// let s3: String = String::from(s2); + /// + /// assert_eq!("hello world", s3) + /// ``` + fn from(s: String) -> Box { + s.into_boxed_str() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_from_cow_str", since = "1.14.0")] +impl<'a> From> for String { + /// Converts a clone-on-write string to an owned + /// instance of [`String`]. + /// + /// This extracts the owned string, + /// clones the string if it is not already owned. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// // If the string is not owned... + /// let cow: Cow = Cow::Borrowed("eggplant"); + /// // It will allocate on the heap and copy the string. + /// let owned: String = String::from(cow); + /// assert_eq!(&owned[..], "eggplant"); + /// ``` + fn from(s: Cow<'a, str>) -> String { + s.into_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<&'a str> for Cow<'a, str> { + /// Converts a string slice into a [`Borrowed`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// assert_eq!(Cow::from("eggplant"), Cow::Borrowed("eggplant")); + /// ``` + /// + /// [`Borrowed`]: crate::borrow::Cow::Borrowed + #[inline] + fn from(s: &'a str) -> Cow<'a, str> { + Cow::Borrowed(s) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From for Cow<'a, str> { + /// Converts a [`String`] into an [`Owned`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// let s = "eggplant".to_string(); + /// let s2 = "eggplant".to_string(); + /// assert_eq!(Cow::from(s), Cow::<'static, str>::Owned(s2)); + /// ``` + /// + /// [`Owned`]: crate::borrow::Cow::Owned + #[inline] + fn from(s: String) -> Cow<'a, str> { + Cow::Owned(s) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_from_string_ref", since = "1.28.0")] +impl<'a> From<&'a String> for Cow<'a, str> { + /// Converts a [`String`] reference into a [`Borrowed`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// let s = "eggplant".to_string(); + /// assert_eq!(Cow::from(&s), Cow::Borrowed("eggplant")); + /// ``` + /// + /// [`Borrowed`]: crate::borrow::Cow::Borrowed + #[inline] + fn from(s: &'a String) -> Cow<'a, str> { + Cow::Borrowed(s.as_str()) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator for Cow<'a, str> { + fn from_iter>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a, 'b> FromIterator<&'b str> for Cow<'a, str> { + fn from_iter>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator for Cow<'a, str> { + fn from_iter>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[stable(feature = "from_string_for_vec_u8", since = "1.14.0")] +impl From for Vec { + /// Converts the given [`String`] to a vector [`Vec`] that holds values of type [`u8`]. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1 = String::from("hello world"); + /// let v1 = Vec::from(s1); + /// + /// for b in v1 { + /// println!("{}", b); + /// } + /// ``` + fn from(string: String) -> Vec { + string.into_bytes() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Write for String { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } + + #[inline] + fn write_char(&mut self, c: char) -> fmt::Result { + self.push(c); + Ok(()) + } +} + +/// A draining iterator for `String`. +/// +/// This struct is created by the [`drain`] method on [`String`]. See its +/// documentation for more. +/// +/// [`drain`]: String::drain +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain<'a> { + /// Will be used as &'a mut String in the destructor + string: *mut String, + /// Start of part to remove + start: usize, + /// End of part to remove + end: usize, + /// Current remaining range to remove + iter: Chars<'a>, +} + +#[stable(feature = "collection_debug", since = "1.17.0")] +impl fmt::Debug for Drain<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.as_str()).finish() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Sync for Drain<'_> {} +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Send for Drain<'_> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl Drop for Drain<'_> { + fn drop(&mut self) { + unsafe { + // Use Vec::drain. "Reaffirm" the bounds checks to avoid + // panic code being inserted again. + let self_vec = (*self.string).as_mut_vec(); + if self.start <= self.end && self.end <= self_vec.len() { + self_vec.drain(self.start..self.end); + } + } + } +} + +impl<'a> Drain<'a> { + /// Returns the remaining (sub)string of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// #![feature(string_drain_as_str)] + /// let mut s = String::from("abc"); + /// let mut drain = s.drain(..); + /// assert_eq!(drain.as_str(), "abc"); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_str(), "bc"); + /// ``` + #[unstable(feature = "string_drain_as_str", issue = "76905")] // Note: uncomment AsRef impls below when stabilizing. + pub fn as_str(&self) -> &str { + self.iter.as_str() + } +} + +// Uncomment when stabilizing `string_drain_as_str`. +// #[unstable(feature = "string_drain_as_str", issue = "76905")] +// impl<'a> AsRef for Drain<'a> { +// fn as_ref(&self) -> &str { +// self.as_str() +// } +// } +// +// #[unstable(feature = "string_drain_as_str", issue = "76905")] +// impl<'a> AsRef<[u8]> for Drain<'a> { +// fn as_ref(&self) -> &[u8] { +// self.as_str().as_bytes() +// } +// } + +#[stable(feature = "drain", since = "1.6.0")] +impl Iterator for Drain<'_> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + #[inline] + fn last(mut self) -> Option { + self.next_back() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl DoubleEndedIterator for Drain<'_> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Drain<'_> {} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_char_for_string", since = "1.46.0")] +impl From for String { + /// Allocates an owned [`String`] from a single character. + /// + /// # Example + /// ```rust + /// let c: char = 'a'; + /// let s: String = String::from(c); + /// assert_eq!("a", &s[..]); + /// ``` + #[inline] + fn from(c: char) -> Self { + c.to_string() + } +} diff --git a/rust/alloc/sync.rs b/rust/alloc/sync.rs new file mode 100644 index 0000000000000..1f4e446806cf0 --- /dev/null +++ b/rust/alloc/sync.rs @@ -0,0 +1,2631 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#![stable(feature = "rust1", since = "1.0.0")] + +//! Thread-safe reference-counting pointers. +//! +//! See the [`Arc`][Arc] documentation for more details. + +use core::any::Any; +use core::borrow; +use core::cmp::Ordering; +use core::convert::{From, TryFrom}; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::hint; +use core::intrinsics::abort; +#[cfg(not(no_global_oom_handling))] +use core::iter; +use core::marker::{PhantomData, Unpin, Unsize}; +#[cfg(not(no_global_oom_handling))] +use core::mem::size_of_val; +use core::mem::{self, align_of_val_raw}; +use core::ops::{CoerceUnsized, Deref, DispatchFromDyn, Receiver}; +#[cfg(not(no_global_oom_handling))] +use core::pin::Pin; +use core::ptr::{self, NonNull}; +#[cfg(not(no_global_oom_handling))] +use core::slice::from_raw_parts_mut; +use core::sync::atomic; +use core::sync::atomic::Ordering::{Acquire, Relaxed, Release, SeqCst}; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::handle_alloc_error; +#[cfg(not(no_global_oom_handling))] +use crate::alloc::{box_free, WriteCloneIntoRaw}; +use crate::alloc::{AllocError, Allocator, Global, Layout}; +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::rc::is_dangling; +#[cfg(not(no_global_oom_handling))] +use crate::string::String; +use crate::vec::Vec; + +#[cfg(test)] +mod tests; + +/// A soft limit on the amount of references that may be made to an `Arc`. +/// +/// Going above this limit will abort your program (although not +/// necessarily) at _exactly_ `MAX_REFCOUNT + 1` references. +const MAX_REFCOUNT: usize = (isize::MAX) as usize; + +#[cfg(not(sanitize = "thread"))] +macro_rules! acquire { + ($x:expr) => { + atomic::fence(Acquire) + }; +} + +// ThreadSanitizer does not support memory fences. To avoid false positive +// reports in Arc / Weak implementation use atomic loads for synchronization +// instead. +#[cfg(sanitize = "thread")] +macro_rules! acquire { + ($x:expr) => { + $x.load(Acquire) + }; +} + +/// A thread-safe reference-counting pointer. 'Arc' stands for 'Atomically +/// Reference Counted'. +/// +/// The type `Arc` provides shared ownership of a value of type `T`, +/// allocated in the heap. Invoking [`clone`][clone] on `Arc` produces +/// a new `Arc` instance, which points to the same allocation on the heap as the +/// source `Arc`, while increasing a reference count. When the last `Arc` +/// pointer to a given allocation is destroyed, the value stored in that allocation (often +/// referred to as "inner value") is also dropped. +/// +/// Shared references in Rust disallow mutation by default, and `Arc` is no +/// exception: you cannot generally obtain a mutable reference to something +/// inside an `Arc`. If you need to mutate through an `Arc`, use +/// [`Mutex`][mutex], [`RwLock`][rwlock], or one of the [`Atomic`][atomic] +/// types. +/// +/// ## Thread Safety +/// +/// Unlike [`Rc`], `Arc` uses atomic operations for its reference +/// counting. This means that it is thread-safe. The disadvantage is that +/// atomic operations are more expensive than ordinary memory accesses. If you +/// are not sharing reference-counted allocations between threads, consider using +/// [`Rc`] for lower overhead. [`Rc`] is a safe default, because the +/// compiler will catch any attempt to send an [`Rc`] between threads. +/// However, a library might choose `Arc` in order to give library consumers +/// more flexibility. +/// +/// `Arc` will implement [`Send`] and [`Sync`] as long as the `T` implements +/// [`Send`] and [`Sync`]. Why can't you put a non-thread-safe type `T` in an +/// `Arc` to make it thread-safe? This may be a bit counter-intuitive at +/// first: after all, isn't the point of `Arc` thread safety? The key is +/// this: `Arc` makes it thread safe to have multiple ownership of the same +/// data, but it doesn't add thread safety to its data. Consider +/// `Arc<`[`RefCell`]`>`. [`RefCell`] isn't [`Sync`], and if `Arc` was always +/// [`Send`], `Arc<`[`RefCell`]`>` would be as well. But then we'd have a problem: +/// [`RefCell`] is not thread safe; it keeps track of the borrowing count using +/// non-atomic operations. +/// +/// In the end, this means that you may need to pair `Arc` with some sort of +/// [`std::sync`] type, usually [`Mutex`][mutex]. +/// +/// ## Breaking cycles with `Weak` +/// +/// The [`downgrade`][downgrade] method can be used to create a non-owning +/// [`Weak`] pointer. A [`Weak`] pointer can be [`upgrade`][upgrade]d +/// to an `Arc`, but this will return [`None`] if the value stored in the allocation has +/// already been dropped. In other words, `Weak` pointers do not keep the value +/// inside the allocation alive; however, they *do* keep the allocation +/// (the backing store for the value) alive. +/// +/// A cycle between `Arc` pointers will never be deallocated. For this reason, +/// [`Weak`] is used to break cycles. For example, a tree could have +/// strong `Arc` pointers from parent nodes to children, and [`Weak`] +/// pointers from children back to their parents. +/// +/// # Cloning references +/// +/// Creating a new reference from an existing reference-counted pointer is done using the +/// `Clone` trait implemented for [`Arc`][Arc] and [`Weak`][Weak]. +/// +/// ``` +/// use std::sync::Arc; +/// let foo = Arc::new(vec![1.0, 2.0, 3.0]); +/// // The two syntaxes below are equivalent. +/// let a = foo.clone(); +/// let b = Arc::clone(&foo); +/// // a, b, and foo are all Arcs that point to the same memory location +/// ``` +/// +/// ## `Deref` behavior +/// +/// `Arc` automatically dereferences to `T` (via the [`Deref`][deref] trait), +/// so you can call `T`'s methods on a value of type `Arc`. To avoid name +/// clashes with `T`'s methods, the methods of `Arc` itself are associated +/// functions, called using [fully qualified syntax]: +/// +/// ``` +/// use std::sync::Arc; +/// +/// let my_arc = Arc::new(()); +/// Arc::downgrade(&my_arc); +/// ``` +/// +/// `Arc`'s implementations of traits like `Clone` may also be called using +/// fully qualified syntax. Some people prefer to use fully qualified syntax, +/// while others prefer using method-call syntax. +/// +/// ``` +/// use std::sync::Arc; +/// +/// let arc = Arc::new(()); +/// // Method-call syntax +/// let arc2 = arc.clone(); +/// // Fully qualified syntax +/// let arc3 = Arc::clone(&arc); +/// ``` +/// +/// [`Weak`][Weak] does not auto-dereference to `T`, because the inner value may have +/// already been dropped. +/// +/// [`Rc`]: crate::rc::Rc +/// [clone]: Clone::clone +/// [mutex]: ../../std/sync/struct.Mutex.html +/// [rwlock]: ../../std/sync/struct.RwLock.html +/// [atomic]: core::sync::atomic +/// [`Send`]: core::marker::Send +/// [`Sync`]: core::marker::Sync +/// [deref]: core::ops::Deref +/// [downgrade]: Arc::downgrade +/// [upgrade]: Weak::upgrade +/// [`RefCell`]: core::cell::RefCell +/// [`std::sync`]: ../../std/sync/index.html +/// [`Arc::clone(&from)`]: Arc::clone +/// [fully qualified syntax]: https://doc.rust-lang.org/book/ch19-03-advanced-traits.html#fully-qualified-syntax-for-disambiguation-calling-methods-with-the-same-name +/// +/// # Examples +/// +/// Sharing some immutable data between threads: +/// +// Note that we **do not** run these tests here. The windows builders get super +// unhappy if a thread outlives the main thread and then exits at the same time +// (something deadlocks) so we just avoid this entirely by not running these +// tests. +/// ```no_run +/// use std::sync::Arc; +/// use std::thread; +/// +/// let five = Arc::new(5); +/// +/// for _ in 0..10 { +/// let five = Arc::clone(&five); +/// +/// thread::spawn(move || { +/// println!("{:?}", five); +/// }); +/// } +/// ``` +/// +/// Sharing a mutable [`AtomicUsize`]: +/// +/// [`AtomicUsize`]: core::sync::atomic::AtomicUsize +/// +/// ```no_run +/// use std::sync::Arc; +/// use std::sync::atomic::{AtomicUsize, Ordering}; +/// use std::thread; +/// +/// let val = Arc::new(AtomicUsize::new(5)); +/// +/// for _ in 0..10 { +/// let val = Arc::clone(&val); +/// +/// thread::spawn(move || { +/// let v = val.fetch_add(1, Ordering::SeqCst); +/// println!("{:?}", v); +/// }); +/// } +/// ``` +/// +/// See the [`rc` documentation][rc_examples] for more examples of reference +/// counting in general. +/// +/// [rc_examples]: crate::rc#examples +#[cfg_attr(not(test), rustc_diagnostic_item = "Arc")] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Arc { + ptr: NonNull>, + phantom: PhantomData>, +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl Send for Arc {} +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl Sync for Arc {} + +#[unstable(feature = "coerce_unsized", issue = "27732")] +impl, U: ?Sized> CoerceUnsized> for Arc {} + +#[unstable(feature = "dispatch_from_dyn", issue = "none")] +impl, U: ?Sized> DispatchFromDyn> for Arc {} + +impl Arc { + fn from_inner(ptr: NonNull>) -> Self { + Self { ptr, phantom: PhantomData } + } + + unsafe fn from_ptr(ptr: *mut ArcInner) -> Self { + unsafe { Self::from_inner(NonNull::new_unchecked(ptr)) } + } +} + +/// `Weak` is a version of [`Arc`] that holds a non-owning reference to the +/// managed allocation. The allocation is accessed by calling [`upgrade`] on the `Weak` +/// pointer, which returns an [`Option`]`<`[`Arc`]`>`. +/// +/// Since a `Weak` reference does not count towards ownership, it will not +/// prevent the value stored in the allocation from being dropped, and `Weak` itself makes no +/// guarantees about the value still being present. Thus it may return [`None`] +/// when [`upgrade`]d. Note however that a `Weak` reference *does* prevent the allocation +/// itself (the backing store) from being deallocated. +/// +/// A `Weak` pointer is useful for keeping a temporary reference to the allocation +/// managed by [`Arc`] without preventing its inner value from being dropped. It is also used to +/// prevent circular references between [`Arc`] pointers, since mutual owning references +/// would never allow either [`Arc`] to be dropped. For example, a tree could +/// have strong [`Arc`] pointers from parent nodes to children, and `Weak` +/// pointers from children back to their parents. +/// +/// The typical way to obtain a `Weak` pointer is to call [`Arc::downgrade`]. +/// +/// [`upgrade`]: Weak::upgrade +#[stable(feature = "arc_weak", since = "1.4.0")] +pub struct Weak { + // This is a `NonNull` to allow optimizing the size of this type in enums, + // but it is not necessarily a valid pointer. + // `Weak::new` sets this to `usize::MAX` so that it doesn’t need + // to allocate space on the heap. That's not a value a real pointer + // will ever have because RcBox has alignment at least 2. + // This is only possible when `T: Sized`; unsized `T` never dangle. + ptr: NonNull>, +} + +#[stable(feature = "arc_weak", since = "1.4.0")] +unsafe impl Send for Weak {} +#[stable(feature = "arc_weak", since = "1.4.0")] +unsafe impl Sync for Weak {} + +#[unstable(feature = "coerce_unsized", issue = "27732")] +impl, U: ?Sized> CoerceUnsized> for Weak {} +#[unstable(feature = "dispatch_from_dyn", issue = "none")] +impl, U: ?Sized> DispatchFromDyn> for Weak {} + +#[stable(feature = "arc_weak", since = "1.4.0")] +impl fmt::Debug for Weak { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "(Weak)") + } +} + +// This is repr(C) to future-proof against possible field-reordering, which +// would interfere with otherwise safe [into|from]_raw() of transmutable +// inner types. +#[repr(C)] +struct ArcInner { + strong: atomic::AtomicUsize, + + // the value usize::MAX acts as a sentinel for temporarily "locking" the + // ability to upgrade weak pointers or downgrade strong ones; this is used + // to avoid races in `make_mut` and `get_mut`. + weak: atomic::AtomicUsize, + + data: T, +} + +unsafe impl Send for ArcInner {} +unsafe impl Sync for ArcInner {} + +impl Arc { + /// Constructs a new `Arc`. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(data: T) -> Arc { + // Start the weak pointer count as 1 which is the weak pointer that's + // held by all the strong pointers (kinda), see std/rc.rs for more info + let x: Box<_> = box ArcInner { + strong: atomic::AtomicUsize::new(1), + weak: atomic::AtomicUsize::new(1), + data, + }; + Self::from_inner(Box::leak(x).into()) + } + + /// Constructs a new `Arc` using a weak reference to itself. Attempting + /// to upgrade the weak reference before this function returns will result + /// in a `None` value. However, the weak reference may be cloned freely and + /// stored for use at a later time. + /// + /// # Examples + /// ``` + /// #![feature(arc_new_cyclic)] + /// #![allow(dead_code)] + /// + /// use std::sync::{Arc, Weak}; + /// + /// struct Foo { + /// me: Weak, + /// } + /// + /// let foo = Arc::new_cyclic(|me| Foo { + /// me: me.clone(), + /// }); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "arc_new_cyclic", issue = "75861")] + pub fn new_cyclic(data_fn: impl FnOnce(&Weak) -> T) -> Arc { + // Construct the inner in the "uninitialized" state with a single + // weak reference. + let uninit_ptr: NonNull<_> = Box::leak(box ArcInner { + strong: atomic::AtomicUsize::new(0), + weak: atomic::AtomicUsize::new(1), + data: mem::MaybeUninit::::uninit(), + }) + .into(); + let init_ptr: NonNull> = uninit_ptr.cast(); + + let weak = Weak { ptr: init_ptr }; + + // It's important we don't give up ownership of the weak pointer, or + // else the memory might be freed by the time `data_fn` returns. If + // we really wanted to pass ownership, we could create an additional + // weak pointer for ourselves, but this would result in additional + // updates to the weak reference count which might not be necessary + // otherwise. + let data = data_fn(&weak); + + // Now we can properly initialize the inner value and turn our weak + // reference into a strong reference. + unsafe { + let inner = init_ptr.as_ptr(); + ptr::write(ptr::addr_of_mut!((*inner).data), data); + + // The above write to the data field must be visible to any threads which + // observe a non-zero strong count. Therefore we need at least "Release" ordering + // in order to synchronize with the `compare_exchange_weak` in `Weak::upgrade`. + // + // "Acquire" ordering is not required. When considering the possible behaviours + // of `data_fn` we only need to look at what it could do with a reference to a + // non-upgradeable `Weak`: + // - It can *clone* the `Weak`, increasing the weak reference count. + // - It can drop those clones, decreasing the weak reference count (but never to zero). + // + // These side effects do not impact us in any way, and no other side effects are + // possible with safe code alone. + let prev_value = (*inner).strong.fetch_add(1, Release); + debug_assert_eq!(prev_value, 0, "No prior strong references should exist"); + } + + let strong = Arc::from_inner(init_ptr); + + // Strong references should collectively own a shared weak reference, + // so don't run the destructor for our old weak reference. + mem::forget(weak); + strong + } + + /// Constructs a new `Arc` with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::sync::Arc; + /// + /// let mut five = Arc::::new_uninit(); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// Arc::get_mut_unchecked(&mut five).as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit() -> Arc> { + unsafe { + Arc::from_ptr(Arc::allocate_for_layout( + Layout::new::(), + |layout| Global.allocate(layout), + |mem| mem as *mut ArcInner>, + )) + } + } + + /// Constructs a new `Arc` with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// use std::sync::Arc; + /// + /// let zero = Arc::::new_zeroed(); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: ../../std/mem/union.MaybeUninit.html#method.zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed() -> Arc> { + unsafe { + Arc::from_ptr(Arc::allocate_for_layout( + Layout::new::(), + |layout| Global.allocate_zeroed(layout), + |mem| mem as *mut ArcInner>, + )) + } + } + + /// Constructs a new `Pin>`. If `T` does not implement `Unpin`, then + /// `data` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "pin", since = "1.33.0")] + pub fn pin(data: T) -> Pin> { + unsafe { Pin::new_unchecked(Arc::new(data)) } + } + + /// Constructs a new `Arc`, returning an error if allocation fails. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// use std::sync::Arc; + /// + /// let five = Arc::try_new(5)?; + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new(data: T) -> Result, AllocError> { + // Start the weak pointer count as 1 which is the weak pointer that's + // held by all the strong pointers (kinda), see std/rc.rs for more info + let x: Box<_> = Box::try_new(ArcInner { + strong: atomic::AtomicUsize::new(1), + weak: atomic::AtomicUsize::new(1), + data, + })?; + Ok(Self::from_inner(Box::leak(x).into())) + } + + /// Constructs a new `Arc` with uninitialized contents, returning an error + /// if allocation fails. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit, allocator_api)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::sync::Arc; + /// + /// let mut five = Arc::::try_new_uninit()?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// Arc::get_mut_unchecked(&mut five).as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_uninit() -> Result>, AllocError> { + unsafe { + Ok(Arc::from_ptr(Arc::try_allocate_for_layout( + Layout::new::(), + |layout| Global.allocate(layout), + |mem| mem as *mut ArcInner>, + )?)) + } + } + + /// Constructs a new `Arc` with uninitialized contents, with the memory + /// being filled with `0` bytes, returning an error if allocation fails. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit, allocator_api)] + /// + /// use std::sync::Arc; + /// + /// let zero = Arc::::try_new_zeroed()?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_zeroed() -> Result>, AllocError> { + unsafe { + Ok(Arc::from_ptr(Arc::try_allocate_for_layout( + Layout::new::(), + |layout| Global.allocate_zeroed(layout), + |mem| mem as *mut ArcInner>, + )?)) + } + } + /// Returns the inner value, if the `Arc` has exactly one strong reference. + /// + /// Otherwise, an [`Err`] is returned with the same `Arc` that was + /// passed in. + /// + /// This will succeed even if there are outstanding weak references. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let x = Arc::new(3); + /// assert_eq!(Arc::try_unwrap(x), Ok(3)); + /// + /// let x = Arc::new(4); + /// let _y = Arc::clone(&x); + /// assert_eq!(*Arc::try_unwrap(x).unwrap_err(), 4); + /// ``` + #[inline] + #[stable(feature = "arc_unique", since = "1.4.0")] + pub fn try_unwrap(this: Self) -> Result { + if this.inner().strong.compare_exchange(1, 0, Relaxed, Relaxed).is_err() { + return Err(this); + } + + acquire!(this.inner().strong); + + unsafe { + let elem = ptr::read(&this.ptr.as_ref().data); + + // Make a weak pointer to clean up the implicit strong-weak reference + let _weak = Weak { ptr: this.ptr }; + mem::forget(this); + + Ok(elem) + } + } +} + +impl Arc<[T]> { + /// Constructs a new atomically reference-counted slice with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::sync::Arc; + /// + /// let mut values = Arc::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// Arc::get_mut_unchecked(&mut values)[0].as_mut_ptr().write(1); + /// Arc::get_mut_unchecked(&mut values)[1].as_mut_ptr().write(2); + /// Arc::get_mut_unchecked(&mut values)[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_slice(len: usize) -> Arc<[mem::MaybeUninit]> { + unsafe { Arc::from_ptr(Arc::allocate_for_slice(len)) } + } + + /// Constructs a new atomically reference-counted slice with uninitialized contents, with the memory being + /// filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and + /// incorrect usage of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// use std::sync::Arc; + /// + /// let values = Arc::<[u32]>::new_zeroed_slice(3); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: ../../std/mem/union.MaybeUninit.html#method.zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_slice(len: usize) -> Arc<[mem::MaybeUninit]> { + unsafe { + Arc::from_ptr(Arc::allocate_for_layout( + Layout::array::(len).unwrap(), + |layout| Global.allocate_zeroed(layout), + |mem| { + ptr::slice_from_raw_parts_mut(mem as *mut T, len) + as *mut ArcInner<[mem::MaybeUninit]> + }, + )) + } + } +} + +impl Arc> { + /// Converts to `Arc`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the inner value + /// really is in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: ../../std/mem/union.MaybeUninit.html#method.assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::sync::Arc; + /// + /// let mut five = Arc::::new_uninit(); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// Arc::get_mut_unchecked(&mut five).as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Arc { + Arc::from_inner(mem::ManuallyDrop::new(self).ptr.cast()) + } +} + +impl Arc<[mem::MaybeUninit]> { + /// Converts to `Arc<[T]>`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the inner value + /// really is in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: ../../std/mem/union.MaybeUninit.html#method.assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// #![feature(get_mut_unchecked)] + /// + /// use std::sync::Arc; + /// + /// let mut values = Arc::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// Arc::get_mut_unchecked(&mut values)[0].as_mut_ptr().write(1); + /// Arc::get_mut_unchecked(&mut values)[1].as_mut_ptr().write(2); + /// Arc::get_mut_unchecked(&mut values)[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Arc<[T]> { + unsafe { Arc::from_ptr(mem::ManuallyDrop::new(self).ptr.as_ptr() as _) } + } +} + +impl Arc { + /// Consumes the `Arc`, returning the wrapped pointer. + /// + /// To avoid a memory leak the pointer must be converted back to an `Arc` using + /// [`Arc::from_raw`]. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let x = Arc::new("hello".to_owned()); + /// let x_ptr = Arc::into_raw(x); + /// assert_eq!(unsafe { &*x_ptr }, "hello"); + /// ``` + #[stable(feature = "rc_raw", since = "1.17.0")] + pub fn into_raw(this: Self) -> *const T { + let ptr = Self::as_ptr(&this); + mem::forget(this); + ptr + } + + /// Provides a raw pointer to the data. + /// + /// The counts are not affected in any way and the `Arc` is not consumed. The pointer is valid for + /// as long as there are strong counts in the `Arc`. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let x = Arc::new("hello".to_owned()); + /// let y = Arc::clone(&x); + /// let x_ptr = Arc::as_ptr(&x); + /// assert_eq!(x_ptr, Arc::as_ptr(&y)); + /// assert_eq!(unsafe { &*x_ptr }, "hello"); + /// ``` + #[stable(feature = "rc_as_ptr", since = "1.45.0")] + pub fn as_ptr(this: &Self) -> *const T { + let ptr: *mut ArcInner = NonNull::as_ptr(this.ptr); + + // SAFETY: This cannot go through Deref::deref or RcBoxPtr::inner because + // this is required to retain raw/mut provenance such that e.g. `get_mut` can + // write through the pointer after the Rc is recovered through `from_raw`. + unsafe { ptr::addr_of_mut!((*ptr).data) } + } + + /// Constructs an `Arc` from a raw pointer. + /// + /// The raw pointer must have been previously returned by a call to + /// [`Arc::into_raw`][into_raw] where `U` must have the same size and + /// alignment as `T`. This is trivially true if `U` is `T`. + /// Note that if `U` is not `T` but has the same size and alignment, this is + /// basically like transmuting references of different types. See + /// [`mem::transmute`][transmute] for more information on what + /// restrictions apply in this case. + /// + /// The user of `from_raw` has to make sure a specific value of `T` is only + /// dropped once. + /// + /// This function is unsafe because improper use may lead to memory unsafety, + /// even if the returned `Arc` is never accessed. + /// + /// [into_raw]: Arc::into_raw + /// [transmute]: core::mem::transmute + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let x = Arc::new("hello".to_owned()); + /// let x_ptr = Arc::into_raw(x); + /// + /// unsafe { + /// // Convert back to an `Arc` to prevent leak. + /// let x = Arc::from_raw(x_ptr); + /// assert_eq!(&*x, "hello"); + /// + /// // Further calls to `Arc::from_raw(x_ptr)` would be memory-unsafe. + /// } + /// + /// // The memory was freed when `x` went out of scope above, so `x_ptr` is now dangling! + /// ``` + #[stable(feature = "rc_raw", since = "1.17.0")] + pub unsafe fn from_raw(ptr: *const T) -> Self { + unsafe { + let offset = data_offset(ptr); + + // Reverse the offset to find the original ArcInner. + let arc_ptr = (ptr as *mut ArcInner).set_ptr_value((ptr as *mut u8).offset(-offset)); + + Self::from_ptr(arc_ptr) + } + } + + /// Creates a new [`Weak`] pointer to this allocation. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// let weak_five = Arc::downgrade(&five); + /// ``` + #[stable(feature = "arc_weak", since = "1.4.0")] + pub fn downgrade(this: &Self) -> Weak { + // This Relaxed is OK because we're checking the value in the CAS + // below. + let mut cur = this.inner().weak.load(Relaxed); + + loop { + // check if the weak counter is currently "locked"; if so, spin. + if cur == usize::MAX { + hint::spin_loop(); + cur = this.inner().weak.load(Relaxed); + continue; + } + + // NOTE: this code currently ignores the possibility of overflow + // into usize::MAX; in general both Rc and Arc need to be adjusted + // to deal with overflow. + + // Unlike with Clone(), we need this to be an Acquire read to + // synchronize with the write coming from `is_unique`, so that the + // events prior to that write happen before this read. + match this.inner().weak.compare_exchange_weak(cur, cur + 1, Acquire, Relaxed) { + Ok(_) => { + // Make sure we do not create a dangling Weak + debug_assert!(!is_dangling(this.ptr.as_ptr())); + return Weak { ptr: this.ptr }; + } + Err(old) => cur = old, + } + } + } + + /// Gets the number of [`Weak`] pointers to this allocation. + /// + /// # Safety + /// + /// This method by itself is safe, but using it correctly requires extra care. + /// Another thread can change the weak count at any time, + /// including potentially between calling this method and acting on the result. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// let _weak_five = Arc::downgrade(&five); + /// + /// // This assertion is deterministic because we haven't shared + /// // the `Arc` or `Weak` between threads. + /// assert_eq!(1, Arc::weak_count(&five)); + /// ``` + #[inline] + #[stable(feature = "arc_counts", since = "1.15.0")] + pub fn weak_count(this: &Self) -> usize { + let cnt = this.inner().weak.load(SeqCst); + // If the weak count is currently locked, the value of the + // count was 0 just before taking the lock. + if cnt == usize::MAX { 0 } else { cnt - 1 } + } + + /// Gets the number of strong (`Arc`) pointers to this allocation. + /// + /// # Safety + /// + /// This method by itself is safe, but using it correctly requires extra care. + /// Another thread can change the strong count at any time, + /// including potentially between calling this method and acting on the result. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// let _also_five = Arc::clone(&five); + /// + /// // This assertion is deterministic because we haven't shared + /// // the `Arc` between threads. + /// assert_eq!(2, Arc::strong_count(&five)); + /// ``` + #[inline] + #[stable(feature = "arc_counts", since = "1.15.0")] + pub fn strong_count(this: &Self) -> usize { + this.inner().strong.load(SeqCst) + } + + /// Increments the strong reference count on the `Arc` associated with the + /// provided pointer by one. + /// + /// # Safety + /// + /// The pointer must have been obtained through `Arc::into_raw`, and the + /// associated `Arc` instance must be valid (i.e. the strong count must be at + /// least 1) for the duration of this method. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// unsafe { + /// let ptr = Arc::into_raw(five); + /// Arc::increment_strong_count(ptr); + /// + /// // This assertion is deterministic because we haven't shared + /// // the `Arc` between threads. + /// let five = Arc::from_raw(ptr); + /// assert_eq!(2, Arc::strong_count(&five)); + /// } + /// ``` + #[inline] + #[stable(feature = "arc_mutate_strong_count", since = "1.51.0")] + pub unsafe fn increment_strong_count(ptr: *const T) { + // Retain Arc, but don't touch refcount by wrapping in ManuallyDrop + let arc = unsafe { mem::ManuallyDrop::new(Arc::::from_raw(ptr)) }; + // Now increase refcount, but don't drop new refcount either + let _arc_clone: mem::ManuallyDrop<_> = arc.clone(); + } + + /// Decrements the strong reference count on the `Arc` associated with the + /// provided pointer by one. + /// + /// # Safety + /// + /// The pointer must have been obtained through `Arc::into_raw`, and the + /// associated `Arc` instance must be valid (i.e. the strong count must be at + /// least 1) when invoking this method. This method can be used to release the final + /// `Arc` and backing storage, but **should not** be called after the final `Arc` has been + /// released. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// unsafe { + /// let ptr = Arc::into_raw(five); + /// Arc::increment_strong_count(ptr); + /// + /// // Those assertions are deterministic because we haven't shared + /// // the `Arc` between threads. + /// let five = Arc::from_raw(ptr); + /// assert_eq!(2, Arc::strong_count(&five)); + /// Arc::decrement_strong_count(ptr); + /// assert_eq!(1, Arc::strong_count(&five)); + /// } + /// ``` + #[inline] + #[stable(feature = "arc_mutate_strong_count", since = "1.51.0")] + pub unsafe fn decrement_strong_count(ptr: *const T) { + unsafe { mem::drop(Arc::from_raw(ptr)) }; + } + + #[inline] + fn inner(&self) -> &ArcInner { + // This unsafety is ok because while this arc is alive we're guaranteed + // that the inner pointer is valid. Furthermore, we know that the + // `ArcInner` structure itself is `Sync` because the inner data is + // `Sync` as well, so we're ok loaning out an immutable pointer to these + // contents. + unsafe { self.ptr.as_ref() } + } + + // Non-inlined part of `drop`. + #[inline(never)] + unsafe fn drop_slow(&mut self) { + // Destroy the data at this time, even though we may not free the box + // allocation itself (there may still be weak pointers lying around). + unsafe { ptr::drop_in_place(Self::get_mut_unchecked(self)) }; + + // Drop the weak ref collectively held by all strong references + drop(Weak { ptr: self.ptr }); + } + + #[inline] + #[stable(feature = "ptr_eq", since = "1.17.0")] + /// Returns `true` if the two `Arc`s point to the same allocation + /// (in a vein similar to [`ptr::eq`]). + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// let same_five = Arc::clone(&five); + /// let other_five = Arc::new(5); + /// + /// assert!(Arc::ptr_eq(&five, &same_five)); + /// assert!(!Arc::ptr_eq(&five, &other_five)); + /// ``` + /// + /// [`ptr::eq`]: core::ptr::eq + pub fn ptr_eq(this: &Self, other: &Self) -> bool { + this.ptr.as_ptr() == other.ptr.as_ptr() + } +} + +impl Arc { + /// Allocates an `ArcInner` with sufficient space for + /// a possibly-unsized inner value where the value has the layout provided. + /// + /// The function `mem_to_arcinner` is called with the data pointer + /// and must return back a (potentially fat)-pointer for the `ArcInner`. + #[cfg(not(no_global_oom_handling))] + unsafe fn allocate_for_layout( + value_layout: Layout, + allocate: impl FnOnce(Layout) -> Result, AllocError>, + mem_to_arcinner: impl FnOnce(*mut u8) -> *mut ArcInner, + ) -> *mut ArcInner { + // Calculate layout using the given value layout. + // Previously, layout was calculated on the expression + // `&*(ptr as *const ArcInner)`, but this created a misaligned + // reference (see #54908). + let layout = Layout::new::>().extend(value_layout).unwrap().0.pad_to_align(); + unsafe { + Arc::try_allocate_for_layout(value_layout, allocate, mem_to_arcinner) + .unwrap_or_else(|_| handle_alloc_error(layout)) + } + } + + /// Allocates an `ArcInner` with sufficient space for + /// a possibly-unsized inner value where the value has the layout provided, + /// returning an error if allocation fails. + /// + /// The function `mem_to_arcinner` is called with the data pointer + /// and must return back a (potentially fat)-pointer for the `ArcInner`. + unsafe fn try_allocate_for_layout( + value_layout: Layout, + allocate: impl FnOnce(Layout) -> Result, AllocError>, + mem_to_arcinner: impl FnOnce(*mut u8) -> *mut ArcInner, + ) -> Result<*mut ArcInner, AllocError> { + // Calculate layout using the given value layout. + // Previously, layout was calculated on the expression + // `&*(ptr as *const ArcInner)`, but this created a misaligned + // reference (see #54908). + let layout = Layout::new::>().extend(value_layout).unwrap().0.pad_to_align(); + + let ptr = allocate(layout)?; + + // Initialize the ArcInner + let inner = mem_to_arcinner(ptr.as_non_null_ptr().as_ptr()); + debug_assert_eq!(unsafe { Layout::for_value(&*inner) }, layout); + + unsafe { + ptr::write(&mut (*inner).strong, atomic::AtomicUsize::new(1)); + ptr::write(&mut (*inner).weak, atomic::AtomicUsize::new(1)); + } + + Ok(inner) + } + + /// Allocates an `ArcInner` with sufficient space for an unsized inner value. + #[cfg(not(no_global_oom_handling))] + unsafe fn allocate_for_ptr(ptr: *const T) -> *mut ArcInner { + // Allocate for the `ArcInner` using the given value. + unsafe { + Self::allocate_for_layout( + Layout::for_value(&*ptr), + |layout| Global.allocate(layout), + |mem| (ptr as *mut ArcInner).set_ptr_value(mem) as *mut ArcInner, + ) + } + } + + #[cfg(not(no_global_oom_handling))] + fn from_box(v: Box) -> Arc { + unsafe { + let (box_unique, alloc) = Box::into_unique(v); + let bptr = box_unique.as_ptr(); + + let value_size = size_of_val(&*bptr); + let ptr = Self::allocate_for_ptr(bptr); + + // Copy value as bytes + ptr::copy_nonoverlapping( + bptr as *const T as *const u8, + &mut (*ptr).data as *mut _ as *mut u8, + value_size, + ); + + // Free the allocation without dropping its contents + box_free(box_unique, alloc); + + Self::from_ptr(ptr) + } + } +} + +impl Arc<[T]> { + /// Allocates an `ArcInner<[T]>` with the given length. + #[cfg(not(no_global_oom_handling))] + unsafe fn allocate_for_slice(len: usize) -> *mut ArcInner<[T]> { + unsafe { + Self::allocate_for_layout( + Layout::array::(len).unwrap(), + |layout| Global.allocate(layout), + |mem| ptr::slice_from_raw_parts_mut(mem as *mut T, len) as *mut ArcInner<[T]>, + ) + } + } + + /// Tries to allocate an `ArcInner<[T]>` with the given length. + unsafe fn try_allocate_for_slice(len: usize) -> Result<*mut ArcInner<[T]>, TryReserveError> { + unsafe { + let layout = Layout::array::(len)?; + Self::try_allocate_for_layout( + layout, + |l| Global.allocate(l), + |mem| ptr::slice_from_raw_parts_mut(mem as *mut T, len) as *mut ArcInner<[T]>, + ).map_err(|_| TryReserveError::AllocError { layout, non_exhaustive: () }) + } + } + + /// Copy elements from slice into newly allocated Arc<\[T\]> + /// + /// Unsafe because the caller must either take ownership or bind `T: Copy`. + #[cfg(not(no_global_oom_handling))] + unsafe fn copy_from_slice(v: &[T]) -> Arc<[T]> { + unsafe { + let ptr = Self::allocate_for_slice(v.len()); + + ptr::copy_nonoverlapping(v.as_ptr(), &mut (*ptr).data as *mut [T] as *mut T, v.len()); + + Self::from_ptr(ptr) + } + } + + /// Tries to copy elements from slice into newly allocated Arc<\[T\]> + /// + /// Unsafe because the caller must either take ownership or bind `T: Copy`. + unsafe fn try_copy_from_slice(v: &[T]) -> Result, TryReserveError> { + unsafe { + let ptr = Self::try_allocate_for_slice(v.len())?; + + ptr::copy_nonoverlapping(v.as_ptr(), &mut (*ptr).data as *mut [T] as *mut T, v.len()); + + Ok(Self::from_ptr(ptr)) + } + } + + /// Constructs an `Arc<[T]>` from an iterator known to be of a certain size. + /// + /// Behavior is undefined should the size be wrong. + #[cfg(not(no_global_oom_handling))] + unsafe fn from_iter_exact(iter: impl iter::Iterator, len: usize) -> Arc<[T]> { + // Panic guard while cloning T elements. + // In the event of a panic, elements that have been written + // into the new ArcInner will be dropped, then the memory freed. + struct Guard { + mem: NonNull, + elems: *mut T, + layout: Layout, + n_elems: usize, + } + + impl Drop for Guard { + fn drop(&mut self) { + unsafe { + let slice = from_raw_parts_mut(self.elems, self.n_elems); + ptr::drop_in_place(slice); + + Global.deallocate(self.mem, self.layout); + } + } + } + + unsafe { + let ptr = Self::allocate_for_slice(len); + + let mem = ptr as *mut _ as *mut u8; + let layout = Layout::for_value(&*ptr); + + // Pointer to first element + let elems = &mut (*ptr).data as *mut [T] as *mut T; + + let mut guard = Guard { mem: NonNull::new_unchecked(mem), elems, layout, n_elems: 0 }; + + for (i, item) in iter.enumerate() { + ptr::write(elems.add(i), item); + guard.n_elems += 1; + } + + // All clear. Forget the guard so it doesn't free the new ArcInner. + mem::forget(guard); + + Self::from_ptr(ptr) + } + } +} + +/// Specialization trait used for `From<&[T]>`. +#[cfg(not(no_global_oom_handling))] +trait ArcFromSlice { + fn from_slice(slice: &[T]) -> Self; +} + +#[cfg(not(no_global_oom_handling))] +impl ArcFromSlice for Arc<[T]> { + #[inline] + default fn from_slice(v: &[T]) -> Self { + unsafe { Self::from_iter_exact(v.iter().cloned(), v.len()) } + } +} + +#[cfg(not(no_global_oom_handling))] +impl ArcFromSlice for Arc<[T]> { + #[inline] + fn from_slice(v: &[T]) -> Self { + unsafe { Arc::copy_from_slice(v) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Arc { + /// Makes a clone of the `Arc` pointer. + /// + /// This creates another pointer to the same allocation, increasing the + /// strong reference count. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// let _ = Arc::clone(&five); + /// ``` + #[inline] + fn clone(&self) -> Arc { + // Using a relaxed ordering is alright here, as knowledge of the + // original reference prevents other threads from erroneously deleting + // the object. + // + // As explained in the [Boost documentation][1], Increasing the + // reference counter can always be done with memory_order_relaxed: New + // references to an object can only be formed from an existing + // reference, and passing an existing reference from one thread to + // another must already provide any required synchronization. + // + // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html) + let old_size = self.inner().strong.fetch_add(1, Relaxed); + + // However we need to guard against massive refcounts in case someone + // is `mem::forget`ing Arcs. If we don't do this the count can overflow + // and users will use-after free. We racily saturate to `isize::MAX` on + // the assumption that there aren't ~2 billion threads incrementing + // the reference count at once. This branch will never be taken in + // any realistic program. + // + // We abort because such a program is incredibly degenerate, and we + // don't care to support it. + if old_size > MAX_REFCOUNT { + abort(); + } + + Self::from_inner(self.ptr) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Deref for Arc { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + &self.inner().data + } +} + +#[unstable(feature = "receiver_trait", issue = "none")] +impl Receiver for Arc {} + +impl Arc { + /// Makes a mutable reference into the given `Arc`. + /// + /// If there are other `Arc` or [`Weak`] pointers to the same allocation, + /// then `make_mut` will create a new allocation and invoke [`clone`][clone] on the inner value + /// to ensure unique ownership. This is also referred to as clone-on-write. + /// + /// Note that this differs from the behavior of [`Rc::make_mut`] which disassociates + /// any remaining `Weak` pointers. + /// + /// See also [`get_mut`][get_mut], which will fail rather than cloning. + /// + /// [clone]: Clone::clone + /// [get_mut]: Arc::get_mut + /// [`Rc::make_mut`]: super::rc::Rc::make_mut + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let mut data = Arc::new(5); + /// + /// *Arc::make_mut(&mut data) += 1; // Won't clone anything + /// let mut other_data = Arc::clone(&data); // Won't clone inner data + /// *Arc::make_mut(&mut data) += 1; // Clones inner data + /// *Arc::make_mut(&mut data) += 1; // Won't clone anything + /// *Arc::make_mut(&mut other_data) *= 2; // Won't clone anything + /// + /// // Now `data` and `other_data` point to different allocations. + /// assert_eq!(*data, 8); + /// assert_eq!(*other_data, 12); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "arc_unique", since = "1.4.0")] + pub fn make_mut(this: &mut Self) -> &mut T { + // Note that we hold both a strong reference and a weak reference. + // Thus, releasing our strong reference only will not, by itself, cause + // the memory to be deallocated. + // + // Use Acquire to ensure that we see any writes to `weak` that happen + // before release writes (i.e., decrements) to `strong`. Since we hold a + // weak count, there's no chance the ArcInner itself could be + // deallocated. + if this.inner().strong.compare_exchange(1, 0, Acquire, Relaxed).is_err() { + // Another strong pointer exists, so we must clone. + // Pre-allocate memory to allow writing the cloned value directly. + let mut arc = Self::new_uninit(); + unsafe { + let data = Arc::get_mut_unchecked(&mut arc); + (**this).write_clone_into_raw(data.as_mut_ptr()); + *this = arc.assume_init(); + } + } else if this.inner().weak.load(Relaxed) != 1 { + // Relaxed suffices in the above because this is fundamentally an + // optimization: we are always racing with weak pointers being + // dropped. Worst case, we end up allocated a new Arc unnecessarily. + + // We removed the last strong ref, but there are additional weak + // refs remaining. We'll move the contents to a new Arc, and + // invalidate the other weak refs. + + // Note that it is not possible for the read of `weak` to yield + // usize::MAX (i.e., locked), since the weak count can only be + // locked by a thread with a strong reference. + + // Materialize our own implicit weak pointer, so that it can clean + // up the ArcInner as needed. + let _weak = Weak { ptr: this.ptr }; + + // Can just steal the data, all that's left is Weaks + let mut arc = Self::new_uninit(); + unsafe { + let data = Arc::get_mut_unchecked(&mut arc); + data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1); + ptr::write(this, arc.assume_init()); + } + } else { + // We were the sole reference of either kind; bump back up the + // strong ref count. + this.inner().strong.store(1, Release); + } + + // As with `get_mut()`, the unsafety is ok because our reference was + // either unique to begin with, or became one upon cloning the contents. + unsafe { Self::get_mut_unchecked(this) } + } +} + +impl Arc { + /// Returns a mutable reference into the given `Arc`, if there are + /// no other `Arc` or [`Weak`] pointers to the same allocation. + /// + /// Returns [`None`] otherwise, because it is not safe to + /// mutate a shared value. + /// + /// See also [`make_mut`][make_mut], which will [`clone`][clone] + /// the inner value when there are other pointers. + /// + /// [make_mut]: Arc::make_mut + /// [clone]: Clone::clone + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let mut x = Arc::new(3); + /// *Arc::get_mut(&mut x).unwrap() = 4; + /// assert_eq!(*x, 4); + /// + /// let _y = Arc::clone(&x); + /// assert!(Arc::get_mut(&mut x).is_none()); + /// ``` + #[inline] + #[stable(feature = "arc_unique", since = "1.4.0")] + pub fn get_mut(this: &mut Self) -> Option<&mut T> { + if this.is_unique() { + // This unsafety is ok because we're guaranteed that the pointer + // returned is the *only* pointer that will ever be returned to T. Our + // reference count is guaranteed to be 1 at this point, and we required + // the Arc itself to be `mut`, so we're returning the only possible + // reference to the inner data. + unsafe { Some(Arc::get_mut_unchecked(this)) } + } else { + None + } + } + + /// Returns a mutable reference into the given `Arc`, + /// without any check. + /// + /// See also [`get_mut`], which is safe and does appropriate checks. + /// + /// [`get_mut`]: Arc::get_mut + /// + /// # Safety + /// + /// Any other `Arc` or [`Weak`] pointers to the same allocation must not be dereferenced + /// for the duration of the returned borrow. + /// This is trivially the case if no such pointers exist, + /// for example immediately after `Arc::new`. + /// + /// # Examples + /// + /// ``` + /// #![feature(get_mut_unchecked)] + /// + /// use std::sync::Arc; + /// + /// let mut x = Arc::new(String::new()); + /// unsafe { + /// Arc::get_mut_unchecked(&mut x).push_str("foo") + /// } + /// assert_eq!(*x, "foo"); + /// ``` + #[inline] + #[unstable(feature = "get_mut_unchecked", issue = "63292")] + pub unsafe fn get_mut_unchecked(this: &mut Self) -> &mut T { + // We are careful to *not* create a reference covering the "count" fields, as + // this would alias with concurrent access to the reference counts (e.g. by `Weak`). + unsafe { &mut (*this.ptr.as_ptr()).data } + } + + /// Determine whether this is the unique reference (including weak refs) to + /// the underlying data. + /// + /// Note that this requires locking the weak ref count. + fn is_unique(&mut self) -> bool { + // lock the weak pointer count if we appear to be the sole weak pointer + // holder. + // + // The acquire label here ensures a happens-before relationship with any + // writes to `strong` (in particular in `Weak::upgrade`) prior to decrements + // of the `weak` count (via `Weak::drop`, which uses release). If the upgraded + // weak ref was never dropped, the CAS here will fail so we do not care to synchronize. + if self.inner().weak.compare_exchange(1, usize::MAX, Acquire, Relaxed).is_ok() { + // This needs to be an `Acquire` to synchronize with the decrement of the `strong` + // counter in `drop` -- the only access that happens when any but the last reference + // is being dropped. + let unique = self.inner().strong.load(Acquire) == 1; + + // The release write here synchronizes with a read in `downgrade`, + // effectively preventing the above read of `strong` from happening + // after the write. + self.inner().weak.store(1, Release); // release the lock + unique + } else { + false + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T: ?Sized> Drop for Arc { + /// Drops the `Arc`. + /// + /// This will decrement the strong reference count. If the strong reference + /// count reaches zero then the only other references (if any) are + /// [`Weak`], so we `drop` the inner value. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// struct Foo; + /// + /// impl Drop for Foo { + /// fn drop(&mut self) { + /// println!("dropped!"); + /// } + /// } + /// + /// let foo = Arc::new(Foo); + /// let foo2 = Arc::clone(&foo); + /// + /// drop(foo); // Doesn't print anything + /// drop(foo2); // Prints "dropped!" + /// ``` + #[inline] + fn drop(&mut self) { + // Because `fetch_sub` is already atomic, we do not need to synchronize + // with other threads unless we are going to delete the object. This + // same logic applies to the below `fetch_sub` to the `weak` count. + if self.inner().strong.fetch_sub(1, Release) != 1 { + return; + } + + // This fence is needed to prevent reordering of use of the data and + // deletion of the data. Because it is marked `Release`, the decreasing + // of the reference count synchronizes with this `Acquire` fence. This + // means that use of the data happens before decreasing the reference + // count, which happens before this fence, which happens before the + // deletion of the data. + // + // As explained in the [Boost documentation][1], + // + // > It is important to enforce any possible access to the object in one + // > thread (through an existing reference) to *happen before* deleting + // > the object in a different thread. This is achieved by a "release" + // > operation after dropping a reference (any access to the object + // > through this reference must obviously happened before), and an + // > "acquire" operation before deleting the object. + // + // In particular, while the contents of an Arc are usually immutable, it's + // possible to have interior writes to something like a Mutex. Since a + // Mutex is not acquired when it is deleted, we can't rely on its + // synchronization logic to make writes in thread A visible to a destructor + // running in thread B. + // + // Also note that the Acquire fence here could probably be replaced with an + // Acquire load, which could improve performance in highly-contended + // situations. See [2]. + // + // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html) + // [2]: (https://github.com/rust-lang/rust/pull/41714) + acquire!(self.inner().strong); + + unsafe { + self.drop_slow(); + } + } +} + +impl Arc { + #[inline] + #[stable(feature = "rc_downcast", since = "1.29.0")] + /// Attempt to downcast the `Arc` to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// use std::sync::Arc; + /// + /// fn print_if_string(value: Arc) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Arc::new(my_string)); + /// print_if_string(Arc::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Self> + where + T: Any + Send + Sync + 'static, + { + if (*self).is::() { + let ptr = self.ptr.cast::>(); + mem::forget(self); + Ok(Arc::from_inner(ptr)) + } else { + Err(self) + } + } +} + +impl Weak { + /// Constructs a new `Weak`, without allocating any memory. + /// Calling [`upgrade`] on the return value always gives [`None`]. + /// + /// [`upgrade`]: Weak::upgrade + /// + /// # Examples + /// + /// ``` + /// use std::sync::Weak; + /// + /// let empty: Weak = Weak::new(); + /// assert!(empty.upgrade().is_none()); + /// ``` + #[stable(feature = "downgraded_weak", since = "1.10.0")] + pub fn new() -> Weak { + Weak { ptr: NonNull::new(usize::MAX as *mut ArcInner).expect("MAX is not 0") } + } +} + +/// Helper type to allow accessing the reference counts without +/// making any assertions about the data field. +struct WeakInner<'a> { + weak: &'a atomic::AtomicUsize, + strong: &'a atomic::AtomicUsize, +} + +impl Weak { + /// Returns a raw pointer to the object `T` pointed to by this `Weak`. + /// + /// The pointer is valid only if there are some strong references. The pointer may be dangling, + /// unaligned or even [`null`] otherwise. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use std::ptr; + /// + /// let strong = Arc::new("hello".to_owned()); + /// let weak = Arc::downgrade(&strong); + /// // Both point to the same object + /// assert!(ptr::eq(&*strong, weak.as_ptr())); + /// // The strong here keeps it alive, so we can still access the object. + /// assert_eq!("hello", unsafe { &*weak.as_ptr() }); + /// + /// drop(strong); + /// // But not any more. We can do weak.as_ptr(), but accessing the pointer would lead to + /// // undefined behaviour. + /// // assert_eq!("hello", unsafe { &*weak.as_ptr() }); + /// ``` + /// + /// [`null`]: core::ptr::null + #[stable(feature = "weak_into_raw", since = "1.45.0")] + pub fn as_ptr(&self) -> *const T { + let ptr: *mut ArcInner = NonNull::as_ptr(self.ptr); + + if is_dangling(ptr) { + // If the pointer is dangling, we return the sentinel directly. This cannot be + // a valid payload address, as the payload is at least as aligned as ArcInner (usize). + ptr as *const T + } else { + // SAFETY: if is_dangling returns false, then the pointer is dereferencable. + // The payload may be dropped at this point, and we have to maintain provenance, + // so use raw pointer manipulation. + unsafe { ptr::addr_of_mut!((*ptr).data) } + } + } + + /// Consumes the `Weak` and turns it into a raw pointer. + /// + /// This converts the weak pointer into a raw pointer, while still preserving the ownership of + /// one weak reference (the weak count is not modified by this operation). It can be turned + /// back into the `Weak` with [`from_raw`]. + /// + /// The same restrictions of accessing the target of the pointer as with + /// [`as_ptr`] apply. + /// + /// # Examples + /// + /// ``` + /// use std::sync::{Arc, Weak}; + /// + /// let strong = Arc::new("hello".to_owned()); + /// let weak = Arc::downgrade(&strong); + /// let raw = weak.into_raw(); + /// + /// assert_eq!(1, Arc::weak_count(&strong)); + /// assert_eq!("hello", unsafe { &*raw }); + /// + /// drop(unsafe { Weak::from_raw(raw) }); + /// assert_eq!(0, Arc::weak_count(&strong)); + /// ``` + /// + /// [`from_raw`]: Weak::from_raw + /// [`as_ptr`]: Weak::as_ptr + #[stable(feature = "weak_into_raw", since = "1.45.0")] + pub fn into_raw(self) -> *const T { + let result = self.as_ptr(); + mem::forget(self); + result + } + + /// Converts a raw pointer previously created by [`into_raw`] back into `Weak`. + /// + /// This can be used to safely get a strong reference (by calling [`upgrade`] + /// later) or to deallocate the weak count by dropping the `Weak`. + /// + /// It takes ownership of one weak reference (with the exception of pointers created by [`new`], + /// as these don't own anything; the method still works on them). + /// + /// # Safety + /// + /// The pointer must have originated from the [`into_raw`] and must still own its potential + /// weak reference. + /// + /// It is allowed for the strong count to be 0 at the time of calling this. Nevertheless, this + /// takes ownership of one weak reference currently represented as a raw pointer (the weak + /// count is not modified by this operation) and therefore it must be paired with a previous + /// call to [`into_raw`]. + /// # Examples + /// + /// ``` + /// use std::sync::{Arc, Weak}; + /// + /// let strong = Arc::new("hello".to_owned()); + /// + /// let raw_1 = Arc::downgrade(&strong).into_raw(); + /// let raw_2 = Arc::downgrade(&strong).into_raw(); + /// + /// assert_eq!(2, Arc::weak_count(&strong)); + /// + /// assert_eq!("hello", &*unsafe { Weak::from_raw(raw_1) }.upgrade().unwrap()); + /// assert_eq!(1, Arc::weak_count(&strong)); + /// + /// drop(strong); + /// + /// // Decrement the last weak count. + /// assert!(unsafe { Weak::from_raw(raw_2) }.upgrade().is_none()); + /// ``` + /// + /// [`new`]: Weak::new + /// [`into_raw`]: Weak::into_raw + /// [`upgrade`]: Weak::upgrade + /// [`forget`]: std::mem::forget + #[stable(feature = "weak_into_raw", since = "1.45.0")] + pub unsafe fn from_raw(ptr: *const T) -> Self { + // See Weak::as_ptr for context on how the input pointer is derived. + + let ptr = if is_dangling(ptr as *mut T) { + // This is a dangling Weak. + ptr as *mut ArcInner + } else { + // Otherwise, we're guaranteed the pointer came from a nondangling Weak. + // SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T. + let offset = unsafe { data_offset(ptr) }; + // Thus, we reverse the offset to get the whole RcBox. + // SAFETY: the pointer originated from a Weak, so this offset is safe. + unsafe { (ptr as *mut ArcInner).set_ptr_value((ptr as *mut u8).offset(-offset)) } + }; + + // SAFETY: we now have recovered the original Weak pointer, so can create the Weak. + Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } } + } +} + +impl Weak { + /// Attempts to upgrade the `Weak` pointer to an [`Arc`], delaying + /// dropping of the inner value if successful. + /// + /// Returns [`None`] if the inner value has since been dropped. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// let weak_five = Arc::downgrade(&five); + /// + /// let strong_five: Option> = weak_five.upgrade(); + /// assert!(strong_five.is_some()); + /// + /// // Destroy all strong pointers. + /// drop(strong_five); + /// drop(five); + /// + /// assert!(weak_five.upgrade().is_none()); + /// ``` + #[stable(feature = "arc_weak", since = "1.4.0")] + pub fn upgrade(&self) -> Option> { + // We use a CAS loop to increment the strong count instead of a + // fetch_add as this function should never take the reference count + // from zero to one. + let inner = self.inner()?; + + // Relaxed load because any write of 0 that we can observe + // leaves the field in a permanently zero state (so a + // "stale" read of 0 is fine), and any other value is + // confirmed via the CAS below. + let mut n = inner.strong.load(Relaxed); + + loop { + if n == 0 { + return None; + } + + // See comments in `Arc::clone` for why we do this (for `mem::forget`). + if n > MAX_REFCOUNT { + abort(); + } + + // Relaxed is fine for the failure case because we don't have any expectations about the new state. + // Acquire is necessary for the success case to synchronise with `Arc::new_cyclic`, when the inner + // value can be initialized after `Weak` references have already been created. In that case, we + // expect to observe the fully initialized value. + match inner.strong.compare_exchange_weak(n, n + 1, Acquire, Relaxed) { + Ok(_) => return Some(Arc::from_inner(self.ptr)), // null checked above + Err(old) => n = old, + } + } + } + + /// Gets the number of strong (`Arc`) pointers pointing to this allocation. + /// + /// If `self` was created using [`Weak::new`], this will return 0. + #[stable(feature = "weak_counts", since = "1.41.0")] + pub fn strong_count(&self) -> usize { + if let Some(inner) = self.inner() { inner.strong.load(SeqCst) } else { 0 } + } + + /// Gets an approximation of the number of `Weak` pointers pointing to this + /// allocation. + /// + /// If `self` was created using [`Weak::new`], or if there are no remaining + /// strong pointers, this will return 0. + /// + /// # Accuracy + /// + /// Due to implementation details, the returned value can be off by 1 in + /// either direction when other threads are manipulating any `Arc`s or + /// `Weak`s pointing to the same allocation. + #[stable(feature = "weak_counts", since = "1.41.0")] + pub fn weak_count(&self) -> usize { + self.inner() + .map(|inner| { + let weak = inner.weak.load(SeqCst); + let strong = inner.strong.load(SeqCst); + if strong == 0 { + 0 + } else { + // Since we observed that there was at least one strong pointer + // after reading the weak count, we know that the implicit weak + // reference (present whenever any strong references are alive) + // was still around when we observed the weak count, and can + // therefore safely subtract it. + weak - 1 + } + }) + .unwrap_or(0) + } + + /// Returns `None` when the pointer is dangling and there is no allocated `ArcInner`, + /// (i.e., when this `Weak` was created by `Weak::new`). + #[inline] + fn inner(&self) -> Option> { + if is_dangling(self.ptr.as_ptr()) { + None + } else { + // We are careful to *not* create a reference covering the "data" field, as + // the field may be mutated concurrently (for example, if the last `Arc` + // is dropped, the data field will be dropped in-place). + Some(unsafe { + let ptr = self.ptr.as_ptr(); + WeakInner { strong: &(*ptr).strong, weak: &(*ptr).weak } + }) + } + } + + /// Returns `true` if the two `Weak`s point to the same allocation (similar to + /// [`ptr::eq`]), or if both don't point to any allocation + /// (because they were created with `Weak::new()`). + /// + /// # Notes + /// + /// Since this compares pointers it means that `Weak::new()` will equal each + /// other, even though they don't point to any allocation. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let first_rc = Arc::new(5); + /// let first = Arc::downgrade(&first_rc); + /// let second = Arc::downgrade(&first_rc); + /// + /// assert!(first.ptr_eq(&second)); + /// + /// let third_rc = Arc::new(5); + /// let third = Arc::downgrade(&third_rc); + /// + /// assert!(!first.ptr_eq(&third)); + /// ``` + /// + /// Comparing `Weak::new`. + /// + /// ``` + /// use std::sync::{Arc, Weak}; + /// + /// let first = Weak::new(); + /// let second = Weak::new(); + /// assert!(first.ptr_eq(&second)); + /// + /// let third_rc = Arc::new(()); + /// let third = Arc::downgrade(&third_rc); + /// assert!(!first.ptr_eq(&third)); + /// ``` + /// + /// [`ptr::eq`]: core::ptr::eq + #[inline] + #[stable(feature = "weak_ptr_eq", since = "1.39.0")] + pub fn ptr_eq(&self, other: &Self) -> bool { + self.ptr.as_ptr() == other.ptr.as_ptr() + } +} + +#[stable(feature = "arc_weak", since = "1.4.0")] +impl Clone for Weak { + /// Makes a clone of the `Weak` pointer that points to the same allocation. + /// + /// # Examples + /// + /// ``` + /// use std::sync::{Arc, Weak}; + /// + /// let weak_five = Arc::downgrade(&Arc::new(5)); + /// + /// let _ = Weak::clone(&weak_five); + /// ``` + #[inline] + fn clone(&self) -> Weak { + let inner = if let Some(inner) = self.inner() { + inner + } else { + return Weak { ptr: self.ptr }; + }; + // See comments in Arc::clone() for why this is relaxed. This can use a + // fetch_add (ignoring the lock) because the weak count is only locked + // where are *no other* weak pointers in existence. (So we can't be + // running this code in that case). + let old_size = inner.weak.fetch_add(1, Relaxed); + + // See comments in Arc::clone() for why we do this (for mem::forget). + if old_size > MAX_REFCOUNT { + abort(); + } + + Weak { ptr: self.ptr } + } +} + +#[stable(feature = "downgraded_weak", since = "1.10.0")] +impl Default for Weak { + /// Constructs a new `Weak`, without allocating memory. + /// Calling [`upgrade`] on the return value always + /// gives [`None`]. + /// + /// [`upgrade`]: Weak::upgrade + /// + /// # Examples + /// + /// ``` + /// use std::sync::Weak; + /// + /// let empty: Weak = Default::default(); + /// assert!(empty.upgrade().is_none()); + /// ``` + fn default() -> Weak { + Weak::new() + } +} + +#[stable(feature = "arc_weak", since = "1.4.0")] +unsafe impl<#[may_dangle] T: ?Sized> Drop for Weak { + /// Drops the `Weak` pointer. + /// + /// # Examples + /// + /// ``` + /// use std::sync::{Arc, Weak}; + /// + /// struct Foo; + /// + /// impl Drop for Foo { + /// fn drop(&mut self) { + /// println!("dropped!"); + /// } + /// } + /// + /// let foo = Arc::new(Foo); + /// let weak_foo = Arc::downgrade(&foo); + /// let other_weak_foo = Weak::clone(&weak_foo); + /// + /// drop(weak_foo); // Doesn't print anything + /// drop(foo); // Prints "dropped!" + /// + /// assert!(other_weak_foo.upgrade().is_none()); + /// ``` + fn drop(&mut self) { + // If we find out that we were the last weak pointer, then its time to + // deallocate the data entirely. See the discussion in Arc::drop() about + // the memory orderings + // + // It's not necessary to check for the locked state here, because the + // weak count can only be locked if there was precisely one weak ref, + // meaning that drop could only subsequently run ON that remaining weak + // ref, which can only happen after the lock is released. + let inner = if let Some(inner) = self.inner() { inner } else { return }; + + if inner.weak.fetch_sub(1, Release) == 1 { + acquire!(inner.weak); + unsafe { Global.deallocate(self.ptr.cast(), Layout::for_value_raw(self.ptr.as_ptr())) } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +trait ArcEqIdent { + fn eq(&self, other: &Arc) -> bool; + fn ne(&self, other: &Arc) -> bool; +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ArcEqIdent for Arc { + #[inline] + default fn eq(&self, other: &Arc) -> bool { + **self == **other + } + #[inline] + default fn ne(&self, other: &Arc) -> bool { + **self != **other + } +} + +/// We're doing this specialization here, and not as a more general optimization on `&T`, because it +/// would otherwise add a cost to all equality checks on refs. We assume that `Arc`s are used to +/// store large values, that are slow to clone, but also heavy to check for equality, causing this +/// cost to pay off more easily. It's also more likely to have two `Arc` clones, that point to +/// the same value, than two `&T`s. +/// +/// We can only do this when `T: Eq` as a `PartialEq` might be deliberately irreflexive. +#[stable(feature = "rust1", since = "1.0.0")] +impl ArcEqIdent for Arc { + #[inline] + fn eq(&self, other: &Arc) -> bool { + Arc::ptr_eq(self, other) || **self == **other + } + + #[inline] + fn ne(&self, other: &Arc) -> bool { + !Arc::ptr_eq(self, other) && **self != **other + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for Arc { + /// Equality for two `Arc`s. + /// + /// Two `Arc`s are equal if their inner values are equal, even if they are + /// stored in different allocation. + /// + /// If `T` also implements `Eq` (implying reflexivity of equality), + /// two `Arc`s that point to the same allocation are always equal. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// assert!(five == Arc::new(5)); + /// ``` + #[inline] + fn eq(&self, other: &Arc) -> bool { + ArcEqIdent::eq(self, other) + } + + /// Inequality for two `Arc`s. + /// + /// Two `Arc`s are unequal if their inner values are unequal. + /// + /// If `T` also implements `Eq` (implying reflexivity of equality), + /// two `Arc`s that point to the same value are never unequal. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// assert!(five != Arc::new(6)); + /// ``` + #[inline] + fn ne(&self, other: &Arc) -> bool { + ArcEqIdent::ne(self, other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for Arc { + /// Partial comparison for two `Arc`s. + /// + /// The two are compared by calling `partial_cmp()` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use std::cmp::Ordering; + /// + /// let five = Arc::new(5); + /// + /// assert_eq!(Some(Ordering::Less), five.partial_cmp(&Arc::new(6))); + /// ``` + fn partial_cmp(&self, other: &Arc) -> Option { + (**self).partial_cmp(&**other) + } + + /// Less-than comparison for two `Arc`s. + /// + /// The two are compared by calling `<` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// assert!(five < Arc::new(6)); + /// ``` + fn lt(&self, other: &Arc) -> bool { + *(*self) < *(*other) + } + + /// 'Less than or equal to' comparison for two `Arc`s. + /// + /// The two are compared by calling `<=` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// assert!(five <= Arc::new(5)); + /// ``` + fn le(&self, other: &Arc) -> bool { + *(*self) <= *(*other) + } + + /// Greater-than comparison for two `Arc`s. + /// + /// The two are compared by calling `>` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// assert!(five > Arc::new(4)); + /// ``` + fn gt(&self, other: &Arc) -> bool { + *(*self) > *(*other) + } + + /// 'Greater than or equal to' comparison for two `Arc`s. + /// + /// The two are compared by calling `>=` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let five = Arc::new(5); + /// + /// assert!(five >= Arc::new(5)); + /// ``` + fn ge(&self, other: &Arc) -> bool { + *(*self) >= *(*other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Arc { + /// Comparison for two `Arc`s. + /// + /// The two are compared by calling `cmp()` on their inner values. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use std::cmp::Ordering; + /// + /// let five = Arc::new(5); + /// + /// assert_eq!(Ordering::Less, five.cmp(&Arc::new(6))); + /// ``` + fn cmp(&self, other: &Arc) -> Ordering { + (**self).cmp(&**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Arc {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Arc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Arc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Pointer for Arc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Pointer::fmt(&(&**self as *const T), f) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Arc { + /// Creates a new `Arc`, with the `Default` value for `T`. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// let x: Arc = Default::default(); + /// assert_eq!(*x, 0); + /// ``` + fn default() -> Arc { + Arc::new(Default::default()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Arc { + fn hash(&self, state: &mut H) { + (**self).hash(state) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_for_ptrs", since = "1.6.0")] +impl From for Arc { + fn from(t: T) -> Self { + Arc::new(t) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From<&[T]> for Arc<[T]> { + /// Allocate a reference-counted slice and fill it by cloning `v`'s items. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// let original: &[i32] = &[1, 2, 3]; + /// let shared: Arc<[i32]> = Arc::from(original); + /// assert_eq!(&[1, 2, 3], &shared[..]); + /// ``` + #[inline] + fn from(v: &[T]) -> Arc<[T]> { + >::from_slice(v) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From<&str> for Arc { + /// Allocate a reference-counted `str` and copy `v` into it. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// let shared: Arc = Arc::from("eggplant"); + /// assert_eq!("eggplant", &shared[..]); + /// ``` + #[inline] + fn from(v: &str) -> Arc { + let arc = Arc::<[u8]>::from(v.as_bytes()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const str) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From for Arc { + /// Allocate a reference-counted `str` and copy `v` into it. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// let unique: String = "eggplant".to_owned(); + /// let shared: Arc = Arc::from(unique); + /// assert_eq!("eggplant", &shared[..]); + /// ``` + #[inline] + fn from(v: String) -> Arc { + Arc::from(&v[..]) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From> for Arc { + /// Move a boxed object to a new, reference-counted allocation. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// let unique: Box = Box::from("eggplant"); + /// let shared: Arc = Arc::from(unique); + /// assert_eq!("eggplant", &shared[..]); + /// ``` + #[inline] + fn from(v: Box) -> Arc { + Arc::from_box(v) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_slice", since = "1.21.0")] +impl From> for Arc<[T]> { + /// Allocate a reference-counted slice and move `v`'s items into it. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// let unique: Vec = vec![1, 2, 3]; + /// let shared: Arc<[i32]> = Arc::from(unique); + /// assert_eq!(&[1, 2, 3], &shared[..]); + /// ``` + #[inline] + fn from(mut v: Vec) -> Arc<[T]> { + unsafe { + let arc = Arc::copy_from_slice(&v); + + // Allow the Vec to free its memory, but not destroy its contents + v.set_len(0); + + arc + } + } +} + +// Avoid `error: specializing impl repeats parameter` implementing `TryFrom`. +impl Arc<[T]> { + /// Tries to allocate a reference-counted slice and move `v`'s items into it. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// let unique: Vec = vec![1, 2, 3]; + /// let shared: Arc<[i32]> = Arc::try_from(unique).unwrap(); + /// assert_eq!(&[1, 2, 3], &shared[..]); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + #[inline] + pub fn try_from_vec(mut v: Vec) -> Result { + unsafe { + let arc = Arc::try_copy_from_slice(&v)?; + + // Allow the Vec to free its memory, but not destroy its contents + v.set_len(0); + + Ok(arc) + } + } +} + +#[stable(feature = "shared_from_cow", since = "1.45.0")] +impl<'a, B> From> for Arc +where + B: ToOwned + ?Sized, + Arc: From<&'a B> + From, +{ + /// Create an atomically reference-counted pointer from + /// a clone-on-write pointer by copying its content. + /// + /// # Example + /// + /// ```rust + /// # use std::sync::Arc; + /// # use std::borrow::Cow; + /// let cow: Cow = Cow::Borrowed("eggplant"); + /// let shared: Arc = Arc::from(cow); + /// assert_eq!("eggplant", &shared[..]); + /// ``` + #[inline] + fn from(cow: Cow<'a, B>) -> Arc { + match cow { + Cow::Borrowed(s) => Arc::from(s), + Cow::Owned(s) => Arc::from(s), + } + } +} + +#[stable(feature = "boxed_slice_try_from", since = "1.43.0")] +impl TryFrom> for Arc<[T; N]> { + type Error = Arc<[T]>; + + fn try_from(boxed_slice: Arc<[T]>) -> Result { + if boxed_slice.len() == N { + Ok(unsafe { Arc::from_raw(Arc::into_raw(boxed_slice) as *mut [T; N]) }) + } else { + Err(boxed_slice) + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "shared_from_iter", since = "1.37.0")] +impl iter::FromIterator for Arc<[T]> { + /// Takes each element in the `Iterator` and collects it into an `Arc<[T]>`. + /// + /// # Performance characteristics + /// + /// ## The general case + /// + /// In the general case, collecting into `Arc<[T]>` is done by first + /// collecting into a `Vec`. That is, when writing the following: + /// + /// ```rust + /// # use std::sync::Arc; + /// let evens: Arc<[u8]> = (0..10).filter(|&x| x % 2 == 0).collect(); + /// # assert_eq!(&*evens, &[0, 2, 4, 6, 8]); + /// ``` + /// + /// this behaves as if we wrote: + /// + /// ```rust + /// # use std::sync::Arc; + /// let evens: Arc<[u8]> = (0..10).filter(|&x| x % 2 == 0) + /// .collect::>() // The first set of allocations happens here. + /// .into(); // A second allocation for `Arc<[T]>` happens here. + /// # assert_eq!(&*evens, &[0, 2, 4, 6, 8]); + /// ``` + /// + /// This will allocate as many times as needed for constructing the `Vec` + /// and then it will allocate once for turning the `Vec` into the `Arc<[T]>`. + /// + /// ## Iterators of known length + /// + /// When your `Iterator` implements `TrustedLen` and is of an exact size, + /// a single allocation will be made for the `Arc<[T]>`. For example: + /// + /// ```rust + /// # use std::sync::Arc; + /// let evens: Arc<[u8]> = (0..10).collect(); // Just a single allocation happens here. + /// # assert_eq!(&*evens, &*(0..10).collect::>()); + /// ``` + fn from_iter>(iter: I) -> Self { + ToArcSlice::to_arc_slice(iter.into_iter()) + } +} + +/// Specialization trait used for collecting into `Arc<[T]>`. +trait ToArcSlice: Iterator + Sized { + fn to_arc_slice(self) -> Arc<[T]>; +} + +#[cfg(not(no_global_oom_handling))] +impl> ToArcSlice for I { + default fn to_arc_slice(self) -> Arc<[T]> { + self.collect::>().into() + } +} + +#[cfg(not(no_global_oom_handling))] +impl> ToArcSlice for I { + fn to_arc_slice(self) -> Arc<[T]> { + // This is the case for a `TrustedLen` iterator. + let (low, high) = self.size_hint(); + if let Some(high) = high { + debug_assert_eq!( + low, + high, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + + unsafe { + // SAFETY: We need to ensure that the iterator has an exact length and we have. + Arc::from_iter_exact(self, low) + } + } else { + // TrustedLen contract guarantees that `upper_bound == `None` implies an iterator + // length exceeding `usize::MAX`. + // The default implementation would collect into a vec which would panic. + // Thus we panic here immediately without invoking `Vec` code. + panic!("capacity overflow"); + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl borrow::Borrow for Arc { + fn borrow(&self) -> &T { + &**self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl AsRef for Arc { + fn as_ref(&self) -> &T { + &**self + } +} + +#[stable(feature = "pin", since = "1.33.0")] +impl Unpin for Arc {} + +/// Get the offset within an `ArcInner` for the payload behind a pointer. +/// +/// # Safety +/// +/// The pointer must point to (and have valid metadata for) a previously +/// valid instance of T, but the T is allowed to be dropped. +unsafe fn data_offset(ptr: *const T) -> isize { + // Align the unsized value to the end of the ArcInner. + // Because RcBox is repr(C), it will always be the last field in memory. + // SAFETY: since the only unsized types possible are slices, trait objects, + // and extern types, the input safety requirement is currently enough to + // satisfy the requirements of align_of_val_raw; this is an implementation + // detail of the language that may not be relied upon outside of std. + unsafe { data_offset_align(align_of_val_raw(ptr)) } +} + +#[inline] +fn data_offset_align(align: usize) -> isize { + let layout = Layout::new::>(); + (layout.size() + layout.padding_needed_for(align)) as isize +} diff --git a/rust/alloc/vec/drain.rs b/rust/alloc/vec/drain.rs new file mode 100644 index 0000000000000..65f7e32e95fd7 --- /dev/null +++ b/rust/alloc/vec/drain.rs @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use core::fmt; +use core::iter::{FusedIterator, TrustedLen}; +use core::mem::{self}; +use core::ptr::{self, NonNull}; +use core::slice::{self}; + +use super::Vec; + +/// A draining iterator for `Vec`. +/// +/// This `struct` is created by [`Vec::drain`]. +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// let mut v = vec![0, 1, 2]; +/// let iter: std::vec::Drain<_> = v.drain(..); +/// ``` +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain< + 'a, + T: 'a, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator + 'a = Global, +> { + /// Index of tail to preserve + pub(super) tail_start: usize, + /// Length of tail + pub(super) tail_len: usize, + /// Current remaining range to remove + pub(super) iter: slice::Iter<'a, T>, + pub(super) vec: NonNull>, +} + +#[stable(feature = "collection_debug", since = "1.17.0")] +impl fmt::Debug for Drain<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.iter.as_slice()).finish() + } +} + +impl<'a, T, A: Allocator> Drain<'a, T, A> { + /// Returns the remaining items of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!['a', 'b', 'c']; + /// let mut drain = vec.drain(..); + /// assert_eq!(drain.as_slice(), &['a', 'b', 'c']); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_slice(), &['b', 'c']); + /// ``` + #[stable(feature = "vec_drain_as_slice", since = "1.46.0")] + pub fn as_slice(&self) -> &[T] { + self.iter.as_slice() + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + unsafe { self.vec.as_ref().allocator() } + } +} + +#[stable(feature = "vec_drain_as_slice", since = "1.46.0")] +impl<'a, T, A: Allocator> AsRef<[T]> for Drain<'a, T, A> { + fn as_ref(&self) -> &[T] { + self.as_slice() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Sync for Drain<'_, T, A> {} +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Send for Drain<'_, T, A> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl Iterator for Drain<'_, T, A> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + self.iter.next().map(|elt| unsafe { ptr::read(elt as *const _) }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl DoubleEndedIterator for Drain<'_, T, A> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back().map(|elt| unsafe { ptr::read(elt as *const _) }) + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl Drop for Drain<'_, T, A> { + fn drop(&mut self) { + /// Continues dropping the remaining elements in the `Drain`, then moves back the + /// un-`Drain`ed elements to restore the original `Vec`. + struct DropGuard<'r, 'a, T, A: Allocator>(&'r mut Drain<'a, T, A>); + + impl<'r, 'a, T, A: Allocator> Drop for DropGuard<'r, 'a, T, A> { + fn drop(&mut self) { + // Continue the same loop we have below. If the loop already finished, this does + // nothing. + self.0.for_each(drop); + + if self.0.tail_len > 0 { + unsafe { + let source_vec = self.0.vec.as_mut(); + // memmove back untouched tail, update to new length + let start = source_vec.len(); + let tail = self.0.tail_start; + if tail != start { + let src = source_vec.as_ptr().add(tail); + let dst = source_vec.as_mut_ptr().add(start); + ptr::copy(src, dst, self.0.tail_len); + } + source_vec.set_len(start + self.0.tail_len); + } + } + } + } + + // exhaust self first + while let Some(item) = self.next() { + let guard = DropGuard(self); + drop(item); + mem::forget(guard); + } + + // Drop a `DropGuard` to move back the non-drained tail of `self`. + DropGuard(self); + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl ExactSizeIterator for Drain<'_, T, A> { + fn is_empty(&self) -> bool { + self.iter.is_empty() + } +} + +#[unstable(feature = "trusted_len", issue = "37572")] +unsafe impl TrustedLen for Drain<'_, T, A> {} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Drain<'_, T, A> {} diff --git a/rust/alloc/vec/drain_filter.rs b/rust/alloc/vec/drain_filter.rs new file mode 100644 index 0000000000000..b04fce041622f --- /dev/null +++ b/rust/alloc/vec/drain_filter.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use core::ptr::{self}; +use core::slice::{self}; + +use super::Vec; + +/// An iterator which uses a closure to determine if an element should be removed. +/// +/// This struct is created by [`Vec::drain_filter`]. +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// #![feature(drain_filter)] +/// +/// let mut v = vec![0, 1, 2]; +/// let iter: std::vec::DrainFilter<_, _> = v.drain_filter(|x| *x % 2 == 0); +/// ``` +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +#[derive(Debug)] +pub struct DrainFilter< + 'a, + T, + F, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> where + F: FnMut(&mut T) -> bool, +{ + pub(super) vec: &'a mut Vec, + /// The index of the item that will be inspected by the next call to `next`. + pub(super) idx: usize, + /// The number of items that have been drained (removed) thus far. + pub(super) del: usize, + /// The original length of `vec` prior to draining. + pub(super) old_len: usize, + /// The filter test predicate. + pub(super) pred: F, + /// A flag that indicates a panic has occurred in the filter test predicate. + /// This is used as a hint in the drop implementation to prevent consumption + /// of the remainder of the `DrainFilter`. Any unprocessed items will be + /// backshifted in the `vec`, but no further items will be dropped or + /// tested by the filter predicate. + pub(super) panic_flag: bool, +} + +impl DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + self.vec.allocator() + } +} + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +impl Iterator for DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + type Item = T; + + fn next(&mut self) -> Option { + unsafe { + while self.idx < self.old_len { + let i = self.idx; + let v = slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len); + self.panic_flag = true; + let drained = (self.pred)(&mut v[i]); + self.panic_flag = false; + // Update the index *after* the predicate is called. If the index + // is updated prior and the predicate panics, the element at this + // index would be leaked. + self.idx += 1; + if drained { + self.del += 1; + return Some(ptr::read(&v[i])); + } else if self.del > 0 { + let del = self.del; + let src: *const T = &v[i]; + let dst: *mut T = &mut v[i - del]; + ptr::copy_nonoverlapping(src, dst, 1); + } + } + None + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.old_len - self.idx)) + } +} + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +impl Drop for DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + fn drop(&mut self) { + struct BackshiftOnDrop<'a, 'b, T, F, A: Allocator> + where + F: FnMut(&mut T) -> bool, + { + drain: &'b mut DrainFilter<'a, T, F, A>, + } + + impl<'a, 'b, T, F, A: Allocator> Drop for BackshiftOnDrop<'a, 'b, T, F, A> + where + F: FnMut(&mut T) -> bool, + { + fn drop(&mut self) { + unsafe { + if self.drain.idx < self.drain.old_len && self.drain.del > 0 { + // This is a pretty messed up state, and there isn't really an + // obviously right thing to do. We don't want to keep trying + // to execute `pred`, so we just backshift all the unprocessed + // elements and tell the vec that they still exist. The backshift + // is required to prevent a double-drop of the last successfully + // drained item prior to a panic in the predicate. + let ptr = self.drain.vec.as_mut_ptr(); + let src = ptr.add(self.drain.idx); + let dst = src.sub(self.drain.del); + let tail_len = self.drain.old_len - self.drain.idx; + src.copy_to(dst, tail_len); + } + self.drain.vec.set_len(self.drain.old_len - self.drain.del); + } + } + } + + let backshift = BackshiftOnDrop { drain: self }; + + // Attempt to consume any remaining elements if the filter predicate + // has not yet panicked. We'll backshift any remaining elements + // whether we've already panicked or if the consumption here panics. + if !backshift.drain.panic_flag { + backshift.drain.for_each(drop); + } + } +} diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs new file mode 100644 index 0000000000000..86a167aad103c --- /dev/null +++ b/rust/alloc/vec/into_iter.rs @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use crate::raw_vec::RawVec; +use core::fmt; +use core::intrinsics::arith_offset; +use core::iter::{FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccess}; +use core::marker::PhantomData; +use core::mem::{self}; +use core::ptr::{self, NonNull}; +use core::slice::{self}; + +/// An iterator that moves out of a vector. +/// +/// This `struct` is created by the `into_iter` method on [`Vec`](super::Vec) +/// (provided by the [`IntoIterator`] trait). +/// +/// # Example +/// +/// ``` +/// let v = vec![0, 1, 2]; +/// let iter: std::vec::IntoIter<_> = v.into_iter(); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +pub struct IntoIter< + T, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> { + pub(super) buf: NonNull, + pub(super) phantom: PhantomData, + pub(super) cap: usize, + pub(super) alloc: A, + pub(super) ptr: *const T, + pub(super) end: *const T, +} + +#[stable(feature = "vec_intoiter_debug", since = "1.13.0")] +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("IntoIter").field(&self.as_slice()).finish() + } +} + +impl IntoIter { + /// Returns the remaining items of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let vec = vec!['a', 'b', 'c']; + /// let mut into_iter = vec.into_iter(); + /// assert_eq!(into_iter.as_slice(), &['a', 'b', 'c']); + /// let _ = into_iter.next().unwrap(); + /// assert_eq!(into_iter.as_slice(), &['b', 'c']); + /// ``` + #[stable(feature = "vec_into_iter_as_slice", since = "1.15.0")] + pub fn as_slice(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.ptr, self.len()) } + } + + /// Returns the remaining items of this iterator as a mutable slice. + /// + /// # Examples + /// + /// ``` + /// let vec = vec!['a', 'b', 'c']; + /// let mut into_iter = vec.into_iter(); + /// assert_eq!(into_iter.as_slice(), &['a', 'b', 'c']); + /// into_iter.as_mut_slice()[2] = 'z'; + /// assert_eq!(into_iter.next().unwrap(), 'a'); + /// assert_eq!(into_iter.next().unwrap(), 'b'); + /// assert_eq!(into_iter.next().unwrap(), 'z'); + /// ``` + #[stable(feature = "vec_into_iter_as_slice", since = "1.15.0")] + pub fn as_mut_slice(&mut self) -> &mut [T] { + unsafe { &mut *self.as_raw_mut_slice() } + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + &self.alloc + } + + fn as_raw_mut_slice(&mut self) -> *mut [T] { + ptr::slice_from_raw_parts_mut(self.ptr as *mut T, self.len()) + } + + /// Drops remaining elements and relinquishes the backing allocation. + /// + /// This is roughly equivalent to the following, but more efficient + /// + /// ``` + /// # let mut into_iter = Vec::::with_capacity(10).into_iter(); + /// (&mut into_iter).for_each(core::mem::drop); + /// unsafe { core::ptr::write(&mut into_iter, Vec::new().into_iter()); } + /// ``` + #[cfg(not(no_global_oom_handling))] + pub(super) fn forget_allocation_drop_remaining(&mut self) { + let remaining = self.as_raw_mut_slice(); + + // overwrite the individual fields instead of creating a new + // struct and then overwriting &mut self. + // this creates less assembly + self.cap = 0; + self.buf = unsafe { NonNull::new_unchecked(RawVec::NEW.ptr()) }; + self.ptr = self.buf.as_ptr(); + self.end = self.buf.as_ptr(); + + unsafe { + ptr::drop_in_place(remaining); + } + } +} + +#[stable(feature = "vec_intoiter_as_ref", since = "1.46.0")] +impl AsRef<[T]> for IntoIter { + fn as_ref(&self) -> &[T] { + self.as_slice() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl Send for IntoIter {} +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl Sync for IntoIter {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for IntoIter { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + if self.ptr as *const _ == self.end { + None + } else if mem::size_of::() == 0 { + // purposefully don't use 'ptr.offset' because for + // vectors with 0-size elements this would return the + // same pointer. + self.ptr = unsafe { arith_offset(self.ptr as *const i8, 1) as *mut T }; + + // Make up a value of this ZST. + Some(unsafe { mem::zeroed() }) + } else { + let old = self.ptr; + self.ptr = unsafe { self.ptr.offset(1) }; + + Some(unsafe { ptr::read(old) }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let exact = if mem::size_of::() == 0 { + (self.end as usize).wrapping_sub(self.ptr as usize) + } else { + unsafe { self.end.offset_from(self.ptr) as usize } + }; + (exact, Some(exact)) + } + + #[inline] + fn count(self) -> usize { + self.len() + } + + unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item + where + Self: TrustedRandomAccess, + { + // SAFETY: the caller must guarantee that `i` is in bounds of the + // `Vec`, so `i` cannot overflow an `isize`, and the `self.ptr.add(i)` + // is guaranteed to pointer to an element of the `Vec` and + // thus guaranteed to be valid to dereference. + // + // Also note the implementation of `Self: TrustedRandomAccess` requires + // that `T: Copy` so reading elements from the buffer doesn't invalidate + // them for `Drop`. + unsafe { + if mem::size_of::() == 0 { mem::zeroed() } else { ptr::read(self.ptr.add(i)) } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl DoubleEndedIterator for IntoIter { + #[inline] + fn next_back(&mut self) -> Option { + if self.end == self.ptr { + None + } else if mem::size_of::() == 0 { + // See above for why 'ptr.offset' isn't used + self.end = unsafe { arith_offset(self.end as *const i8, -1) as *mut T }; + + // Make up a value of this ZST. + Some(unsafe { mem::zeroed() }) + } else { + self.end = unsafe { self.end.offset(-1) }; + + Some(unsafe { ptr::read(self.end) }) + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ExactSizeIterator for IntoIter { + fn is_empty(&self) -> bool { + self.ptr == self.end + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for IntoIter {} + +#[unstable(feature = "trusted_len", issue = "37572")] +unsafe impl TrustedLen for IntoIter {} + +#[doc(hidden)] +#[unstable(issue = "none", feature = "std_internals")] +// T: Copy as approximation for !Drop since get_unchecked does not advance self.ptr +// and thus we can't implement drop-handling +unsafe impl TrustedRandomAccess for IntoIter +where + T: Copy, +{ + const MAY_HAVE_SIDE_EFFECT: bool = false; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_into_iter_clone", since = "1.8.0")] +impl Clone for IntoIter { + #[cfg(not(test))] + fn clone(&self) -> Self { + self.as_slice().to_vec_in(self.alloc.clone()).into_iter() + } + #[cfg(test)] + fn clone(&self) -> Self { + crate::slice::to_vec(self.as_slice(), self.alloc.clone()).into_iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T, A: Allocator> Drop for IntoIter { + fn drop(&mut self) { + struct DropGuard<'a, T, A: Allocator>(&'a mut IntoIter); + + impl Drop for DropGuard<'_, T, A> { + fn drop(&mut self) { + unsafe { + // `IntoIter::alloc` is not used anymore after this + let alloc = ptr::read(&self.0.alloc); + // RawVec handles deallocation + let _ = RawVec::from_raw_parts_in(self.0.buf.as_ptr(), self.0.cap, alloc); + } + } + } + + let guard = DropGuard(self); + // destroy the remaining elements + unsafe { + ptr::drop_in_place(guard.0.as_raw_mut_slice()); + } + // now `guard` will be dropped and do the rest + } +} + +#[unstable(issue = "none", feature = "inplace_iteration")] +#[doc(hidden)] +unsafe impl InPlaceIterable for IntoIter {} + +#[unstable(issue = "none", feature = "inplace_iteration")] +#[doc(hidden)] +unsafe impl SourceIter for IntoIter { + type Source = Self; + + #[inline] + unsafe fn as_inner(&mut self) -> &mut Self::Source { + self + } +} + +// internal helper trait for in-place iteration specialization. +#[rustc_specialization_trait] +pub(crate) trait AsIntoIter { + type Item; + fn as_into_iter(&mut self) -> &mut IntoIter; +} + +impl AsIntoIter for IntoIter { + type Item = T; + + fn as_into_iter(&mut self) -> &mut IntoIter { + self + } +} diff --git a/rust/alloc/vec/is_zero.rs b/rust/alloc/vec/is_zero.rs new file mode 100644 index 0000000000000..40e1e667c9fb3 --- /dev/null +++ b/rust/alloc/vec/is_zero.rs @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::boxed::Box; + +#[rustc_specialization_trait] +pub(super) unsafe trait IsZero { + /// Whether this value is zero + fn is_zero(&self) -> bool; +} + +macro_rules! impl_is_zero { + ($t:ty, $is_zero:expr) => { + unsafe impl IsZero for $t { + #[inline] + fn is_zero(&self) -> bool { + $is_zero(*self) + } + } + }; +} + +impl_is_zero!(i16, |x| x == 0); +impl_is_zero!(i32, |x| x == 0); +impl_is_zero!(i64, |x| x == 0); +impl_is_zero!(i128, |x| x == 0); +impl_is_zero!(isize, |x| x == 0); + +impl_is_zero!(u16, |x| x == 0); +impl_is_zero!(u32, |x| x == 0); +impl_is_zero!(u64, |x| x == 0); +impl_is_zero!(u128, |x| x == 0); +impl_is_zero!(usize, |x| x == 0); + +impl_is_zero!(bool, |x| x == false); +impl_is_zero!(char, |x| x == '\0'); + +impl_is_zero!(f32, |x: f32| x.to_bits() == 0); +impl_is_zero!(f64, |x: f64| x.to_bits() == 0); + +unsafe impl IsZero for *const T { + #[inline] + fn is_zero(&self) -> bool { + (*self).is_null() + } +} + +unsafe impl IsZero for *mut T { + #[inline] + fn is_zero(&self) -> bool { + (*self).is_null() + } +} + +// `Option<&T>` and `Option>` are guaranteed to represent `None` as null. +// For fat pointers, the bytes that would be the pointer metadata in the `Some` +// variant are padding in the `None` variant, so ignoring them and +// zero-initializing instead is ok. +// `Option<&mut T>` never implements `Clone`, so there's no need for an impl of +// `SpecFromElem`. + +unsafe impl IsZero for Option<&T> { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } +} + +unsafe impl IsZero for Option> { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } +} + +// `Option` and similar have a representation guarantee that +// they're the same size as the corresponding `u32` type, as well as a guarantee +// that transmuting between `NonZeroU32` and `Option` works. +// While the documentation officially makes it UB to transmute from `None`, +// we're the standard library so we can make extra inferences, and we know that +// the only niche available to represent `None` is the one that's all zeros. + +macro_rules! impl_is_zero_option_of_nonzero { + ($($t:ident,)+) => {$( + unsafe impl IsZero for Option { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } + } + )+}; +} + +impl_is_zero_option_of_nonzero!( + NonZeroU8, + NonZeroU16, + NonZeroU32, + NonZeroU64, + NonZeroU128, + NonZeroI8, + NonZeroI16, + NonZeroI32, + NonZeroI64, + NonZeroI128, + NonZeroUsize, + NonZeroIsize, +); diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs new file mode 100644 index 0000000000000..2abffb93e498d --- /dev/null +++ b/rust/alloc/vec/mod.rs @@ -0,0 +1,3255 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A contiguous growable array type with heap-allocated contents, written +//! `Vec`. +//! +//! Vectors have `O(1)` indexing, amortized `O(1)` push (to the end) and +//! `O(1)` pop (from the end). +//! +//! Vectors ensure they never allocate more than `isize::MAX` bytes. +//! +//! # Examples +//! +//! You can explicitly create a [`Vec`] with [`Vec::new`]: +//! +//! ``` +//! let v: Vec = Vec::new(); +//! ``` +//! +//! ...or by using the [`vec!`] macro: +//! +//! ``` +//! let v: Vec = vec![]; +//! +//! let v = vec![1, 2, 3, 4, 5]; +//! +//! let v = vec![0; 10]; // ten zeroes +//! ``` +//! +//! You can [`push`] values onto the end of a vector (which will grow the vector +//! as needed): +//! +//! ``` +//! let mut v = vec![1, 2]; +//! +//! v.push(3); +//! ``` +//! +//! Popping values works in much the same way: +//! +//! ``` +//! let mut v = vec![1, 2]; +//! +//! let two = v.pop(); +//! ``` +//! +//! Vectors also support indexing (through the [`Index`] and [`IndexMut`] traits): +//! +//! ``` +//! let mut v = vec![1, 2, 3]; +//! let three = v[2]; +//! v[1] = v[1] + 5; +//! ``` +//! +//! [`push`]: Vec::push + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +use core::cmp; +use core::cmp::Ordering; +use core::convert::TryFrom; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::intrinsics::{arith_offset, assume}; +use core::iter; +#[cfg(not(no_global_oom_handling))] +use core::iter::FromIterator; +use core::marker::PhantomData; +use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::ops::{self, Index, IndexMut, Range, RangeBounds}; +use core::ptr::{self, NonNull}; +use core::slice::{self, SliceIndex}; + +use crate::alloc::{Allocator, Global}; +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::raw_vec::RawVec; + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +pub use self::drain_filter::DrainFilter; + +mod drain_filter; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_splice", since = "1.21.0")] +pub use self::splice::Splice; + +#[cfg(not(no_global_oom_handling))] +mod splice; + +#[stable(feature = "drain", since = "1.6.0")] +pub use self::drain::Drain; + +mod drain; + +#[cfg(not(no_global_oom_handling))] +mod cow; + +#[cfg(not(no_global_oom_handling))] +pub(crate) use self::into_iter::AsIntoIter; +#[stable(feature = "rust1", since = "1.0.0")] +pub use self::into_iter::IntoIter; + +mod into_iter; + +#[cfg(not(no_global_oom_handling))] +use self::is_zero::IsZero; + +mod is_zero; + +#[cfg(not(no_global_oom_handling))] +mod source_iter_marker; + +mod partial_eq; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_elem::SpecFromElem; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_elem; + +use self::set_len_on_drop::SetLenOnDrop; + +mod set_len_on_drop; + +#[cfg(not(no_global_oom_handling))] +use self::in_place_drop::InPlaceDrop; + +#[cfg(not(no_global_oom_handling))] +mod in_place_drop; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_iter_nested::SpecFromIterNested; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_iter_nested; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_iter::SpecFromIter; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_iter; + +#[cfg(not(no_global_oom_handling))] +use self::spec_extend::SpecExtend; + +use self::spec_extend::TrySpecExtend; + +mod spec_extend; + +/// A contiguous growable array type, written as `Vec` and pronounced 'vector'. +/// +/// # Examples +/// +/// ``` +/// let mut vec = Vec::new(); +/// vec.push(1); +/// vec.push(2); +/// +/// assert_eq!(vec.len(), 2); +/// assert_eq!(vec[0], 1); +/// +/// assert_eq!(vec.pop(), Some(2)); +/// assert_eq!(vec.len(), 1); +/// +/// vec[0] = 7; +/// assert_eq!(vec[0], 7); +/// +/// vec.extend([1, 2, 3].iter().copied()); +/// +/// for x in &vec { +/// println!("{}", x); +/// } +/// assert_eq!(vec, [7, 1, 2, 3]); +/// ``` +/// +/// The [`vec!`] macro is provided to make initialization more convenient: +/// +/// ``` +/// let mut vec = vec![1, 2, 3]; +/// vec.push(4); +/// assert_eq!(vec, [1, 2, 3, 4]); +/// ``` +/// +/// It can also initialize each element of a `Vec` with a given value. +/// This may be more efficient than performing allocation and initialization +/// in separate steps, especially when initializing a vector of zeros: +/// +/// ``` +/// let vec = vec![0; 5]; +/// assert_eq!(vec, [0, 0, 0, 0, 0]); +/// +/// // The following is equivalent, but potentially slower: +/// let mut vec = Vec::with_capacity(5); +/// vec.resize(5, 0); +/// assert_eq!(vec, [0, 0, 0, 0, 0]); +/// ``` +/// +/// For more information, see +/// [Capacity and Reallocation](#capacity-and-reallocation). +/// +/// Use a `Vec` as an efficient stack: +/// +/// ``` +/// let mut stack = Vec::new(); +/// +/// stack.push(1); +/// stack.push(2); +/// stack.push(3); +/// +/// while let Some(top) = stack.pop() { +/// // Prints 3, 2, 1 +/// println!("{}", top); +/// } +/// ``` +/// +/// # Indexing +/// +/// The `Vec` type allows to access values by index, because it implements the +/// [`Index`] trait. An example will be more explicit: +/// +/// ``` +/// let v = vec![0, 2, 4, 6]; +/// println!("{}", v[1]); // it will display '2' +/// ``` +/// +/// However be careful: if you try to access an index which isn't in the `Vec`, +/// your software will panic! You cannot do this: +/// +/// ```should_panic +/// let v = vec![0, 2, 4, 6]; +/// println!("{}", v[6]); // it will panic! +/// ``` +/// +/// Use [`get`] and [`get_mut`] if you want to check whether the index is in +/// the `Vec`. +/// +/// # Slicing +/// +/// A `Vec` can be mutable. On the other hand, slices are read-only objects. +/// To get a [slice][prim@slice], use [`&`]. Example: +/// +/// ``` +/// fn read_slice(slice: &[usize]) { +/// // ... +/// } +/// +/// let v = vec![0, 1]; +/// read_slice(&v); +/// +/// // ... and that's all! +/// // you can also do it like this: +/// let u: &[usize] = &v; +/// // or like this: +/// let u: &[_] = &v; +/// ``` +/// +/// In Rust, it's more common to pass slices as arguments rather than vectors +/// when you just want to provide read access. The same goes for [`String`] and +/// [`&str`]. +/// +/// # Capacity and reallocation +/// +/// The capacity of a vector is the amount of space allocated for any future +/// elements that will be added onto the vector. This is not to be confused with +/// the *length* of a vector, which specifies the number of actual elements +/// within the vector. If a vector's length exceeds its capacity, its capacity +/// will automatically be increased, but its elements will have to be +/// reallocated. +/// +/// For example, a vector with capacity 10 and length 0 would be an empty vector +/// with space for 10 more elements. Pushing 10 or fewer elements onto the +/// vector will not change its capacity or cause reallocation to occur. However, +/// if the vector's length is increased to 11, it will have to reallocate, which +/// can be slow. For this reason, it is recommended to use [`Vec::with_capacity`] +/// whenever possible to specify how big the vector is expected to get. +/// +/// # Guarantees +/// +/// Due to its incredibly fundamental nature, `Vec` makes a lot of guarantees +/// about its design. This ensures that it's as low-overhead as possible in +/// the general case, and can be correctly manipulated in primitive ways +/// by unsafe code. Note that these guarantees refer to an unqualified `Vec`. +/// If additional type parameters are added (e.g., to support custom allocators), +/// overriding their defaults may change the behavior. +/// +/// Most fundamentally, `Vec` is and always will be a (pointer, capacity, length) +/// triplet. No more, no less. The order of these fields is completely +/// unspecified, and you should use the appropriate methods to modify these. +/// The pointer will never be null, so this type is null-pointer-optimized. +/// +/// However, the pointer might not actually point to allocated memory. In particular, +/// if you construct a `Vec` with capacity 0 via [`Vec::new`], [`vec![]`][`vec!`], +/// [`Vec::with_capacity(0)`][`Vec::with_capacity`], or by calling [`shrink_to_fit`] +/// on an empty Vec, it will not allocate memory. Similarly, if you store zero-sized +/// types inside a `Vec`, it will not allocate space for them. *Note that in this case +/// the `Vec` might not report a [`capacity`] of 0*. `Vec` will allocate if and only +/// if [`mem::size_of::`]`() * capacity() > 0`. In general, `Vec`'s allocation +/// details are very subtle — if you intend to allocate memory using a `Vec` +/// and use it for something else (either to pass to unsafe code, or to build your +/// own memory-backed collection), be sure to deallocate this memory by using +/// `from_raw_parts` to recover the `Vec` and then dropping it. +/// +/// If a `Vec` *has* allocated memory, then the memory it points to is on the heap +/// (as defined by the allocator Rust is configured to use by default), and its +/// pointer points to [`len`] initialized, contiguous elements in order (what +/// you would see if you coerced it to a slice), followed by [`capacity`]` - +/// `[`len`] logically uninitialized, contiguous elements. +/// +/// A vector containing the elements `'a'` and `'b'` with capacity 4 can be +/// visualized as below. The top part is the `Vec` struct, it contains a +/// pointer to the head of the allocation in the heap, length and capacity. +/// The bottom part is the allocation on the heap, a contiguous memory block. +/// +/// ```text +/// ptr len capacity +/// +--------+--------+--------+ +/// | 0x0123 | 2 | 4 | +/// +--------+--------+--------+ +/// | +/// v +/// Heap +--------+--------+--------+--------+ +/// | 'a' | 'b' | uninit | uninit | +/// +--------+--------+--------+--------+ +/// ``` +/// +/// - **uninit** represents memory that is not initialized, see [`MaybeUninit`]. +/// - Note: the ABI is not stable and `Vec` makes no guarantees about its memory +/// layout (including the order of fields). +/// +/// `Vec` will never perform a "small optimization" where elements are actually +/// stored on the stack for two reasons: +/// +/// * It would make it more difficult for unsafe code to correctly manipulate +/// a `Vec`. The contents of a `Vec` wouldn't have a stable address if it were +/// only moved, and it would be more difficult to determine if a `Vec` had +/// actually allocated memory. +/// +/// * It would penalize the general case, incurring an additional branch +/// on every access. +/// +/// `Vec` will never automatically shrink itself, even if completely empty. This +/// ensures no unnecessary allocations or deallocations occur. Emptying a `Vec` +/// and then filling it back up to the same [`len`] should incur no calls to +/// the allocator. If you wish to free up unused memory, use +/// [`shrink_to_fit`] or [`shrink_to`]. +/// +/// [`push`] and [`insert`] will never (re)allocate if the reported capacity is +/// sufficient. [`push`] and [`insert`] *will* (re)allocate if +/// [`len`]` == `[`capacity`]. That is, the reported capacity is completely +/// accurate, and can be relied on. It can even be used to manually free the memory +/// allocated by a `Vec` if desired. Bulk insertion methods *may* reallocate, even +/// when not necessary. +/// +/// `Vec` does not guarantee any particular growth strategy when reallocating +/// when full, nor when [`reserve`] is called. The current strategy is basic +/// and it may prove desirable to use a non-constant growth factor. Whatever +/// strategy is used will of course guarantee *O*(1) amortized [`push`]. +/// +/// `vec![x; n]`, `vec![a, b, c, d]`, and +/// [`Vec::with_capacity(n)`][`Vec::with_capacity`], will all produce a `Vec` +/// with exactly the requested capacity. If [`len`]` == `[`capacity`], +/// (as is the case for the [`vec!`] macro), then a `Vec` can be converted to +/// and from a [`Box<[T]>`][owned slice] without reallocating or moving the elements. +/// +/// `Vec` will not specifically overwrite any data that is removed from it, +/// but also won't specifically preserve it. Its uninitialized memory is +/// scratch space that it may use however it wants. It will generally just do +/// whatever is most efficient or otherwise easy to implement. Do not rely on +/// removed data to be erased for security purposes. Even if you drop a `Vec`, its +/// buffer may simply be reused by another `Vec`. Even if you zero a `Vec`'s memory +/// first, that might not actually happen because the optimizer does not consider +/// this a side-effect that must be preserved. There is one case which we will +/// not break, however: using `unsafe` code to write to the excess capacity, +/// and then increasing the length to match, is always valid. +/// +/// Currently, `Vec` does not guarantee the order in which elements are dropped. +/// The order has changed in the past and may change again. +/// +/// [`get`]: ../../std/vec/struct.Vec.html#method.get +/// [`get_mut`]: ../../std/vec/struct.Vec.html#method.get_mut +/// [`String`]: crate::string::String +/// [`&str`]: type@str +/// [`shrink_to_fit`]: Vec::shrink_to_fit +/// [`shrink_to`]: Vec::shrink_to +/// [`capacity`]: Vec::capacity +/// [`mem::size_of::`]: core::mem::size_of +/// [`len`]: Vec::len +/// [`push`]: Vec::push +/// [`insert`]: Vec::insert +/// [`reserve`]: Vec::reserve +/// [`MaybeUninit`]: core::mem::MaybeUninit +/// [owned slice]: Box +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "vec_type")] +pub struct Vec { + buf: RawVec, + len: usize, +} + +//////////////////////////////////////////////////////////////////////////////// +// Inherent methods +//////////////////////////////////////////////////////////////////////////////// + +impl Vec { + /// Constructs a new, empty `Vec`. + /// + /// The vector will not allocate until elements are pushed onto it. + /// + /// # Examples + /// + /// ``` + /// # #![allow(unused_mut)] + /// let mut vec: Vec = Vec::new(); + /// ``` + #[inline] + #[rustc_const_stable(feature = "const_vec_new", since = "1.39.0")] + #[stable(feature = "rust1", since = "1.0.0")] + pub const fn new() -> Self { + Vec { buf: RawVec::NEW, len: 0 } + } + + /// Constructs a new, empty `Vec` with the specified capacity. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[doc(alias = "malloc")] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } + + /// Tries to construct a new, empty `Vec` with the specified capacity. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::try_with_capacity(10).unwrap(); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// + /// let mut result = Vec::try_with_capacity(usize::MAX); + /// assert!(result.is_err()); + /// ``` + #[inline] + #[doc(alias = "malloc")] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_with_capacity(capacity: usize) -> Result { + Self::try_with_capacity_in(capacity, Global) + } + + /// Creates a `Vec` directly from the raw components of another vector. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * `ptr` needs to have been previously allocated via [`String`]/`Vec` + /// (at least, it's highly likely to be incorrect if it wasn't). + /// * `T` needs to have the same size and alignment as what `ptr` was allocated with. + /// (`T` having a less strict alignment is not sufficient, the alignment really + /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be + /// allocated and deallocated with the same layout.) + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. For example it is **not** safe + /// to build a `Vec` from a pointer to a C `char` array with length `size_t`. + /// It's also not safe to build one from a `Vec` and its length, because + /// the allocator cares about the alignment, and these two types have different + /// alignments. The buffer was allocated with alignment 2 (for `u16`), but after + /// turning it into a `Vec` it'll be deallocated with alignment 1. + /// + /// The ownership of `ptr` is effectively transferred to the + /// `Vec` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// [`String`]: crate::string::String + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// + /// # Examples + /// + /// ``` + /// use std::ptr; + /// use std::mem; + /// + /// let v = vec![1, 2, 3]; + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent running `v`'s destructor so we are in complete control + /// // of the allocation. + /// let mut v = mem::ManuallyDrop::new(v); + /// + /// // Pull out the various important pieces of information about `v` + /// let p = v.as_mut_ptr(); + /// let len = v.len(); + /// let cap = v.capacity(); + /// + /// unsafe { + /// // Overwrite memory with 4, 5, 6 + /// for i in 0..len as isize { + /// ptr::write(p.offset(i), 4 + i); + /// } + /// + /// // Put everything back together into a Vec + /// let rebuilt = Vec::from_raw_parts(p, len, cap); + /// assert_eq!(rebuilt, [4, 5, 6]); + /// } + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self { + unsafe { Self::from_raw_parts_in(ptr, length, capacity, Global) } + } +} + +impl Vec { + /// Constructs a new, empty `Vec`. + /// + /// The vector will not allocate until elements are pushed onto it. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// # #[allow(unused_mut)] + /// let mut vec: Vec = Vec::new_in(System); + /// ``` + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub const fn new_in(alloc: A) -> Self { + Vec { buf: RawVec::new_in(alloc), len: 0 } + } + + /// Constructs a new, empty `Vec` with the specified capacity with the provided + /// allocator. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let mut vec = Vec::with_capacity_in(10, System); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Vec { buf: RawVec::with_capacity_in(capacity, alloc), len: 0 } + } + + /// Tries to construct a new, empty `Vec` with the specified capacity + /// with the provided allocator. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let mut vec = Vec::try_with_capacity_in(10, System).unwrap(); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// + /// let mut result = Vec::try_with_capacity_in(usize::MAX, System); + /// assert!(result.is_err()); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { + Ok(Vec { buf: RawVec::try_with_capacity_in(capacity, alloc)?, len: 0 }) + } + + /// Creates a `Vec` directly from the raw components of another vector. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * `ptr` needs to have been previously allocated via [`String`]/`Vec` + /// (at least, it's highly likely to be incorrect if it wasn't). + /// * `T` needs to have the same size and alignment as what `ptr` was allocated with. + /// (`T` having a less strict alignment is not sufficient, the alignment really + /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be + /// allocated and deallocated with the same layout.) + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. For example it is **not** safe + /// to build a `Vec` from a pointer to a C `char` array with length `size_t`. + /// It's also not safe to build one from a `Vec` and its length, because + /// the allocator cares about the alignment, and these two types have different + /// alignments. The buffer was allocated with alignment 2 (for `u16`), but after + /// turning it into a `Vec` it'll be deallocated with alignment 1. + /// + /// The ownership of `ptr` is effectively transferred to the + /// `Vec` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// [`String`]: crate::string::String + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// use std::ptr; + /// use std::mem; + /// + /// let mut v = Vec::with_capacity_in(3, System); + /// v.push(1); + /// v.push(2); + /// v.push(3); + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent running `v`'s destructor so we are in complete control + /// // of the allocation. + /// let mut v = mem::ManuallyDrop::new(v); + /// + /// // Pull out the various important pieces of information about `v` + /// let p = v.as_mut_ptr(); + /// let len = v.len(); + /// let cap = v.capacity(); + /// let alloc = v.allocator(); + /// + /// unsafe { + /// // Overwrite memory with 4, 5, 6 + /// for i in 0..len as isize { + /// ptr::write(p.offset(i), 4 + i); + /// } + /// + /// // Put everything back together into a Vec + /// let rebuilt = Vec::from_raw_parts_in(p, len, cap, alloc.clone()); + /// assert_eq!(rebuilt, [4, 5, 6]); + /// } + /// ``` + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub unsafe fn from_raw_parts_in(ptr: *mut T, length: usize, capacity: usize, alloc: A) -> Self { + unsafe { Vec { buf: RawVec::from_raw_parts_in(ptr, capacity, alloc), len: length } } + } + + /// Decomposes a `Vec` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of + /// the vector (in elements), and the allocated capacity of the + /// data (in elements). These are the same arguments in the same + /// order as the arguments to [`from_raw_parts`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Vec`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `Vec` with the [`from_raw_parts`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts`]: Vec::from_raw_parts + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_into_raw_parts)] + /// let v: Vec = vec![-1, 0, 1]; + /// + /// let (ptr, len, cap) = v.into_raw_parts(); + /// + /// let rebuilt = unsafe { + /// // We can now make changes to the components, such as + /// // transmuting the raw pointer to a compatible type. + /// let ptr = ptr as *mut u32; + /// + /// Vec::from_raw_parts(ptr, len, cap) + /// }; + /// assert_eq!(rebuilt, [4294967295, 0, 1]); + /// ``` + #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts(self) -> (*mut T, usize, usize) { + let mut me = ManuallyDrop::new(self); + (me.as_mut_ptr(), me.len(), me.capacity()) + } + + /// Decomposes a `Vec` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of the vector (in elements), + /// the allocated capacity of the data (in elements), and the allocator. These are the same + /// arguments in the same order as the arguments to [`from_raw_parts_in`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Vec`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `Vec` with the [`from_raw_parts_in`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts_in`]: Vec::from_raw_parts_in + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, vec_into_raw_parts)] + /// + /// use std::alloc::System; + /// + /// let mut v: Vec = Vec::new_in(System); + /// v.push(-1); + /// v.push(0); + /// v.push(1); + /// + /// let (ptr, len, cap, alloc) = v.into_raw_parts_with_alloc(); + /// + /// let rebuilt = unsafe { + /// // We can now make changes to the components, such as + /// // transmuting the raw pointer to a compatible type. + /// let ptr = ptr as *mut u32; + /// + /// Vec::from_raw_parts_in(ptr, len, cap, alloc) + /// }; + /// assert_eq!(rebuilt, [4294967295, 0, 1]); + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts_with_alloc(self) -> (*mut T, usize, usize, A) { + let mut me = ManuallyDrop::new(self); + let len = me.len(); + let capacity = me.capacity(); + let ptr = me.as_mut_ptr(); + let alloc = unsafe { ptr::read(me.allocator()) }; + (ptr, len, capacity, alloc) + } + + /// Returns the number of elements the vector can hold without + /// reallocating. + /// + /// # Examples + /// + /// ``` + /// let vec: Vec = Vec::with_capacity(10); + /// assert_eq!(vec.capacity(), 10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the given `Vec`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.reserve(10); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[doc(alias = "realloc")] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.buf.reserve(self.len, additional); + } + + /// Reserves the minimum capacity for exactly `additional` more elements to + /// be inserted in the given `Vec`. After calling `reserve_exact`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer `reserve` if future insertions are expected. + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.reserve_exact(10); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[doc(alias = "realloc")] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve_exact(&mut self, additional: usize) { + self.buf.reserve_exact(self.len, additional); + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `Vec`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `try_reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &[u32]) -> Result, TryReserveError> { + /// let mut output = Vec::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.extend(data.iter().map(|&val| { + /// val * 2 + 5 // very complicated + /// })); + /// + /// Ok(output) + /// } + /// # process_data(&[1, 2, 3]).expect("why is the test harness OOMing on 12 bytes?"); + /// ``` + #[doc(alias = "realloc")] + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve(self.len, additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` + /// elements to be inserted in the given `Vec`. After calling + /// `try_reserve_exact`, capacity will be greater than or equal to + /// `self.len() + additional` if it returns `Ok(())`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer `reserve` if future insertions are expected. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &[u32]) -> Result, TryReserveError> { + /// let mut output = Vec::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve_exact(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.extend(data.iter().map(|&val| { + /// val * 2 + 5 // very complicated + /// })); + /// + /// Ok(output) + /// } + /// # process_data(&[1, 2, 3]).expect("why is the test harness OOMing on 12 bytes?"); + /// ``` + #[doc(alias = "realloc")] + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve_exact(self.len, additional) + } + + /// Shrinks the capacity of the vector as much as possible. + /// + /// It will drop down as close as possible to the length but the allocator + /// may still inform the vector that there is space for a few more elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.shrink_to_fit(); + /// assert!(vec.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[doc(alias = "realloc")] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + // The capacity is never less than the length, and there's nothing to do when + // they are equal, so we can avoid the panic case in `RawVec::shrink_to_fit` + // by only calling it with a greater capacity. + if self.capacity() > self.len { + self.buf.shrink_to_fit(self.len); + } + } + + /// Tries to shrink the capacity of the vector as much as possible. + /// + /// It will drop down as close as possible to the length but the allocator + /// may still inform the vector that there is space for a few more elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.try_shrink_to_fit().unwrap(); + /// assert!(vec.capacity() >= 3); + /// ``` + #[doc(alias = "realloc")] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_shrink_to_fit(&mut self) -> Result<(), TryReserveError> { + // The capacity is never less than the length, and there's nothing to do when + // they are equal, so we can avoid the panic case in `RawVec::try_shrink_to_fit` + // by only calling it with a greater capacity. + if self.capacity() <= self.len { + return Ok(()); + } + + self.buf.try_shrink_to_fit(self.len) + } + + /// Shrinks the capacity of the vector with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// If the current capacity is less than the lower limit, this is a no-op. + /// + /// # Examples + /// + /// ``` + /// #![feature(shrink_to)] + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.shrink_to(4); + /// assert!(vec.capacity() >= 4); + /// vec.shrink_to(0); + /// assert!(vec.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[doc(alias = "realloc")] + #[unstable(feature = "shrink_to", reason = "new API", issue = "56431")] + pub fn shrink_to(&mut self, min_capacity: usize) { + if self.capacity() > min_capacity { + self.buf.shrink_to_fit(cmp::max(self.len, min_capacity)); + } + } + + /// Converts the vector into [`Box<[T]>`][owned slice]. + /// + /// Note that this will drop any excess capacity. + /// + /// [owned slice]: Box + /// + /// # Examples + /// + /// ``` + /// let v = vec![1, 2, 3]; + /// + /// let slice = v.into_boxed_slice(); + /// ``` + /// + /// Any excess capacity is removed: + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// + /// assert_eq!(vec.capacity(), 10); + /// let slice = vec.into_boxed_slice(); + /// assert_eq!(slice.into_vec().capacity(), 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_boxed_slice(mut self) -> Box<[T], A> { + unsafe { + self.shrink_to_fit(); + let me = ManuallyDrop::new(self); + let buf = ptr::read(&me.buf); + let len = me.len(); + buf.into_box(len).assume_init() + } + } + + /// Tries to convert the vector into [`Box<[T]>`][owned slice]. + /// + /// Note that this will drop any excess capacity. + /// + /// [owned slice]: Box + /// + /// # Examples + /// + /// ``` + /// let v = vec![1, 2, 3]; + /// + /// let slice = v.try_into_boxed_slice().unwrap(); + /// ``` + /// + /// Any excess capacity is removed: + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// + /// assert_eq!(vec.capacity(), 10); + /// let slice = vec.try_into_boxed_slice().unwrap(); + /// assert_eq!(slice.into_vec().capacity(), 3); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_into_boxed_slice(mut self) -> Result, TryReserveError> { + unsafe { + self.try_shrink_to_fit()?; + let me = ManuallyDrop::new(self); + let buf = ptr::read(&me.buf); + let len = me.len(); + Ok(buf.into_box(len).assume_init()) + } + } + + /// Shortens the vector, keeping the first `len` elements and dropping + /// the rest. + /// + /// If `len` is greater than the vector's current length, this has no + /// effect. + /// + /// The [`drain`] method can emulate `truncate`, but causes the excess + /// elements to be returned instead of dropped. + /// + /// Note that this method has no effect on the allocated capacity + /// of the vector. + /// + /// # Examples + /// + /// Truncating a five element vector to two elements: + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4, 5]; + /// vec.truncate(2); + /// assert_eq!(vec, [1, 2]); + /// ``` + /// + /// No truncation occurs when `len` is greater than the vector's current + /// length: + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.truncate(8); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + /// + /// Truncating when `len == 0` is equivalent to calling the [`clear`] + /// method. + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.truncate(0); + /// assert_eq!(vec, []); + /// ``` + /// + /// [`clear`]: Vec::clear + /// [`drain`]: Vec::drain + #[stable(feature = "rust1", since = "1.0.0")] + pub fn truncate(&mut self, len: usize) { + // This is safe because: + // + // * the slice passed to `drop_in_place` is valid; the `len > self.len` + // case avoids creating an invalid slice, and + // * the `len` of the vector is shrunk before calling `drop_in_place`, + // such that no value will be dropped twice in case `drop_in_place` + // were to panic once (if it panics twice, the program aborts). + unsafe { + // Note: It's intentional that this is `>` and not `>=`. + // Changing it to `>=` has negative performance + // implications in some cases. See #78884 for more. + if len > self.len { + return; + } + let remaining_len = self.len - len; + let s = ptr::slice_from_raw_parts_mut(self.as_mut_ptr().add(len), remaining_len); + self.len = len; + ptr::drop_in_place(s); + } + } + + /// Extracts a slice containing the entire vector. + /// + /// Equivalent to `&s[..]`. + /// + /// # Examples + /// + /// ``` + /// use std::io::{self, Write}; + /// let buffer = vec![1, 2, 3, 5, 8]; + /// io::sink().write(buffer.as_slice()).unwrap(); + /// ``` + #[inline] + #[stable(feature = "vec_as_slice", since = "1.7.0")] + pub fn as_slice(&self) -> &[T] { + self + } + + /// Extracts a mutable slice of the entire vector. + /// + /// Equivalent to `&mut s[..]`. + /// + /// # Examples + /// + /// ``` + /// use std::io::{self, Read}; + /// let mut buffer = vec![0; 3]; + /// io::repeat(0b101).read_exact(buffer.as_mut_slice()).unwrap(); + /// ``` + #[inline] + #[stable(feature = "vec_as_slice", since = "1.7.0")] + pub fn as_mut_slice(&mut self) -> &mut [T] { + self + } + + /// Returns a raw pointer to the vector's buffer. + /// + /// The caller must ensure that the vector outlives the pointer this + /// function returns, or else it will end up pointing to garbage. + /// Modifying the vector may cause its buffer to be reallocated, + /// which would also make any pointers to it invalid. + /// + /// The caller must also ensure that the memory the pointer (non-transitively) points to + /// is never written to (except inside an `UnsafeCell`) using this pointer or any pointer + /// derived from it. If you need to mutate the contents of the slice, use [`as_mut_ptr`]. + /// + /// # Examples + /// + /// ``` + /// let x = vec![1, 2, 4]; + /// let x_ptr = x.as_ptr(); + /// + /// unsafe { + /// for i in 0..x.len() { + /// assert_eq!(*x_ptr.add(i), 1 << i); + /// } + /// } + /// ``` + /// + /// [`as_mut_ptr`]: Vec::as_mut_ptr + #[stable(feature = "vec_as_ptr", since = "1.37.0")] + #[inline] + pub fn as_ptr(&self) -> *const T { + // We shadow the slice method of the same name to avoid going through + // `deref`, which creates an intermediate reference. + let ptr = self.buf.ptr(); + unsafe { + assume(!ptr.is_null()); + } + ptr + } + + /// Returns an unsafe mutable pointer to the vector's buffer. + /// + /// The caller must ensure that the vector outlives the pointer this + /// function returns, or else it will end up pointing to garbage. + /// Modifying the vector may cause its buffer to be reallocated, + /// which would also make any pointers to it invalid. + /// + /// # Examples + /// + /// ``` + /// // Allocate vector big enough for 4 elements. + /// let size = 4; + /// let mut x: Vec = Vec::with_capacity(size); + /// let x_ptr = x.as_mut_ptr(); + /// + /// // Initialize elements via raw pointer writes, then set length. + /// unsafe { + /// for i in 0..size { + /// *x_ptr.add(i) = i as i32; + /// } + /// x.set_len(size); + /// } + /// assert_eq!(&*x, &[0, 1, 2, 3]); + /// ``` + #[stable(feature = "vec_as_ptr", since = "1.37.0")] + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + // We shadow the slice method of the same name to avoid going through + // `deref_mut`, which creates an intermediate reference. + let ptr = self.buf.ptr(); + unsafe { + assume(!ptr.is_null()); + } + ptr + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + self.buf.allocator() + } + + /// Forces the length of the vector to `new_len`. + /// + /// This is a low-level operation that maintains none of the normal + /// invariants of the type. Normally changing the length of a vector + /// is done using one of the safe operations instead, such as + /// [`truncate`], [`resize`], [`extend`], or [`clear`]. + /// + /// [`truncate`]: Vec::truncate + /// [`resize`]: Vec::resize + /// [`extend`]: Extend::extend + /// [`clear`]: Vec::clear + /// + /// # Safety + /// + /// - `new_len` must be less than or equal to [`capacity()`]. + /// - The elements at `old_len..new_len` must be initialized. + /// + /// [`capacity()`]: Vec::capacity + /// + /// # Examples + /// + /// This method can be useful for situations in which the vector + /// is serving as a buffer for other code, particularly over FFI: + /// + /// ```no_run + /// # #![allow(dead_code)] + /// # // This is just a minimal skeleton for the doc example; + /// # // don't use this as a starting point for a real library. + /// # pub struct StreamWrapper { strm: *mut std::ffi::c_void } + /// # const Z_OK: i32 = 0; + /// # extern "C" { + /// # fn deflateGetDictionary( + /// # strm: *mut std::ffi::c_void, + /// # dictionary: *mut u8, + /// # dictLength: *mut usize, + /// # ) -> i32; + /// # } + /// # impl StreamWrapper { + /// pub fn get_dictionary(&self) -> Option> { + /// // Per the FFI method's docs, "32768 bytes is always enough". + /// let mut dict = Vec::with_capacity(32_768); + /// let mut dict_length = 0; + /// // SAFETY: When `deflateGetDictionary` returns `Z_OK`, it holds that: + /// // 1. `dict_length` elements were initialized. + /// // 2. `dict_length` <= the capacity (32_768) + /// // which makes `set_len` safe to call. + /// unsafe { + /// // Make the FFI call... + /// let r = deflateGetDictionary(self.strm, dict.as_mut_ptr(), &mut dict_length); + /// if r == Z_OK { + /// // ...and update the length to what was initialized. + /// dict.set_len(dict_length); + /// Some(dict) + /// } else { + /// None + /// } + /// } + /// } + /// # } + /// ``` + /// + /// While the following example is sound, there is a memory leak since + /// the inner vectors were not freed prior to the `set_len` call: + /// + /// ``` + /// let mut vec = vec![vec![1, 0, 0], + /// vec![0, 1, 0], + /// vec![0, 0, 1]]; + /// // SAFETY: + /// // 1. `old_len..0` is empty so no elements need to be initialized. + /// // 2. `0 <= capacity` always holds whatever `capacity` is. + /// unsafe { + /// vec.set_len(0); + /// } + /// ``` + /// + /// Normally, here, one would use [`clear`] instead to correctly drop + /// the contents and thus not leak memory. + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn set_len(&mut self, new_len: usize) { + debug_assert!(new_len <= self.capacity()); + + self.len = new_len; + } + + /// Removes an element from the vector and returns it. + /// + /// The removed element is replaced by the last element of the vector. + /// + /// This does not preserve ordering, but is O(1). + /// + /// # Panics + /// + /// Panics if `index` is out of bounds. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec!["foo", "bar", "baz", "qux"]; + /// + /// assert_eq!(v.swap_remove(1), "bar"); + /// assert_eq!(v, ["foo", "qux", "baz"]); + /// + /// assert_eq!(v.swap_remove(0), "foo"); + /// assert_eq!(v, ["baz", "qux"]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn swap_remove(&mut self, index: usize) -> T { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("swap_remove index (is {}) should be < len (is {})", index, len); + } + + let len = self.len(); + if index >= len { + assert_failed(index, len); + } + unsafe { + // We replace self[index] with the last element. Note that if the + // bounds check above succeeds there must be a last element (which + // can be self[index] itself). + let last = ptr::read(self.as_ptr().add(len - 1)); + let hole = self.as_mut_ptr().add(index); + self.set_len(len - 1); + ptr::replace(hole, last) + } + } + + /// Inserts an element at position `index` within the vector, shifting all + /// elements after it to the right. + /// + /// # Panics + /// + /// Panics if `index > len`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.insert(1, 4); + /// assert_eq!(vec, [1, 4, 2, 3]); + /// vec.insert(4, 5); + /// assert_eq!(vec, [1, 4, 2, 3, 5]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, index: usize, element: T) { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("insertion index (is {}) should be <= len (is {})", index, len); + } + + let len = self.len(); + if index > len { + assert_failed(index, len); + } + + // space for the new element + if len == self.buf.capacity() { + self.reserve(1); + } + + unsafe { + // infallible + // The spot to put the new value + { + let p = self.as_mut_ptr().add(index); + // Shift everything over to make space. (Duplicating the + // `index`th element into two consecutive places.) + ptr::copy(p, p.offset(1), len - index); + // Write it in, overwriting the first copy of the `index`th + // element. + ptr::write(p, element); + } + self.set_len(len + 1); + } + } + + /// Removes and returns the element at position `index` within the vector, + /// shifting all elements after it to the left. + /// + /// # Panics + /// + /// Panics if `index` is out of bounds. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// assert_eq!(v.remove(1), 2); + /// assert_eq!(v, [1, 3]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, index: usize) -> T { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("removal index (is {}) should be < len (is {})", index, len); + } + + let len = self.len(); + if index >= len { + assert_failed(index, len); + } + unsafe { + // infallible + let ret; + { + // the place we are taking from. + let ptr = self.as_mut_ptr().add(index); + // copy it out, unsafely having a copy of the value on + // the stack and in the vector at the same time. + ret = ptr::read(ptr); + + // Shift everything down to fill in that spot. + ptr::copy(ptr.offset(1), ptr, len - index - 1); + } + self.set_len(len - 1); + ret + } + } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all elements `e` such that `f(&e)` returns `false`. + /// This method operates in place, visiting each element exactly once in the + /// original order, and preserves the order of the retained elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.retain(|&x| x % 2 == 0); + /// assert_eq!(vec, [2, 4]); + /// ``` + /// + /// Because the elements are visited exactly once in the original order, + /// external state may be used to decide which elements to keep. + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4, 5]; + /// let keep = [false, true, true, false, true]; + /// let mut iter = keep.iter(); + /// vec.retain(|_| *iter.next().unwrap()); + /// assert_eq!(vec, [2, 3, 5]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&T) -> bool, + { + let original_len = self.len(); + // Avoid double drop if the drop guard is not executed, + // since we may make some holes during the process. + unsafe { self.set_len(0) }; + + // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked] + // |<- processed len ->| ^- next to check + // |<- deleted cnt ->| + // |<- original_len ->| + // Kept: Elements which predicate returns true on. + // Hole: Moved or dropped element slot. + // Unchecked: Unchecked valid elements. + // + // This drop guard will be invoked when predicate or `drop` of element panicked. + // It shifts unchecked elements to cover holes and `set_len` to the correct length. + // In cases when predicate and `drop` never panick, it will be optimized out. + struct BackshiftOnDrop<'a, T, A: Allocator> { + v: &'a mut Vec, + processed_len: usize, + deleted_cnt: usize, + original_len: usize, + } + + impl Drop for BackshiftOnDrop<'_, T, A> { + fn drop(&mut self) { + if self.deleted_cnt > 0 { + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { + ptr::copy( + self.v.as_ptr().add(self.processed_len), + self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt), + self.original_len - self.processed_len, + ); + } + } + // SAFETY: After filling holes, all items are in contiguous memory. + unsafe { + self.v.set_len(self.original_len - self.deleted_cnt); + } + } + } + + let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len }; + + while g.processed_len < original_len { + // SAFETY: Unchecked element must be valid. + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; + if !f(cur) { + // Advance early to avoid double drop if `drop_in_place` panicked. + g.processed_len += 1; + g.deleted_cnt += 1; + // SAFETY: We never touch this element again after dropped. + unsafe { ptr::drop_in_place(cur) }; + // We already advanced the counter. + continue; + } + if g.deleted_cnt > 0 { + // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. + // We use copy for move, and never touch this element again. + unsafe { + let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); + ptr::copy_nonoverlapping(cur, hole_slot, 1); + } + } + g.processed_len += 1; + } + + // All item are processed. This can be optimized to `set_len` by LLVM. + drop(g); + } + + /// Removes all but the first of consecutive elements in the vector that resolve to the same + /// key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![10, 20, 21, 30, 20]; + /// + /// vec.dedup_by_key(|i| *i / 10); + /// + /// assert_eq!(vec, [10, 20, 30, 20]); + /// ``` + #[stable(feature = "dedup_by", since = "1.16.0")] + #[inline] + pub fn dedup_by_key(&mut self, mut key: F) + where + F: FnMut(&mut T) -> K, + K: PartialEq, + { + self.dedup_by(|a, b| key(a) == key(b)) + } + + /// Removes all but the first of consecutive elements in the vector satisfying a given equality + /// relation. + /// + /// The `same_bucket` function is passed references to two elements from the vector and + /// must determine if the elements compare equal. The elements are passed in opposite order + /// from their order in the slice, so if `same_bucket(a, b)` returns `true`, `a` is removed. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + /// + /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + /// + /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); + /// ``` + #[stable(feature = "dedup_by", since = "1.16.0")] + pub fn dedup_by(&mut self, mut same_bucket: F) + where + F: FnMut(&mut T, &mut T) -> bool, + { + let len = self.len(); + if len <= 1 { + return; + } + + /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ + struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { + /* Offset of the element we want to check if it is duplicate */ + read: usize, + + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + write: usize, + + /* The Vec that would need correction if `same_bucket` panicked */ + vec: &'a mut Vec, + } + + impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { + fn drop(&mut self) { + /* This code gets executed when `same_bucket` panics */ + + /* SAFETY: invariant guarantees that `read - write` + * and `len - read` never overflow and that the copy is always + * in-bounds. */ + unsafe { + let ptr = self.vec.as_mut_ptr(); + let len = self.vec.len(); + + /* How many items were left when `same_bucket` paniced. + * Basically vec[read..].len() */ + let items_left = len.wrapping_sub(self.read); + + /* Pointer to first item in vec[write..write+items_left] slice */ + let dropped_ptr = ptr.add(self.write); + /* Pointer to first item in vec[read..] slice */ + let valid_ptr = ptr.add(self.read); + + /* Copy `vec[read..]` to `vec[write..write+items_left]`. + * The slices can overlap, so `copy_nonoverlapping` cannot be used */ + ptr::copy(valid_ptr, dropped_ptr, items_left); + + /* How many items have been already dropped + * Basically vec[read..write].len() */ + let dropped = self.read.wrapping_sub(self.write); + + self.vec.set_len(len - dropped); + } + } + } + + let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; + let ptr = gap.vec.as_mut_ptr(); + + /* Drop items while going through Vec, it should be more efficient than + * doing slice partition_dedup + truncate */ + + /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + * are always in-bounds and read_ptr never aliases prev_ptr */ + unsafe { + while gap.read < len { + let read_ptr = ptr.add(gap.read); + let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); + + if same_bucket(&mut *read_ptr, &mut *prev_ptr) { + // Increase `gap.read` now since the drop may panic. + gap.read += 1; + /* We have found duplicate, drop it in-place */ + ptr::drop_in_place(read_ptr); + } else { + let write_ptr = ptr.add(gap.write); + + /* Because `read_ptr` can be equal to `write_ptr`, we either + * have to use `copy` or conditional `copy_nonoverlapping`. + * Looks like the first option is faster. */ + ptr::copy(read_ptr, write_ptr, 1); + + /* We have filled that place, so go further */ + gap.write += 1; + gap.read += 1; + } + } + + /* Technically we could let `gap` clean up with its Drop, but + * when `same_bucket` is guaranteed to not panic, this bloats a little + * the codegen, so we just do it manually */ + gap.vec.set_len(gap.write); + mem::forget(gap); + } + } + + /// Appends an element to the back of a collection. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2]; + /// vec.push(3); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push(&mut self, value: T) { + // This will panic or abort if we would allocate > isize::MAX bytes + // or if the length increment would overflow for zero-sized types. + if self.len == self.buf.capacity() { + self.reserve(1); + } + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + } + + /// Tries to append an element to the back of a collection. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2]; + /// vec.try_push(3).unwrap(); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_push(&mut self, value: T) -> Result<(), TryReserveError> { + if self.len == self.buf.capacity() { + self.try_reserve(1)?; + } + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + Ok(()) + } + + /// Removes the last element from a vector and returns it, or [`None`] if it + /// is empty. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// assert_eq!(vec.pop(), Some(3)); + /// assert_eq!(vec, [1, 2]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + unsafe { + self.len -= 1; + Some(ptr::read(self.as_ptr().add(self.len()))) + } + } + } + + /// Moves all the elements of `other` into `Self`, leaving `other` empty. + /// + /// # Panics + /// + /// Panics if the number of elements in the vector overflows a `usize`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// let mut vec2 = vec![4, 5, 6]; + /// vec.append(&mut vec2); + /// assert_eq!(vec, [1, 2, 3, 4, 5, 6]); + /// assert_eq!(vec2, []); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "append", since = "1.4.0")] + pub fn append(&mut self, other: &mut Self) { + unsafe { + self.append_elements(other.as_slice() as _); + other.set_len(0); + } + } + + /// Appends elements to `Self` from other buffer. + #[cfg(not(no_global_oom_handling))] + #[inline] + unsafe fn append_elements(&mut self, other: *const [T]) { + let count = unsafe { (*other).len() }; + self.reserve(count); + let len = self.len(); + unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; + self.len += count; + } + + /// Tries to append elements to `Self` from other buffer. + #[inline] + unsafe fn try_append_elements(&mut self, other: *const [T]) -> Result<(), TryReserveError> { + let count = unsafe { (*other).len() }; + self.try_reserve(count)?; + let len = self.len(); + unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; + self.len += count; + Ok(()) + } + + /// Creates a draining iterator that removes the specified range in the vector + /// and yields the removed items. + /// + /// When the iterator **is** dropped, all elements in the range are removed + /// from the vector, even if the iterator was not fully consumed. If the + /// iterator **is not** dropped (with [`mem::forget`] for example), it is + /// unspecified how many elements are removed. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// let u: Vec<_> = v.drain(1..).collect(); + /// assert_eq!(v, &[1]); + /// assert_eq!(u, &[2, 3]); + /// + /// // A full range clears the vector + /// v.drain(..); + /// assert_eq!(v, &[]); + /// ``` + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain(&mut self, range: R) -> Drain<'_, T, A> + where + R: RangeBounds, + { + // Memory safety + // + // When the Drain is first created, it shortens the length of + // the source vector to make sure no uninitialized or moved-from elements + // are accessible at all if the Drain's destructor never gets to run. + // + // Drain will ptr::read out the values to remove. + // When finished, remaining tail of the vec is copied back to cover + // the hole, and the vector length is restored to the new length. + // + let len = self.len(); + let Range { start, end } = slice::range(range, ..len); + + unsafe { + // set self.vec length's to start, to be safe in case Drain is leaked + self.set_len(start); + // Use the borrow in the IterMut to indicate borrowing behavior of the + // whole Drain iterator (like &mut T). + let range_slice = slice::from_raw_parts_mut(self.as_mut_ptr().add(start), end - start); + Drain { + tail_start: end, + tail_len: len - end, + iter: range_slice.iter(), + vec: NonNull::from(self), + } + } + } + + /// Clears the vector, removing all values. + /// + /// Note that this method has no effect on the allocated capacity + /// of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// + /// v.clear(); + /// + /// assert!(v.is_empty()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + self.truncate(0) + } + + /// Returns the number of elements in the vector, also referred to + /// as its 'length'. + /// + /// # Examples + /// + /// ``` + /// let a = vec![1, 2, 3]; + /// assert_eq!(a.len(), 3); + /// ``` + #[doc(alias = "length")] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if the vector contains no elements. + /// + /// # Examples + /// + /// ``` + /// let mut v = Vec::new(); + /// assert!(v.is_empty()); + /// + /// v.push(1); + /// assert!(!v.is_empty()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Splits the collection into two at the given index. + /// + /// Returns a newly allocated vector containing the elements in the range + /// `[at, len)`. After the call, the original vector will be left containing + /// the elements `[0, at)` with its previous capacity unchanged. + /// + /// # Panics + /// + /// Panics if `at > len`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// let vec2 = vec.split_off(1); + /// assert_eq!(vec, [1]); + /// assert_eq!(vec2, [2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[must_use = "use `.truncate()` if you don't need the other half"] + #[stable(feature = "split_off", since = "1.4.0")] + pub fn split_off(&mut self, at: usize) -> Self + where + A: Clone, + { + #[cold] + #[inline(never)] + fn assert_failed(at: usize, len: usize) -> ! { + panic!("`at` split index (is {}) should be <= len (is {})", at, len); + } + + if at > self.len() { + assert_failed(at, self.len()); + } + + if at == 0 { + // the new vector can take over the original buffer and avoid the copy + return mem::replace( + self, + Vec::with_capacity_in(self.capacity(), self.allocator().clone()), + ); + } + + let other_len = self.len - at; + let mut other = Vec::with_capacity_in(other_len, self.allocator().clone()); + + // Unsafely `set_len` and copy items to `other`. + unsafe { + self.set_len(at); + other.set_len(other_len); + + ptr::copy_nonoverlapping(self.as_ptr().add(at), other.as_mut_ptr(), other.len()); + } + other + } + + /// Resizes the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with the result of + /// calling the closure `f`. The return values from `f` will end up + /// in the `Vec` in the order they have been generated. + /// + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method uses a closure to create new values on every push. If + /// you'd rather [`Clone`] a given value, use [`Vec::resize`]. If you + /// want to use the [`Default`] trait to generate values, you can + /// pass [`Default::default`] as the second argument. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.resize_with(5, Default::default); + /// assert_eq!(vec, [1, 2, 3, 0, 0]); + /// + /// let mut vec = vec![]; + /// let mut p = 1; + /// vec.resize_with(4, || { p *= 2; p }); + /// assert_eq!(vec, [2, 4, 8, 16]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_resize_with", since = "1.33.0")] + pub fn resize_with(&mut self, new_len: usize, f: F) + where + F: FnMut() -> T, + { + let len = self.len(); + if new_len > len { + self.extend_with(new_len - len, ExtendFunc(f)); + } else { + self.truncate(new_len); + } + } + + /// Consumes and leaks the `Vec`, returning a mutable reference to the contents, + /// `&'a mut [T]`. Note that the type `T` must outlive the chosen lifetime + /// `'a`. If the type has only static references, or none at all, then this + /// may be chosen to be `'static`. + /// + /// This function is similar to the [`leak`][Box::leak] function on [`Box`] + /// except that there is no way to recover the leaked memory. + /// + /// This function is mainly useful for data that lives for the remainder of + /// the program's life. Dropping the returned reference will cause a memory + /// leak. + /// + /// # Examples + /// + /// Simple usage: + /// + /// ``` + /// let x = vec![1, 2, 3]; + /// let static_ref: &'static mut [usize] = x.leak(); + /// static_ref[0] += 1; + /// assert_eq!(static_ref, &[2, 2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_leak", since = "1.47.0")] + #[inline] + pub fn leak<'a>(self) -> &'a mut [T] + where + A: 'a, + { + Box::leak(self.into_boxed_slice()) + } + + /// Returns the remaining spare capacity of the vector as a slice of + /// `MaybeUninit`. + /// + /// The returned slice can be used to fill the vector with data (e.g. by + /// reading from a file) before marking the data as initialized using the + /// [`set_len`] method. + /// + /// [`set_len`]: Vec::set_len + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_spare_capacity, maybe_uninit_extra)] + /// + /// // Allocate vector big enough for 10 elements. + /// let mut v = Vec::with_capacity(10); + /// + /// // Fill in the first 3 elements. + /// let uninit = v.spare_capacity_mut(); + /// uninit[0].write(0); + /// uninit[1].write(1); + /// uninit[2].write(2); + /// + /// // Mark the first 3 elements of the vector as being initialized. + /// unsafe { + /// v.set_len(3); + /// } + /// + /// assert_eq!(&v, &[0, 1, 2]); + /// ``` + #[unstable(feature = "vec_spare_capacity", issue = "75017")] + #[inline] + pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { + // Note: + // This method is not implemented in terms of `split_at_spare_mut`, + // to prevent invalidation of pointers to the buffer. + unsafe { + slice::from_raw_parts_mut( + self.as_mut_ptr().add(self.len) as *mut MaybeUninit, + self.buf.capacity() - self.len, + ) + } + } + + /// Returns vector content as a slice of `T`, along with the remaining spare + /// capacity of the vector as a slice of `MaybeUninit`. + /// + /// The returned spare capacity slice can be used to fill the vector with data + /// (e.g. by reading from a file) before marking the data as initialized using + /// the [`set_len`] method. + /// + /// [`set_len`]: Vec::set_len + /// + /// Note that this is a low-level API, which should be used with care for + /// optimization purposes. If you need to append data to a `Vec` + /// you can use [`push`], [`extend`], [`extend_from_slice`], + /// [`extend_from_within`], [`insert`], [`append`], [`resize`] or + /// [`resize_with`], depending on your exact needs. + /// + /// [`push`]: Vec::push + /// [`extend`]: Vec::extend + /// [`extend_from_slice`]: Vec::extend_from_slice + /// [`extend_from_within`]: Vec::extend_from_within + /// [`insert`]: Vec::insert + /// [`append`]: Vec::append + /// [`resize`]: Vec::resize + /// [`resize_with`]: Vec::resize_with + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_split_at_spare, maybe_uninit_extra)] + /// + /// let mut v = vec![1, 1, 2]; + /// + /// // Reserve additional space big enough for 10 elements. + /// v.reserve(10); + /// + /// let (init, uninit) = v.split_at_spare_mut(); + /// let sum = init.iter().copied().sum::(); + /// + /// // Fill in the next 4 elements. + /// uninit[0].write(sum); + /// uninit[1].write(sum * 2); + /// uninit[2].write(sum * 3); + /// uninit[3].write(sum * 4); + /// + /// // Mark the 4 elements of the vector as being initialized. + /// unsafe { + /// let len = v.len(); + /// v.set_len(len + 4); + /// } + /// + /// assert_eq!(&v, &[1, 1, 2, 4, 8, 12, 16]); + /// ``` + #[unstable(feature = "vec_split_at_spare", issue = "81944")] + #[inline] + pub fn split_at_spare_mut(&mut self) -> (&mut [T], &mut [MaybeUninit]) { + // SAFETY: + // - len is ignored and so never changed + let (init, spare, _) = unsafe { self.split_at_spare_mut_with_len() }; + (init, spare) + } + + /// Safety: changing returned .2 (&mut usize) is considered the same as calling `.set_len(_)`. + /// + /// This method provides unique access to all vec parts at once in `extend_from_within`. + unsafe fn split_at_spare_mut_with_len( + &mut self, + ) -> (&mut [T], &mut [MaybeUninit], &mut usize) { + let Range { start: ptr, end: spare_ptr } = self.as_mut_ptr_range(); + let spare_ptr = spare_ptr.cast::>(); + let spare_len = self.buf.capacity() - self.len; + + // SAFETY: + // - `ptr` is guaranteed to be valid for `len` elements + // - `spare_ptr` is pointing one element past the buffer, so it doesn't overlap with `initialized` + unsafe { + let initialized = slice::from_raw_parts_mut(ptr, self.len); + let spare = slice::from_raw_parts_mut(spare_ptr, spare_len); + + (initialized, spare, &mut self.len) + } + } +} + +impl Vec { + /// Resizes the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with `value`. + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method requires `T` to implement [`Clone`], + /// in order to be able to clone the passed value. + /// If you need more flexibility (or want to rely on [`Default`] instead of + /// [`Clone`]), use [`Vec::resize_with`]. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["hello"]; + /// vec.resize(3, "world"); + /// assert_eq!(vec, ["hello", "world", "world"]); + /// + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.resize(2, 0); + /// assert_eq!(vec, [1, 2]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_resize", since = "1.5.0")] + pub fn resize(&mut self, new_len: usize, value: T) { + let len = self.len(); + + if new_len > len { + self.extend_with(new_len - len, ExtendElement(value)) + } else { + self.truncate(new_len); + } + } + + /// Tries to resize the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with `value`. + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method requires `T` to implement [`Clone`], + /// in order to be able to clone the passed value. + /// If you need more flexibility (or want to rely on [`Default`] instead of + /// [`Clone`]), use [`Vec::resize_with`]. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["hello"]; + /// vec.try_resize(3, "world").unwrap(); + /// assert_eq!(vec, ["hello", "world", "world"]); + /// + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.try_resize(2, 0).unwrap(); + /// assert_eq!(vec, [1, 2]); + /// + /// let mut vec = vec![42]; + /// let result = vec.try_resize(usize::MAX, 0); + /// assert!(result.is_err()); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_resize(&mut self, new_len: usize, value: T) -> Result<(), TryReserveError> { + let len = self.len(); + + if new_len > len { + self.try_extend_with(new_len - len, ExtendElement(value)) + } else { + self.truncate(new_len); + Ok(()) + } + } + + /// Clones and appends all elements in a slice to the `Vec`. + /// + /// Iterates over the slice `other`, clones each element, and then appends + /// it to this `Vec`. The `other` vector is traversed in-order. + /// + /// Note that this function is same as [`extend`] except that it is + /// specialized to work with slices instead. If and when Rust gets + /// specialization this function will likely be deprecated (but still + /// available). + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.extend_from_slice(&[2, 3, 4]); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// [`extend`]: Vec::extend + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_extend_from_slice", since = "1.6.0")] + pub fn extend_from_slice(&mut self, other: &[T]) { + self.spec_extend(other.iter()) + } + + /// Tries to clone and append all elements in a slice to the `Vec`. + /// + /// Iterates over the slice `other`, clones each element, and then appends + /// it to this `Vec`. The `other` vector is traversed in-order. + /// + /// Note that this function is same as [`extend`] except that it is + /// specialized to work with slices instead. If and when Rust gets + /// specialization this function will likely be deprecated (but still + /// available). + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.try_extend_from_slice(&[2, 3, 4]).unwrap(); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// [`extend`]: Vec::extend + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_extend_from_slice(&mut self, other: &[T]) -> Result<(), TryReserveError> { + self.try_spec_extend(other.iter()) + } + + /// Copies elements from `src` range to the end of the vector. + /// + /// ## Examples + /// + /// ``` + /// let mut vec = vec![0, 1, 2, 3, 4]; + /// + /// vec.extend_from_within(2..); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4]); + /// + /// vec.extend_from_within(..2); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4, 0, 1]); + /// + /// vec.extend_from_within(4..8); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4, 0, 1, 4, 2, 3, 4]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_extend_from_within", since = "1.53.0")] + pub fn extend_from_within(&mut self, src: R) + where + R: RangeBounds, + { + let range = slice::range(src, ..self.len()); + self.reserve(range.len()); + + // SAFETY: + // - `slice::range` guarantees that the given range is valid for indexing self + unsafe { + self.spec_extend_from_within(range); + } + } +} + +// This code generalizes `extend_with_{element,default}`. +trait ExtendWith { + fn next(&mut self) -> T; + fn last(self) -> T; +} + +struct ExtendElement(T); +impl ExtendWith for ExtendElement { + fn next(&mut self) -> T { + self.0.clone() + } + fn last(self) -> T { + self.0 + } +} + +struct ExtendDefault; +impl ExtendWith for ExtendDefault { + fn next(&mut self) -> T { + Default::default() + } + fn last(self) -> T { + Default::default() + } +} + +struct ExtendFunc(F); +impl T> ExtendWith for ExtendFunc { + fn next(&mut self) -> T { + (self.0)() + } + fn last(mut self) -> T { + (self.0)() + } +} + +impl Vec { + #[cfg(not(no_global_oom_handling))] + /// Extend the vector by `n` values, using the given generator. + fn extend_with>(&mut self, n: usize, mut value: E) { + self.reserve(n); + + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + // Use SetLenOnDrop to work around bug where compiler + // may not realize the store through `ptr` through self.set_len() + // don't alias. + let mut local_len = SetLenOnDrop::new(&mut self.len); + + // Write all elements except the last one + for _ in 1..n { + ptr::write(ptr, value.next()); + ptr = ptr.offset(1); + // Increment the length in every step in case next() panics + local_len.increment_len(1); + } + + if n > 0 { + // We can write the last element directly without cloning needlessly + ptr::write(ptr, value.last()); + local_len.increment_len(1); + } + + // len set by scope guard + } + } + + /// Try to extend the vector by `n` values, using the given generator. + fn try_extend_with>(&mut self, n: usize, mut value: E) -> Result<(), TryReserveError> { + self.try_reserve(n)?; + + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + // Use SetLenOnDrop to work around bug where compiler + // may not realize the store through `ptr` through self.set_len() + // don't alias. + let mut local_len = SetLenOnDrop::new(&mut self.len); + + // Write all elements except the last one + for _ in 1..n { + ptr::write(ptr, value.next()); + ptr = ptr.offset(1); + // Increment the length in every step in case next() panics + local_len.increment_len(1); + } + + if n > 0 { + // We can write the last element directly without cloning needlessly + ptr::write(ptr, value.last()); + local_len.increment_len(1); + } + + // len set by scope guard + Ok(()) + } + } +} + +impl Vec { + /// Removes consecutive repeated elements in the vector according to the + /// [`PartialEq`] trait implementation. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 2, 3, 2]; + /// + /// vec.dedup(); + /// + /// assert_eq!(vec, [1, 2, 3, 2]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn dedup(&mut self) { + self.dedup_by(|a, b| a == b) + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Internal methods and functions +//////////////////////////////////////////////////////////////////////////////// + +#[doc(hidden)] +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn from_elem(elem: T, n: usize) -> Vec { + ::from_elem(elem, n, Global) +} + +#[doc(hidden)] +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "allocator_api", issue = "32838")] +pub fn from_elem_in(elem: T, n: usize, alloc: A) -> Vec { + ::from_elem(elem, n, alloc) +} + +trait ExtendFromWithinSpec { + /// # Safety + /// + /// - `src` needs to be valid index + /// - `self.capacity() - self.len()` must be `>= src.len()` + unsafe fn spec_extend_from_within(&mut self, src: Range); +} + +impl ExtendFromWithinSpec for Vec { + default unsafe fn spec_extend_from_within(&mut self, src: Range) { + // SAFETY: + // - len is increased only after initializing elements + let (this, spare, len) = unsafe { self.split_at_spare_mut_with_len() }; + + // SAFETY: + // - caller guaratees that src is a valid index + let to_clone = unsafe { this.get_unchecked(src) }; + + iter::zip(to_clone, spare) + .map(|(src, dst)| dst.write(src.clone())) + // Note: + // - Element was just initialized with `MaybeUninit::write`, so it's ok to increase len + // - len is increased after each element to prevent leaks (see issue #82533) + .for_each(|_| *len += 1); + } +} + +impl ExtendFromWithinSpec for Vec { + unsafe fn spec_extend_from_within(&mut self, src: Range) { + let count = src.len(); + { + let (init, spare) = self.split_at_spare_mut(); + + // SAFETY: + // - caller guaratees that `src` is a valid index + let source = unsafe { init.get_unchecked(src) }; + + // SAFETY: + // - Both pointers are created from unique slice references (`&mut [_]`) + // so they are valid and do not overlap. + // - Elements are :Copy so it's OK to to copy them, without doing + // anything with the original values + // - `count` is equal to the len of `source`, so source is valid for + // `count` reads + // - `.reserve(count)` guarantees that `spare.len() >= count` so spare + // is valid for `count` writes + unsafe { ptr::copy_nonoverlapping(source.as_ptr(), spare.as_mut_ptr() as _, count) }; + } + + // SAFETY: + // - The elements were just initialized by `copy_nonoverlapping` + self.len += count; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Common trait implementations for Vec +//////////////////////////////////////////////////////////////////////////////// + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for Vec { + type Target = [T]; + + fn deref(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.as_ptr(), self.len) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::DerefMut for Vec { + fn deref_mut(&mut self) -> &mut [T] { + unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Vec { + #[cfg(not(test))] + fn clone(&self) -> Self { + let alloc = self.allocator().clone(); + <[T]>::to_vec_in(&**self, alloc) + } + + // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is + // required for this method definition, is not available. Instead use the + // `slice::to_vec` function which is only available with cfg(test) + // NB see the slice::hack module in slice.rs for more information + #[cfg(test)] + fn clone(&self) -> Self { + let alloc = self.allocator().clone(); + crate::slice::to_vec(&**self, alloc) + } + + fn clone_from(&mut self, other: &Self) { + // drop anything that will not be overwritten + self.truncate(other.len()); + + // self.len <= other.len due to the truncate above, so the + // slices here are always in-bounds. + let (init, tail) = other.split_at(self.len()); + + // reuse the contained values' allocations/resources. + self.clone_from_slice(init); + self.extend_from_slice(tail); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Vec { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_on_unimplemented( + message = "vector indices are of type `usize` or ranges of `usize`", + label = "vector indices are of type `usize` or ranges of `usize`" +)] +impl, A: Allocator> Index for Vec { + type Output = I::Output; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + Index::index(&**self, index) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_on_unimplemented( + message = "vector indices are of type `usize` or ranges of `usize`", + label = "vector indices are of type `usize` or ranges of `usize`" +)] +impl, A: Allocator> IndexMut for Vec { + #[inline] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + IndexMut::index_mut(&mut **self, index) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator for Vec { + #[inline] + fn from_iter>(iter: I) -> Vec { + >::from_iter(iter.into_iter()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl IntoIterator for Vec { + type Item = T; + type IntoIter = IntoIter; + + /// Creates a consuming iterator, that is, one that moves each value out of + /// the vector (from start to end). The vector cannot be used after calling + /// this. + /// + /// # Examples + /// + /// ``` + /// let v = vec!["a".to_string(), "b".to_string()]; + /// for s in v.into_iter() { + /// // s has type String, not &String + /// println!("{}", s); + /// } + /// ``` + #[inline] + fn into_iter(self) -> IntoIter { + unsafe { + let mut me = ManuallyDrop::new(self); + let alloc = ptr::read(me.allocator()); + let begin = me.as_mut_ptr(); + let end = if mem::size_of::() == 0 { + arith_offset(begin as *const i8, me.len() as isize) as *const T + } else { + begin.add(me.len()) as *const T + }; + let cap = me.buf.capacity(); + IntoIter { + buf: NonNull::new_unchecked(begin), + phantom: PhantomData, + cap, + alloc, + ptr: begin, + end, + } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, A: Allocator> IntoIterator for &'a Vec { + type Item = &'a T; + type IntoIter = slice::Iter<'a, T>; + + fn into_iter(self) -> slice::Iter<'a, T> { + self.iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, A: Allocator> IntoIterator for &'a mut Vec { + type Item = &'a mut T; + type IntoIter = slice::IterMut<'a, T>; + + fn into_iter(self) -> slice::IterMut<'a, T> { + self.iter_mut() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend for Vec { + #[inline] + fn extend>(&mut self, iter: I) { + >::spec_extend(self, iter.into_iter()) + } + + #[inline] + fn extend_one(&mut self, item: T) { + self.push(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +impl Vec { + // leaf method to which various SpecFrom/SpecExtend implementations delegate when + // they have no further optimizations to apply + #[cfg(not(no_global_oom_handling))] + fn extend_desugared>(&mut self, mut iterator: I) { + // This is the case for a general iterator. + // + // This function should be the moral equivalent of: + // + // for item in iterator { + // self.push(item); + // } + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.reserve(lower.saturating_add(1)); + } + unsafe { + ptr::write(self.as_mut_ptr().add(len), element); + // NB can't overflow since we would have had to alloc the address space + self.set_len(len + 1); + } + } + } + + // leaf method to which various SpecFrom/SpecExtend implementations delegate when + // they have no further optimizations to apply + fn try_extend_desugared>(&mut self, mut iterator: I) -> Result<(), TryReserveError> { + // This is the case for a general iterator. + // + // This function should be the moral equivalent of: + // + // for item in iterator { + // self.push(item); + // } + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.try_reserve(lower.saturating_add(1))?; + } + unsafe { + ptr::write(self.as_mut_ptr().add(len), element); + // NB can't overflow since we would have had to alloc the address space + self.set_len(len + 1); + } + } + + Ok(()) + } + + /// Creates a splicing iterator that replaces the specified range in the vector + /// with the given `replace_with` iterator and yields the removed items. + /// `replace_with` does not need to be the same length as `range`. + /// + /// `range` is removed even if the iterator is not consumed until the end. + /// + /// It is unspecified how many elements are removed from the vector + /// if the `Splice` value is leaked. + /// + /// The input iterator `replace_with` is only consumed when the `Splice` value is dropped. + /// + /// This is optimal if: + /// + /// * The tail (elements in the vector after `range`) is empty, + /// * or `replace_with` yields fewer or equal elements than `range`’s length + /// * or the lower bound of its `size_hint()` is exact. + /// + /// Otherwise, a temporary vector is allocated and the tail is moved twice. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// let new = [7, 8]; + /// let u: Vec<_> = v.splice(..2, new).collect(); + /// assert_eq!(v, &[7, 8, 3]); + /// assert_eq!(u, &[1, 2]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "vec_splice", since = "1.21.0")] + pub fn splice(&mut self, range: R, replace_with: I) -> Splice<'_, I::IntoIter, A> + where + R: RangeBounds, + I: IntoIterator, + { + Splice { drain: self.drain(range), replace_with: replace_with.into_iter() } + } + + /// Creates an iterator which uses a closure to determine if an element should be removed. + /// + /// If the closure returns true, then the element is removed and yielded. + /// If the closure returns false, the element will remain in the vector and will not be yielded + /// by the iterator. + /// + /// Using this method is equivalent to the following code: + /// + /// ``` + /// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 }; + /// # let mut vec = vec![1, 2, 3, 4, 5, 6]; + /// let mut i = 0; + /// while i < vec.len() { + /// if some_predicate(&mut vec[i]) { + /// let val = vec.remove(i); + /// // your code here + /// } else { + /// i += 1; + /// } + /// } + /// + /// # assert_eq!(vec, vec![1, 4, 5]); + /// ``` + /// + /// But `drain_filter` is easier to use. `drain_filter` is also more efficient, + /// because it can backshift the elements of the array in bulk. + /// + /// Note that `drain_filter` also lets you mutate every element in the filter closure, + /// regardless of whether you choose to keep or remove it. + /// + /// # Examples + /// + /// Splitting an array into evens and odds, reusing the original allocation: + /// + /// ``` + /// #![feature(drain_filter)] + /// let mut numbers = vec![1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]; + /// + /// let evens = numbers.drain_filter(|x| *x % 2 == 0).collect::>(); + /// let odds = numbers; + /// + /// assert_eq!(evens, vec![2, 4, 6, 8, 14]); + /// assert_eq!(odds, vec![1, 3, 5, 9, 11, 13, 15]); + /// ``` + #[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] + pub fn drain_filter(&mut self, filter: F) -> DrainFilter<'_, T, F, A> + where + F: FnMut(&mut T) -> bool, + { + let old_len = self.len(); + + // Guard against us getting leaked (leak amplification) + unsafe { + self.set_len(0); + } + + DrainFilter { vec: self, idx: 0, del: 0, old_len, pred: filter, panic_flag: false } + } +} + +/// Extend implementation that copies elements out of references before pushing them onto the Vec. +/// +/// This implementation is specialized for slice iterators, where it uses [`copy_from_slice`] to +/// append the entire slice at once. +/// +/// [`copy_from_slice`]: slice::copy_from_slice +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_ref", since = "1.2.0")] +impl<'a, T: Copy + 'a, A: Allocator + 'a> Extend<&'a T> for Vec { + fn extend>(&mut self, iter: I) { + self.spec_extend(iter.into_iter()) + } + + #[inline] + fn extend_one(&mut self, &item: &'a T) { + self.push(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +/// Implements comparison of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison). +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for Vec { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Vec {} + +/// Implements ordering of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison). +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Vec { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T, A: Allocator> Drop for Vec { + fn drop(&mut self) { + unsafe { + // use drop for [T] + // use a raw slice to refer to the elements of the vector as weakest necessary type; + // could avoid questions of validity in certain cases + ptr::drop_in_place(ptr::slice_from_raw_parts_mut(self.as_mut_ptr(), self.len)) + } + // RawVec handles deallocation + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Vec { + /// Creates an empty `Vec`. + fn default() -> Vec { + Vec::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Vec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef> for Vec { + fn as_ref(&self) -> &Vec { + self + } +} + +#[stable(feature = "vec_as_mut", since = "1.5.0")] +impl AsMut> for Vec { + fn as_mut(&mut self) -> &mut Vec { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<[T]> for Vec { + fn as_ref(&self) -> &[T] { + self + } +} + +#[stable(feature = "vec_as_mut", since = "1.5.0")] +impl AsMut<[T]> for Vec { + fn as_mut(&mut self) -> &mut [T] { + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&[T]> for Vec { + /// Allocate a `Vec` and fill it by cloning `s`'s items. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from(&[1, 2, 3][..]), vec![1, 2, 3]); + /// ``` + #[cfg(not(test))] + fn from(s: &[T]) -> Vec { + s.to_vec() + } + #[cfg(test)] + fn from(s: &[T]) -> Vec { + crate::slice::to_vec(s, Global) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_from_mut", since = "1.19.0")] +impl From<&mut [T]> for Vec { + /// Allocate a `Vec` and fill it by cloning `s`'s items. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from(&mut [1, 2, 3][..]), vec![1, 2, 3]); + /// ``` + #[cfg(not(test))] + fn from(s: &mut [T]) -> Vec { + s.to_vec() + } + #[cfg(test)] + fn from(s: &mut [T]) -> Vec { + crate::slice::to_vec(s, Global) + } +} + +#[stable(feature = "vec_from_array", since = "1.44.0")] +impl From<[T; N]> for Vec { + #[cfg(not(test))] + fn from(s: [T; N]) -> Vec { + <[T]>::into_vec(box s) + } + /// Allocate a `Vec` and move `s`'s items into it. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from([1, 2, 3]), vec![1, 2, 3]); + /// ``` + #[cfg(test)] + fn from(s: [T; N]) -> Vec { + crate::slice::into_vec(box s) + } +} + +#[stable(feature = "vec_from_cow_slice", since = "1.14.0")] +impl<'a, T> From> for Vec +where + [T]: ToOwned>, +{ + /// Convert a clone-on-write slice into a vector. + /// + /// If `s` already owns a `Vec`, it will be returned directly. + /// If `s` is borrowing a slice, a new `Vec` will be allocated and + /// filled by cloning `s`'s items into it. + /// + /// # Examples + /// + /// ``` + /// # use std::borrow::Cow; + /// let o: Cow<[i32]> = Cow::Owned(vec![1, 2, 3]); + /// let b: Cow<[i32]> = Cow::Borrowed(&[1, 2, 3]); + /// assert_eq!(Vec::from(o), Vec::from(b)); + /// ``` + fn from(s: Cow<'a, [T]>) -> Vec { + s.into_owned() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(test))] +#[stable(feature = "vec_from_box", since = "1.18.0")] +impl From> for Vec { + /// Convert a boxed slice into a vector by transferring ownership of + /// the existing heap allocation. + /// + /// # Examples + /// + /// ``` + /// let b: Box<[i32]> = vec![1, 2, 3].into_boxed_slice(); + /// assert_eq!(Vec::from(b), vec![1, 2, 3]); + /// ``` + fn from(s: Box<[T], A>) -> Self { + s.into_vec() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(no_global_oom_handling))] +#[cfg(not(test))] +#[stable(feature = "box_from_vec", since = "1.20.0")] +impl From> for Box<[T], A> { + /// Convert a vector into a boxed slice. + /// + /// If `v` has excess capacity, its items will be moved into a + /// newly-allocated buffer with exactly the right capacity. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Box::from(vec![1, 2, 3]), vec![1, 2, 3].into_boxed_slice()); + /// ``` + fn from(v: Vec) -> Self { + v.into_boxed_slice() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&str> for Vec { + /// Allocate a `Vec` and fill it with a UTF-8 string. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from("123"), vec![b'1', b'2', b'3']); + /// ``` + fn from(s: &str) -> Vec { + From::from(s.as_bytes()) + } +} + +#[stable(feature = "array_try_from_vec", since = "1.48.0")] +impl TryFrom> for [T; N] { + type Error = Vec; + + /// Gets the entire contents of the `Vec` as an array, + /// if its size exactly matches that of the requested array. + /// + /// # Examples + /// + /// ``` + /// use std::convert::TryInto; + /// assert_eq!(vec![1, 2, 3].try_into(), Ok([1, 2, 3])); + /// assert_eq!(>::new().try_into(), Ok([])); + /// ``` + /// + /// If the length doesn't match, the input comes back in `Err`: + /// ``` + /// use std::convert::TryInto; + /// let r: Result<[i32; 4], _> = (0..10).collect::>().try_into(); + /// assert_eq!(r, Err(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9])); + /// ``` + /// + /// If you're fine with just getting a prefix of the `Vec`, + /// you can call [`.truncate(N)`](Vec::truncate) first. + /// ``` + /// use std::convert::TryInto; + /// let mut v = String::from("hello world").into_bytes(); + /// v.sort(); + /// v.truncate(2); + /// let [a, b]: [_; 2] = v.try_into().unwrap(); + /// assert_eq!(a, b' '); + /// assert_eq!(b, b'd'); + /// ``` + fn try_from(mut vec: Vec) -> Result<[T; N], Vec> { + if vec.len() != N { + return Err(vec); + } + + // SAFETY: `.set_len(0)` is always sound. + unsafe { vec.set_len(0) }; + + // SAFETY: A `Vec`'s pointer is always aligned properly, and + // the alignment the array needs is the same as the items. + // We checked earlier that we have sufficient items. + // The items will not double-drop as the `set_len` + // tells the `Vec` not to also drop them. + let array = unsafe { ptr::read(vec.as_ptr() as *const [T; N]) }; + Ok(array) + } +} diff --git a/rust/alloc/vec/partial_eq.rs b/rust/alloc/vec/partial_eq.rs new file mode 100644 index 0000000000000..273e99bed4888 --- /dev/null +++ b/rust/alloc/vec/partial_eq.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::Allocator; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::Cow; + +use super::Vec; + +macro_rules! __impl_slice_eq1 { + ([$($vars:tt)*] $lhs:ty, $rhs:ty $(where $ty:ty: $bound:ident)?, #[$stability:meta]) => { + #[$stability] + impl PartialEq<$rhs> for $lhs + where + T: PartialEq, + $($ty: $bound)? + { + #[inline] + fn eq(&self, other: &$rhs) -> bool { self[..] == other[..] } + #[inline] + fn ne(&self, other: &$rhs) -> bool { self[..] != other[..] } + } + } +} + +__impl_slice_eq1! { [A: Allocator] Vec, Vec, #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] Vec, &[U], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] Vec, &mut [U], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] &[T], Vec, #[stable(feature = "partialeq_vec_for_ref_slice", since = "1.46.0")] } +__impl_slice_eq1! { [A: Allocator] &mut [T], Vec, #[stable(feature = "partialeq_vec_for_ref_slice", since = "1.46.0")] } +__impl_slice_eq1! { [A: Allocator] Vec, [U], #[stable(feature = "partialeq_vec_for_slice", since = "1.48.0")] } +__impl_slice_eq1! { [A: Allocator] [T], Vec, #[stable(feature = "partialeq_vec_for_slice", since = "1.48.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [A: Allocator] Cow<'_, [T]>, Vec where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [] Cow<'_, [T]>, &[U] where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [] Cow<'_, [T]>, &mut [U] where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator, const N: usize] Vec, [U; N], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator, const N: usize] Vec, &[U; N], #[stable(feature = "rust1", since = "1.0.0")] } + +// NOTE: some less important impls are omitted to reduce code bloat +// FIXME(Centril): Reconsider this? +//__impl_slice_eq1! { [const N: usize] Vec, &mut [B; N], } +//__impl_slice_eq1! { [const N: usize] [A; N], Vec, } +//__impl_slice_eq1! { [const N: usize] &[A; N], Vec, } +//__impl_slice_eq1! { [const N: usize] &mut [A; N], Vec, } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, [B; N], } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, &[B; N], } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, &mut [B; N], } diff --git a/rust/alloc/vec/set_len_on_drop.rs b/rust/alloc/vec/set_len_on_drop.rs new file mode 100644 index 0000000000000..448bf5076a0bf --- /dev/null +++ b/rust/alloc/vec/set_len_on_drop.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Set the length of the vec when the `SetLenOnDrop` value goes out of scope. +// +// The idea is: The length field in SetLenOnDrop is a local variable +// that the optimizer will see does not alias with any stores through the Vec's data +// pointer. This is a workaround for alias analysis issue #32155 +pub(super) struct SetLenOnDrop<'a> { + len: &'a mut usize, + local_len: usize, +} + +impl<'a> SetLenOnDrop<'a> { + #[inline] + pub(super) fn new(len: &'a mut usize) -> Self { + SetLenOnDrop { local_len: *len, len } + } + + #[inline] + pub(super) fn increment_len(&mut self, increment: usize) { + self.local_len += increment; + } +} + +impl Drop for SetLenOnDrop<'_> { + #[inline] + fn drop(&mut self) { + *self.len = self.local_len; + } +} diff --git a/rust/alloc/vec/spec_extend.rs b/rust/alloc/vec/spec_extend.rs new file mode 100644 index 0000000000000..5a64c7ce2393e --- /dev/null +++ b/rust/alloc/vec/spec_extend.rs @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::Allocator; +use crate::vec::TryReserveError; +use core::iter::TrustedLen; +use core::ptr::{self}; +use core::slice::{self}; + +use super::{IntoIter, SetLenOnDrop, Vec}; + +// Specialization trait used for Vec::extend +#[cfg(not(no_global_oom_handling))] +pub(super) trait SpecExtend { + fn spec_extend(&mut self, iter: I); +} + +// Specialization trait used for Vec::try_extend +pub(super) trait TrySpecExtend { + fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError>; +} + +#[cfg(not(no_global_oom_handling))] +impl SpecExtend for Vec +where + I: Iterator, +{ + default fn spec_extend(&mut self, iter: I) { + self.extend_desugared(iter) + } +} + +impl TrySpecExtend for Vec +where + I: Iterator, +{ + default fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError> { + self.try_extend_desugared(iter) + } +} + +#[cfg(not(no_global_oom_handling))] +impl SpecExtend for Vec +where + I: TrustedLen, +{ + default fn spec_extend(&mut self, iterator: I) { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.reserve(additional); + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.offset(1); + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + } else { + // Per TrustedLen contract a `None` upper bound means that the iterator length + // truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway. + // Since the other branch already panics eagerly (via `reserve()`) we do the same here. + // This avoids additional codegen for a fallback code path which would eventually + // panic anyway. + panic!("capacity overflow"); + } + } +} + +impl TrySpecExtend for Vec +where + I: TrustedLen, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.try_reserve(additional)?; + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.offset(1); + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + Ok(()) + } else { + Err(TryReserveError::CapacityOverflow) + } + } +} + +#[cfg(not(no_global_oom_handling))] +impl SpecExtend> for Vec { + fn spec_extend(&mut self, mut iterator: IntoIter) { + unsafe { + self.append_elements(iterator.as_slice() as _); + } + iterator.ptr = iterator.end; + } +} + +impl TrySpecExtend> for Vec { + fn try_spec_extend(&mut self, mut iterator: IntoIter) -> Result<(), TryReserveError> { + unsafe { + self.try_append_elements(iterator.as_slice() as _)?; + } + iterator.ptr = iterator.end; + Ok(()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, I, A: Allocator + 'a> SpecExtend<&'a T, I> for Vec +where + I: Iterator, + T: Clone, +{ + default fn spec_extend(&mut self, iterator: I) { + self.spec_extend(iterator.cloned()) + } +} + +impl<'a, T: 'a, I, A: Allocator + 'a> TrySpecExtend<&'a T, I> for Vec +where + I: Iterator, + T: Clone, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + self.try_spec_extend(iterator.cloned()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, A: Allocator + 'a> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec +where + T: Copy, +{ + fn spec_extend(&mut self, iterator: slice::Iter<'a, T>) { + let slice = iterator.as_slice(); + unsafe { self.append_elements(slice) }; + } +} + +impl<'a, T: 'a, A: Allocator + 'a> TrySpecExtend<&'a T, slice::Iter<'a, T>> for Vec +where + T: Copy, +{ + fn try_spec_extend(&mut self, iterator: slice::Iter<'a, T>) -> Result<(), TryReserveError> { + let slice = iterator.as_slice(); + unsafe { self.try_append_elements(slice) } + } +} From 8444e3570a0fc1713a164bcd54df3f0db3bb344a Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:30:16 +0200 Subject: [PATCH 066/851] rust: add `build_error` crate The `build_error` crate provides the `build_error` function which is then used to provide the `build_error!` and the `build_assert!` macros. `build_assert!` is intended to be used when `static_assert!` cannot be used, e.g. when the condition refers to generic parameters or parameters of an inline function. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/build_error.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 rust/build_error.rs diff --git a/rust/build_error.rs b/rust/build_error.rs new file mode 100644 index 0000000000000..d47fa8393cbc8 --- /dev/null +++ b/rust/build_error.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Build-time error. +//! +//! This crate provides a function `build_error`, which will panic in +//! compile-time if executed in const context, and will cause a build error +//! if not executed at compile time and the optimizer does not optimise away the +//! call. +//! +//! It is used by `build_assert!` in the kernel crate, allowing checking of +//! conditions that could be checked statically, but could not be enforced in +//! Rust yet (e.g. perform some checks in const functions, but those +//! functions could still be called in the runtime). + +#![no_std] +#![feature(const_panic, core_panic)] + +/// Panics if executed in const context, or triggers a build error if not. +#[inline(never)] +#[cold] +#[no_mangle] +#[track_caller] +pub const fn build_error(msg: &'static str) -> ! { + // Could also be `panic!(msg)` to avoid using unstable feature `core_panic`, + // but it is not allowed in Rust 2021, while `panic!("{}", msg)` could not + // yet be used in const context. + core::panicking::panic(msg); +} + +#[cfg(CONFIG_RUST_BUILD_ASSERT_WARN)] +#[link_section = ".gnu.warning.build_error"] +#[used] +static BUILD_ERROR_WARNING: [u8; 45] = *b"call to build_error present after compilation"; From dc8500d5b795aec876566d1cfe08ffac6f8900ab Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:56:17 +0200 Subject: [PATCH 067/851] rust: add `macros` crate This crate contains all the procedural macros ("proc macros") shared by all the kernel. Procedural macros allow to create syntax extensions. They run at compile-time and can consume as well as produce Rust syntax. For instance, the `module!` macro that is used by Rust modules is implemented here. It allows to easily declare the equivalent information to the `MODULE_*` macros in C modules, e.g.: module! { type: RustMinimal, name: b"rust_minimal", author: b"Rust for Linux Contributors", description: b"Rust minimal sample", license: b"GPL v2", } Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/macros/lib.rs | 127 +++++++ rust/macros/module.rs | 754 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 881 insertions(+) create mode 100644 rust/macros/lib.rs create mode 100644 rust/macros/module.rs diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs new file mode 100644 index 0000000000000..cb7a4f12f3b4d --- /dev/null +++ b/rust/macros/lib.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Crate for all kernel procedural macros. + +mod module; + +use proc_macro::TokenStream; + +/// Declares a kernel module. +/// +/// The `type` argument should be a type which implements the [`KernelModule`] +/// trait. Also accepts various forms of kernel metadata. +/// +/// C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) +/// +/// [`KernelModule`]: ../kernel/trait.KernelModule.html +/// +/// # Examples +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module!{ +/// type: MyKernelModule, +/// name: b"my_kernel_module", +/// author: b"Rust for Linux Contributors", +/// description: b"My very own kernel module!", +/// license: b"GPL v2", +/// params: { +/// my_i32: i32 { +/// default: 42, +/// permissions: 0o000, +/// description: b"Example of i32", +/// }, +/// writeable_i32: i32 { +/// default: 42, +/// permissions: 0o644, +/// description: b"Example of i32", +/// }, +/// }, +/// } +/// +/// struct MyKernelModule; +/// +/// impl KernelModule for MyKernelModule { +/// fn init() -> Result { +/// // If the parameter is writeable, then the kparam lock must be +/// // taken to read the parameter: +/// { +/// let lock = THIS_MODULE.kernel_param_lock(); +/// pr_info!("i32 param is: {}\n", writeable_i32.read(&lock)); +/// } +/// // If the parameter is read only, it can be read without locking +/// // the kernel parameters: +/// pr_info!("i32 param is: {}\n", my_i32.read()); +/// Ok(MyKernelModule) +/// } +/// } +/// ``` +/// +/// # Supported argument types +/// - `type`: type which implements the [`KernelModule`] trait (required). +/// - `name`: byte array of the name of the kernel module (required). +/// - `author`: byte array of the author of the kernel module. +/// - `description`: byte array of the description of the kernel module. +/// - `license`: byte array of the license of the kernel module (required). +/// - `alias`: byte array of alias name of the kernel module. +/// - `alias_rtnl_link`: byte array of the `rtnl_link_alias` of the kernel module (mutually exclusive with `alias`). +/// - `params`: parameters for the kernel module, as described below. +/// +/// # Supported parameter types +/// +/// - `bool`: Corresponds to C `bool` param type. +/// - `i8`: No equivalent C param type. +/// - `u8`: Corresponds to C `char` param type. +/// - `i16`: Corresponds to C `short` param type. +/// - `u16`: Corresponds to C `ushort` param type. +/// - `i32`: Corresponds to C `int` param type. +/// - `u32`: Corresponds to C `uint` param type. +/// - `i64`: No equivalent C param type. +/// - `u64`: Corresponds to C `ullong` param type. +/// - `isize`: No equivalent C param type. +/// - `usize`: No equivalent C param type. +/// - `str`: Corresponds to C `charp` param type. Reading returns a byte slice. +/// - `ArrayParam`: Corresponds to C parameters created using `module_param_array`. An array +/// of `T`'s of length at **most** `N`. +/// +/// `invbool` is unsupported: it was only ever used in a few modules. +/// Consider using a `bool` and inverting the logic instead. +#[proc_macro] +pub fn module(ts: TokenStream) -> TokenStream { + module::module(ts) +} + +/// Declares a kernel module that exposes a single misc device. +/// +/// The `type` argument should be a type which implements the [`FileOpener`] trait. Also accepts +/// various forms of kernel metadata. +/// +/// C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) +/// +/// [`FileOpener`]: ../kernel/file_operations/trait.FileOpener.html +/// +/// # Examples +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module_misc_device! { +/// type: MyFile, +/// name: b"my_miscdev_kernel_module", +/// author: b"Rust for Linux Contributors", +/// description: b"My very own misc device kernel module!", +/// license: b"GPL v2", +/// } +/// +/// #[derive(Default)] +/// struct MyFile; +/// +/// impl kernel::file_operations::FileOperations for MyFile { +/// kernel::declare_file_operations!(); +/// } +/// ``` +#[proc_macro] +pub fn module_misc_device(ts: TokenStream) -> TokenStream { + module::module_misc_device(ts) +} diff --git a/rust/macros/module.rs b/rust/macros/module.rs new file mode 100644 index 0000000000000..1389c53aa228f --- /dev/null +++ b/rust/macros/module.rs @@ -0,0 +1,754 @@ +// SPDX-License-Identifier: GPL-2.0 + +use proc_macro::{token_stream, Delimiter, Group, Literal, TokenStream, TokenTree}; + +fn try_ident(it: &mut token_stream::IntoIter) -> Option { + if let Some(TokenTree::Ident(ident)) = it.next() { + Some(ident.to_string()) + } else { + None + } +} + +fn try_literal(it: &mut token_stream::IntoIter) -> Option { + if let Some(TokenTree::Literal(literal)) = it.next() { + Some(literal.to_string()) + } else { + None + } +} + +fn try_byte_string(it: &mut token_stream::IntoIter) -> Option { + try_literal(it).and_then(|byte_string| { + if byte_string.starts_with("b\"") && byte_string.ends_with('\"') { + Some(byte_string[2..byte_string.len() - 1].to_string()) + } else { + None + } + }) +} + +fn expect_ident(it: &mut token_stream::IntoIter) -> String { + try_ident(it).expect("Expected Ident") +} + +fn expect_punct(it: &mut token_stream::IntoIter) -> char { + if let TokenTree::Punct(punct) = it.next().expect("Reached end of token stream for Punct") { + punct.as_char() + } else { + panic!("Expected Punct"); + } +} + +fn expect_literal(it: &mut token_stream::IntoIter) -> String { + try_literal(it).expect("Expected Literal") +} + +fn expect_group(it: &mut token_stream::IntoIter) -> Group { + if let TokenTree::Group(group) = it.next().expect("Reached end of token stream for Group") { + group + } else { + panic!("Expected Group"); + } +} + +fn expect_byte_string(it: &mut token_stream::IntoIter) -> String { + try_byte_string(it).expect("Expected byte string") +} + +#[derive(Clone, PartialEq)] +enum ParamType { + Ident(String), + Array { vals: String, max_length: usize }, +} + +fn expect_array_fields(it: &mut token_stream::IntoIter) -> ParamType { + assert_eq!(expect_punct(it), '<'); + let vals = expect_ident(it); + assert_eq!(expect_punct(it), ','); + let max_length_str = expect_literal(it); + let max_length = max_length_str + .parse::() + .expect("Expected usize length"); + assert_eq!(expect_punct(it), '>'); + ParamType::Array { vals, max_length } +} + +fn expect_type(it: &mut token_stream::IntoIter) -> ParamType { + if let TokenTree::Ident(ident) = it + .next() + .expect("Reached end of token stream for param type") + { + match ident.to_string().as_ref() { + "ArrayParam" => expect_array_fields(it), + _ => ParamType::Ident(ident.to_string()), + } + } else { + panic!("Expected Param Type") + } +} + +fn expect_end(it: &mut token_stream::IntoIter) { + if it.next().is_some() { + panic!("Expected end"); + } +} + +fn get_literal(it: &mut token_stream::IntoIter, expected_name: &str) -> String { + assert_eq!(expect_ident(it), expected_name); + assert_eq!(expect_punct(it), ':'); + let literal = expect_literal(it); + assert_eq!(expect_punct(it), ','); + literal +} + +fn get_byte_string(it: &mut token_stream::IntoIter, expected_name: &str) -> String { + assert_eq!(expect_ident(it), expected_name); + assert_eq!(expect_punct(it), ':'); + let byte_string = expect_byte_string(it); + assert_eq!(expect_punct(it), ','); + byte_string +} + +struct ModInfoBuilder<'a> { + module: &'a str, + counter: usize, + buffer: String, +} + +impl<'a> ModInfoBuilder<'a> { + fn new(module: &'a str) -> Self { + ModInfoBuilder { + module, + counter: 0, + buffer: String::new(), + } + } + + fn emit_base(&mut self, field: &str, content: &str, builtin: bool) { + use std::fmt::Write; + + let string = if builtin { + // Built-in modules prefix their modinfo strings by `module.`. + format!( + "{module}.{field}={content}\0", + module = self.module, + field = field, + content = content + ) + } else { + // Loadable modules' modinfo strings go as-is. + format!("{field}={content}\0", field = field, content = content) + }; + + write!( + &mut self.buffer, + " + {cfg} + #[doc(hidden)] + #[link_section = \".modinfo\"] + #[used] + pub static __{module}_{counter}: [u8; {length}] = *{string}; + ", + cfg = if builtin { + "#[cfg(not(MODULE))]" + } else { + "#[cfg(MODULE)]" + }, + module = self.module, + counter = self.counter, + length = string.len(), + string = Literal::byte_string(string.as_bytes()), + ) + .unwrap(); + + self.counter += 1; + } + + fn emit_only_builtin(&mut self, field: &str, content: &str) { + self.emit_base(field, content, true) + } + + fn emit_only_loadable(&mut self, field: &str, content: &str) { + self.emit_base(field, content, false) + } + + fn emit(&mut self, field: &str, content: &str) { + self.emit_only_builtin(field, content); + self.emit_only_loadable(field, content); + } + + fn emit_param(&mut self, field: &str, param: &str, content: &str) { + let content = format!("{param}:{content}", param = param, content = content); + self.emit(field, &content); + } +} + +fn permissions_are_readonly(perms: &str) -> bool { + let (radix, digits) = if let Some(n) = perms.strip_prefix("0x") { + (16, n) + } else if let Some(n) = perms.strip_prefix("0o") { + (8, n) + } else if let Some(n) = perms.strip_prefix("0b") { + (2, n) + } else { + (10, perms) + }; + match u32::from_str_radix(digits, radix) { + Ok(perms) => perms & 0o222 == 0, + Err(_) => false, + } +} + +fn param_ops_path(param_type: &str) -> &'static str { + match param_type { + "bool" => "kernel::module_param::PARAM_OPS_BOOL", + "i8" => "kernel::module_param::PARAM_OPS_I8", + "u8" => "kernel::module_param::PARAM_OPS_U8", + "i16" => "kernel::module_param::PARAM_OPS_I16", + "u16" => "kernel::module_param::PARAM_OPS_U16", + "i32" => "kernel::module_param::PARAM_OPS_I32", + "u32" => "kernel::module_param::PARAM_OPS_U32", + "i64" => "kernel::module_param::PARAM_OPS_I64", + "u64" => "kernel::module_param::PARAM_OPS_U64", + "isize" => "kernel::module_param::PARAM_OPS_ISIZE", + "usize" => "kernel::module_param::PARAM_OPS_USIZE", + "str" => "kernel::module_param::PARAM_OPS_STR", + t => panic!("Unrecognized type {}", t), + } +} + +fn try_simple_param_val( + param_type: &str, +) -> Box Option> { + match param_type { + "bool" => Box::new(|param_it| try_ident(param_it)), + "str" => Box::new(|param_it| { + try_byte_string(param_it) + .map(|s| format!("kernel::module_param::StringParam::Ref(b\"{}\")", s)) + }), + _ => Box::new(|param_it| try_literal(param_it)), + } +} + +fn get_default(param_type: &ParamType, param_it: &mut token_stream::IntoIter) -> String { + let try_param_val = match param_type { + ParamType::Ident(ref param_type) + | ParamType::Array { + vals: ref param_type, + max_length: _, + } => try_simple_param_val(param_type), + }; + assert_eq!(expect_ident(param_it), "default"); + assert_eq!(expect_punct(param_it), ':'); + let default = match param_type { + ParamType::Ident(_) => try_param_val(param_it).expect("Expected default param value"), + ParamType::Array { + vals: _, + max_length: _, + } => { + let group = expect_group(param_it); + assert_eq!(group.delimiter(), Delimiter::Bracket); + let mut default_vals = Vec::new(); + let mut it = group.stream().into_iter(); + + while let Some(default_val) = try_param_val(&mut it) { + default_vals.push(default_val); + match it.next() { + Some(TokenTree::Punct(punct)) => assert_eq!(punct.as_char(), ','), + None => break, + _ => panic!("Expected ',' or end of array default values"), + } + } + + let mut default_array = "kernel::module_param::ArrayParam::create(&[".to_string(); + default_array.push_str( + &default_vals + .iter() + .map(|val| val.to_string()) + .collect::>() + .join(","), + ); + default_array.push_str("])"); + default_array + } + }; + assert_eq!(expect_punct(param_it), ','); + default +} + +fn generated_array_ops_name(vals: &str, max_length: usize) -> String { + format!( + "__generated_array_ops_{vals}_{max_length}", + vals = vals, + max_length = max_length + ) +} + +#[derive(Debug, Default)] +struct ModuleInfo { + type_: String, + license: String, + name: String, + author: Option, + description: Option, + alias: Option, + params: Option, +} + +impl ModuleInfo { + fn parse(it: &mut token_stream::IntoIter) -> Self { + let mut info = ModuleInfo::default(); + + const EXPECTED_KEYS: &[&str] = &[ + "type", + "name", + "author", + "description", + "license", + "alias", + "alias_rtnl_link", + "params", + ]; + const REQUIRED_KEYS: &[&str] = &["type", "name", "license"]; + let mut seen_keys = Vec::new(); + + loop { + let key = match it.next() { + Some(TokenTree::Ident(ident)) => ident.to_string(), + Some(_) => panic!("Expected Ident or end"), + None => break, + }; + + if seen_keys.contains(&key) { + panic!( + "Duplicated key \"{}\". Keys can only be specified once.", + key + ); + } + + assert_eq!(expect_punct(it), ':'); + + match key.as_str() { + "type" => info.type_ = expect_ident(it), + "name" => info.name = expect_byte_string(it), + "author" => info.author = Some(expect_byte_string(it)), + "description" => info.description = Some(expect_byte_string(it)), + "license" => info.license = expect_byte_string(it), + "alias" => info.alias = Some(expect_byte_string(it)), + "alias_rtnl_link" => { + info.alias = Some(format!("rtnl-link-{}", expect_byte_string(it))) + } + "params" => info.params = Some(expect_group(it)), + _ => panic!( + "Unknown key \"{}\". Valid keys are: {:?}.", + key, EXPECTED_KEYS + ), + } + + assert_eq!(expect_punct(it), ','); + + seen_keys.push(key); + } + + expect_end(it); + + for key in REQUIRED_KEYS { + if !seen_keys.iter().any(|e| e == key) { + panic!("Missing required key \"{}\".", key); + } + } + + let mut ordered_keys: Vec<&str> = Vec::new(); + for key in EXPECTED_KEYS { + if seen_keys.iter().any(|e| e == key) { + ordered_keys.push(key); + } + } + + if seen_keys != ordered_keys { + panic!( + "Keys are not ordered as expected. Order them like: {:?}.", + ordered_keys + ); + } + + info + } +} + +pub fn module(ts: TokenStream) -> TokenStream { + let mut it = ts.into_iter(); + + let info = ModuleInfo::parse(&mut it); + + let name = info.name.clone(); + + let mut modinfo = ModInfoBuilder::new(&name); + if let Some(author) = info.author { + modinfo.emit("author", &author); + } + if let Some(description) = info.description { + modinfo.emit("description", &description); + } + modinfo.emit("license", &info.license); + if let Some(alias) = info.alias { + modinfo.emit("alias", &alias); + } + + // Built-in modules also export the `file` modinfo string + let file = + std::env::var("RUST_MODFILE").expect("Unable to fetch RUST_MODFILE environmental variable"); + modinfo.emit_only_builtin("file", &file); + + let mut array_types_to_generate = Vec::new(); + if let Some(params) = info.params { + assert_eq!(params.delimiter(), Delimiter::Brace); + + let mut it = params.stream().into_iter(); + + loop { + let param_name = match it.next() { + Some(TokenTree::Ident(ident)) => ident.to_string(), + Some(_) => panic!("Expected Ident or end"), + None => break, + }; + + assert_eq!(expect_punct(&mut it), ':'); + let param_type = expect_type(&mut it); + let group = expect_group(&mut it); + assert_eq!(expect_punct(&mut it), ','); + + assert_eq!(group.delimiter(), Delimiter::Brace); + + let mut param_it = group.stream().into_iter(); + let param_default = get_default(¶m_type, &mut param_it); + let param_permissions = get_literal(&mut param_it, "permissions"); + let param_description = get_byte_string(&mut param_it, "description"); + expect_end(&mut param_it); + + // TODO: more primitive types + // TODO: other kinds: unsafes, etc. + let (param_kernel_type, ops): (String, _) = match param_type { + ParamType::Ident(ref param_type) => ( + param_type.to_string(), + param_ops_path(param_type).to_string(), + ), + ParamType::Array { + ref vals, + max_length, + } => { + array_types_to_generate.push((vals.clone(), max_length)); + ( + format!("__rust_array_param_{}_{}", vals, max_length), + generated_array_ops_name(vals, max_length), + ) + } + }; + + modinfo.emit_param("parmtype", ¶m_name, ¶m_kernel_type); + modinfo.emit_param("parm", ¶m_name, ¶m_description); + let param_type_internal = match param_type { + ParamType::Ident(ref param_type) => match param_type.as_ref() { + "str" => "kernel::module_param::StringParam".to_string(), + other => other.to_string(), + }, + ParamType::Array { + ref vals, + max_length, + } => format!( + "kernel::module_param::ArrayParam<{vals}, {max_length}>", + vals = vals, + max_length = max_length + ), + }; + let read_func = if permissions_are_readonly(¶m_permissions) { + format!( + " + fn read(&self) -> &<{param_type_internal} as kernel::module_param::ModuleParam>::Value {{ + // SAFETY: Parameters do not need to be locked because they are read only or sysfs is not enabled. + unsafe {{ <{param_type_internal} as kernel::module_param::ModuleParam>::value(&__{name}_{param_name}_value) }} + }} + ", + name = name, + param_name = param_name, + param_type_internal = param_type_internal, + ) + } else { + format!( + " + fn read<'lck>(&self, lock: &'lck kernel::KParamGuard) -> &'lck <{param_type_internal} as kernel::module_param::ModuleParam>::Value {{ + // SAFETY: Parameters are locked by `KParamGuard`. + unsafe {{ <{param_type_internal} as kernel::module_param::ModuleParam>::value(&__{name}_{param_name}_value) }} + }} + ", + name = name, + param_name = param_name, + param_type_internal = param_type_internal, + ) + }; + let kparam = format!( + " + kernel::bindings::kernel_param__bindgen_ty_1 {{ + arg: unsafe {{ &__{name}_{param_name}_value }} as *const _ as *mut kernel::c_types::c_void, + }}, + ", + name = name, + param_name = param_name, + ); + modinfo.buffer.push_str( + &format!( + " + static mut __{name}_{param_name}_value: {param_type_internal} = {param_default}; + + struct __{name}_{param_name}; + + impl __{name}_{param_name} {{ {read_func} }} + + const {param_name}: __{name}_{param_name} = __{name}_{param_name}; + + // Note: the C macro that generates the static structs for the `__param` section + // asks for them to be `aligned(sizeof(void *))`. However, that was put in place + // in 2003 in commit 38d5b085d2 (\"[PATCH] Fix over-alignment problem on x86-64\") + // to undo GCC over-alignment of static structs of >32 bytes. It seems that is + // not the case anymore, so we simplify to a transparent representation here + // in the expectation that it is not needed anymore. + // TODO: revisit this to confirm the above comment and remove it if it happened + #[repr(transparent)] + struct __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param); + + unsafe impl Sync for __{name}_{param_name}_RacyKernelParam {{ + }} + + #[cfg(not(MODULE))] + const __{name}_{param_name}_name: *const kernel::c_types::c_char = b\"{name}.{param_name}\\0\" as *const _ as *const kernel::c_types::c_char; + + #[cfg(MODULE)] + const __{name}_{param_name}_name: *const kernel::c_types::c_char = b\"{param_name}\\0\" as *const _ as *const kernel::c_types::c_char; + + #[link_section = \"__param\"] + #[used] + static __{name}_{param_name}_struct: __{name}_{param_name}_RacyKernelParam = __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param {{ + name: __{name}_{param_name}_name, + // SAFETY: `__this_module` is constructed by the kernel at load time and will not be freed until the module is unloaded. + #[cfg(MODULE)] + mod_: unsafe {{ &kernel::bindings::__this_module as *const _ as *mut _ }}, + #[cfg(not(MODULE))] + mod_: core::ptr::null_mut(), + ops: unsafe {{ &{ops} }} as *const kernel::bindings::kernel_param_ops, + perm: {permissions}, + level: -1, + flags: 0, + __bindgen_anon_1: {kparam} + }}); + ", + name = name, + param_type_internal = param_type_internal, + read_func = read_func, + param_default = param_default, + param_name = param_name, + ops = ops, + permissions = param_permissions, + kparam = kparam, + ) + ); + } + } + + let mut generated_array_types = String::new(); + + for (vals, max_length) in array_types_to_generate { + let ops_name = generated_array_ops_name(&vals, max_length); + generated_array_types.push_str(&format!( + " + kernel::make_param_ops!( + {ops_name}, + kernel::module_param::ArrayParam<{vals}, {{ {max_length} }}> + ); + ", + ops_name = ops_name, + vals = vals, + max_length = max_length, + )); + } + + format!( + " + /// The module name. + /// + /// Used by the printing macros, e.g. [`info!`]. + const __LOG_PREFIX: &[u8] = b\"{name}\\0\"; + + static mut __MOD: Option<{type_}> = None; + + // SAFETY: `__this_module` is constructed by the kernel at load time and will not be freed until the module is unloaded. + #[cfg(MODULE)] + static THIS_MODULE: kernel::ThisModule = unsafe {{ kernel::ThisModule::from_ptr(&kernel::bindings::__this_module as *const _ as *mut _) }}; + #[cfg(not(MODULE))] + static THIS_MODULE: kernel::ThisModule = unsafe {{ kernel::ThisModule::from_ptr(core::ptr::null_mut()) }}; + + // Loadable modules need to export the `{{init,cleanup}}_module` identifiers + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn init_module() -> kernel::c_types::c_int {{ + __init() + }} + + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn cleanup_module() {{ + __exit() + }} + + // Built-in modules are initialized through an initcall pointer + // and the identifiers need to be unique + #[cfg(not(MODULE))] + #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] + #[doc(hidden)] + #[link_section = \"{initcall_section}\"] + #[used] + pub static __{name}_initcall: extern \"C\" fn() -> kernel::c_types::c_int = __{name}_init; + + #[cfg(not(MODULE))] + #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] + global_asm!( + r#\".section \"{initcall_section}\", \"a\" + __{name}_initcall: + .long __{name}_init - . + .previous + \"# + ); + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_init() -> kernel::c_types::c_int {{ + __init() + }} + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_exit() {{ + __exit() + }} + + fn __init() -> kernel::c_types::c_int {{ + match <{type_} as kernel::KernelModule>::init() {{ + Ok(m) => {{ + unsafe {{ + __MOD = Some(m); + }} + return 0; + }} + Err(e) => {{ + return e.to_kernel_errno(); + }} + }} + }} + + fn __exit() {{ + unsafe {{ + // Invokes `drop()` on `__MOD`, which should be used for cleanup. + __MOD = None; + }} + }} + + {modinfo} + + {generated_array_types} + ", + type_ = info.type_, + name = info.name, + modinfo = modinfo.buffer, + generated_array_types = generated_array_types, + initcall_section = ".initcall6.init" + ).parse().expect("Error parsing formatted string into token stream.") +} + +pub fn module_misc_device(ts: TokenStream) -> TokenStream { + let mut it = ts.into_iter(); + + let info = ModuleInfo::parse(&mut it); + + let module = format!("__internal_ModuleFor{}", info.type_); + + format!( + " + #[doc(hidden)] + struct {module} {{ + _dev: core::pin::Pin>, + }} + + impl kernel::KernelModule for {module} {{ + fn init() -> kernel::Result {{ + Ok(Self {{ + _dev: kernel::miscdev::Registration::new_pinned::<{type_}>( + kernel::c_str!(\"{name}\"), + None, + (), + )?, + }}) + }} + }} + + kernel::prelude::module! {{ + type: {module}, + name: b\"{name}\", + {author} + {description} + license: b\"{license}\", + {alias} + }} + ", + module = module, + type_ = info.type_, + name = info.name, + author = info + .author + .map(|v| format!("author: b\"{}\",", v)) + .unwrap_or_else(|| "".to_string()), + description = info + .description + .map(|v| format!("description: b\"{}\",", v)) + .unwrap_or_else(|| "".to_string()), + alias = info + .alias + .map(|v| format!("alias: b\"{}\",", v)) + .unwrap_or_else(|| "".to_string()), + license = info.license + ) + .parse() + .expect("Error parsing formatted string into token stream.") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_permissions_are_readonly() { + assert!(permissions_are_readonly("0b000000000")); + assert!(permissions_are_readonly("0o000")); + assert!(permissions_are_readonly("000")); + assert!(permissions_are_readonly("0x000")); + + assert!(!permissions_are_readonly("0b111111111")); + assert!(!permissions_are_readonly("0o777")); + assert!(!permissions_are_readonly("511")); + assert!(!permissions_are_readonly("0x1ff")); + + assert!(permissions_are_readonly("0o014")); + assert!(permissions_are_readonly("0o015")); + + assert!(!permissions_are_readonly("0o214")); + assert!(!permissions_are_readonly("0o024")); + assert!(!permissions_are_readonly("0o012")); + + assert!(!permissions_are_readonly("0o315")); + assert!(!permissions_are_readonly("0o065")); + assert!(!permissions_are_readonly("0o017")); + } +} From 6e81e20413f29d44fc0ffd5eb0a8bb125423c429 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:16:07 +0200 Subject: [PATCH 068/851] rust: add `kernel` crate The `kernel` crate currently includes all the abstractions that wrap kernel features written in C. These abstractions call the C side of the kernel via the generated bindings with the `bindgen` tool. Modules developed in Rust should never call the bindings themselves. In the future, as the abstractions grow in number, we may need to split this crate into several, possibly following a similar subdivision in subsystems as the kernel itself. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- include/linux/spinlock.h | 17 +- kernel/printk/printk.c | 5 +- rust/kernel/allocator.rs | 63 +++ rust/kernel/bindings.rs | 28 ++ rust/kernel/bindings_helper.h | 24 ++ rust/kernel/buffer.rs | 39 ++ rust/kernel/build_assert.rs | 80 ++++ rust/kernel/c_types.rs | 119 ++++++ rust/kernel/chrdev.rs | 212 ++++++++++ rust/kernel/error.rs | 272 +++++++++++++ rust/kernel/file.rs | 130 ++++++ rust/kernel/file_operations.rs | 698 +++++++++++++++++++++++++++++++++ rust/kernel/io_buffer.rs | 153 ++++++++ rust/kernel/iov_iter.rs | 95 +++++ rust/kernel/lib.rs | 220 +++++++++++ rust/kernel/linked_list.rs | 245 ++++++++++++ rust/kernel/miscdev.rs | 113 ++++++ rust/kernel/module_param.rs | 497 +++++++++++++++++++++++ rust/kernel/of.rs | 101 +++++ rust/kernel/pages.rs | 176 +++++++++ rust/kernel/platdev.rs | 166 ++++++++ rust/kernel/prelude.rs | 28 ++ rust/kernel/print.rs | 412 +++++++++++++++++++ rust/kernel/random.rs | 50 +++ rust/kernel/raw_list.rs | 361 +++++++++++++++++ rust/kernel/rbtree.rs | 570 +++++++++++++++++++++++++++ rust/kernel/security.rs | 79 ++++ rust/kernel/static_assert.rs | 39 ++ rust/kernel/str.rs | 259 ++++++++++++ rust/kernel/sync/arc.rs | 227 +++++++++++ rust/kernel/sync/condvar.rs | 136 +++++++ rust/kernel/sync/guard.rs | 82 ++++ rust/kernel/sync/locked_by.rs | 112 ++++++ rust/kernel/sync/mod.rs | 84 ++++ rust/kernel/sync/mutex.rs | 101 +++++ rust/kernel/sync/spinlock.rs | 109 +++++ rust/kernel/sysctl.rs | 198 ++++++++++ rust/kernel/task.rs | 193 +++++++++ rust/kernel/traits.rs | 26 ++ rust/kernel/types.rs | 249 ++++++++++++ rust/kernel/user_ptr.rs | 191 +++++++++ 41 files changed, 6952 insertions(+), 7 deletions(-) create mode 100644 rust/kernel/allocator.rs create mode 100644 rust/kernel/bindings.rs create mode 100644 rust/kernel/bindings_helper.h create mode 100644 rust/kernel/buffer.rs create mode 100644 rust/kernel/build_assert.rs create mode 100644 rust/kernel/c_types.rs create mode 100644 rust/kernel/chrdev.rs create mode 100644 rust/kernel/error.rs create mode 100644 rust/kernel/file.rs create mode 100644 rust/kernel/file_operations.rs create mode 100644 rust/kernel/io_buffer.rs create mode 100644 rust/kernel/iov_iter.rs create mode 100644 rust/kernel/lib.rs create mode 100644 rust/kernel/linked_list.rs create mode 100644 rust/kernel/miscdev.rs create mode 100644 rust/kernel/module_param.rs create mode 100644 rust/kernel/of.rs create mode 100644 rust/kernel/pages.rs create mode 100644 rust/kernel/platdev.rs create mode 100644 rust/kernel/prelude.rs create mode 100644 rust/kernel/print.rs create mode 100644 rust/kernel/random.rs create mode 100644 rust/kernel/raw_list.rs create mode 100644 rust/kernel/rbtree.rs create mode 100644 rust/kernel/security.rs create mode 100644 rust/kernel/static_assert.rs create mode 100644 rust/kernel/str.rs create mode 100644 rust/kernel/sync/arc.rs create mode 100644 rust/kernel/sync/condvar.rs create mode 100644 rust/kernel/sync/guard.rs create mode 100644 rust/kernel/sync/locked_by.rs create mode 100644 rust/kernel/sync/mod.rs create mode 100644 rust/kernel/sync/mutex.rs create mode 100644 rust/kernel/sync/spinlock.rs create mode 100644 rust/kernel/sysctl.rs create mode 100644 rust/kernel/task.rs create mode 100644 rust/kernel/traits.rs create mode 100644 rust/kernel/types.rs create mode 100644 rust/kernel/user_ptr.rs diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 79897841a2cc8..a022992725be3 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -331,12 +331,17 @@ static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock) #ifdef CONFIG_DEBUG_SPINLOCK -# define spin_lock_init(lock) \ -do { \ - static struct lock_class_key __key; \ - \ - __raw_spin_lock_init(spinlock_check(lock), \ - #lock, &__key, LD_WAIT_CONFIG); \ +static inline void __spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ + __raw_spin_lock_init(spinlock_check(lock), name, key, LD_WAIT_CONFIG); +} + +# define spin_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __spin_lock_init(lock, #lock, &__key); \ } while (0) #else diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 421c35571797e..f7f6d13476dae 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -397,7 +397,10 @@ static struct latched_seq clear_seq = { /* the maximum size of a formatted record (i.e. with prefix added per line) */ #define CONSOLE_LOG_MAX 1024 -/* the maximum size allowed to be reserved for a record */ +/* + * The maximum size allowed to be reserved for a record. + * Keep in sync with rust/kernel/print.rs. + */ #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) #define LOG_LEVEL(v) ((v) & 0x07) diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs new file mode 100644 index 0000000000000..759cec47de2b1 --- /dev/null +++ b/rust/kernel/allocator.rs @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Allocator support. + +use core::alloc::{GlobalAlloc, Layout}; +use core::ptr; + +use crate::bindings; +use crate::c_types; + +pub struct KernelAllocator; + +unsafe impl GlobalAlloc for KernelAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // `krealloc()` is used instead of `kmalloc()` because the latter is + // an inline function and cannot be bound to as a result. + unsafe { bindings::krealloc(ptr::null(), layout.size(), bindings::GFP_KERNEL) as *mut u8 } + } + + unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { + unsafe { + bindings::kfree(ptr as *const c_types::c_void); + } + } +} + +#[global_allocator] +static ALLOCATOR: KernelAllocator = KernelAllocator; + +// `rustc` only generates these for some crate types. Even then, we would need +// to extract the object file that has them from the archive. For the moment, +// let's generate them ourselves instead. +#[no_mangle] +pub fn __rust_alloc(size: usize, _align: usize) -> *mut u8 { + unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 } +} + +#[no_mangle] +pub fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) { + unsafe { bindings::kfree(ptr as *const c_types::c_void) }; +} + +#[no_mangle] +pub fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 { + unsafe { + bindings::krealloc( + ptr as *const c_types::c_void, + new_size, + bindings::GFP_KERNEL, + ) as *mut u8 + } +} + +#[no_mangle] +pub fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 { + unsafe { + bindings::krealloc( + core::ptr::null(), + size, + bindings::GFP_KERNEL | bindings::__GFP_ZERO, + ) as *mut u8 + } +} diff --git a/rust/kernel/bindings.rs b/rust/kernel/bindings.rs new file mode 100644 index 0000000000000..93290926ceca1 --- /dev/null +++ b/rust/kernel/bindings.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bindings +//! +//! Imports the generated bindings by `bindgen`. + +// See https://github.com/rust-lang/rust-bindgen/issues/1651. +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] + +#[allow( + clippy::all, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unsafe_op_in_unsafe_fn +)] +mod bindings_raw { + use crate::c_types; + include!(env!("RUST_BINDINGS_FILE")); +} +pub use bindings_raw::*; + +pub const GFP_KERNEL: gfp_t = BINDINGS_GFP_KERNEL; +pub const __GFP_ZERO: gfp_t = BINDINGS___GFP_ZERO; +pub const __GFP_HIGHMEM: gfp_t = ___GFP_HIGHMEM; diff --git a/rust/kernel/bindings_helper.h b/rust/kernel/bindings_helper.h new file mode 100644 index 0000000000000..c64a6307da37e --- /dev/null +++ b/rust/kernel/bindings_helper.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// `bindgen` gets confused at certain things +const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL; +const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; diff --git a/rust/kernel/buffer.rs b/rust/kernel/buffer.rs new file mode 100644 index 0000000000000..b2502fa968fe9 --- /dev/null +++ b/rust/kernel/buffer.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Struct for writing to a pre-allocated buffer with the [`write!`] macro. + +use core::fmt; + +/// A pre-allocated buffer that implements [`core::fmt::Write`]. +/// +/// Consecutive writes will append to what has already been written. +/// Writes that don't fit in the buffer will fail. +pub struct Buffer<'a> { + slice: &'a mut [u8], + pos: usize, +} + +impl<'a> Buffer<'a> { + /// Create a new buffer from an existing array. + pub fn new(slice: &'a mut [u8]) -> Self { + Buffer { slice, pos: 0 } + } + + /// Number of bytes that have already been written to the buffer. + /// This will always be less than the length of the original array. + pub fn bytes_written(&self) -> usize { + self.pos + } +} + +impl<'a> fmt::Write for Buffer<'a> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if s.len() > self.slice.len() - self.pos { + Err(fmt::Error) + } else { + self.slice[self.pos..self.pos + s.len()].copy_from_slice(s.as_bytes()); + self.pos += s.len(); + Ok(()) + } + } +} diff --git a/rust/kernel/build_assert.rs b/rust/kernel/build_assert.rs new file mode 100644 index 0000000000000..f726927185c0d --- /dev/null +++ b/rust/kernel/build_assert.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Build-time assert. + +/// Fails the build if the code path calling `build_error!` can possibly be executed. +/// +/// If the macro is executed in const context, `build_error!` will panic. +/// If the compiler or optimizer cannot guarantee that `build_error!` can never +/// be called, a build error will be triggered. +/// +/// # Examples +/// ``` +/// # use kernel::build_error; +/// #[inline] +/// fn foo(a: usize) -> usize { +/// a.checked_add(1).unwrap_or_else(|| build_error!("overflow")) +/// } +/// ``` +#[macro_export] +macro_rules! build_error { + () => {{ + $crate::build_error("") + }}; + ($msg:expr) => {{ + $crate::build_error($msg) + }}; +} + +/// Asserts that a boolean expression is `true` at compile time. +/// +/// If the condition is evaluated to `false` in const context, `build_assert!` +/// will panic. If the compiler or optimizer cannot guarantee the condition will +/// be evaluated to `true`, a build error will be triggered. +/// +/// [`static_assert!`] should be preferred to `build_assert!` whenever possible. +/// +/// # Examples +/// +/// These examples show that different types of [`assert!`] will trigger errors +/// at different stage of compilation. It is preferred to err as early as +/// possible, so [`static_assert!`] should be used whenever possible. +/// ```compile_fail +/// # use kernel::prelude::*; +/// fn foo() { +/// static_assert!(1 > 1); // Compile-time error +/// build_assert!(1 > 1); // Build-time error +/// assert!(1 > 1); // Run-time error +/// } +/// ``` +/// +/// When the condition refers to generic parameters or parameters of an inline function, +/// [`static_assert!`] cannot be used. Use `build_assert!` in this scenario. +/// ```no_run +/// # use kernel::prelude::*; +/// fn foo() { +/// // `static_assert!(N > 1);` is not allowed +/// build_assert!(N > 1); // Build-time check +/// assert!(N > 1); // Run-time check +/// } +/// +/// #[inline] +/// fn bar(n: usize) { +/// // `static_assert!(n > 1);` is not allowed +/// build_assert!(n > 1); // Build-time check +/// assert!(n > 1); // Run-time check +/// } +/// ``` +#[macro_export] +macro_rules! build_assert { + ($cond:expr $(,)?) => {{ + if !$cond { + $crate::build_error(concat!("assertion failed: ", stringify!($cond))); + } + }}; + ($cond:expr, $msg:expr) => {{ + if !$cond { + $crate::build_error($msg); + } + }}; +} diff --git a/rust/kernel/c_types.rs b/rust/kernel/c_types.rs new file mode 100644 index 0000000000000..07593a3ba8bed --- /dev/null +++ b/rust/kernel/c_types.rs @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! C types for the bindings. +//! +//! The bindings generated by `bindgen` use these types to map to the C ones. +//! +//! C's standard integer types may differ in width depending on +//! the architecture, thus we need to conditionally compile those. + +#![allow(non_camel_case_types)] + +#[cfg(any(target_arch = "arm", target_arch = "x86", target_arch = "riscv32",))] +mod c { + /// C `void` type. + pub type c_void = core::ffi::c_void; + + /// C `char` type. + pub type c_char = i8; + + /// C `signed char` type. + pub type c_schar = i8; + + /// C `unsigned char` type. + pub type c_uchar = u8; + + /// C `short` type. + pub type c_short = i16; + + /// C `unsigned short` type. + pub type c_ushort = u16; + + /// C `int` type. + pub type c_int = i32; + + /// C `unsigned int` type. + pub type c_uint = u32; + + /// C `long` type. + pub type c_long = i32; + + /// C `unsigned long` type. + pub type c_ulong = u32; + + /// C `long long` type. + pub type c_longlong = i64; + + /// C `unsigned long long` type. + pub type c_ulonglong = u64; + + /// C `ssize_t` type (typically defined in `` by POSIX). + /// + /// For some 32-bit architectures like this one, the kernel defines it as + /// `int`, i.e. it is an [`i32`]. + pub type c_ssize_t = isize; + + /// C `size_t` type (typically defined in ``). + /// + /// For some 32-bit architectures like this one, the kernel defines it as + /// `unsigned int`, i.e. it is an [`u32`]. + pub type c_size_t = usize; +} + +#[cfg(any( + target_arch = "aarch64", + target_arch = "x86_64", + target_arch = "powerpc64", + target_arch = "riscv64", +))] +mod c { + /// C `void` type. + pub type c_void = core::ffi::c_void; + + /// C `char` type. + pub type c_char = i8; + + /// C `signed char` type. + pub type c_schar = i8; + + /// C `unsigned char` type. + pub type c_uchar = u8; + + /// C `short` type. + pub type c_short = i16; + + /// C `unsigned short` type. + pub type c_ushort = u16; + + /// C `int` type. + pub type c_int = i32; + + /// C `unsigned int` type. + pub type c_uint = u32; + + /// C `long` type. + pub type c_long = i64; + + /// C `unsigned long` type. + pub type c_ulong = u64; + + /// C `long long` type. + pub type c_longlong = i64; + + /// C `unsigned long long` type. + pub type c_ulonglong = u64; + + /// C `ssize_t` type (typically defined in `` by POSIX). + /// + /// For 64-bit architectures like this one, the kernel defines it as + /// `long`, i.e. it is an [`i64`]. + pub type c_ssize_t = isize; + + /// C `size_t` type (typically defined in ``). + /// + /// For 64-bit architectures like this one, the kernel defines it as + /// `unsigned long`, i.e. it is an [`u64`]. + pub type c_size_t = usize; +} + +pub use c::*; diff --git a/rust/kernel/chrdev.rs b/rust/kernel/chrdev.rs new file mode 100644 index 0000000000000..20e93ec05def3 --- /dev/null +++ b/rust/kernel/chrdev.rs @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Character devices. +//! +//! Also called "char devices", `chrdev`, `cdev`. +//! +//! C header: [`include/linux/cdev.h`](../../../../include/linux/cdev.h) +//! +//! Reference: + +use alloc::boxed::Box; +use core::convert::TryInto; +use core::marker::PhantomPinned; +use core::pin::Pin; + +use crate::bindings; +use crate::c_types; +use crate::error::{Error, Result}; +use crate::file_operations; +use crate::str::CStr; + +/// Character device. +/// +/// # Invariants +/// +/// - [`self.0`] is valid and non-null. +/// - [`(*self.0).ops`] is valid, non-null and has static lifetime. +/// - [`(*self.0).owner`] is valid and, if non-null, has module lifetime. +struct Cdev(*mut bindings::cdev); + +impl Cdev { + fn alloc( + fops: &'static bindings::file_operations, + module: &'static crate::ThisModule, + ) -> Result { + // SAFETY: FFI call. + let cdev = unsafe { bindings::cdev_alloc() }; + if cdev.is_null() { + return Err(Error::ENOMEM); + } + // SAFETY: `cdev` is valid and non-null since `cdev_alloc()` + // returned a valid pointer which was null-checked. + unsafe { + (*cdev).ops = fops; + (*cdev).owner = module.0; + } + // INVARIANTS: + // - [`self.0`] is valid and non-null. + // - [`(*self.0).ops`] is valid, non-null and has static lifetime, + // because it was coerced from a reference with static lifetime. + // - [`(*self.0).owner`] is valid and, if non-null, has module lifetime, + // guaranteed by the [`ThisModule`] invariant. + Ok(Self(cdev)) + } + + fn add(&mut self, dev: bindings::dev_t, count: c_types::c_uint) -> Result { + // SAFETY: according to the type invariants: + // - [`self.0`] can be safely passed to [`bindings::cdev_add`]. + // - [`(*self.0).ops`] will live at least as long as [`self.0`]. + // - [`(*self.0).owner`] will live at least as long as the + // module, which is an implicit requirement. + let rc = unsafe { bindings::cdev_add(self.0, dev, count) }; + if rc != 0 { + return Err(Error::from_kernel_errno(rc)); + } + Ok(()) + } +} + +impl Drop for Cdev { + fn drop(&mut self) { + // SAFETY: [`self.0`] is valid and non-null by the type invariants. + unsafe { + bindings::cdev_del(self.0); + } + } +} + +struct RegistrationInner { + dev: bindings::dev_t, + used: usize, + cdevs: [Option; N], + _pin: PhantomPinned, +} + +/// Character device registration. +/// +/// May contain up to a fixed number (`N`) of devices. Must be pinned. +pub struct Registration { + name: &'static CStr, + minors_start: u16, + this_module: &'static crate::ThisModule, + inner: Option>, +} + +impl Registration<{ N }> { + /// Creates a [`Registration`] object for a character device. + /// + /// This does *not* register the device: see [`Self::register()`]. + /// + /// This associated function is intended to be used when you need to avoid + /// a memory allocation, e.g. when the [`Registration`] is a member of + /// a bigger structure inside your [`crate::KernelModule`] instance. If you + /// are going to pin the registration right away, call + /// [`Self::new_pinned()`] instead. + pub fn new( + name: &'static CStr, + minors_start: u16, + this_module: &'static crate::ThisModule, + ) -> Self { + Registration { + name, + minors_start, + this_module, + inner: None, + } + } + + /// Creates a pinned [`Registration`] object for a character device. + /// + /// This does *not* register the device: see [`Self::register()`]. + pub fn new_pinned( + name: &'static CStr, + minors_start: u16, + this_module: &'static crate::ThisModule, + ) -> Result>> { + Ok(Pin::from(Box::try_new(Self::new( + name, + minors_start, + this_module, + ))?)) + } + + /// Registers a character device. + /// + /// You may call this once per device type, up to `N` times. + pub fn register>(self: Pin<&mut Self>) -> Result { + // SAFETY: We must ensure that we never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + if this.inner.is_none() { + let mut dev: bindings::dev_t = 0; + // SAFETY: Calling unsafe function. `this.name` has `'static` + // lifetime. + let res = unsafe { + bindings::alloc_chrdev_region( + &mut dev, + this.minors_start.into(), + N.try_into()?, + this.name.as_char_ptr(), + ) + }; + if res != 0 { + return Err(Error::from_kernel_errno(res)); + } + const NONE: Option = None; + this.inner = Some(RegistrationInner { + dev, + used: 0, + cdevs: [NONE; N], + _pin: PhantomPinned, + }); + } + + let mut inner = this.inner.as_mut().unwrap(); + if inner.used == N { + return Err(Error::EINVAL); + } + + // SAFETY: The adapter doesn't retrieve any state yet, so it's compatible with any + // registration. + let fops = unsafe { file_operations::FileOperationsVtable::::build() }; + let mut cdev = Cdev::alloc(fops, this.this_module)?; + cdev.add(inner.dev + inner.used as bindings::dev_t, 1)?; + inner.cdevs[inner.used].replace(cdev); + inner.used += 1; + Ok(()) + } +} + +impl file_operations::FileOpenAdapter for Registration<{ N }> { + type Arg = (); + + unsafe fn convert( + _inode: *mut bindings::inode, + _file: *mut bindings::file, + ) -> *const Self::Arg { + // TODO: Update the SAFETY comment on the call to `FileOperationsVTable::build` above once + // this is updated to retrieve state. + &() + } +} + +// SAFETY: `Registration` does not expose any of its state across threads +// (it is fine for multiple threads to have a shared reference to it). +unsafe impl Sync for Registration<{ N }> {} + +impl Drop for Registration<{ N }> { + fn drop(&mut self) { + if let Some(inner) = self.inner.as_mut() { + // Replicate kernel C behaviour: drop [`Cdev`]s before calling + // [`bindings::unregister_chrdev_region`]. + for i in 0..inner.used { + inner.cdevs[i].take(); + } + // SAFETY: [`self.inner`] is Some, so [`inner.dev`] was previously + // created using [`bindings::alloc_chrdev_region`]. + unsafe { + bindings::unregister_chrdev_region(inner.dev, N.try_into().unwrap()); + } + } + } +} diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs new file mode 100644 index 0000000000000..df7ba6a36610c --- /dev/null +++ b/rust/kernel/error.rs @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel errors. +//! +//! C header: [`include/uapi/asm-generic/errno-base.h`](../../../include/uapi/asm-generic/errno-base.h) + +use crate::str::CStr; +use crate::{bindings, c_types}; +use alloc::{alloc::AllocError, collections::TryReserveError}; +use core::convert::From; +use core::fmt; +use core::num::TryFromIntError; +use core::str::{self, Utf8Error}; + +/// Generic integer kernel error. +/// +/// The kernel defines a set of integer generic error codes based on C and +/// POSIX ones. These codes may have a more specific meaning in some contexts. +/// +/// # Invariants +/// +/// The value is a valid `errno` (i.e. `>= -MAX_ERRNO && < 0`). +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Error(c_types::c_int); + +impl Error { + /// Invalid argument. + pub const EINVAL: Self = Error(-(bindings::EINVAL as i32)); + + /// Out of memory. + pub const ENOMEM: Self = Error(-(bindings::ENOMEM as i32)); + + /// Bad address. + pub const EFAULT: Self = Error(-(bindings::EFAULT as i32)); + + /// Illegal seek. + pub const ESPIPE: Self = Error(-(bindings::ESPIPE as i32)); + + /// Try again. + pub const EAGAIN: Self = Error(-(bindings::EAGAIN as i32)); + + /// Device or resource busy. + pub const EBUSY: Self = Error(-(bindings::EBUSY as i32)); + + /// Restart the system call. + pub const ERESTARTSYS: Self = Error(-(bindings::ERESTARTSYS as i32)); + + /// Operation not permitted. + pub const EPERM: Self = Error(-(bindings::EPERM as i32)); + + /// No such process. + pub const ESRCH: Self = Error(-(bindings::ESRCH as i32)); + + /// No such file or directory. + pub const ENOENT: Self = Error(-(bindings::ENOENT as i32)); + + /// Interrupted system call. + pub const EINTR: Self = Error(-(bindings::EINTR as i32)); + + /// Bad file number. + pub const EBADF: Self = Error(-(bindings::EBADF as i32)); + + /// Creates an [`Error`] from a kernel error code. + /// + /// It is a bug to pass an out-of-range `errno`. `EINVAL` would + /// be returned in such a case. + pub(crate) fn from_kernel_errno(errno: c_types::c_int) -> Error { + if errno < -(bindings::MAX_ERRNO as i32) || errno >= 0 { + // TODO: make it a `WARN_ONCE` once available. + crate::pr_warn!( + "attempted to create `Error` with out of range `errno`: {}", + errno + ); + return Error::EINVAL; + } + + // INVARIANT: the check above ensures the type invariant + // will hold. + Error(errno) + } + + /// Creates an [`Error`] from a kernel error code. + /// + /// # Safety + /// + /// `errno` must be within error code range (i.e. `>= -MAX_ERRNO && < 0`). + pub(crate) unsafe fn from_kernel_errno_unchecked(errno: c_types::c_int) -> Error { + // INVARIANT: the contract ensures the type invariant + // will hold. + Error(errno) + } + + /// Returns the kernel error code. + pub fn to_kernel_errno(self) -> c_types::c_int { + self.0 + } +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + extern "C" { + fn rust_helper_errname(err: c_types::c_int) -> *const c_types::c_char; + } + // SAFETY: FFI call. + let name = unsafe { rust_helper_errname(-self.0) }; + + if name.is_null() { + // Print out number if no name can be found. + return f.debug_tuple("Error").field(&-self.0).finish(); + } + + // SAFETY: `'static` string from C, and is not NULL. + let cstr = unsafe { CStr::from_char_ptr(name) }; + // SAFETY: These strings are ASCII-only. + let str = unsafe { str::from_utf8_unchecked(cstr) }; + f.debug_tuple(str).finish() + } +} + +impl From for Error { + fn from(_: TryFromIntError) -> Error { + Error::EINVAL + } +} + +impl From for Error { + fn from(_: Utf8Error) -> Error { + Error::EINVAL + } +} + +impl From for Error { + fn from(_: TryReserveError) -> Error { + Error::ENOMEM + } +} + +/// A [`Result`] with an [`Error`] error type. +/// +/// To be used as the return type for functions that may fail. +/// +/// # Error codes in C and Rust +/// +/// In C, it is common that functions indicate success or failure through +/// their return value; modifying or returning extra data through non-`const` +/// pointer parameters. In particular, in the kernel, functions that may fail +/// typically return an `int` that represents a generic error code. We model +/// those as [`Error`]. +/// +/// In Rust, it is idiomatic to model functions that may fail as returning +/// a [`Result`]. Since in the kernel many functions return an error code, +/// [`Result`] is a type alias for a [`core::result::Result`] that uses +/// [`Error`] as its error type. +/// +/// Note that even if a function does not return anything when it succeeds, +/// it should still be modeled as returning a `Result` rather than +/// just an [`Error`]. +pub type Result = core::result::Result; + +impl From for Error { + fn from(_: AllocError) -> Error { + Error::ENOMEM + } +} + +// # Invariant: `-bindings::MAX_ERRNO` fits in an `i16`. +crate::static_assert!(bindings::MAX_ERRNO <= -(i16::MIN as i32) as u32); + +#[doc(hidden)] +pub fn from_kernel_result_helper(r: Result) -> T +where + T: From, +{ + match r { + Ok(v) => v, + // NO-OVERFLOW: negative `errno`s are no smaller than `-bindings::MAX_ERRNO`, + // `-bindings::MAX_ERRNO` fits in an `i16` as per invariant above, + // therefore a negative `errno` always fits in an `i16` and will not overflow. + Err(e) => T::from(e.to_kernel_errno() as i16), + } +} + +/// Transforms a [`crate::error::Result`] to a kernel C integer result. +/// +/// This is useful when calling Rust functions that return [`crate::error::Result`] +/// from inside `extern "C"` functions that need to return an integer +/// error result. +/// +/// `T` should be convertible to an `i16` via `From`. +/// +/// # Examples +/// +/// ```ignore +/// # use kernel::from_kernel_result; +/// # use kernel::c_types; +/// # use kernel::bindings; +/// unsafe extern "C" fn probe_callback( +/// pdev: *mut bindings::platform_device, +/// ) -> c_types::c_int { +/// from_kernel_result! { +/// let ptr = devm_alloc(pdev)?; +/// rust_helper_platform_set_drvdata(pdev, ptr); +/// Ok(0) +/// } +/// } +/// ``` +#[macro_export] +macro_rules! from_kernel_result { + ($($tt:tt)*) => {{ + $crate::error::from_kernel_result_helper((|| { + $($tt)* + })()) + }}; +} + +/// Transform a kernel "error pointer" to a normal pointer. +/// +/// Some kernel C API functions return an "error pointer" which optionally +/// embeds an `errno`. Callers are supposed to check the returned pointer +/// for errors. This function performs the check and converts the "error pointer" +/// to a normal pointer in an idiomatic fashion. +/// +/// # Examples +/// +/// ```ignore +/// # use kernel::prelude::*; +/// # use kernel::from_kernel_err_ptr; +/// # use kernel::c_types; +/// # use kernel::bindings; +/// fn devm_platform_ioremap_resource( +/// pdev: &mut PlatformDevice, +/// index: u32, +/// ) -> Result<*mut c_types::c_void> { +/// // SAFETY: FFI call. +/// unsafe { +/// from_kernel_err_ptr(bindings::devm_platform_ioremap_resource( +/// pdev.to_ptr(), +/// index, +/// )) +/// } +/// } +/// ``` +// TODO: remove `dead_code` marker once an in-kernel client is available. +#[allow(dead_code)] +pub(crate) fn from_kernel_err_ptr(ptr: *mut T) -> Result<*mut T> { + extern "C" { + #[allow(improper_ctypes)] + fn rust_helper_is_err(ptr: *const c_types::c_void) -> bool; + + #[allow(improper_ctypes)] + fn rust_helper_ptr_err(ptr: *const c_types::c_void) -> c_types::c_long; + } + + // CAST: casting a pointer to `*const c_types::c_void` is always valid. + let const_ptr: *const c_types::c_void = ptr.cast(); + // SAFETY: the FFI function does not deref the pointer. + if unsafe { rust_helper_is_err(const_ptr) } { + // SAFETY: the FFI function does not deref the pointer. + let err = unsafe { rust_helper_ptr_err(const_ptr) }; + // CAST: if `rust_helper_is_err()` returns `true`, + // then `rust_helper_ptr_err()` is guaranteed to return a + // negative value greater-or-equal to `-bindings::MAX_ERRNO`, + // which always fits in an `i16`, as per the invariant above. + // And an `i16` always fits in an `i32`. So casting `err` to + // an `i32` can never overflow, and is always valid. + // + // SAFETY: `rust_helper_is_err()` ensures `err` is a + // negative value greater-or-equal to `-bindings::MAX_ERRNO` + return Err(unsafe { Error::from_kernel_errno_unchecked(err as i32) }); + } + Ok(ptr) +} diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs new file mode 100644 index 0000000000000..091b3a4306c50 --- /dev/null +++ b/rust/kernel/file.rs @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Files and file descriptors. +//! +//! C headers: [`include/linux/fs.h`](../../../../include/linux/fs.h) and +//! [`include/linux/file.h`](../../../../include/linux/file.h) + +use crate::{bindings, error::Error, Result}; +use core::{mem::ManuallyDrop, ops::Deref}; + +/// Wraps the kernel's `struct file`. +/// +/// # Invariants +/// +/// The pointer `File::ptr` is non-null and valid. Its reference count is also non-zero. +pub struct File { + pub(crate) ptr: *mut bindings::file, +} + +impl File { + /// Constructs a new [`struct file`] wrapper from a file descriptor. + /// + /// The file descriptor belongs to the current process. + pub fn from_fd(fd: u32) -> Result { + // SAFETY: FFI call, there are no requirements on `fd`. + let ptr = unsafe { bindings::fget(fd) }; + if ptr.is_null() { + return Err(Error::EBADF); + } + + // INVARIANTS: We checked that `ptr` is non-null, so it is valid. `fget` increments the ref + // count before returning. + Ok(Self { ptr }) + } + + /// Returns the current seek/cursor/pointer position (`struct file::f_pos`). + pub fn pos(&self) -> u64 { + // SAFETY: `File::ptr` is guaranteed to be valid by the type invariants. + unsafe { (*self.ptr).f_pos as u64 } + } + + /// Returns whether the file is in blocking mode. + pub fn is_blocking(&self) -> bool { + // SAFETY: `File::ptr` is guaranteed to be valid by the type invariants. + unsafe { (*self.ptr).f_flags & bindings::O_NONBLOCK == 0 } + } +} + +impl Drop for File { + fn drop(&mut self) { + // SAFETY: The type invariants guarantee that `File::ptr` has a non-zero reference count. + unsafe { bindings::fput(self.ptr) }; + } +} + +/// A wrapper for [`File`] that doesn't automatically decrement the refcount when dropped. +/// +/// We need the wrapper because [`ManuallyDrop`] alone would allow callers to call +/// [`ManuallyDrop::into_inner`]. This would allow an unsafe sequence to be triggered without +/// `unsafe` blocks because it would trigger an unbalanced call to `fput`. +/// +/// # Invariants +/// +/// The wrapped [`File`] remains valid for the lifetime of the object. +pub(crate) struct FileRef(ManuallyDrop); + +impl FileRef { + /// Constructs a new [`struct file`] wrapper that doesn't change its reference count. + /// + /// # Safety + /// + /// The pointer `ptr` must be non-null and valid for the lifetime of the object. + pub(crate) unsafe fn from_ptr(ptr: *mut bindings::file) -> Self { + Self(ManuallyDrop::new(File { ptr })) + } +} + +impl Deref for FileRef { + type Target = File; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +/// A file descriptor reservation. +/// +/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it, +/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran +/// out of available slots), but commit and drop never fail (and are mutually exclusive). +pub struct FileDescriptorReservation { + fd: u32, +} + +impl FileDescriptorReservation { + /// Creates a new file descriptor reservation. + pub fn new(flags: u32) -> Result { + let fd = unsafe { bindings::get_unused_fd_flags(flags) }; + if fd < 0 { + return Err(Error::from_kernel_errno(fd)); + } + Ok(Self { fd: fd as _ }) + } + + /// Returns the file descriptor number that was reserved. + pub fn reserved_fd(&self) -> u32 { + self.fd + } + + /// Commits the reservation. + /// + /// The previously reserved file descriptor is bound to `file`. + pub fn commit(self, file: File) { + // SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`, and `file.ptr` is + // guaranteed to have an owned ref count by its type invariants. + unsafe { bindings::fd_install(self.fd, file.ptr) }; + + // `fd_install` consumes both the file descriptor and the file reference, so we cannot run + // the destructors. + core::mem::forget(self); + core::mem::forget(file); + } +} + +impl Drop for FileDescriptorReservation { + fn drop(&mut self) { + // SAFETY: `self.fd` was returned by a previous call to `get_unused_fd_flags`. + unsafe { bindings::put_unused_fd(self.fd) }; + } +} diff --git a/rust/kernel/file_operations.rs b/rust/kernel/file_operations.rs new file mode 100644 index 0000000000000..b866b6668561c --- /dev/null +++ b/rust/kernel/file_operations.rs @@ -0,0 +1,698 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! File operations. +//! +//! C header: [`include/linux/fs.h`](../../../../include/linux/fs.h) + +use core::convert::{TryFrom, TryInto}; +use core::{marker, mem, ops::Deref, ptr}; + +use alloc::boxed::Box; + +use crate::{ + bindings, c_types, + error::{Error, Result}, + file::{File, FileRef}, + from_kernel_result, + io_buffer::{IoBufferReader, IoBufferWriter}, + iov_iter::IovIter, + sync::CondVar, + types::PointerWrapper, + user_ptr::{UserSlicePtr, UserSlicePtrReader, UserSlicePtrWriter}, +}; + +/// Wraps the kernel's `struct poll_table_struct`. +/// +/// # Invariants +/// +/// The pointer `PollTable::ptr` is null or valid. +pub struct PollTable { + ptr: *mut bindings::poll_table_struct, +} + +impl PollTable { + /// Constructors a new `struct poll_table_struct` wrapper. + /// + /// # Safety + /// + /// The pointer `ptr` must be either null or a valid pointer for the lifetime of the object. + unsafe fn from_ptr(ptr: *mut bindings::poll_table_struct) -> Self { + Self { ptr } + } + + /// Associates the given file and condition variable to this poll table. It means notifying the + /// condition variable will notify the poll table as well; additionally, the association + /// between the condition variable and the file will automatically be undone by the kernel when + /// the file is destructed. To unilaterally remove the association before then, one can call + /// [`CondVar::free_waiters`]. + /// + /// # Safety + /// + /// If the condition variable is destroyed before the file, then [`CondVar::free_waiters`] must + /// be called to ensure that all waiters are flushed out. + pub unsafe fn register_wait<'a>(&self, file: &'a File, cv: &'a CondVar) { + if self.ptr.is_null() { + return; + } + + // SAFETY: `PollTable::ptr` is guaranteed to be valid by the type invariants and the null + // check above. + let table = unsafe { &*self.ptr }; + if let Some(proc) = table._qproc { + // SAFETY: All pointers are known to be valid. + unsafe { proc(file.ptr as _, cv.wait_list.get(), self.ptr) } + } + } +} + +/// Equivalent to [`std::io::SeekFrom`]. +/// +/// [`std::io::SeekFrom`]: https://doc.rust-lang.org/std/io/enum.SeekFrom.html +pub enum SeekFrom { + /// Equivalent to C's `SEEK_SET`. + Start(u64), + + /// Equivalent to C's `SEEK_END`. + End(i64), + + /// Equivalent to C's `SEEK_CUR`. + Current(i64), +} + +unsafe extern "C" fn open_callback>( + inode: *mut bindings::inode, + file: *mut bindings::file, +) -> c_types::c_int { + from_kernel_result! { + let arg = unsafe { A::convert(inode, file) }; + let ptr = T::open(unsafe { &*arg })?.into_pointer(); + unsafe { (*file).private_data = ptr as *mut c_types::c_void }; + Ok(0) + } +} + +unsafe extern "C" fn read_callback( + file: *mut bindings::file, + buf: *mut c_types::c_char, + len: c_types::c_size_t, + offset: *mut bindings::loff_t, +) -> c_types::c_ssize_t { + from_kernel_result! { + let mut data = unsafe { UserSlicePtr::new(buf as *mut c_types::c_void, len).writer() }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + // No `FMODE_UNSIGNED_OFFSET` support, so `offset` must be in [0, 2^63). + // See discussion in https://github.com/fishinabarrel/linux-kernel-module-rust/pull/113 + let read = T::read(&f, unsafe { &FileRef::from_ptr(file) }, &mut data, unsafe { *offset }.try_into()?)?; + unsafe { (*offset) += bindings::loff_t::try_from(read).unwrap() }; + Ok(read as _) + } +} + +unsafe extern "C" fn read_iter_callback( + iocb: *mut bindings::kiocb, + raw_iter: *mut bindings::iov_iter, +) -> isize { + from_kernel_result! { + let mut iter = unsafe { IovIter::from_ptr(raw_iter) }; + let file = unsafe { (*iocb).ki_filp }; + let offset = unsafe { (*iocb).ki_pos }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let read = T::read(&f, unsafe { &FileRef::from_ptr(file) }, &mut iter, offset.try_into()?)?; + unsafe { (*iocb).ki_pos += bindings::loff_t::try_from(read).unwrap() }; + Ok(read as _) + } +} + +unsafe extern "C" fn write_callback( + file: *mut bindings::file, + buf: *const c_types::c_char, + len: c_types::c_size_t, + offset: *mut bindings::loff_t, +) -> c_types::c_ssize_t { + from_kernel_result! { + let mut data = unsafe { UserSlicePtr::new(buf as *mut c_types::c_void, len).reader() }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + // No `FMODE_UNSIGNED_OFFSET` support, so `offset` must be in [0, 2^63). + // See discussion in https://github.com/fishinabarrel/linux-kernel-module-rust/pull/113 + let written = T::write(&f, unsafe { &FileRef::from_ptr(file) }, &mut data, unsafe { *offset }.try_into()?)?; + unsafe { (*offset) += bindings::loff_t::try_from(written).unwrap() }; + Ok(written as _) + } +} + +unsafe extern "C" fn write_iter_callback( + iocb: *mut bindings::kiocb, + raw_iter: *mut bindings::iov_iter, +) -> isize { + from_kernel_result! { + let mut iter = unsafe { IovIter::from_ptr(raw_iter) }; + let file = unsafe { (*iocb).ki_filp }; + let offset = unsafe { (*iocb).ki_pos }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let written = T::write(&f, unsafe { &FileRef::from_ptr(file) }, &mut iter, offset.try_into()?)?; + unsafe { (*iocb).ki_pos += bindings::loff_t::try_from(written).unwrap() }; + Ok(written as _) + } +} + +unsafe extern "C" fn release_callback( + _inode: *mut bindings::inode, + file: *mut bindings::file, +) -> c_types::c_int { + let ptr = mem::replace(unsafe { &mut (*file).private_data }, ptr::null_mut()); + T::release(unsafe { T::Wrapper::from_pointer(ptr as _) }, unsafe { + &FileRef::from_ptr(file) + }); + 0 +} + +unsafe extern "C" fn llseek_callback( + file: *mut bindings::file, + offset: bindings::loff_t, + whence: c_types::c_int, +) -> bindings::loff_t { + from_kernel_result! { + let off = match whence as u32 { + bindings::SEEK_SET => SeekFrom::Start(offset.try_into()?), + bindings::SEEK_CUR => SeekFrom::Current(offset), + bindings::SEEK_END => SeekFrom::End(offset), + _ => return Err(Error::EINVAL), + }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let off = T::seek(&f, unsafe { &FileRef::from_ptr(file) }, off)?; + Ok(off as bindings::loff_t) + } +} + +unsafe extern "C" fn unlocked_ioctl_callback( + file: *mut bindings::file, + cmd: c_types::c_uint, + arg: c_types::c_ulong, +) -> c_types::c_long { + from_kernel_result! { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let mut cmd = IoctlCommand::new(cmd as _, arg as _); + let ret = T::ioctl(&f, unsafe { &FileRef::from_ptr(file) }, &mut cmd)?; + Ok(ret as _) + } +} + +unsafe extern "C" fn compat_ioctl_callback( + file: *mut bindings::file, + cmd: c_types::c_uint, + arg: c_types::c_ulong, +) -> c_types::c_long { + from_kernel_result! { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let mut cmd = IoctlCommand::new(cmd as _, arg as _); + let ret = T::compat_ioctl(&f, unsafe { &FileRef::from_ptr(file) }, &mut cmd)?; + Ok(ret as _) + } +} + +unsafe extern "C" fn mmap_callback( + file: *mut bindings::file, + vma: *mut bindings::vm_area_struct, +) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + T::mmap(&f, unsafe { &FileRef::from_ptr(file) }, unsafe { &mut *vma })?; + Ok(0) + } +} + +unsafe extern "C" fn fsync_callback( + file: *mut bindings::file, + start: bindings::loff_t, + end: bindings::loff_t, + datasync: c_types::c_int, +) -> c_types::c_int { + from_kernel_result! { + let start = start.try_into()?; + let end = end.try_into()?; + let datasync = datasync != 0; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let res = T::fsync(&f, unsafe { &FileRef::from_ptr(file) }, start, end, datasync)?; + Ok(res.try_into().unwrap()) + } +} + +unsafe extern "C" fn poll_callback( + file: *mut bindings::file, + wait: *mut bindings::poll_table_struct, +) -> bindings::__poll_t { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to `file` + // have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + match T::poll(&f, unsafe { &FileRef::from_ptr(file) }, unsafe { + &PollTable::from_ptr(wait) + }) { + Ok(v) => v, + Err(_) => bindings::POLLERR, + } +} + +pub(crate) struct FileOperationsVtable(marker::PhantomData, marker::PhantomData); + +impl> FileOperationsVtable { + const VTABLE: bindings::file_operations = bindings::file_operations { + open: Some(open_callback::), + release: Some(release_callback::), + read: if T::TO_USE.read { + Some(read_callback::) + } else { + None + }, + write: if T::TO_USE.write { + Some(write_callback::) + } else { + None + }, + llseek: if T::TO_USE.seek { + Some(llseek_callback::) + } else { + None + }, + + check_flags: None, + compat_ioctl: if T::TO_USE.compat_ioctl { + Some(compat_ioctl_callback::) + } else { + None + }, + copy_file_range: None, + fallocate: None, + fadvise: None, + fasync: None, + flock: None, + flush: None, + fsync: if T::TO_USE.fsync { + Some(fsync_callback::) + } else { + None + }, + get_unmapped_area: None, + iterate: None, + iterate_shared: None, + iopoll: None, + lock: None, + mmap: if T::TO_USE.mmap { + Some(mmap_callback::) + } else { + None + }, + mmap_supported_flags: 0, + owner: ptr::null_mut(), + poll: if T::TO_USE.poll { + Some(poll_callback::) + } else { + None + }, + read_iter: if T::TO_USE.read_iter { + Some(read_iter_callback::) + } else { + None + }, + remap_file_range: None, + sendpage: None, + setlease: None, + show_fdinfo: None, + splice_read: None, + splice_write: None, + unlocked_ioctl: if T::TO_USE.ioctl { + Some(unlocked_ioctl_callback::) + } else { + None + }, + write_iter: if T::TO_USE.write_iter { + Some(write_iter_callback::) + } else { + None + }, + }; + + /// Builds an instance of [`struct file_operations`]. + /// + /// # Safety + /// + /// The caller must ensure that the adapter is compatible with the way the device is registered. + pub(crate) const unsafe fn build() -> &'static bindings::file_operations { + &Self::VTABLE + } +} + +/// Represents which fields of [`struct file_operations`] should be populated with pointers. +pub struct ToUse { + /// The `read` field of [`struct file_operations`]. + pub read: bool, + + /// The `read_iter` field of [`struct file_operations`]. + pub read_iter: bool, + + /// The `write` field of [`struct file_operations`]. + pub write: bool, + + /// The `write_iter` field of [`struct file_operations`]. + pub write_iter: bool, + + /// The `llseek` field of [`struct file_operations`]. + pub seek: bool, + + /// The `unlocked_ioctl` field of [`struct file_operations`]. + pub ioctl: bool, + + /// The `compat_ioctl` field of [`struct file_operations`]. + pub compat_ioctl: bool, + + /// The `fsync` field of [`struct file_operations`]. + pub fsync: bool, + + /// The `mmap` field of [`struct file_operations`]. + pub mmap: bool, + + /// The `poll` field of [`struct file_operations`]. + pub poll: bool, +} + +/// A constant version where all values are to set to `false`, that is, all supported fields will +/// be set to null pointers. +pub const USE_NONE: ToUse = ToUse { + read: false, + read_iter: false, + write: false, + write_iter: false, + seek: false, + ioctl: false, + compat_ioctl: false, + fsync: false, + mmap: false, + poll: false, +}; + +/// Defines the [`FileOperations::TO_USE`] field based on a list of fields to be populated. +#[macro_export] +macro_rules! declare_file_operations { + () => { + const TO_USE: $crate::file_operations::ToUse = $crate::file_operations::USE_NONE; + }; + ($($i:ident),+) => { + const TO_USE: kernel::file_operations::ToUse = + $crate::file_operations::ToUse { + $($i: true),+ , + ..$crate::file_operations::USE_NONE + }; + }; +} + +/// Allows the handling of ioctls defined with the `_IO`, `_IOR`, `_IOW`, and `_IOWR` macros. +/// +/// For each macro, there is a handler function that takes the appropriate types as arguments. +pub trait IoctlHandler: Sync { + /// The type of the first argument to each associated function. + type Target; + + /// Handles ioctls defined with the `_IO` macro, that is, with no buffer as argument. + fn pure(_this: &Self::Target, _file: &File, _cmd: u32, _arg: usize) -> Result { + Err(Error::EINVAL) + } + + /// Handles ioctls defined with the `_IOR` macro, that is, with an output buffer provided as + /// argument. + fn read( + _this: &Self::Target, + _file: &File, + _cmd: u32, + _writer: &mut UserSlicePtrWriter, + ) -> Result { + Err(Error::EINVAL) + } + + /// Handles ioctls defined with the `_IOW` macro, that is, with an input buffer provided as + /// argument. + fn write( + _this: &Self::Target, + _file: &File, + _cmd: u32, + _reader: &mut UserSlicePtrReader, + ) -> Result { + Err(Error::EINVAL) + } + + /// Handles ioctls defined with the `_IOWR` macro, that is, with a buffer for both input and + /// output provided as argument. + fn read_write( + _this: &Self::Target, + _file: &File, + _cmd: u32, + _data: UserSlicePtr, + ) -> Result { + Err(Error::EINVAL) + } +} + +/// Represents an ioctl command. +/// +/// It can use the components of an ioctl command to dispatch ioctls using +/// [`IoctlCommand::dispatch`]. +pub struct IoctlCommand { + cmd: u32, + arg: usize, + user_slice: Option, +} + +impl IoctlCommand { + /// Constructs a new [`IoctlCommand`]. + fn new(cmd: u32, arg: usize) -> Self { + let size = (cmd >> bindings::_IOC_SIZESHIFT) & bindings::_IOC_SIZEMASK; + + // SAFETY: We only create one instance of the user slice per ioctl call, so TOCTOU issues + // are not possible. + let user_slice = Some(unsafe { UserSlicePtr::new(arg as _, size as _) }); + Self { + cmd, + arg, + user_slice, + } + } + + /// Dispatches the given ioctl to the appropriate handler based on the value of the command. It + /// also creates a [`UserSlicePtr`], [`UserSlicePtrReader`], or [`UserSlicePtrWriter`] + /// depending on the direction of the buffer of the command. + /// + /// It is meant to be used in implementations of [`FileOperations::ioctl`] and + /// [`FileOperations::compat_ioctl`]. + pub fn dispatch(&mut self, handler: &T::Target, file: &File) -> Result { + let dir = (self.cmd >> bindings::_IOC_DIRSHIFT) & bindings::_IOC_DIRMASK; + if dir == bindings::_IOC_NONE { + return T::pure(handler, file, self.cmd, self.arg); + } + + let data = self.user_slice.take().ok_or(Error::EINVAL)?; + const READ_WRITE: u32 = bindings::_IOC_READ | bindings::_IOC_WRITE; + match dir { + bindings::_IOC_WRITE => T::write(handler, file, self.cmd, &mut data.reader()), + bindings::_IOC_READ => T::read(handler, file, self.cmd, &mut data.writer()), + READ_WRITE => T::read_write(handler, file, self.cmd, data), + _ => Err(Error::EINVAL), + } + } + + /// Returns the raw 32-bit value of the command and the ptr-sized argument. + pub fn raw(&self) -> (u32, usize) { + (self.cmd, self.arg) + } +} + +/// Trait for extracting file open arguments from kernel data structures. +/// +/// This is meant to be implemented by registration managers. +pub trait FileOpenAdapter { + /// The type of argument this adapter extracts. + type Arg; + + /// Converts untyped data stored in [`struct inode`] and [`struct file`] (when [`struct + /// file_operations::open`] is called) into the given type. For example, for `miscdev` + /// devices, a pointer to the registered [`struct miscdev`] is stored in [`struct + /// file::private_data`]. + /// + /// # Safety + /// + /// This function must be called only when [`struct file_operations::open`] is being called for + /// a file that was registered by the implementer. + unsafe fn convert(_inode: *mut bindings::inode, _file: *mut bindings::file) + -> *const Self::Arg; +} + +/// Trait for implementers of kernel files. +/// +/// In addition to the methods in [`FileOperations`], implementers must also provide +/// [`FileOpener::open`] with a customised argument. This allows a single implementation of +/// [`FileOperations`] to be used for different types of registrations, for example, `miscdev` and +/// `chrdev`. +pub trait FileOpener: FileOperations { + /// Creates a new instance of this file. + /// + /// Corresponds to the `open` function pointer in `struct file_operations`. + fn open(context: &T) -> Result; +} + +impl> + Default> FileOpener<()> for T { + fn open(_: &()) -> Result { + Ok(Box::try_new(T::default())?) + } +} + +/// Corresponds to the kernel's `struct file_operations`. +/// +/// You implement this trait whenever you would create a `struct file_operations`. +/// +/// File descriptors may be used from multiple threads/processes concurrently, so your type must be +/// [`Sync`]. It must also be [`Send`] because [`FileOperations::release`] will be called from the +/// thread that decrements that associated file's refcount to zero. +pub trait FileOperations: Send + Sync + Sized { + /// The methods to use to populate [`struct file_operations`]. + const TO_USE: ToUse; + + /// The pointer type that will be used to hold ourselves. + type Wrapper: PointerWrapper = Box; + + /// Cleans up after the last reference to the file goes away. + /// + /// Note that the object is moved, so it will be freed automatically unless the implementation + /// moves it elsewhere. + /// + /// Corresponds to the `release` function pointer in `struct file_operations`. + fn release(_obj: Self::Wrapper, _file: &File) {} + + /// Reads data from this file to the caller's buffer. + /// + /// Corresponds to the `read` and `read_iter` function pointers in `struct file_operations`. + fn read( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _data: &mut T, + _offset: u64, + ) -> Result { + Err(Error::EINVAL) + } + + /// Writes data from the caller's buffer to this file. + /// + /// Corresponds to the `write` and `write_iter` function pointers in `struct file_operations`. + fn write( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _data: &mut T, + _offset: u64, + ) -> Result { + Err(Error::EINVAL) + } + + /// Changes the position of the file. + /// + /// Corresponds to the `llseek` function pointer in `struct file_operations`. + fn seek( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _offset: SeekFrom, + ) -> Result { + Err(Error::EINVAL) + } + + /// Performs IO control operations that are specific to the file. + /// + /// Corresponds to the `unlocked_ioctl` function pointer in `struct file_operations`. + fn ioctl( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _cmd: &mut IoctlCommand, + ) -> Result { + Err(Error::EINVAL) + } + + /// Performs 32-bit IO control operations on that are specific to the file on 64-bit kernels. + /// + /// Corresponds to the `compat_ioctl` function pointer in `struct file_operations`. + fn compat_ioctl( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _cmd: &mut IoctlCommand, + ) -> Result { + Err(Error::EINVAL) + } + + /// Syncs pending changes to this file. + /// + /// Corresponds to the `fsync` function pointer in `struct file_operations`. + fn fsync( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _start: u64, + _end: u64, + _datasync: bool, + ) -> Result { + Err(Error::EINVAL) + } + + /// Maps areas of the caller's virtual memory with device/file memory. + /// + /// Corresponds to the `mmap` function pointer in `struct file_operations`. + /// TODO: wrap `vm_area_struct` so that we don't have to expose it. + fn mmap( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _vma: &mut bindings::vm_area_struct, + ) -> Result { + Err(Error::EINVAL) + } + + /// Checks the state of the file and optionally registers for notification when the state + /// changes. + /// + /// Corresponds to the `poll` function pointer in `struct file_operations`. + fn poll( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _table: &PollTable, + ) -> Result { + Ok(bindings::POLLIN | bindings::POLLOUT | bindings::POLLRDNORM | bindings::POLLWRNORM) + } +} diff --git a/rust/kernel/io_buffer.rs b/rust/kernel/io_buffer.rs new file mode 100644 index 0000000000000..ccecc4763aca3 --- /dev/null +++ b/rust/kernel/io_buffer.rs @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Buffers used in IO. + +use crate::Result; +use alloc::vec::Vec; +use core::mem::{size_of, MaybeUninit}; + +/// Represents a buffer to be read from during IO. +pub trait IoBufferReader { + /// Returns the number of bytes left to be read from the io buffer. + /// + /// Note that even reading less than this number of bytes may fail. + fn len(&self) -> usize; + + /// Returns `true` if no data is available in the io buffer. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Reads raw data from the io buffer into a raw kernel buffer. + /// + /// # Safety + /// + /// The output buffer must be valid. + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result; + + /// Reads all data remaining in the io buffer. + /// + /// Returns `EFAULT` if the address does not currently point to mapped, readable memory. + fn read_all(&mut self) -> Result> { + let mut data = Vec::::new(); + data.try_resize(self.len(), 0)?; + + // SAFETY: The output buffer is valid as we just allocated it. + unsafe { self.read_raw(data.as_mut_ptr(), data.len())? }; + Ok(data) + } + + /// Reads a byte slice from the io buffer. + /// + /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the user slice or + /// if the address does not currently point to mapped, readable memory. + fn read_slice(&mut self, data: &mut [u8]) -> Result { + // SAFETY: The output buffer is valid as it's coming from a live reference. + unsafe { self.read_raw(data.as_mut_ptr(), data.len()) } + } + + /// Reads the contents of a plain old data (POD) type from the io buffer. + fn read(&mut self) -> Result { + let mut out = MaybeUninit::::uninit(); + // SAFETY: The buffer is valid as it was just allocated. + unsafe { self.read_raw(out.as_mut_ptr() as _, size_of::()) }?; + // SAFETY: We just initialised the data. + Ok(unsafe { out.assume_init() }) + } +} + +/// Represents a buffer to be written to during IO. +pub trait IoBufferWriter { + /// Returns the number of bytes left to be written into the io buffer. + /// + /// Note that even writing less than this number of bytes may fail. + fn len(&self) -> usize; + + /// Returns `true` if the io buffer cannot hold any additional data. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Writes zeroes to the io buffer. + /// + /// Differently from the other write functions, `clear` will zero as much as it can and update + /// the writer internal state to reflect this. It will, however, return an error if it cannot + /// clear `len` bytes. + /// + /// For example, if a caller requests that 100 bytes be cleared but a segfault happens after + /// 20 bytes, then EFAULT is returned and the writer is advanced by 20 bytes. + fn clear(&mut self, len: usize) -> Result; + + /// Writes a byte slice into the io buffer. + /// + /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the io buffer or if + /// the address does not currently point to mapped, writable memory. + fn write_slice(&mut self, data: &[u8]) -> Result { + // SAFETY: The input buffer is valid as it's coming from a live reference. + unsafe { self.write_raw(data.as_ptr(), data.len()) } + } + + /// Writes raw data to the io buffer from a raw kernel buffer. + /// + /// # Safety + /// + /// The input buffer must be valid. + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result; + + /// Writes the contents of the given data into the io buffer. + fn write(&mut self, data: &T) -> Result { + // SAFETY: The input buffer is valid as it's coming from a live + // reference to a type that implements `WritableToBytes`. + unsafe { self.write_raw(data as *const T as _, size_of::()) } + } +} + +/// Specifies that a type is safely readable from byte slices. +/// +/// Not all types can be safely read from byte slices; examples from +/// include `bool` +/// that must be either `0` or `1`, and `char` that cannot be a surrogate or above `char::MAX`. +/// +/// # Safety +/// +/// Implementers must ensure that the type is made up only of types that can be safely read from +/// arbitrary byte sequences (e.g., `u32`, `u64`, etc.). +pub unsafe trait ReadableFromBytes {} + +// SAFETY: All bit patterns are acceptable values of the types below. +unsafe impl ReadableFromBytes for u8 {} +unsafe impl ReadableFromBytes for u16 {} +unsafe impl ReadableFromBytes for u32 {} +unsafe impl ReadableFromBytes for u64 {} +unsafe impl ReadableFromBytes for usize {} +unsafe impl ReadableFromBytes for i8 {} +unsafe impl ReadableFromBytes for i16 {} +unsafe impl ReadableFromBytes for i32 {} +unsafe impl ReadableFromBytes for i64 {} +unsafe impl ReadableFromBytes for isize {} + +/// Specifies that a type is safely writable to byte slices. +/// +/// This means that we don't read undefined values (which leads to UB) in preparation for writing +/// to the byte slice. It also ensures that no potentially sensitive information is leaked into the +/// byte slices. +/// +/// # Safety +/// +/// A type must not include padding bytes and must be fully initialised to safely implement +/// [`WritableToBytes`] (i.e., it doesn't contain [`MaybeUninit`] fields). A composition of +/// writable types in a structure is not necessarily writable because it may result in padding +/// bytes. +pub unsafe trait WritableToBytes {} + +// SAFETY: Initialised instances of the following types have no uninitialised portions. +unsafe impl WritableToBytes for u8 {} +unsafe impl WritableToBytes for u16 {} +unsafe impl WritableToBytes for u32 {} +unsafe impl WritableToBytes for u64 {} +unsafe impl WritableToBytes for usize {} +unsafe impl WritableToBytes for i8 {} +unsafe impl WritableToBytes for i16 {} +unsafe impl WritableToBytes for i32 {} +unsafe impl WritableToBytes for i64 {} +unsafe impl WritableToBytes for isize {} diff --git a/rust/kernel/iov_iter.rs b/rust/kernel/iov_iter.rs new file mode 100644 index 0000000000000..d778e1ac97607 --- /dev/null +++ b/rust/kernel/iov_iter.rs @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! IO vector iterators. +//! +//! C header: [`include/linux/uio.h`](../../../../include/linux/uio.h) + +use crate::{ + bindings, c_types, + error::Error, + io_buffer::{IoBufferReader, IoBufferWriter}, + Result, +}; + +extern "C" { + fn rust_helper_copy_to_iter( + addr: *const c_types::c_void, + bytes: usize, + i: *mut bindings::iov_iter, + ) -> usize; + + fn rust_helper_copy_from_iter( + addr: *mut c_types::c_void, + bytes: usize, + i: *mut bindings::iov_iter, + ) -> usize; +} + +/// Wraps the kernel's `struct iov_iter`. +/// +/// # Invariants +/// +/// The pointer `IovIter::ptr` is non-null and valid. +pub struct IovIter { + ptr: *mut bindings::iov_iter, +} + +impl IovIter { + fn common_len(&self) -> usize { + // SAFETY: `IovIter::ptr` is guaranteed to be valid by the type invariants. + unsafe { (*self.ptr).count } + } + + /// Constructs a new [`struct iov_iter`] wrapper. + /// + /// # Safety + /// + /// The pointer `ptr` must be non-null and valid for the lifetime of the object. + pub(crate) unsafe fn from_ptr(ptr: *mut bindings::iov_iter) -> Self { + // INVARIANTS: the safety contract ensures the type invariant will hold. + Self { ptr } + } +} + +impl IoBufferWriter for IovIter { + fn len(&self) -> usize { + self.common_len() + } + + fn clear(&mut self, mut len: usize) -> Result { + while len > 0 { + // SAFETY: `IovIter::ptr` is guaranteed to be valid by the type invariants. + let written = unsafe { bindings::iov_iter_zero(len, self.ptr) }; + if written == 0 { + return Err(Error::EFAULT); + } + + len -= written; + } + Ok(()) + } + + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result { + let res = unsafe { rust_helper_copy_to_iter(data as _, len, self.ptr) }; + if res != len { + Err(Error::EFAULT) + } else { + Ok(()) + } + } +} + +impl IoBufferReader for IovIter { + fn len(&self) -> usize { + self.common_len() + } + + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result { + let res = unsafe { rust_helper_copy_from_iter(out as _, len, self.ptr) }; + if res != len { + Err(Error::EFAULT) + } else { + Ok(()) + } + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs new file mode 100644 index 0000000000000..86c580fd7f68e --- /dev/null +++ b/rust/kernel/lib.rs @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! The `kernel` crate. +//! +//! This crate contains the kernel APIs that have been ported or wrapped for +//! usage by Rust code in the kernel and is shared by all of them. +//! +//! In other words, all the rest of the Rust code in the kernel (e.g. kernel +//! modules written in Rust) depends on [`core`], [`alloc`] and this crate. +//! +//! If you need a kernel C API that is not ported or wrapped yet here, then +//! do so first instead of bypassing this crate. + +#![no_std] +#![feature( + allocator_api, + alloc_error_handler, + associated_type_defaults, + const_fn_trait_bound, + const_mut_refs, + const_panic, + const_raw_ptr_deref, + const_unreachable_unchecked, + receiver_trait, + try_reserve +)] + +// Ensure conditional compilation based on the kernel configuration works; +// otherwise we may silently break things like initcall handling. +#[cfg(not(CONFIG_RUST))] +compile_error!("Missing kernel configuration for conditional compilation"); + +#[cfg(not(test))] +#[cfg(not(testlib))] +mod allocator; + +#[doc(hidden)] +pub mod bindings; + +pub mod buffer; +pub mod c_types; +pub mod chrdev; +mod error; +pub mod file; +pub mod file_operations; +pub mod miscdev; +pub mod pages; +pub mod security; +pub mod str; +pub mod task; +pub mod traits; + +pub mod linked_list; +mod raw_list; +pub mod rbtree; + +#[doc(hidden)] +pub mod module_param; + +mod build_assert; +pub mod prelude; +pub mod print; +pub mod random; +mod static_assert; +pub mod sync; + +#[cfg(CONFIG_SYSCTL)] +pub mod sysctl; + +pub mod io_buffer; +pub mod iov_iter; +pub mod of; +pub mod platdev; +mod types; +pub mod user_ptr; + +#[doc(hidden)] +pub use build_error::build_error; + +pub use crate::error::{Error, Result}; +pub use crate::types::{Mode, ScopeGuard}; + +/// Page size defined in terms of the `PAGE_SHIFT` macro from C. +/// +/// [`PAGE_SHIFT`]: ../../../include/asm-generic/page.h +pub const PAGE_SIZE: usize = 1 << bindings::PAGE_SHIFT; + +/// Prefix to appear before log messages printed from within the kernel crate. +const __LOG_PREFIX: &[u8] = b"rust_kernel\0"; + +/// The top level entrypoint to implementing a kernel module. +/// +/// For any teardown or cleanup operations, your type may implement [`Drop`]. +pub trait KernelModule: Sized + Sync { + /// Called at module initialization time. + /// + /// Use this method to perform whatever setup or registration your module + /// should do. + /// + /// Equivalent to the `module_init` macro in the C API. + fn init() -> Result; +} + +/// Equivalent to `THIS_MODULE` in the C API. +/// +/// C header: `include/linux/export.h` +pub struct ThisModule(*mut bindings::module); + +// SAFETY: `THIS_MODULE` may be used from all threads within a module. +unsafe impl Sync for ThisModule {} + +impl ThisModule { + /// Creates a [`ThisModule`] given the `THIS_MODULE` pointer. + /// + /// # Safety + /// + /// The pointer must be equal to the right `THIS_MODULE`. + pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule { + ThisModule(ptr) + } + + /// Locks the module parameters to access them. + /// + /// Returns a [`KParamGuard`] that will release the lock when dropped. + pub fn kernel_param_lock(&self) -> KParamGuard<'_> { + // SAFETY: `kernel_param_lock` will check if the pointer is null and + // use the built-in mutex in that case. + #[cfg(CONFIG_SYSFS)] + unsafe { + bindings::kernel_param_lock(self.0) + } + + KParamGuard { this_module: self } + } +} + +/// Scoped lock on the kernel parameters of [`ThisModule`]. +/// +/// Lock will be released when this struct is dropped. +pub struct KParamGuard<'a> { + this_module: &'a ThisModule, +} + +#[cfg(CONFIG_SYSFS)] +impl<'a> Drop for KParamGuard<'a> { + fn drop(&mut self) { + // SAFETY: `kernel_param_lock` will check if the pointer is null and + // use the built-in mutex in that case. The existance of `self` + // guarantees that the lock is held. + unsafe { bindings::kernel_param_unlock(self.this_module.0) } + } +} + +/// Calculates the offset of a field from the beginning of the struct it belongs to. +/// +/// # Example +/// +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::offset_of; +/// struct Test { +/// a: u64, +/// b: u32, +/// } +/// +/// fn test() { +/// // This prints `8`. +/// pr_info!("{}\n", offset_of!(Test, b)); +/// } +/// ``` +#[macro_export] +macro_rules! offset_of { + ($type:ty, $($f:tt)*) => {{ + let tmp = core::mem::MaybeUninit::<$type>::uninit(); + let outer = tmp.as_ptr(); + // To avoid warnings when nesting `unsafe` blocks. + #[allow(unused_unsafe)] + // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that + // we don't actually read from `outer` (which would be UB) nor create an intermediate + // reference. + let inner = unsafe { core::ptr::addr_of!((*outer).$($f)*) } as *const u8; + // To avoid warnings when nesting `unsafe` blocks. + #[allow(unused_unsafe)] + // SAFETY: The two pointers are within the same allocation block. + unsafe { inner.offset_from(outer as *const u8) } + }} +} + +/// Produces a pointer to an object from a pointer to one of its fields. +/// +/// # Safety +/// +/// Callers must ensure that the pointer to the field is in fact a pointer to the specified field, +/// as opposed to a pointer to another object of the same type. +/// +/// # Example +/// +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::container_of; +/// struct Test { +/// a: u64, +/// b: u32, +/// } +/// +/// fn test() { +/// let test = Test { a: 10, b: 20 }; +/// let b_ptr = &test.b; +/// let test_alias = unsafe { container_of!(b_ptr, Test, b) }; +/// // This prints `true`. +/// pr_info!("{}\n", core::ptr::eq(&test, test_alias)); +/// } +/// ``` +#[macro_export] +macro_rules! container_of { + ($ptr:expr, $type:ty, $($f:tt)*) => {{ + let offset = $crate::offset_of!($type, $($f)*); + unsafe { ($ptr as *const _ as *const u8).offset(-offset) as *const $type } + }} +} diff --git a/rust/kernel/linked_list.rs b/rust/kernel/linked_list.rs new file mode 100644 index 0000000000000..d57bf1b881a35 --- /dev/null +++ b/rust/kernel/linked_list.rs @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Linked lists. +//! +//! TODO: This module is a work in progress. + +use alloc::{boxed::Box, sync::Arc}; +use core::ptr::NonNull; + +pub use crate::raw_list::{Cursor, GetLinks, Links}; +use crate::{raw_list, raw_list::RawList}; + +// TODO: Use the one from `kernel::file_operations::PointerWrapper` instead. +/// Wraps an object to be inserted in a linked list. +pub trait Wrapper { + /// Converts the wrapped object into a pointer that represents it. + fn into_pointer(self) -> NonNull; + + /// Converts the object back from the pointer representation. + /// + /// # Safety + /// + /// The passed pointer must come from a previous call to [`Wrapper::into_pointer()`]. + unsafe fn from_pointer(ptr: NonNull) -> Self; + + /// Returns a reference to the wrapped object. + fn as_ref(&self) -> &T; +} + +impl Wrapper for Box { + fn into_pointer(self) -> NonNull { + NonNull::new(Box::into_raw(self)).unwrap() + } + + unsafe fn from_pointer(ptr: NonNull) -> Self { + unsafe { Box::from_raw(ptr.as_ptr()) } + } + + fn as_ref(&self) -> &T { + AsRef::as_ref(self) + } +} + +impl Wrapper for Arc { + fn into_pointer(self) -> NonNull { + NonNull::new(Arc::into_raw(self) as _).unwrap() + } + + unsafe fn from_pointer(ptr: NonNull) -> Self { + unsafe { Arc::from_raw(ptr.as_ptr()) } + } + + fn as_ref(&self) -> &T { + AsRef::as_ref(self) + } +} + +impl Wrapper for &T { + fn into_pointer(self) -> NonNull { + NonNull::from(self) + } + + unsafe fn from_pointer(ptr: NonNull) -> Self { + unsafe { &*ptr.as_ptr() } + } + + fn as_ref(&self) -> &T { + self + } +} + +/// A descriptor of wrapped list elements. +pub trait GetLinksWrapped: GetLinks { + /// Specifies which wrapper (e.g., `Box` and `Arc`) wraps the list entries. + type Wrapped: Wrapper; +} + +impl GetLinksWrapped for Box +where + Box: GetLinks, +{ + type Wrapped = Box< as GetLinks>::EntryType>; +} + +impl GetLinks for Box { + type EntryType = T::EntryType; + fn get_links(data: &Self::EntryType) -> &Links { + ::get_links(data) + } +} + +impl GetLinksWrapped for Arc +where + Arc: GetLinks, +{ + type Wrapped = Arc< as GetLinks>::EntryType>; +} + +impl GetLinks for Arc { + type EntryType = T::EntryType; + fn get_links(data: &Self::EntryType) -> &Links { + ::get_links(data) + } +} + +/// A linked list. +/// +/// Elements in the list are wrapped and ownership is transferred to the list while the element is +/// in the list. +pub struct List { + list: RawList, +} + +impl List { + /// Constructs a new empty linked list. + pub fn new() -> Self { + Self { + list: RawList::new(), + } + } + + /// Returns whether the list is empty. + pub fn is_empty(&self) -> bool { + self.list.is_empty() + } + + /// Adds the given object to the end (back) of the list. + /// + /// It is dropped if it's already on this (or another) list; this can happen for + /// reference-counted objects, so dropping means decrementing the reference count. + pub fn push_back(&mut self, data: G::Wrapped) { + let ptr = data.into_pointer(); + + // SAFETY: We took ownership of the entry, so it is safe to insert it. + if !unsafe { self.list.push_back(ptr.as_ref()) } { + // If insertion failed, rebuild object so that it can be freed. + // SAFETY: We just called `into_pointer` above. + unsafe { G::Wrapped::from_pointer(ptr) }; + } + } + + /// Inserts the given object after `existing`. + /// + /// It is dropped if it's already on this (or another) list; this can happen for + /// reference-counted objects, so dropping means decrementing the reference count. + /// + /// # Safety + /// + /// Callers must ensure that `existing` points to a valid entry that is on the list. + pub unsafe fn insert_after(&mut self, existing: NonNull, data: G::Wrapped) { + let ptr = data.into_pointer(); + let entry = unsafe { &*existing.as_ptr() }; + if unsafe { !self.list.insert_after(entry, ptr.as_ref()) } { + // If insertion failed, rebuild object so that it can be freed. + unsafe { G::Wrapped::from_pointer(ptr) }; + } + } + + /// Removes the given entry. + /// + /// # Safety + /// + /// Callers must ensure that `data` is either on this list or in no list. It being on another + /// list leads to memory unsafety. + pub unsafe fn remove(&mut self, data: &G::Wrapped) -> Option { + let entry_ref = Wrapper::as_ref(data); + if unsafe { self.list.remove(entry_ref) } { + Some(unsafe { G::Wrapped::from_pointer(NonNull::from(entry_ref)) }) + } else { + None + } + } + + /// Removes the element currently at the front of the list and returns it. + /// + /// Returns `None` if the list is empty. + pub fn pop_front(&mut self) -> Option { + let front = self.list.pop_front()?; + // SAFETY: Elements on the list were inserted after a call to `into_pointer `. + Some(unsafe { G::Wrapped::from_pointer(front) }) + } + + /// Returns a cursor starting on the first (front) element of the list. + pub fn cursor_front(&self) -> Cursor<'_, G> { + self.list.cursor_front() + } + + /// Returns a mutable cursor starting on the first (front) element of the list. + pub fn cursor_front_mut(&mut self) -> CursorMut<'_, G> { + CursorMut::new(self.list.cursor_front_mut()) + } +} + +impl Default for List { + fn default() -> Self { + Self::new() + } +} + +impl Drop for List { + fn drop(&mut self) { + while self.pop_front().is_some() {} + } +} + +/// A list cursor that allows traversing a linked list and inspecting & mutating elements. +pub struct CursorMut<'a, G: GetLinksWrapped> { + cursor: raw_list::CursorMut<'a, G>, +} + +impl<'a, G: GetLinksWrapped> CursorMut<'a, G> { + fn new(cursor: raw_list::CursorMut<'a, G>) -> Self { + Self { cursor } + } + + /// Returns the element the cursor is currently positioned on. + pub fn current(&mut self) -> Option<&mut G::EntryType> { + self.cursor.current() + } + + /// Removes the element the cursor is currently positioned on. + /// + /// After removal, it advances the cursor to the next element. + pub fn remove_current(&mut self) -> Option { + let ptr = self.cursor.remove_current()?; + + // SAFETY: Elements on the list were inserted after a call to `into_pointer `. + Some(unsafe { G::Wrapped::from_pointer(ptr) }) + } + + /// Returns the element immediately after the one the cursor is positioned on. + pub fn peek_next(&mut self) -> Option<&mut G::EntryType> { + self.cursor.peek_next() + } + + /// Returns the element immediately before the one the cursor is positioned on. + pub fn peek_prev(&mut self) -> Option<&mut G::EntryType> { + self.cursor.peek_prev() + } + + /// Moves the cursor to the next element. + pub fn move_next(&mut self) { + self.cursor.move_next(); + } +} diff --git a/rust/kernel/miscdev.rs b/rust/kernel/miscdev.rs new file mode 100644 index 0000000000000..e4d94d7416efc --- /dev/null +++ b/rust/kernel/miscdev.rs @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Miscellaneous devices. +//! +//! C header: [`include/linux/miscdevice.h`](../../../../include/linux/miscdevice.h) +//! +//! Reference: + +use crate::bindings; +use crate::error::{Error, Result}; +use crate::file_operations::{FileOpenAdapter, FileOpener, FileOperationsVtable}; +use crate::str::CStr; +use alloc::boxed::Box; +use core::marker::PhantomPinned; +use core::pin::Pin; + +/// A registration of a miscellaneous device. +pub struct Registration { + registered: bool, + mdev: bindings::miscdevice, + _pin: PhantomPinned, + + /// Context initialised on construction and made available to all file instances on + /// [`FileOpener::open`]. + pub context: T, +} + +impl Registration { + /// Creates a new [`Registration`] but does not register it yet. + /// + /// It is allowed to move. + pub fn new(context: T) -> Self { + Self { + registered: false, + mdev: bindings::miscdevice::default(), + _pin: PhantomPinned, + context, + } + } + + /// Registers a miscellaneous device. + /// + /// Returns a pinned heap-allocated representation of the registration. + pub fn new_pinned>( + name: &'static CStr, + minor: Option, + context: T, + ) -> Result>> { + let mut r = Pin::from(Box::try_new(Self::new(context))?); + r.as_mut().register::(name, minor)?; + Ok(r) + } + + /// Registers a miscellaneous device with the rest of the kernel. + /// + /// It must be pinned because the memory block that represents the registration is + /// self-referential. If a minor is not given, the kernel allocates a new one if possible. + pub fn register>( + self: Pin<&mut Self>, + name: &'static CStr, + minor: Option, + ) -> Result { + // SAFETY: We must ensure that we never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + if this.registered { + // Already registered. + return Err(Error::EINVAL); + } + + // SAFETY: The adapter is compatible with `misc_register`. + this.mdev.fops = unsafe { FileOperationsVtable::::build() }; + this.mdev.name = name.as_char_ptr(); + this.mdev.minor = minor.unwrap_or(bindings::MISC_DYNAMIC_MINOR as i32); + + let ret = unsafe { bindings::misc_register(&mut this.mdev) }; + if ret < 0 { + return Err(Error::from_kernel_errno(ret)); + } + this.registered = true; + Ok(()) + } +} + +impl FileOpenAdapter for Registration { + type Arg = T; + + unsafe fn convert(_inode: *mut bindings::inode, file: *mut bindings::file) -> *const Self::Arg { + // TODO: `SAFETY` comment required here even if `unsafe` is not present, + // because `container_of!` hides it. Ideally we would not allow + // `unsafe` code as parameters to macros. + let reg = crate::container_of!((*file).private_data, Self, mdev); + unsafe { &(*reg).context } + } +} + +// SAFETY: The only method is `register()`, which requires a (pinned) mutable `Registration`, so it +// is safe to pass `&Registration` to multiple threads because it offers no interior mutability, +// except maybe through `Registration::context`, but it is itself `Sync`. +unsafe impl Sync for Registration {} + +// SAFETY: All functions work from any thread. So as long as the `Registration::context` is +// `Send`, so is `Registration`. `T` needs to be `Sync` because it's a requirement of +// `Registration`. +unsafe impl Send for Registration {} + +impl Drop for Registration { + /// Removes the registration from the kernel if it has completed successfully before. + fn drop(&mut self) { + if self.registered { + unsafe { bindings::misc_deregister(&mut self.mdev) } + } + } +} diff --git a/rust/kernel/module_param.rs b/rust/kernel/module_param.rs new file mode 100644 index 0000000000000..a588449c41fab --- /dev/null +++ b/rust/kernel/module_param.rs @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Types for module parameters. +//! +//! C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) + +use crate::str::CStr; +use core::fmt::Write; + +/// Types that can be used for module parameters. +/// +/// Note that displaying the type in `sysfs` will fail if +/// [`alloc::string::ToString::to_string`] (as implemented through the +/// [`core::fmt::Display`] trait) writes more than [`PAGE_SIZE`] +/// bytes (including an additional null terminator). +/// +/// [`PAGE_SIZE`]: `crate::PAGE_SIZE` +pub trait ModuleParam: core::fmt::Display + core::marker::Sized { + /// The `ModuleParam` will be used by the kernel module through this type. + /// + /// This may differ from `Self` if, for example, `Self` needs to track + /// ownership without exposing it or allocate extra space for other possible + /// parameter values. See [`StringParam`] or [`ArrayParam`] for examples. + type Value: ?Sized; + + /// Whether the parameter is allowed to be set without an argument. + /// + /// Setting this to `true` allows the parameter to be passed without an + /// argument (e.g. just `module.param` instead of `module.param=foo`). + const NOARG_ALLOWED: bool; + + /// Convert a parameter argument into the parameter value. + /// + /// `None` should be returned when parsing of the argument fails. + /// `arg == None` indicates that the parameter was passed without an + /// argument. If `NOARG_ALLOWED` is set to `false` then `arg` is guaranteed + /// to always be `Some(_)`. + /// + /// Parameters passed at boot time will be set before [`kmalloc`] is + /// available (even if the module is loaded at a later time). However, in + /// this case, the argument buffer will be valid for the entire lifetime of + /// the kernel. So implementations of this method which need to allocate + /// should first check that the allocator is available (with + /// [`crate::bindings::slab_is_available`]) and when it is not available + /// provide an alternative implementation which doesn't allocate. In cases + /// where the allocator is not available it is safe to save references to + /// `arg` in `Self`, but in other cases a copy should be made. + /// + /// [`kmalloc`]: ../../../include/linux/slab.h + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option; + + /// Get the current value of the parameter for use in the kernel module. + /// + /// This function should not be used directly. Instead use the wrapper + /// `read` which will be generated by [`macros::module`]. + fn value(&self) -> &Self::Value; + + /// Set the module parameter from a string. + /// + /// Used to set the parameter value when loading the module or when set + /// through `sysfs`. + /// + /// # Safety + /// + /// If `val` is non-null then it must point to a valid null-terminated + /// string. The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn set_param( + val: *const crate::c_types::c_char, + param: *const crate::bindings::kernel_param, + ) -> crate::c_types::c_int { + let arg = if val.is_null() { + None + } else { + Some(unsafe { CStr::from_char_ptr(val).as_bytes() }) + }; + match Self::try_from_param_arg(arg) { + Some(new_value) => { + let old_value = unsafe { (*param).__bindgen_anon_1.arg as *mut Self }; + let _ = unsafe { core::ptr::replace(old_value, new_value) }; + 0 + } + None => crate::error::Error::EINVAL.to_kernel_errno(), + } + } + + /// Write a string representation of the current parameter value to `buf`. + /// + /// Used for displaying the current parameter value in `sysfs`. + /// + /// # Safety + /// + /// `buf` must be a buffer of length at least `kernel::PAGE_SIZE` that is + /// writeable. The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn get_param( + buf: *mut crate::c_types::c_char, + param: *const crate::bindings::kernel_param, + ) -> crate::c_types::c_int { + let slice = unsafe { core::slice::from_raw_parts_mut(buf as *mut u8, crate::PAGE_SIZE) }; + let mut buf = crate::buffer::Buffer::new(slice); + match unsafe { write!(buf, "{}\0", *((*param).__bindgen_anon_1.arg as *mut Self)) } { + Err(_) => crate::error::Error::EINVAL.to_kernel_errno(), + Ok(()) => buf.bytes_written() as crate::c_types::c_int, + } + } + + /// Drop the parameter. + /// + /// Called when unloading a module. + /// + /// # Safety + /// + /// The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn free(arg: *mut crate::c_types::c_void) { + unsafe { core::ptr::drop_in_place(arg as *mut Self) }; + } +} + +/// Trait for parsing integers. +/// +/// Strings begining with `0x`, `0o`, or `0b` are parsed as hex, octal, or +/// binary respectively. Strings beginning with `0` otherwise are parsed as +/// octal. Anything else is parsed as decimal. A leading `+` or `-` is also +/// permitted. Any string parsed by [`kstrtol()`] or [`kstrtoul()`] will be +/// successfully parsed. +/// +/// [`kstrtol()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtol +/// [`kstrtoul()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtoul +trait ParseInt: Sized { + fn from_str_radix(src: &str, radix: u32) -> Result; + fn checked_neg(self) -> Option; + + fn from_str_unsigned(src: &str) -> Result { + let (radix, digits) = if let Some(n) = src.strip_prefix("0x") { + (16, n) + } else if let Some(n) = src.strip_prefix("0X") { + (16, n) + } else if let Some(n) = src.strip_prefix("0o") { + (8, n) + } else if let Some(n) = src.strip_prefix("0O") { + (8, n) + } else if let Some(n) = src.strip_prefix("0b") { + (2, n) + } else if let Some(n) = src.strip_prefix("0B") { + (2, n) + } else if src.starts_with('0') { + (8, src) + } else { + (10, src) + }; + Self::from_str_radix(digits, radix) + } + + fn from_str(src: &str) -> Option { + match src.bytes().next() { + None => None, + Some(b'-') => Self::from_str_unsigned(&src[1..]).ok()?.checked_neg(), + Some(b'+') => Some(Self::from_str_unsigned(&src[1..]).ok()?), + Some(_) => Some(Self::from_str_unsigned(src).ok()?), + } + } +} + +macro_rules! impl_parse_int { + ($ty:ident) => { + impl ParseInt for $ty { + fn from_str_radix(src: &str, radix: u32) -> Result { + $ty::from_str_radix(src, radix) + } + + fn checked_neg(self) -> Option { + self.checked_neg() + } + } + }; +} + +impl_parse_int!(i8); +impl_parse_int!(u8); +impl_parse_int!(i16); +impl_parse_int!(u16); +impl_parse_int!(i32); +impl_parse_int!(u32); +impl_parse_int!(i64); +impl_parse_int!(u64); +impl_parse_int!(isize); +impl_parse_int!(usize); + +macro_rules! impl_module_param { + ($ty:ident) => { + impl ModuleParam for $ty { + type Value = $ty; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + let bytes = arg?; + let utf8 = core::str::from_utf8(bytes).ok()?; + <$ty as crate::module_param::ParseInt>::from_str(utf8) + } + + fn value(&self) -> &Self::Value { + self + } + } + }; +} + +#[doc(hidden)] +#[macro_export] +/// Generate a static [`kernel_param_ops`](../../../include/linux/moduleparam.h) struct. +/// +/// # Example +/// ```ignore +/// make_param_ops!( +/// /// Documentation for new param ops. +/// PARAM_OPS_MYTYPE, // Name for the static. +/// MyType // A type which implements [`ModuleParam`]. +/// ); +/// ``` +macro_rules! make_param_ops { + ($ops:ident, $ty:ty) => { + $crate::make_param_ops!( + #[doc=""] + $ops, + $ty + ); + }; + ($(#[$meta:meta])* $ops:ident, $ty:ty) => { + $(#[$meta])* + /// + /// Static [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// struct generated by [`make_param_ops`]. + pub static $ops: $crate::bindings::kernel_param_ops = $crate::bindings::kernel_param_ops { + flags: if <$ty as $crate::module_param::ModuleParam>::NOARG_ALLOWED { + $crate::bindings::KERNEL_PARAM_OPS_FL_NOARG + } else { + 0 + }, + set: Some(<$ty as $crate::module_param::ModuleParam>::set_param), + get: Some(<$ty as $crate::module_param::ModuleParam>::get_param), + free: Some(<$ty as $crate::module_param::ModuleParam>::free), + }; + }; +} + +impl_module_param!(i8); +impl_module_param!(u8); +impl_module_param!(i16); +impl_module_param!(u16); +impl_module_param!(i32); +impl_module_param!(u32); +impl_module_param!(i64); +impl_module_param!(u64); +impl_module_param!(isize); +impl_module_param!(usize); + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i8`]. + PARAM_OPS_I8, + i8 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u8`]. + PARAM_OPS_U8, + u8 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i16`]. + PARAM_OPS_I16, + i16 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u16`]. + PARAM_OPS_U16, + u16 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i32`]. + PARAM_OPS_I32, + i32 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u32`]. + PARAM_OPS_U32, + u32 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i64`]. + PARAM_OPS_I64, + i64 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u64`]. + PARAM_OPS_U64, + u64 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`isize`]. + PARAM_OPS_ISIZE, + isize +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`usize`]. + PARAM_OPS_USIZE, + usize +); + +impl ModuleParam for bool { + type Value = bool; + + const NOARG_ALLOWED: bool = true; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + match arg { + None => Some(true), + Some(b"y") | Some(b"Y") | Some(b"1") | Some(b"true") => Some(true), + Some(b"n") | Some(b"N") | Some(b"0") | Some(b"false") => Some(false), + _ => None, + } + } + + fn value(&self) -> &Self::Value { + self + } +} + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`bool`]. + PARAM_OPS_BOOL, + bool +); + +/// An array of at __most__ `N` values. +/// +/// # Invariant +/// +/// The first `self.used` elements of `self.values` are initialized. +pub struct ArrayParam { + values: [core::mem::MaybeUninit; N], + used: usize, +} + +impl ArrayParam { + fn values(&self) -> &[T] { + // SAFETY: The invariant maintained by `ArrayParam` allows us to cast + // the first `self.used` elements to `T`. + unsafe { + &*(&self.values[0..self.used] as *const [core::mem::MaybeUninit] as *const [T]) + } + } +} + +impl ArrayParam { + const fn new() -> Self { + // INVARIANT: The first `self.used` elements of `self.values` are + // initialized. + ArrayParam { + values: [core::mem::MaybeUninit::uninit(); N], + used: 0, + } + } + + const fn push(&mut self, val: T) { + if self.used < N { + // INVARIANT: The first `self.used` elements of `self.values` are + // initialized. + self.values[self.used] = core::mem::MaybeUninit::new(val); + self.used += 1; + } + } + + /// Create an instance of `ArrayParam` initialized with `vals`. + /// + /// This function is only meant to be used in the [`module::module`] macro. + pub const fn create(vals: &[T]) -> Self { + let mut result = ArrayParam::new(); + let mut i = 0; + while i < vals.len() { + result.push(vals[i]); + i += 1; + } + result + } +} + +impl core::fmt::Display for ArrayParam { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + for val in self.values() { + write!(f, "{},", val)?; + } + Ok(()) + } +} + +impl ModuleParam + for ArrayParam +{ + type Value = [T]; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + arg.and_then(|args| { + let mut result = Self::new(); + for arg in args.split(|b| *b == b',') { + result.push(T::try_from_param_arg(Some(arg))?); + } + Some(result) + }) + } + + fn value(&self) -> &Self::Value { + self.values() + } +} + +/// A C-style string parameter. +/// +/// The Rust version of the [`charp`] parameter. This type is meant to be +/// used by the [`macros::module`] macro, not handled directly. Instead use the +/// `read` method generated by that macro. +/// +/// [`charp`]: ../../../include/linux/moduleparam.h +pub enum StringParam { + /// A borrowed parameter value. + /// + /// Either the default value (which is static in the module) or borrowed + /// from the original argument buffer used to set the value. + Ref(&'static [u8]), + + /// A value that was allocated when the parameter was set. + /// + /// The value needs to be freed when the parameter is reset or the module is + /// unloaded. + Owned(alloc::vec::Vec), +} + +impl StringParam { + fn bytes(&self) -> &[u8] { + match self { + StringParam::Ref(bytes) => *bytes, + StringParam::Owned(vec) => &vec[..], + } + } +} + +impl core::fmt::Display for StringParam { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let bytes = self.bytes(); + match core::str::from_utf8(bytes) { + Ok(utf8) => write!(f, "{}", utf8), + Err(_) => write!(f, "{:?}", bytes), + } + } +} + +impl ModuleParam for StringParam { + type Value = [u8]; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + // SAFETY: It is always safe to call [`slab_is_available`](../../../include/linux/slab.h). + let slab_available = unsafe { crate::bindings::slab_is_available() }; + arg.and_then(|arg| { + if slab_available { + let mut vec = alloc::vec::Vec::new(); + vec.try_extend_from_slice(arg).ok()?; + Some(StringParam::Owned(vec)) + } else { + Some(StringParam::Ref(arg)) + } + }) + } + + fn value(&self) -> &Self::Value { + self.bytes() + } +} + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`StringParam`]. + PARAM_OPS_STR, + StringParam +); diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs new file mode 100644 index 0000000000000..78aa5956f03fe --- /dev/null +++ b/rust/kernel/of.rs @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Devicetree and Open Firmware abstractions. +//! +//! C header: [`include/linux/of_*.h`](../../../../include/linux/of_*.h) + +use crate::{bindings, c_types, str::CStr}; + +use core::ops::Deref; +use core::ptr; + +/// A kernel Open Firmware / devicetree match table. +/// +/// Can only exist as an `&OfMatchTable` reference (akin to `&str` or +/// `&Path` in Rust std). +/// +/// # Invariants +/// +/// The inner reference points to a sentinel-terminated C array. +#[repr(transparent)] +pub struct OfMatchTable(bindings::of_device_id); + +impl OfMatchTable { + /// Returns the table as a reference to a static lifetime, sentinel-terminated C array. + /// + /// This is suitable to be coerced into the kernel's `of_match_table` field. + pub fn as_ptr(&'static self) -> &'static bindings::of_device_id { + // The inner reference points to a sentinel-terminated C array, as per + // the type invariant. + &self.0 + } +} + +/// An Open Firmware Match Table that can be constructed at build time. +/// +/// # Invariants +/// +/// `sentinel` always contains zeroes. +#[repr(C)] +pub struct ConstOfMatchTable { + table: [bindings::of_device_id; N], + sentinel: bindings::of_device_id, +} + +impl ConstOfMatchTable { + /// Creates a new Open Firmware Match Table from a list of compatible strings. + pub const fn new_const(compatibles: [&'static CStr; N]) -> Self { + let mut table = [Self::zeroed_of_device_id(); N]; + let mut i = 0; + while i < N { + table[i] = Self::new_of_device_id(compatibles[i]); + i += 1; + } + Self { + table, + // INVARIANTS: we zero the sentinel here, and never change it + // anywhere. Therefore it always contains zeroes. + sentinel: Self::zeroed_of_device_id(), + } + } + + const fn zeroed_of_device_id() -> bindings::of_device_id { + bindings::of_device_id { + name: [0; 32], + type_: [0; 32], + compatible: [0; 128], + data: ptr::null(), + } + } + + const fn new_of_device_id(compatible: &'static CStr) -> bindings::of_device_id { + let mut id = Self::zeroed_of_device_id(); + let compatible = compatible.as_bytes_with_nul(); + let mut i = 0; + while i < compatible.len() { + // If `compatible` does not fit in `id.compatible`, an + // "index out of bounds" build time error will be triggered. + id.compatible[i] = compatible[i] as c_types::c_char; + i += 1; + } + id + } +} + +impl Deref for ConstOfMatchTable { + type Target = OfMatchTable; + + fn deref(&self) -> &OfMatchTable { + // INVARIANTS: `head` points to a sentinel-terminated C array, + // as per the `ConstOfMatchTable` type invariant, therefore + // `&OfMatchTable`'s inner reference will point to a sentinel-terminated C array. + let head = &self.table[0] as *const bindings::of_device_id as *const OfMatchTable; + + // SAFETY: The returned reference must remain valid for the lifetime of `self`. + // The raw pointer `head` points to memory inside `self`. So the reference created + // from this raw pointer has the same lifetime as `self`. + // Therefore this reference remains valid for the lifetime of `self`, and + // is safe to return. + unsafe { &*head } + } +} diff --git a/rust/kernel/pages.rs b/rust/kernel/pages.rs new file mode 100644 index 0000000000000..4f45bef09bca4 --- /dev/null +++ b/rust/kernel/pages.rs @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel page allocation and management. +//! +//! TODO: This module is a work in progress. + +use crate::{ + bindings, c_types, io_buffer::IoBufferReader, user_ptr::UserSlicePtrReader, Error, Result, + PAGE_SIZE, +}; +use core::{marker::PhantomData, ptr}; + +extern "C" { + #[allow(improper_ctypes)] + fn rust_helper_alloc_pages( + gfp_mask: bindings::gfp_t, + order: c_types::c_uint, + ) -> *mut bindings::page; + + #[allow(improper_ctypes)] + fn rust_helper_kmap(page: *mut bindings::page) -> *mut c_types::c_void; + + #[allow(improper_ctypes)] + fn rust_helper_kunmap(page: *mut bindings::page); +} + +/// A set of physical pages. +/// +/// `Pages` holds a reference to a set of pages of order `ORDER`. Having the order as a generic +/// const allows the struct to have the same size as a pointer. +/// +/// # Invariants +/// +/// The pointer `Pages::pages` is valid and points to 2^ORDER pages. +pub struct Pages { + pages: *mut bindings::page, +} + +impl Pages { + /// Allocates a new set of contiguous pages. + pub fn new() -> Result { + // TODO: Consider whether we want to allow callers to specify flags. + // SAFETY: This only allocates pages. We check that it succeeds in the next statement. + let pages = unsafe { + rust_helper_alloc_pages( + bindings::GFP_KERNEL | bindings::__GFP_ZERO | bindings::__GFP_HIGHMEM, + ORDER, + ) + }; + if pages.is_null() { + return Err(Error::ENOMEM); + } + // INVARIANTS: We checked that the allocation above succeeded> + Ok(Self { pages }) + } + + /// Maps a single page at the given address in the given VM area. + /// + /// This is only meant to be used by pages of order 0. + pub fn insert_page(&self, vma: &mut bindings::vm_area_struct, address: usize) -> Result { + if ORDER != 0 { + return Err(Error::EINVAL); + } + + // SAFETY: We check above that the allocation is of order 0. The range of `address` is + // already checked by `vm_insert_page`. + let ret = unsafe { bindings::vm_insert_page(vma, address as _, self.pages) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } + } + + /// Copies data from the given [`UserSlicePtrReader`] into the pages. + pub fn copy_into_page( + &self, + reader: &mut UserSlicePtrReader, + offset: usize, + len: usize, + ) -> Result { + // TODO: For now this only works on the first page. + let end = offset.checked_add(len).ok_or(Error::EINVAL)?; + if end > PAGE_SIZE { + return Err(Error::EINVAL); + } + + let mapping = self.kmap(0).ok_or(Error::EINVAL)?; + + // SAFETY: We ensured that the buffer was valid with the check above. + unsafe { reader.read_raw((mapping.ptr as usize + offset) as _, len) }?; + Ok(()) + } + + /// Maps the pages and reads from them into the given buffer. + /// + /// # Safety + /// + /// Callers must ensure that the destination buffer is valid for the given length. + /// Additionally, if the raw buffer is intended to be recast, they must ensure that the data + /// can be safely cast; [`crate::io_buffer::ReadableFromBytes`] has more details about it. + pub unsafe fn read(&self, dest: *mut u8, offset: usize, len: usize) -> Result { + // TODO: For now this only works on the first page. + let end = offset.checked_add(len).ok_or(Error::EINVAL)?; + if end > PAGE_SIZE { + return Err(Error::EINVAL); + } + + let mapping = self.kmap(0).ok_or(Error::EINVAL)?; + unsafe { ptr::copy((mapping.ptr as *mut u8).add(offset), dest, len) }; + Ok(()) + } + + /// Maps the pages and writes into them from the given bufer. + /// + /// # Safety + /// + /// Callers must ensure that the buffer is valid for the given length. Additionally, if the + /// page is (or will be) mapped by userspace, they must ensure that no kernel data is leaked + /// through padding if it was cast from another type; [`crate::io_buffer::WritableToBytes`] has + /// more details about it. + pub unsafe fn write(&self, src: *const u8, offset: usize, len: usize) -> Result { + // TODO: For now this only works on the first page. + let end = offset.checked_add(len).ok_or(Error::EINVAL)?; + if end > PAGE_SIZE { + return Err(Error::EINVAL); + } + + let mapping = self.kmap(0).ok_or(Error::EINVAL)?; + unsafe { ptr::copy(src, (mapping.ptr as *mut u8).add(offset), len) }; + Ok(()) + } + + /// Maps the page at index `index`. + fn kmap(&self, index: usize) -> Option> { + if index >= 1usize << ORDER { + return None; + } + + // SAFETY: We checked above that `index` is within range. + let page = unsafe { self.pages.add(index) }; + + // SAFETY: `page` is valid based on the checks above. + let ptr = unsafe { rust_helper_kmap(page) }; + if ptr.is_null() { + return None; + } + + Some(PageMapping { + page, + ptr, + _phantom: PhantomData, + }) + } +} + +impl Drop for Pages { + fn drop(&mut self) { + // SAFETY: By the type invariants, we know the pages are allocated with the given order. + unsafe { bindings::__free_pages(self.pages, ORDER) }; + } +} + +struct PageMapping<'a> { + page: *mut bindings::page, + ptr: *mut c_types::c_void, + _phantom: PhantomData<&'a i32>, +} + +impl Drop for PageMapping<'_> { + fn drop(&mut self) { + // SAFETY: An instance of `PageMapping` is created only when `kmap` succeeded for the given + // page, so it is safe to unmap it here. + unsafe { rust_helper_kunmap(self.page) }; + } +} diff --git a/rust/kernel/platdev.rs b/rust/kernel/platdev.rs new file mode 100644 index 0000000000000..5f306b61321e3 --- /dev/null +++ b/rust/kernel/platdev.rs @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Platform devices. +//! +//! Also called `platdev`, `pdev`. +//! +//! C header: [`include/linux/platform_device.h`](../../../../include/linux/platform_device.h) + +use crate::{ + bindings, c_types, + error::{Error, Result}, + from_kernel_result, + of::OfMatchTable, + str::CStr, + types::PointerWrapper, +}; +use alloc::boxed::Box; +use core::{marker::PhantomPinned, pin::Pin}; + +/// A registration of a platform device. +#[derive(Default)] +pub struct Registration { + registered: bool, + pdrv: bindings::platform_driver, + _pin: PhantomPinned, +} + +// SAFETY: `Registration` does not expose any of its state across threads +// (it is fine for multiple threads to have a shared reference to it). +unsafe impl Sync for Registration {} + +extern "C" { + #[allow(improper_ctypes)] + fn rust_helper_platform_get_drvdata( + pdev: *const bindings::platform_device, + ) -> *mut c_types::c_void; + + #[allow(improper_ctypes)] + fn rust_helper_platform_set_drvdata( + pdev: *mut bindings::platform_device, + data: *mut c_types::c_void, + ); +} + +extern "C" fn probe_callback( + pdev: *mut bindings::platform_device, +) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + let device_id = unsafe { (*pdev).id }; + let drv_data = P::probe(device_id)?; + let drv_data = drv_data.into_pointer() as *mut c_types::c_void; + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + unsafe { + rust_helper_platform_set_drvdata(pdev, drv_data); + } + Ok(0) + } +} + +extern "C" fn remove_callback( + pdev: *mut bindings::platform_device, +) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + let device_id = unsafe { (*pdev).id }; + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + let ptr = unsafe { rust_helper_platform_get_drvdata(pdev) }; + // SAFETY: + // - we allocated this pointer using `P::DrvData::into_pointer`, + // so it is safe to turn back into a `P::DrvData`. + // - the allocation happened in `probe`, no-one freed the memory, + // `remove` is the canonical kernel location to free driver data. so OK + // to convert the pointer back to a Rust structure here. + let drv_data = unsafe { P::DrvData::from_pointer(ptr) }; + P::remove(device_id, drv_data)?; + Ok(0) + } +} + +impl Registration { + fn register( + self: Pin<&mut Self>, + name: &'static CStr, + of_match_table: Option<&'static OfMatchTable>, + module: &'static crate::ThisModule, + ) -> Result { + // SAFETY: We must ensure that we never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + if this.registered { + // Already registered. + return Err(Error::EINVAL); + } + this.pdrv.driver.name = name.as_char_ptr(); + if let Some(tbl) = of_match_table { + this.pdrv.driver.of_match_table = tbl.as_ptr(); + } + this.pdrv.probe = Some(probe_callback::

); + this.pdrv.remove = Some(remove_callback::

); + // SAFETY: + // - `this.pdrv` lives at least until the call to `platform_driver_unregister()` returns. + // - `name` pointer has static lifetime. + // - `module.0` lives at least as long as the module. + // - `probe()` and `remove()` are static functions. + // - `of_match_table` is either a raw pointer with static lifetime, + // as guaranteed by the [`of::OfMatchTable::as_ptr()`] return type, + // or null. + let ret = unsafe { bindings::__platform_driver_register(&mut this.pdrv, module.0) }; + if ret < 0 { + return Err(Error::from_kernel_errno(ret)); + } + this.registered = true; + Ok(()) + } + + /// Registers a platform device. + /// + /// Returns a pinned heap-allocated representation of the registration. + pub fn new_pinned( + name: &'static CStr, + of_match_tbl: Option<&'static OfMatchTable>, + module: &'static crate::ThisModule, + ) -> Result>> { + let mut r = Pin::from(Box::try_new(Self::default())?); + r.as_mut().register::

(name, of_match_tbl, module)?; + Ok(r) + } +} + +impl Drop for Registration { + fn drop(&mut self) { + if self.registered { + // SAFETY: if `registered` is true, then `self.pdev` was registered + // previously, which means `platform_driver_unregister` is always + // safe to call. + unsafe { bindings::platform_driver_unregister(&mut self.pdrv) } + } + } +} + +/// Trait for implementers of platform drivers. +/// +/// Implement this trait whenever you create a platform driver. +pub trait PlatformDriver { + /// Device driver data. + /// + /// Corresponds to the data set or retrieved via the kernel's + /// `platform_{set,get}_drvdata()` functions. + /// + /// Require that `DrvData` implements `PointerWrapper`. We guarantee to + /// never move the underlying wrapped data structure. This allows + /// driver writers to use pinned or self-referential data structures. + type DrvData: PointerWrapper; + + /// Platform driver probe. + /// + /// Called when a new platform device is added or discovered. + /// Implementers should attempt to initialize the device here. + fn probe(device_id: i32) -> Result; + + /// Platform driver remove. + /// + /// Called when a platform device is removed. + /// Implementers should prepare the device for complete removal here. + fn remove(device_id: i32, drv_data: Self::DrvData) -> Result; +} diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs new file mode 100644 index 0000000000000..c0bf618099adc --- /dev/null +++ b/rust/kernel/prelude.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! The `kernel` prelude. +//! +//! These are the most common items used by Rust code in the kernel, +//! intended to be imported by all Rust code, for convenience. +//! +//! # Examples +//! +//! ``` +//! use kernel::prelude::*; +//! ``` + +pub use core::pin::Pin; + +pub use alloc::{boxed::Box, string::String, sync::Arc, vec::Vec}; + +pub use macros::{module, module_misc_device}; + +pub use super::build_assert; + +pub use super::{pr_alert, pr_crit, pr_emerg, pr_err, pr_info, pr_notice, pr_warn}; + +pub use super::static_assert; + +pub use super::{Error, KernelModule, Result}; + +pub use crate::traits::TryPin; diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs new file mode 100644 index 0000000000000..b7384f0bacc37 --- /dev/null +++ b/rust/kernel/print.rs @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Printing facilities. +//! +//! C header: [`include/linux/printk.h`](../../../../include/linux/printk.h) +//! +//! Reference: + +use core::cmp; +use core::fmt; + +use crate::bindings; +use crate::c_types::{c_char, c_void}; + +// Called from `vsprintf` with format specifier `%pA`. +#[no_mangle] +unsafe fn rust_fmt_argument(buf: *mut c_char, end: *mut c_char, ptr: *const c_void) -> *mut c_char { + use fmt::Write; + + // Use `usize` to use `saturating_*` functions. + struct Writer { + buf: usize, + end: usize, + } + + impl Write for Writer { + fn write_str(&mut self, s: &str) -> fmt::Result { + // `buf` value after writing `len` bytes. This does not have to be bounded + // by `end`, but we don't want it to wrap around to 0. + let buf_new = self.buf.saturating_add(s.len()); + + // Amount that we can copy. `saturating_sub` ensures we get 0 if + // `buf` goes past `end`. + let len_to_copy = cmp::min(buf_new, self.end).saturating_sub(self.buf); + + // SAFETY: In any case, `buf` is non-null and properly aligned. + // If `len_to_copy` is non-zero, then we know `buf` has not past + // `end` yet and so is valid. + unsafe { + core::ptr::copy_nonoverlapping( + s.as_bytes().as_ptr(), + self.buf as *mut u8, + len_to_copy, + ) + }; + + self.buf = buf_new; + Ok(()) + } + } + + let mut w = Writer { + buf: buf as _, + end: end as _, + }; + let _ = w.write_fmt(unsafe { *(ptr as *const fmt::Arguments<'_>) }); + w.buf as _ +} + +/// Format strings. +/// +/// Public but hidden since it should only be used from public macros. +#[doc(hidden)] +pub mod format_strings { + use crate::bindings; + + /// The length we copy from the `KERN_*` kernel prefixes. + const LENGTH_PREFIX: usize = 2; + + /// The length of the fixed format strings. + pub const LENGTH: usize = 10; + + /// Generates a fixed format string for the kernel's [`printk`]. + /// + /// The format string is always the same for a given level, i.e. for a + /// given `prefix`, which are the kernel's `KERN_*` constants. + /// + /// [`printk`]: ../../../../include/linux/printk.h + const fn generate(is_cont: bool, prefix: &[u8; 3]) -> [u8; LENGTH] { + // Ensure the `KERN_*` macros are what we expect. + assert!(prefix[0] == b'\x01'); + if is_cont { + assert!(prefix[1] == b'c'); + } else { + assert!(prefix[1] >= b'0' && prefix[1] <= b'7'); + } + assert!(prefix[2] == b'\x00'); + + let suffix: &[u8; LENGTH - LENGTH_PREFIX] = if is_cont { + b"%pA\0\0\0\0\0" + } else { + b"%s: %pA\0" + }; + + [ + prefix[0], prefix[1], suffix[0], suffix[1], suffix[2], suffix[3], suffix[4], suffix[5], + suffix[6], suffix[7], + ] + } + + // Generate the format strings at compile-time. + // + // This avoids the compiler generating the contents on the fly in the stack. + // + // Furthermore, `static` instead of `const` is used to share the strings + // for all the kernel. + pub static EMERG: [u8; LENGTH] = generate(false, bindings::KERN_EMERG); + pub static ALERT: [u8; LENGTH] = generate(false, bindings::KERN_ALERT); + pub static CRIT: [u8; LENGTH] = generate(false, bindings::KERN_CRIT); + pub static ERR: [u8; LENGTH] = generate(false, bindings::KERN_ERR); + pub static WARNING: [u8; LENGTH] = generate(false, bindings::KERN_WARNING); + pub static NOTICE: [u8; LENGTH] = generate(false, bindings::KERN_NOTICE); + pub static INFO: [u8; LENGTH] = generate(false, bindings::KERN_INFO); + pub static DEBUG: [u8; LENGTH] = generate(false, bindings::KERN_DEBUG); + pub static CONT: [u8; LENGTH] = generate(true, bindings::KERN_CONT); +} + +/// Prints a message via the kernel's [`printk`]. +/// +/// Public but hidden since it should only be used from public macros. +/// +/// # Safety +/// +/// The format string must be one of the ones in [`format_strings`], and +/// the module name must be null-terminated. +/// +/// [`printk`]: ../../../../include/linux/printk.h +#[doc(hidden)] +pub unsafe fn call_printk( + format_string: &[u8; format_strings::LENGTH], + module_name: &[u8], + args: fmt::Arguments<'_>, +) { + // `printk` does not seem to fail in any path. + unsafe { + bindings::printk( + format_string.as_ptr() as _, + module_name.as_ptr(), + &args as *const _ as *const c_void, + ); + } +} + +/// Prints a message via the kernel's [`printk`] for the `CONT` level. +/// +/// Public but hidden since it should only be used from public macros. +/// +/// [`printk`]: ../../../../include/linux/printk.h +#[doc(hidden)] +pub fn call_printk_cont(args: fmt::Arguments<'_>) { + // `printk` does not seem to fail in any path. + // + // SAFETY: The format string is fixed. + unsafe { + bindings::printk( + format_strings::CONT.as_ptr() as _, + &args as *const _ as *const c_void, + ); + } +} + +/// Performs formatting and forwards the string to [`call_printk`]. +/// +/// Public but hidden since it should only be used from public macros. +#[doc(hidden)] +#[cfg(not(testlib))] +#[macro_export] +macro_rules! print_macro ( + // The non-continuation cases (most of them, e.g. `INFO`). + ($format_string:path, false, $($arg:tt)+) => ( + // SAFETY: This hidden macro should only be called by the documented + // printing macros which ensure the format string is one of the fixed + // ones. All `__LOG_PREFIX`s are null-terminated as they are generated + // by the `module!` proc macro or fixed values defined in a kernel + // crate. + unsafe { + $crate::print::call_printk( + &$format_string, + crate::__LOG_PREFIX, + format_args!($($arg)+), + ); + } + ); + + // The `CONT` case. + ($format_string:path, true, $($arg:tt)+) => ( + $crate::print::call_printk_cont( + format_args!($($arg)+), + ); + ); +); + +/// Stub for doctests +#[cfg(testlib)] +#[macro_export] +macro_rules! print_macro ( + ($format_string:path, $e:expr, $($arg:tt)+) => ( + () + ); +); + +// We could use a macro to generate these macros. However, doing so ends +// up being a bit ugly: it requires the dollar token trick to escape `$` as +// well as playing with the `doc` attribute. Furthermore, they cannot be easily +// imported in the prelude due to [1]. So, for the moment, we just write them +// manually, like in the C side; while keeping most of the logic in another +// macro, i.e. [`print_macro`]. +// +// [1]: https://github.com/rust-lang/rust/issues/52234 + +/// Prints an emergency-level message (level 0). +/// +/// Use this level if the system is unusable. +/// +/// Equivalent to the kernel's [`pr_emerg`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_emerg`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_emerg +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_emerg!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_emerg ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::EMERG, false, $($arg)*) + ) +); + +/// Prints an alert-level message (level 1). +/// +/// Use this level if action must be taken immediately. +/// +/// Equivalent to the kernel's [`pr_alert`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_alert`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_alert +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_alert!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_alert ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::ALERT, false, $($arg)*) + ) +); + +/// Prints a critical-level message (level 2). +/// +/// Use this level for critical conditions. +/// +/// Equivalent to the kernel's [`pr_crit`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_crit`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_crit +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_crit!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_crit ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::CRIT, false, $($arg)*) + ) +); + +/// Prints an error-level message (level 3). +/// +/// Use this level for error conditions. +/// +/// Equivalent to the kernel's [`pr_err`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_err`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_err +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_err!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_err ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::ERR, false, $($arg)*) + ) +); + +/// Prints a warning-level message (level 4). +/// +/// Use this level for warning conditions. +/// +/// Equivalent to the kernel's [`pr_warn`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_warn`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_warn +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_warn!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_warn ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::WARNING, false, $($arg)*) + ) +); + +/// Prints a notice-level message (level 5). +/// +/// Use this level for normal but significant conditions. +/// +/// Equivalent to the kernel's [`pr_notice`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_notice`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_notice +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_notice!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_notice ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::NOTICE, false, $($arg)*) + ) +); + +/// Prints an info-level message (level 6). +/// +/// Use this level for informational messages. +/// +/// Equivalent to the kernel's [`pr_info`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_info`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_info +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_info!("hello {}\n", "there"); +/// ``` +#[macro_export] +#[doc(alias = "print")] +macro_rules! pr_info ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::INFO, false, $($arg)*) + ) +); + +/// Continues a previous log message in the same line. +/// +/// Use only when continuing a previous `pr_*!` macro (e.g. [`pr_info!`]). +/// +/// Equivalent to the kernel's [`pr_cont`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_cont`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_cont +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::pr_cont; +/// pr_info!("hello"); +/// pr_cont!(" {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_cont ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::CONT, true, $($arg)*) + ) +); diff --git a/rust/kernel/random.rs b/rust/kernel/random.rs new file mode 100644 index 0000000000000..723a89829f661 --- /dev/null +++ b/rust/kernel/random.rs @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Random numbers. +//! +//! C header: [`include/linux/random.h`](../../../../include/linux/random.h) + +use core::convert::TryInto; + +use crate::{bindings, c_types, error}; + +/// Fills a byte slice with random bytes generated from the kernel's CSPRNG. +/// +/// Ensures that the CSPRNG has been seeded before generating any random bytes, +/// and will block until it is ready. +pub fn getrandom(dest: &mut [u8]) -> error::Result { + let res = unsafe { bindings::wait_for_random_bytes() }; + if res != 0 { + return Err(error::Error::from_kernel_errno(res)); + } + + unsafe { + bindings::get_random_bytes( + dest.as_mut_ptr() as *mut c_types::c_void, + dest.len().try_into()?, + ); + } + Ok(()) +} + +/// Fills a byte slice with random bytes generated from the kernel's CSPRNG. +/// +/// If the CSPRNG is not yet seeded, returns an `Err(EAGAIN)` immediately. +pub fn getrandom_nonblock(dest: &mut [u8]) -> error::Result { + if !unsafe { bindings::rng_is_initialized() } { + return Err(error::Error::EAGAIN); + } + getrandom(dest) +} + +/// Contributes the contents of a byte slice to the kernel's entropy pool. +/// +/// Does *not* credit the kernel entropy counter though. +pub fn add_randomness(data: &[u8]) { + unsafe { + bindings::add_device_randomness( + data.as_ptr() as *const c_types::c_void, + data.len().try_into().unwrap(), + ); + } +} diff --git a/rust/kernel/raw_list.rs b/rust/kernel/raw_list.rs new file mode 100644 index 0000000000000..4bc4f4a24ad5e --- /dev/null +++ b/rust/kernel/raw_list.rs @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Raw lists. +//! +//! TODO: This module is a work in progress. + +use core::{ + cell::UnsafeCell, + ptr, + ptr::NonNull, + sync::atomic::{AtomicBool, Ordering}, +}; + +/// A descriptor of list elements. +/// +/// It describes the type of list elements and provides a function to determine how to get the +/// links to be used on a list. +/// +/// A type that may be in multiple lists simultaneously neneds to implement one of these for each +/// simultaneous list. +pub trait GetLinks { + /// The type of the entries in the list. + type EntryType: ?Sized; + + /// Returns the links to be used when linking an entry within a list. + fn get_links(data: &Self::EntryType) -> &Links; +} + +/// The links used to link an object on a linked list. +/// +/// Instances of this type are usually embedded in structures and returned in calls to +/// [`GetLinks::get_links`]. +pub struct Links { + inserted: AtomicBool, + entry: UnsafeCell>, +} + +impl Links { + /// Constructs a new [`Links`] instance that isn't inserted on any lists yet. + pub fn new() -> Self { + Self { + inserted: AtomicBool::new(false), + entry: UnsafeCell::new(ListEntry::new()), + } + } + + fn acquire_for_insertion(&self) -> bool { + self.inserted + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + } + + fn release_after_removal(&self) { + self.inserted.store(false, Ordering::Release); + } +} + +impl Default for Links { + fn default() -> Self { + Self::new() + } +} + +struct ListEntry { + next: Option>, + prev: Option>, +} + +impl ListEntry { + fn new() -> Self { + Self { + next: None, + prev: None, + } + } +} + +/// A linked list. +/// +/// # Invariants +/// +/// The links of objects added to a list are owned by the list. +pub(crate) struct RawList { + head: Option>, +} + +impl RawList { + pub(crate) fn new() -> Self { + Self { head: None } + } + + pub(crate) fn is_empty(&self) -> bool { + self.head.is_none() + } + + fn insert_after_priv( + &mut self, + existing: &G::EntryType, + new_entry: &mut ListEntry, + new_ptr: Option>, + ) { + { + // SAFETY: It's safe to get the previous entry of `existing` because the list cannot + // change. + let existing_links = unsafe { &mut *G::get_links(existing).entry.get() }; + new_entry.next = existing_links.next; + existing_links.next = new_ptr; + } + + new_entry.prev = Some(NonNull::from(existing)); + + // SAFETY: It's safe to get the next entry of `existing` because the list cannot change. + let next_links = + unsafe { &mut *G::get_links(new_entry.next.unwrap().as_ref()).entry.get() }; + next_links.prev = new_ptr; + } + + /// Inserts the given object after `existing`. + /// + /// # Safety + /// + /// Callers must ensure that `existing` points to a valid entry that is on the list. + pub(crate) unsafe fn insert_after( + &mut self, + existing: &G::EntryType, + new: &G::EntryType, + ) -> bool { + let links = G::get_links(new); + if !links.acquire_for_insertion() { + // Nothing to do if already inserted. + return false; + } + + // SAFETY: The links are now owned by the list, so it is safe to get a mutable reference. + let new_entry = unsafe { &mut *links.entry.get() }; + self.insert_after_priv(existing, new_entry, Some(NonNull::from(new))); + true + } + + fn push_back_internal(&mut self, new: &G::EntryType) -> bool { + let links = G::get_links(new); + if !links.acquire_for_insertion() { + // Nothing to do if already inserted. + return false; + } + + // SAFETY: The links are now owned by the list, so it is safe to get a mutable reference. + let new_entry = unsafe { &mut *links.entry.get() }; + let new_ptr = Some(NonNull::from(new)); + match self.back() { + // SAFETY: `back` is valid as the list cannot change. + Some(back) => self.insert_after_priv(unsafe { back.as_ref() }, new_entry, new_ptr), + None => { + self.head = new_ptr; + new_entry.next = new_ptr; + new_entry.prev = new_ptr; + } + } + true + } + + pub(crate) unsafe fn push_back(&mut self, new: &G::EntryType) -> bool { + self.push_back_internal(new) + } + + fn remove_internal(&mut self, data: &G::EntryType) -> bool { + let links = G::get_links(data); + + // SAFETY: The links are now owned by the list, so it is safe to get a mutable reference. + let entry = unsafe { &mut *links.entry.get() }; + let next = if let Some(next) = entry.next { + next + } else { + // Nothing to do if the entry is not on the list. + return false; + }; + + if ptr::eq(data, next.as_ptr()) { + // We're removing the only element. + self.head = None + } else { + // Update the head if we're removing it. + if let Some(raw_head) = self.head { + if ptr::eq(data, raw_head.as_ptr()) { + self.head = Some(next); + } + } + + // SAFETY: It's safe to get the previous entry because the list cannot change. + unsafe { &mut *G::get_links(entry.prev.unwrap().as_ref()).entry.get() }.next = + entry.next; + + // SAFETY: It's safe to get the next entry because the list cannot change. + unsafe { &mut *G::get_links(next.as_ref()).entry.get() }.prev = entry.prev; + } + + // Reset the links of the element we're removing so that we know it's not on any list. + entry.next = None; + entry.prev = None; + links.release_after_removal(); + true + } + + /// Removes the given entry. + /// + /// # Safety + /// + /// Callers must ensure that `data` is either on this list or in no list. It being on another + /// list leads to memory unsafety. + pub(crate) unsafe fn remove(&mut self, data: &G::EntryType) -> bool { + self.remove_internal(data) + } + + fn pop_front_internal(&mut self) -> Option> { + let head = self.head?; + // SAFETY: The head is on the list as we just got it from there and it cannot change. + unsafe { self.remove(head.as_ref()) }; + Some(head) + } + + pub(crate) fn pop_front(&mut self) -> Option> { + self.pop_front_internal() + } + + pub(crate) fn front(&self) -> Option> { + self.head + } + + pub(crate) fn back(&self) -> Option> { + // SAFETY: The links of head are owned by the list, so it is safe to get a reference. + unsafe { &*G::get_links(self.head?.as_ref()).entry.get() }.prev + } + + pub(crate) fn cursor_front(&self) -> Cursor<'_, G> { + Cursor::new(self, self.front()) + } + + pub(crate) fn cursor_front_mut(&mut self) -> CursorMut<'_, G> { + CursorMut::new(self, self.front()) + } +} + +struct CommonCursor { + cur: Option>, +} + +impl CommonCursor { + fn new(cur: Option>) -> Self { + Self { cur } + } + + fn move_next(&mut self, list: &RawList) { + match self.cur.take() { + None => self.cur = list.head, + Some(cur) => { + if let Some(head) = list.head { + // SAFETY: We have a shared ref to the linked list, so the links can't change. + let links = unsafe { &*G::get_links(cur.as_ref()).entry.get() }; + if links.next.unwrap() != head { + self.cur = links.next; + } + } + } + } + } + + fn move_prev(&mut self, list: &RawList) { + match list.head { + None => self.cur = None, + Some(head) => { + let next = match self.cur.take() { + None => head, + Some(cur) => { + if cur == head { + return; + } + cur + } + }; + // SAFETY: There's a shared ref to the list, so the links can't change. + let links = unsafe { &*G::get_links(next.as_ref()).entry.get() }; + self.cur = links.prev; + } + } + } +} + +/// A list cursor that allows traversing a linked list and inspecting elements. +pub struct Cursor<'a, G: GetLinks> { + cursor: CommonCursor, + list: &'a RawList, +} + +impl<'a, G: GetLinks> Cursor<'a, G> { + fn new(list: &'a RawList, cur: Option>) -> Self { + Self { + list, + cursor: CommonCursor::new(cur), + } + } + + /// Returns the element the cursor is currently positioned on. + pub fn current(&self) -> Option<&'a G::EntryType> { + let cur = self.cursor.cur?; + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &*cur.as_ptr() }) + } + + /// Moves the cursor to the next element. + pub fn move_next(&mut self) { + self.cursor.move_next(self.list); + } +} + +pub(crate) struct CursorMut<'a, G: GetLinks> { + cursor: CommonCursor, + list: &'a mut RawList, +} + +impl<'a, G: GetLinks> CursorMut<'a, G> { + fn new(list: &'a mut RawList, cur: Option>) -> Self { + Self { + list, + cursor: CommonCursor::new(cur), + } + } + + pub(crate) fn current(&mut self) -> Option<&mut G::EntryType> { + let cur = self.cursor.cur?; + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &mut *cur.as_ptr() }) + } + + /// Removes the entry the cursor is pointing to and advances the cursor to the next entry. It + /// returns a raw pointer to the removed element (if one is removed). + pub(crate) fn remove_current(&mut self) -> Option> { + let entry = self.cursor.cur?; + self.cursor.move_next(self.list); + // SAFETY: The entry is on the list as we just got it from there and it cannot change. + unsafe { self.list.remove(entry.as_ref()) }; + Some(entry) + } + + pub(crate) fn peek_next(&mut self) -> Option<&mut G::EntryType> { + let mut new = CommonCursor::new(self.cursor.cur); + new.move_next(self.list); + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &mut *new.cur?.as_ptr() }) + } + + pub(crate) fn peek_prev(&mut self) -> Option<&mut G::EntryType> { + let mut new = CommonCursor::new(self.cursor.cur); + new.move_prev(self.list); + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &mut *new.cur?.as_ptr() }) + } + + pub(crate) fn move_next(&mut self) { + self.cursor.move_next(self.list); + } +} diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs new file mode 100644 index 0000000000000..630865369508c --- /dev/null +++ b/rust/kernel/rbtree.rs @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Red-black trees. +//! +//! C header: [`include/linux/rbtree.h`](../../../../include/linux/rbtree.h) +//! +//! Reference: + +use crate::{bindings, Result}; +use alloc::boxed::Box; +use core::{ + cmp::{Ord, Ordering}, + iter::{IntoIterator, Iterator}, + marker::PhantomData, + mem::MaybeUninit, + ptr::{addr_of_mut, NonNull}, +}; + +extern "C" { + fn rust_helper_rb_link_node( + node: *mut bindings::rb_node, + parent: *const bindings::rb_node, + rb_link: *mut *mut bindings::rb_node, + ); +} + +struct Node { + links: bindings::rb_node, + key: K, + value: V, +} + +/// A red-black tree with owned nodes. +/// +/// It is backed by the kernel C red-black trees. +/// +/// # Invariants +/// +/// Non-null parent/children pointers stored in instances of the `rb_node` C struct are always +/// valid, and pointing to a field of our internal representation of a node. +/// +/// # Examples +/// +/// In the example below we do several operations on a tree. We note that insertions may fail if +/// the system is out of memory. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::rbtree::RBTree; +/// +/// fn rbtest() -> Result { +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_insert(20, 200)?; +/// tree.try_insert(10, 100)?; +/// tree.try_insert(30, 300)?; +/// +/// // Check the nodes we just inserted. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Print all elements. +/// for (key, value) in &tree { +/// pr_info!("{} = {}\n", key, value); +/// } +/// +/// // Replace one of the elements. +/// tree.try_insert(10, 1000)?; +/// +/// // Check that the tree reflects the replacement. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &1000)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Change the value of one of the elements. +/// *tree.get_mut(&30).unwrap() = 3000; +/// +/// // Check that the tree reflects the update. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &1000)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &3000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Remove an element. +/// tree.remove(&10); +/// +/// // Check that the tree reflects the removal. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &3000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Update all values. +/// for value in tree.values_mut() { +/// *value *= 10; +/// } +/// +/// // Check that the tree reflects the changes to values. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&20, &2000)); +/// assert_eq!(iter.next().unwrap(), (&30, &30000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// Ok(()) +/// } +/// ``` +/// +/// In the example below, we first allocate a node, acquire a spinlock, then insert the node into +/// the tree. This is useful when the insertion context does not allow sleeping, for example, when +/// holding a spinlock. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::{rbtree::RBTree, sync::SpinLock}; +/// +/// fn insert_test(tree: &SpinLock>) -> Result { +/// // Pre-allocate node. This may fail (as it allocates memory). +/// let node = RBTree::try_allocate_node(10, 100)?; +/// +/// // Insert node while holding the lock. It is guaranteed to succeed with no allocation +/// // attempts. +/// let mut guard = tree.lock(); +/// guard.insert(node); +/// Ok(()) +/// } +/// ``` +/// +/// In the example below, we reuse an existing node allocation from an element we removed. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::rbtree::RBTree; +/// +/// fn reuse_test() -> Result { +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_insert(20, 200)?; +/// tree.try_insert(10, 100)?; +/// tree.try_insert(30, 300)?; +/// +/// // Check the nodes we just inserted. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Remove a node, getting back ownership of it. +/// let existing = tree.remove_node(&30).unwrap(); +/// +/// // Check that the tree reflects the removal. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Turn the node into a reservation so that we can reuse it with a different key/value. +/// let reservation = existing.into_reservation(); +/// +/// // Insert a new node into the tree, reusing the previous allocation. This is guaranteed to +/// // succeed (no memory allocations). +/// tree.insert(reservation.into_node(15, 150)); +/// +/// // Check that the tree reflect the new insertion. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&15, &150)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert!(iter.next().is_none()); +/// } +/// +/// Ok(()) +/// } +/// ``` +pub struct RBTree { + root: bindings::rb_root, + _p: PhantomData>, +} + +impl RBTree { + /// Creates a new and empty tree. + pub fn new() -> Self { + Self { + // INVARIANT: There are no nodes in the tree, so the invariant holds vacuously. + root: bindings::rb_root::default(), + _p: PhantomData, + } + } + + /// Tries to insert a new value into the tree. + /// + /// It overwrites a node if one already exists with the same key and returns it (containing the + /// key/value pair). Returns [`None`] if a node with the same key didn't already exist. + /// + /// Returns an error if it cannot allocate memory for the new node. + pub fn try_insert(&mut self, key: K, value: V) -> Result>> + where + K: Ord, + { + Ok(self.insert(Self::try_allocate_node(key, value)?)) + } + + /// Allocates memory for a node to be eventually initialised and inserted into the tree via a + /// call to [`RBTree::insert`]. + pub fn try_reserve_node() -> Result> { + Ok(RBTreeNodeReservation { + node: Box::try_new(MaybeUninit::uninit())?, + }) + } + + /// Allocates and initialiases a node that can be inserted into the tree via + /// [`RBTree::insert`]. + pub fn try_allocate_node(key: K, value: V) -> Result> { + Ok(Self::try_reserve_node()?.into_node(key, value)) + } + + /// Inserts a new node into the tree. + /// + /// It overwrites a node if one already exists with the same key and returns it (containing the + /// key/value pair). Returns [`None`] if a node with the same key didn't already exist. + /// + /// This function always succeeds. + pub fn insert(&mut self, node: RBTreeNode) -> Option> + where + K: Ord, + { + let RBTreeNode { node } = node; + let node = Box::into_raw(node); + // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when + // the node is removed or replaced. + let node_links = unsafe { addr_of_mut!((*node).links) }; + let mut new_link: &mut *mut bindings::rb_node = &mut self.root.rb_node; + let mut parent = core::ptr::null_mut(); + while !new_link.is_null() { + let this = crate::container_of!(*new_link, Node, links); + + parent = *new_link; + + // SAFETY: `this` is a non-null node so it is valid by the type invariants. `node` is + // valid until the node is removed. + match unsafe { (*node).key.cmp(&(*this).key) } { + // SAFETY: `parent` is a non-null node so it is valid by the type invariants. + Ordering::Less => new_link = unsafe { &mut (*parent).rb_left }, + // SAFETY: `parent` is a non-null node so it is valid by the type invariants. + Ordering::Greater => new_link = unsafe { &mut (*parent).rb_right }, + Ordering::Equal => { + // INVARIANT: We are replacing an existing node with a new one, which is valid. + // It remains valid because we "forgot" it with `Box::into_raw`. + // SAFETY: All pointers are non-null and valid (parent, despite the name, really + // is the node we're replacing). + unsafe { bindings::rb_replace_node(parent, node_links, &mut self.root) }; + + // INVARIANT: The node is being returned and the caller may free it, however, + // it was removed from the tree. So the invariants still hold. + return Some(RBTreeNode { + // SAFETY: `this` was a node in the tree, so it is valid. + node: unsafe { Box::from_raw(this as _) }, + }); + } + } + } + + // INVARIANT: We are linking in a new node, which is valid. It remains valid because we + // "forgot" it with `Box::into_raw`. + // SAFETY: All pointers are non-null and valid (`*new_link` is null, but `new_link` is a + // mutable reference). + unsafe { rust_helper_rb_link_node(node_links, parent, new_link) }; + + // SAFETY: All pointers are valid. `node` has just been inserted into the tree. + unsafe { bindings::rb_insert_color(node_links, &mut self.root) }; + None + } + + /// Returns a node with the given key, if one exists. + fn find(&self, key: &K) -> Option>> + where + K: Ord, + { + let mut node = self.root.rb_node; + while !node.is_null() { + let this = crate::container_of!(node, Node, links); + // SAFETY: `this` is a non-null node so it is valid by the type invariants. + node = match key.cmp(unsafe { &(*this).key }) { + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + Ordering::Less => unsafe { (*node).rb_left }, + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + Ordering::Greater => unsafe { (*node).rb_right }, + Ordering::Equal => return NonNull::new(this as _), + } + } + None + } + + /// Returns a reference to the value corresponding to the key. + pub fn get(&self, key: &K) -> Option<&V> + where + K: Ord, + { + // SAFETY: The `find` return value is a node in the tree, so it is valid. + self.find(key).map(|node| unsafe { &node.as_ref().value }) + } + + /// Returns a mutable reference to the value corresponding to the key. + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> + where + K: Ord, + { + // SAFETY: the `find` return value is a node in the tree, so it is valid. + self.find(key) + .map(|mut node| unsafe { &mut node.as_mut().value }) + } + + /// Removes the node with the given key from the tree. + /// + /// It returns the node that was removed if one exists, or [`None`] otherwise. + pub fn remove_node(&mut self, key: &K) -> Option> + where + K: Ord, + { + let mut node = self.find(key)?; + + // SAFETY: the `find` return value is a node in the tree, so it is valid. + unsafe { bindings::rb_erase(&mut node.as_mut().links, &mut self.root) }; + + // INVARIANT: The node is being returned and the caller may free it, however, it was + // removed from the tree. So the invariants still hold. + Some(RBTreeNode { + // SAFETY: the `find` return value was a node in the tree, so it is valid. + node: unsafe { Box::from_raw(node.as_ptr()) }, + }) + } + + /// Removes the node with the given key from the tree. + /// + /// It returns the value that was removed if one exists, or [`None`] otherwise. + pub fn remove(&mut self, key: &K) -> Option + where + K: Ord, + { + let node = self.remove_node(key)?; + let RBTreeNode { node } = node; + let Node { + links: _, + key: _, + value, + } = *node; + Some(value) + } + + /// Returns an iterator over the tree nodes, sorted by key. + pub fn iter(&self) -> RBTreeIterator<'_, K, V> { + RBTreeIterator { + _tree: PhantomData, + // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`. + next: unsafe { bindings::rb_first(&self.root) }, + } + } + + /// Returns a mutable iterator over the tree nodes, sorted by key. + pub fn iter_mut(&mut self) -> RBTreeIteratorMut<'_, K, V> { + RBTreeIteratorMut { + _tree: PhantomData, + // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`. + next: unsafe { bindings::rb_first(&self.root) }, + } + } + + /// Returns an iterator over the keys of the nodes in the tree, in sorted order. + pub fn keys(&self) -> impl Iterator { + self.iter().map(|(k, _)| k) + } + + /// Returns an iterator over the values of the nodes in the tree, sorted by key. + pub fn values(&self) -> impl Iterator { + self.iter().map(|(_, v)| v) + } + + /// Returns a mutable iterator over the values of the nodes in the tree, sorted by key. + pub fn values_mut(&mut self) -> impl Iterator { + self.iter_mut().map(|(_, v)| v) + } +} + +impl Default for RBTree { + fn default() -> Self { + Self::new() + } +} + +impl Drop for RBTree { + fn drop(&mut self) { + // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`. + let mut next = unsafe { bindings::rb_first_postorder(&self.root) }; + + // INVARIANT: The loop invariant is that all tree nodes from `next` in postorder are valid. + while !next.is_null() { + let this = crate::container_of!(next, Node, links); + + // Find out what the next node is before disposing of the current one. + // SAFETY: `next` and all nodes in postorder are still valid. + next = unsafe { bindings::rb_next_postorder(next) }; + + // INVARIANT: This is the destructor, so we break the type invariant during clean-up, + // but it is not observable. The loop invariant is still maintained. + // SAFETY: `this` is valid per the loop invariant. + unsafe { Box::from_raw(this as *mut Node) }; + } + } +} + +impl<'a, K, V> IntoIterator for &'a RBTree { + type Item = (&'a K, &'a V); + type IntoIter = RBTreeIterator<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An iterator over the nodes of a [`RBTree`]. +/// +/// Instances are created by calling [`RBTree::iter`]. +pub struct RBTreeIterator<'a, K, V> { + _tree: PhantomData<&'a RBTree>, + next: *mut bindings::rb_node, +} + +impl<'a, K, V> Iterator for RBTreeIterator<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option { + if self.next.is_null() { + return None; + } + + let cur = crate::container_of!(self.next, Node, links); + + // SAFETY: The reference to the tree used to create the iterator outlives the iterator, so + // the tree cannot change. By the tree invariant, all nodes are valid. + self.next = unsafe { bindings::rb_next(self.next) }; + + // SAFETY: By the same reasoning above, it is safe to dereference the node. Additionally, + // it is ok to return a reference to members because the iterator must outlive it. + Some(unsafe { (&(*cur).key, &(*cur).value) }) + } +} + +impl<'a, K, V> IntoIterator for &'a mut RBTree { + type Item = (&'a K, &'a mut V); + type IntoIter = RBTreeIteratorMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +/// A mutable iterator over the nodes of a [`RBTree`]. +/// +/// Instances are created by calling [`RBTree::iter_mut`]. +pub struct RBTreeIteratorMut<'a, K, V> { + _tree: PhantomData<&'a RBTree>, + next: *mut bindings::rb_node, +} + +impl<'a, K, V> Iterator for RBTreeIteratorMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option { + if self.next.is_null() { + return None; + } + + let cur = crate::container_of!(self.next, Node, links) as *mut Node; + + // SAFETY: The reference to the tree used to create the iterator outlives the iterator, so + // the tree cannot change (except for the value of previous nodes, but those don't affect + // the iteration process). By the tree invariant, all nodes are valid. + self.next = unsafe { bindings::rb_next(self.next) }; + + // SAFETY: By the same reasoning above, it is safe to dereference the node. Additionally, + // it is ok to return a reference to members because the iterator must outlive it. + Some(unsafe { (&(*cur).key, &mut (*cur).value) }) + } +} + +/// A memory reservation for a red-black tree node. +/// +/// It contains the memory needed to hold a node that can be inserted into a red-black tree. One +/// can be obtained by directly allocating it ([`RBTree::try_reserve_node`]) or by "uninitialising" +/// ([`RBTreeNode::into_reservation`]) an actual node (usually returned by some operation like +/// removal from a tree). +pub struct RBTreeNodeReservation { + node: Box>>, +} + +impl RBTreeNodeReservation { + /// Initialises a node reservation. + /// + /// It then becomes an [`RBTreeNode`] that can be inserted into a tree. + pub fn into_node(mut self, key: K, value: V) -> RBTreeNode { + let node_ptr = self.node.as_mut_ptr(); + // SAFETY: `node_ptr` is valid, and so are its fields. + unsafe { addr_of_mut!((*node_ptr).links).write(bindings::rb_node::default()) }; + // SAFETY: `node_ptr` is valid, and so are its fields. + unsafe { addr_of_mut!((*node_ptr).key).write(key) }; + // SAFETY: `node_ptr` is valid, and so are its fields. + unsafe { addr_of_mut!((*node_ptr).value).write(value) }; + let raw = Box::into_raw(self.node); + RBTreeNode { + // SAFETY: The pointer came from a `MaybeUninit` whose fields have all been + // initialised. Additionally, it has the same layout as `Node`. + node: unsafe { Box::from_raw(raw as _) }, + } + } +} + +/// A red-black tree node. +/// +/// The node is fully initialised (with key and value) and can be inserted into a tree without any +/// extra allocations or failure paths. +pub struct RBTreeNode { + node: Box>, +} + +impl RBTreeNode { + /// "Uninitialises" a node. + /// + /// It then becomes a reservation that can be re-initialised into a different node (i.e., with + /// a different key and/or value). + /// + /// The existing key and value are dropped in-place as part of this operation, that is, memory + /// may be freed (but only for the key/value; memory for the node itself is kept for reuse). + pub fn into_reservation(self) -> RBTreeNodeReservation { + let raw = Box::into_raw(self.node); + let mut ret = RBTreeNodeReservation { + // SAFETY: The pointer came from a valid `Node`, which has the same layout as + // `MaybeUninit`. + node: unsafe { Box::from_raw(raw as _) }, + }; + // SAFETY: Although the type is `MaybeUninit`, we know it has been initialised + // because it came from a `Node`. So it is safe to drop it. + unsafe { core::ptr::drop_in_place(ret.node.as_mut_ptr()) }; + ret + } +} diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs new file mode 100644 index 0000000000000..c38b0dceb345a --- /dev/null +++ b/rust/kernel/security.rs @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Linux Security Modules (LSM). +//! +//! C header: [`include/linux/security.h`](../../../../include/linux/security.h). + +use crate::{bindings, c_types, error::Error, file::File, task::Task, Result}; + +extern "C" { + #[allow(improper_ctypes)] + fn rust_helper_security_binder_set_context_mgr( + mgr: *mut bindings::task_struct, + ) -> c_types::c_int; + #[allow(improper_ctypes)] + fn rust_helper_security_binder_transaction( + from: *mut bindings::task_struct, + to: *mut bindings::task_struct, + ) -> c_types::c_int; + #[allow(improper_ctypes)] + fn rust_helper_security_binder_transfer_binder( + from: *mut bindings::task_struct, + to: *mut bindings::task_struct, + ) -> c_types::c_int; + #[allow(improper_ctypes)] + fn rust_helper_security_binder_transfer_file( + from: *mut bindings::task_struct, + to: *mut bindings::task_struct, + file: *mut bindings::file, + ) -> c_types::c_int; +} + +/// Calls the security modules to determine if the given task can become the manager of a binder +/// context. +pub fn binder_set_context_mgr(mgr: &Task) -> Result { + // SAFETY: By the `Task` invariants, `mgr.ptr` is valid. + let ret = unsafe { rust_helper_security_binder_set_context_mgr(mgr.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} + +/// Calls the security modules to determine if binder transactions are allowed from task `from` to +/// task `to`. +pub fn binder_transaction(from: &Task, to: &Task) -> Result { + // SAFETY: By the `Task` invariants, `from.ptr` and `to.ptr` are valid. + let ret = unsafe { rust_helper_security_binder_transaction(from.ptr, to.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} + +/// Calls the security modules to determine if task `from` is allowed to send binder objects +/// (owned by itself or other processes) to task `to` through a binder transaction. +pub fn binder_transfer_binder(from: &Task, to: &Task) -> Result { + // SAFETY: By the `Task` invariants, `from.ptr` and `to.ptr` are valid. + let ret = unsafe { rust_helper_security_binder_transfer_binder(from.ptr, to.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} + +/// Calls the security modules to determine if task `from` is allowed to send the given file to +/// task `to` (which would get its own file descriptor) through a binder transaction. +pub fn binder_transfer_file(from: &Task, to: &Task, file: &File) -> Result { + // SAFETY: By the `Task` invariants, `from.ptr` and `to.ptr` are valid. Similarly, by the + // `File` invariants, `file.ptr` is also valid. + let ret = unsafe { rust_helper_security_binder_transfer_file(from.ptr, to.ptr, file.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} diff --git a/rust/kernel/static_assert.rs b/rust/kernel/static_assert.rs new file mode 100644 index 0000000000000..a80d8ab57564f --- /dev/null +++ b/rust/kernel/static_assert.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Static assert. + +/// Static assert (i.e. compile-time assert). +/// +/// Similar to C11 [`_Static_assert`] and C++11 [`static_assert`]. +/// +/// The feature may be added to Rust in the future: see [RFC 2790]. +/// +/// [`_Static_assert`]: https://en.cppreference.com/w/c/language/_Static_assert +/// [`static_assert`]: https://en.cppreference.com/w/cpp/language/static_assert +/// [RFC 2790]: https://github.com/rust-lang/rfcs/issues/2790 +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// static_assert!(42 > 24); +/// static_assert!(core::mem::size_of::() == 1); +/// +/// const X: &[u8] = b"bar"; +/// static_assert!(X[1] == 'a' as u8); +/// +/// const fn f(x: i32) -> i32 { +/// x + 2 +/// } +/// static_assert!(f(40) == 42); +/// ``` +#[macro_export] +macro_rules! static_assert { + ($condition:expr) => { + // Based on the latest one in `rustc`'s one before it was [removed]. + // + // [removed]: https://github.com/rust-lang/rust/commit/c2dad1c6b9f9636198d7c561b47a2974f5103f6d + #[allow(dead_code)] + const _: () = [()][!($condition) as usize]; + }; +} diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs new file mode 100644 index 0000000000000..5620080a8e81d --- /dev/null +++ b/rust/kernel/str.rs @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! String representations. + +use core::ops::{self, Deref, Index}; + +use crate::bindings; +use crate::c_types; + +/// Byte string without UTF-8 validity guarantee. +/// +/// `BStr` is simply an alias to `[u8]`, but has a more evident semantical meaning. +pub type BStr = [u8]; + +/// Creates a new [`BStr`] from a string literal. +/// +/// `b_str!` converts the supplied string literal to byte string, so non-ASCII +/// characters can be included. +/// +/// # Examples +/// +/// ``` +/// # use kernel::b_str; +/// # use kernel::str::BStr; +/// const MY_BSTR: &'static BStr = b_str!("My awesome BStr!"); +/// ``` +#[macro_export] +macro_rules! b_str { + ($str:literal) => {{ + const S: &'static str = $str; + const C: &'static $crate::str::BStr = S.as_bytes(); + C + }}; +} + +/// Possible errors when using conversion functions in [`CStr`]. +#[derive(Debug, Clone, Copy)] +pub enum CStrConvertError { + /// Supplied bytes contain an interior `NUL`. + InteriorNul, + + /// Supplied bytes are not terminated by `NUL`. + NotNulTerminated, +} + +impl From for crate::Error { + #[inline] + fn from(_: CStrConvertError) -> crate::Error { + crate::Error::EINVAL + } +} + +/// A string that is guaranteed to have exactly one `NUL` byte, which is at the +/// end. +/// +/// Used for interoperability with kernel APIs that take C strings. +#[repr(transparent)] +pub struct CStr([u8]); + +impl CStr { + /// Returns the length of this string excluding `NUL`. + #[inline] + pub const fn len(&self) -> usize { + self.len_with_nul() - 1 + } + + /// Returns the length of this string with `NUL`. + #[inline] + pub const fn len_with_nul(&self) -> usize { + // SAFETY: This is one of the invariant of `CStr`. + // We add a `unreachable_unchecked` here to hint the optimizer that + // the value returned from this function is non-zero. + if self.0.is_empty() { + unsafe { core::hint::unreachable_unchecked() }; + } + self.0.len() + } + + /// Returns `true` if the string only includes `NUL`. + #[inline] + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Wraps a raw C string pointer. + /// + /// # Safety + /// + /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must + /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` + /// must not be mutated. + #[inline] + pub unsafe fn from_char_ptr<'a>(ptr: *const c_types::c_char) -> &'a Self { + // SAFETY: The safety precondition guarantees `ptr` is a valid pointer + // to a `NUL`-terminated C string. + let len = unsafe { bindings::strlen(ptr) } + 1; + // SAFETY: Lifetime guaranteed by the safety precondition. + let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len as _) }; + // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. + // As we have added 1 to `len`, the last byte is known to be `NUL`. + unsafe { Self::from_bytes_with_nul_unchecked(bytes) } + } + + /// Creates a [`CStr`] from a `[u8]`. + /// + /// The provided slice must be `NUL`-terminated, does not contain any + /// interior `NUL` bytes. + pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> { + if bytes.is_empty() { + return Err(CStrConvertError::NotNulTerminated); + } + if bytes[bytes.len() - 1] != 0 { + return Err(CStrConvertError::NotNulTerminated); + } + let mut i = 0; + // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking, + // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`. + while i + 1 < bytes.len() { + if bytes[i] == 0 { + return Err(CStrConvertError::InteriorNul); + } + i += 1; + } + // SAFETY: We just checked that all properties hold. + Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) + } + + /// Creates a [`CStr`] from a `[u8]`, panic if input is not valid. + /// + /// This function is only meant to be used by `c_str!` macro, so + /// crates using `c_str!` macro don't have to enable `const_panic` feature. + #[doc(hidden)] + pub const fn from_bytes_with_nul_unwrap(bytes: &[u8]) -> &Self { + match Self::from_bytes_with_nul(bytes) { + Ok(v) => v, + Err(_) => panic!("string contains interior NUL"), + } + } + + /// Creates a [`CStr`] from a `[u8]` without performing any additional + /// checks. + /// + /// # Safety + /// + /// `bytes` *must* end with a `NUL` byte, and should only have a single + /// `NUL` byte (or the string will be truncated). + #[inline] + pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { + // Note: This can be done using pointer deref (which requires + // `const_raw_ptr_deref` to be const) or `transmute` (which requires + // `const_transmute` to be const) or `ptr::from_raw_parts` (which + // requires `ptr_metadata`). + // While none of them are current stable, it is very likely that one of + // them will eventually be. + // SAFETY: Properties of `bytes` guaranteed by the safety precondition. + unsafe { &*(bytes as *const [u8] as *const Self) } + } + + /// Returns a C pointer to the string. + #[inline] + pub const fn as_char_ptr(&self) -> *const c_types::c_char { + self.0.as_ptr() as _ + } + + /// Convert the string to a byte slice without the trailing 0 byte. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.0[..self.len()] + } + + /// Convert the string to a byte slice containing the trailing 0 byte. + #[inline] + pub const fn as_bytes_with_nul(&self) -> &[u8] { + &self.0 + } +} + +impl AsRef for CStr { + #[inline] + fn as_ref(&self) -> &BStr { + self.as_bytes() + } +} + +impl Deref for CStr { + type Target = BStr; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_bytes() + } +} + +impl Index> for CStr { + type Output = CStr; + + #[inline] + fn index(&self, index: ops::RangeFrom) -> &Self::Output { + // Delegate bounds checking to slice. + // Assign to _ to mute clippy's unnecessary operation warning. + let _ = &self.as_bytes()[index.start..]; + // SAFETY: We just checked the bounds. + unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) } + } +} + +impl Index for CStr { + type Output = CStr; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &Self::Output { + self + } +} + +mod private { + use core::ops; + + // Marker trait for index types that can be forward to `BStr`. + pub trait CStrIndex {} + + impl CStrIndex for usize {} + impl CStrIndex for ops::Range {} + impl CStrIndex for ops::RangeInclusive {} + impl CStrIndex for ops::RangeToInclusive {} +} + +impl Index for CStr +where + Idx: private::CStrIndex, + BStr: Index, +{ + type Output = >::Output; + + #[inline] + fn index(&self, index: Idx) -> &Self::Output { + &self.as_bytes()[index] + } +} + +/// Creates a new [`CStr`] from a string literal. +/// +/// The string literal should not contain any `NUL` bytes. +/// +/// # Examples +/// +/// ``` +/// # use kernel::c_str; +/// # use kernel::str::CStr; +/// const MY_CSTR: &'static CStr = c_str!("My awesome CStr!"); +/// ``` +#[macro_export] +macro_rules! c_str { + ($str:literal) => {{ + const S: &str = concat!($str, "\0"); + const C: &$crate::str::CStr = $crate::str::CStr::from_bytes_with_nul_unwrap(S.as_bytes()); + C + }}; +} diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs new file mode 100644 index 0000000000000..ddecb9e371a45 --- /dev/null +++ b/rust/kernel/sync/arc.rs @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A reference-counted pointer. +//! +//! This module implements a way for users to create reference-counted objects and pointers to +//! them. Such a pointer automatically increments and decrements the count, and drops the +//! underlying object when it reaches zero. It is also safe to use concurrently from multiple +//! threads. +//! +//! It is different from the standard library's [`Arc`] in a few ways: +//! 1. It is backed by the kernel's `refcount_t` type. +//! 2. It does not support weak references, which allows it to be half the size. +//! 3. It saturates the reference count instead of aborting when it goes over a threshold. +//! 4. It does not provide a `get_mut` method, so the ref counted object is pinned. +//! +//! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html + +use crate::{bindings, Result}; +use alloc::boxed::Box; +use core::{ + cell::UnsafeCell, convert::AsRef, marker::PhantomData, mem::ManuallyDrop, ops::Deref, pin::Pin, + ptr::NonNull, +}; + +extern "C" { + fn rust_helper_refcount_new() -> bindings::refcount_t; + fn rust_helper_refcount_inc(r: *mut bindings::refcount_t); + fn rust_helper_refcount_dec_and_test(r: *mut bindings::refcount_t) -> bool; +} + +/// A reference-counted pointer to an instance of `T`. +/// +/// The reference count is incremented when new instances of [`Ref`] are created, and decremented +/// when they are dropped. When the count reaches zero, the underlying `T` is also dropped. +/// +/// # Invariants +/// +/// The reference count on an instance of [`Ref`] is always non-zero. +/// The object pointed to by [`Ref`] is always pinned. +pub struct Ref { + ptr: NonNull>, + _p: PhantomData>, +} + +struct RefInner { + refcount: UnsafeCell, + data: T, +} + +// This is to allow [`Ref`] (and variants) to be used as the type of `self`. +impl core::ops::Receiver for Ref {} + +// SAFETY: It is safe to send `Ref` to another thread when the underlying `T` is `Sync` because +// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs +// `T` to be `Send` because any thread that has a `Ref` may ultimately access `T` directly, for +// example, when the reference count reaches zero and `T` is dropped. +unsafe impl Send for Ref {} + +// SAFETY: It is safe to send `&Ref` to another thread when the underlying `T` is `Sync` for +// the same reason as above. `T` needs to be `Send` as well because a thread can clone a `&Ref` +// into a `Ref`, which may lead to `T` being accessed by the same reasoning as above. +unsafe impl Sync for Ref {} + +impl Ref { + /// Constructs a new reference counted instance of `T`. + pub fn try_new(contents: T) -> Result { + Self::try_new_and_init(contents, |_| {}) + } + + /// Constructs a new reference counted instance of `T` and calls the initialisation function. + /// + /// This is useful because it provides a mutable reference to `T` at its final location. + pub fn try_new_and_init)>(contents: T, init: U) -> Result { + // INVARIANT: The refcount is initialised to a non-zero value. + let mut inner = Box::try_new(RefInner { + // SAFETY: Just an FFI call that returns a `refcount_t` initialised to 1. + refcount: UnsafeCell::new(unsafe { rust_helper_refcount_new() }), + data: contents, + })?; + + // SAFETY: By the invariant, `RefInner` is pinned and `T` is also pinned. + let pinned = unsafe { Pin::new_unchecked(&mut inner.data) }; + + // INVARIANT: The only places where `&mut T` is available are here, which is explicitly + // pinned, and in `drop`. Both are compatible with the pin requirements. + init(pinned); + + Ok(Ref { + ptr: NonNull::from(Box::leak(inner)), + _p: PhantomData, + }) + } + + /// Deconstructs a [`Ref`] object into a `usize`. + /// + /// It can be reconstructed once via [`Ref::from_usize`]. + pub fn into_usize(obj: Self) -> usize { + ManuallyDrop::new(obj).ptr.as_ptr() as _ + } + + /// Borrows a [`Ref`] instance previously deconstructed via [`Ref::into_usize`]. + /// + /// # Safety + /// + /// `encoded` must have been returned by a previous call to [`Ref::into_usize`]. Additionally, + /// [`Ref::from_usize`] can only be called after *all* instances of [`RefBorrow`] have been + /// dropped. + pub unsafe fn borrow_usize(encoded: usize) -> RefBorrow { + // SAFETY: By the safety requirement of this function, we know that `encoded` came from + // a previous call to `Ref::into_usize`. + let obj = ManuallyDrop::new(unsafe { Ref::from_usize(encoded) }); + + // SAFEY: The safety requirements ensure that the object remains alive for the lifetime of + // the returned value. There is no way to create mutable references to the object. + unsafe { RefBorrow::new(obj) } + } + + /// Recreates a [`Ref`] instance previously deconstructed via [`Ref::into_usize`]. + /// + /// # Safety + /// + /// `encoded` must have been returned by a previous call to [`Ref::into_usize`]. Additionally, + /// it can only be called once for each previous call to [``Ref::into_usize`]. + pub unsafe fn from_usize(encoded: usize) -> Self { + Ref { + ptr: NonNull::new(encoded as _).unwrap(), + _p: PhantomData, + } + } +} + +impl Ref { + /// Determines if two reference-counted pointers point to the same underlying instance of `T`. + pub fn ptr_eq(a: &Self, b: &Self) -> bool { + core::ptr::eq(a.ptr.as_ptr(), b.ptr.as_ptr()) + } + + /// Returns a pinned version of a given `Ref` instance. + pub fn pinned(obj: Self) -> Pin { + // SAFETY: The type invariants guarantee that the value is pinned. + unsafe { Pin::new_unchecked(obj) } + } +} + +impl Deref for Ref { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to dereference it. + unsafe { &self.ptr.as_ref().data } + } +} + +impl Clone for Ref { + fn clone(&self) -> Self { + // INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero. + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to increment the refcount. + unsafe { rust_helper_refcount_inc(self.ptr.as_ref().refcount.get()) }; + Self { + ptr: self.ptr, + _p: PhantomData, + } + } +} + +impl AsRef for Ref { + fn as_ref(&self) -> &T { + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to dereference it. + unsafe { &self.ptr.as_ref().data } + } +} + +impl Drop for Ref { + fn drop(&mut self) { + // SAFETY: By the type invariant, there is necessarily a reference to the object. We cannot + // touch `refcount` after it's decremented to a non-zero value because another thread/CPU + // may concurrently decrement it to zero and free it. It is ok to have a raw pointer to + // freed/invalid memory as long as it is never dereferenced. + let refcount = unsafe { self.ptr.as_ref() }.refcount.get(); + + // INVARIANT: If the refcount reaches zero, there are no other instances of `Ref`, and + // this instance is being dropped, so the broken invariant is not observable. + // SAFETY: Also by the type invariant, we are allowed to decrement the refcount. + let is_zero = unsafe { rust_helper_refcount_dec_and_test(refcount) }; + if is_zero { + // The count reached zero, we must free the memory. + // + // SAFETY: The pointer was initialised from the result of `Box::leak`. + unsafe { Box::from_raw(self.ptr.as_ptr()) }; + } + } +} + +/// A borrowed [`Ref`] with manually-managed lifetime. +/// +/// # Invariants +/// +/// There are no mutable references to the underlying [`Ref`], and it remains valid for the lifetime +/// of the [`RefBorrow`] instance. +pub struct RefBorrow { + inner_ref: ManuallyDrop>, +} + +impl RefBorrow { + /// Creates a new [`RefBorrow`] instance. + /// + /// # Safety + /// + /// Callers must ensure the following for the lifetime of the returned [`RefBorrow`] instance: + /// 1. That `obj` remains valid; + /// 2. That no mutable references to `obj` are created. + unsafe fn new(obj: ManuallyDrop>) -> Self { + // INVARIANT: The safety requirements guarantee the invariants. + Self { inner_ref: obj } + } +} + +impl Deref for RefBorrow { + type Target = Ref; + + fn deref(&self) -> &Self::Target { + self.inner_ref.deref() + } +} diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs new file mode 100644 index 0000000000000..993087e6c2339 --- /dev/null +++ b/rust/kernel/sync/condvar.rs @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A condition variable. +//! +//! This module allows Rust code to use the kernel's [`struct wait_queue_head`] as a condition +//! variable. + +use super::{Guard, Lock, NeedsLockClass}; +use crate::{bindings, str::CStr, task::Task}; +use core::{cell::UnsafeCell, marker::PhantomPinned, mem::MaybeUninit, pin::Pin}; + +extern "C" { + fn rust_helper_init_wait(wq: *mut bindings::wait_queue_entry); +} + +/// Safely initialises a [`CondVar`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! condvar_init { + ($condvar:expr, $name:literal) => { + $crate::init_with_lockdep!($condvar, $name) + }; +} + +// TODO: `bindgen` is not generating this constant. Figure out why. +const POLLFREE: u32 = 0x4000; + +/// Exposes the kernel's [`struct wait_queue_head`] as a condition variable. It allows the caller to +/// atomically release the given lock and go to sleep. It reacquires the lock when it wakes up. And +/// it wakes up when notified by another thread (via [`CondVar::notify_one`] or +/// [`CondVar::notify_all`]) or because the thread received a signal. +/// +/// [`struct wait_queue_head`]: ../../../include/linux/wait.h +pub struct CondVar { + pub(crate) wait_list: UnsafeCell, + + /// A condvar needs to be pinned because it contains a [`struct list_head`] that is + /// self-referential, so it cannot be safely moved once it is initialised. + _pin: PhantomPinned, +} + +// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on any thread. +unsafe impl Send for CondVar {} + +// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on multiple threads +// concurrently. +unsafe impl Sync for CondVar {} + +impl CondVar { + /// Constructs a new conditional variable. + /// + /// # Safety + /// + /// The caller must call `CondVar::init` before using the conditional variable. + pub unsafe fn new() -> Self { + Self { + wait_list: UnsafeCell::new(bindings::wait_queue_head::default()), + _pin: PhantomPinned, + } + } + + /// Atomically releases the given lock (whose ownership is proven by the guard) and puts the + /// thread to sleep. It wakes up when notified by [`CondVar::notify_one`] or + /// [`CondVar::notify_all`], or when the thread receives a signal. + /// + /// Returns whether there is a signal pending. + #[must_use = "wait returns if a signal is pending, so the caller must check the return value"] + pub fn wait(&self, guard: &mut Guard<'_, L>) -> bool { + let lock = guard.lock; + let mut wait = MaybeUninit::::uninit(); + + // SAFETY: `wait` points to valid memory. + unsafe { rust_helper_init_wait(wait.as_mut_ptr()) }; + + // SAFETY: Both `wait` and `wait_list` point to valid memory. + unsafe { + bindings::prepare_to_wait_exclusive( + self.wait_list.get(), + wait.as_mut_ptr(), + bindings::TASK_INTERRUPTIBLE as _, + ); + } + + // SAFETY: The guard is evidence that the caller owns the lock. + unsafe { lock.unlock() }; + + // SAFETY: No arguments, switches to another thread. + unsafe { bindings::schedule() }; + + lock.lock_noguard(); + + // SAFETY: Both `wait` and `wait_list` point to valid memory. + unsafe { bindings::finish_wait(self.wait_list.get(), wait.as_mut_ptr()) }; + + Task::current().signal_pending() + } + + /// Calls the kernel function to notify the appropriate number of threads with the given flags. + fn notify(&self, count: i32, flags: u32) { + // SAFETY: `wait_list` points to valid memory. + unsafe { + bindings::__wake_up( + self.wait_list.get(), + bindings::TASK_NORMAL, + count, + flags as _, + ) + }; + } + + /// Wakes a single waiter up, if any. This is not 'sticky' in the sense that if no thread is + /// waiting, the notification is lost completely (as opposed to automatically waking up the + /// next waiter). + pub fn notify_one(&self) { + self.notify(1, 0); + } + + /// Wakes all waiters up, if any. This is not 'sticky' in the sense that if no thread is + /// waiting, the notification is lost completely (as opposed to automatically waking up the + /// next waiter). + pub fn notify_all(&self) { + self.notify(0, 0); + } + + /// Wakes all waiters up. If they were added by `epoll`, they are also removed from the list of + /// waiters. This is useful when cleaning up a condition variable that may be waited on by + /// threads that use `epoll`. + pub fn free_waiters(&self) { + self.notify(1, bindings::POLLHUP | POLLFREE); + } +} + +impl NeedsLockClass for CondVar { + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key) { + unsafe { bindings::__init_waitqueue_head(self.wait_list.get(), name.as_char_ptr(), key) }; + } +} diff --git a/rust/kernel/sync/guard.rs b/rust/kernel/sync/guard.rs new file mode 100644 index 0000000000000..84e5d319a5fd8 --- /dev/null +++ b/rust/kernel/sync/guard.rs @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A generic lock guard and trait. +//! +//! This module contains a lock guard that can be used with any locking primitive that implements +//! the ([`Lock`]) trait. It also contains the definition of the trait, which can be leveraged by +//! other constructs to work on generic locking primitives. + +/// Allows mutual exclusion primitives that implement the [`Lock`] trait to automatically unlock +/// when a guard goes out of scope. It also provides a safe and convenient way to access the data +/// protected by the lock. +#[must_use = "the lock unlocks immediately when the guard is unused"] +pub struct Guard<'a, L: Lock + ?Sized> { + pub(crate) lock: &'a L, +} + +// SAFETY: `Guard` is sync when the data protected by the lock is also sync. This is more +// conservative than the default compiler implementation; more details can be found on +// https://github.com/rust-lang/rust/issues/41622 -- it refers to `MutexGuard` from the standard +// library. +unsafe impl Sync for Guard<'_, L> +where + L: Lock + ?Sized, + L::Inner: Sync, +{ +} + +impl core::ops::Deref for Guard<'_, L> { + type Target = L::Inner; + + fn deref(&self) -> &Self::Target { + // SAFETY: The caller owns the lock, so it is safe to deref the protected data. + unsafe { &*self.lock.locked_data().get() } + } +} + +impl core::ops::DerefMut for Guard<'_, L> { + fn deref_mut(&mut self) -> &mut L::Inner { + // SAFETY: The caller owns the lock, so it is safe to deref the protected data. + unsafe { &mut *self.lock.locked_data().get() } + } +} + +impl Drop for Guard<'_, L> { + fn drop(&mut self) { + // SAFETY: The caller owns the lock, so it is safe to unlock it. + unsafe { self.lock.unlock() }; + } +} + +impl<'a, L: Lock + ?Sized> Guard<'a, L> { + /// Constructs a new lock guard. + /// + /// # Safety + /// + /// The caller must ensure that it owns the lock. + pub(crate) unsafe fn new(lock: &'a L) -> Self { + Self { lock } + } +} + +/// A generic mutual exclusion primitive. +/// +/// [`Guard`] is written such that any mutual exclusion primitive that can implement this trait can +/// also benefit from having an automatic way to unlock itself. +pub trait Lock { + /// The type of the data protected by the lock. + type Inner: ?Sized; + + /// Acquires the lock, making the caller its owner. + fn lock_noguard(&self); + + /// Releases the lock, giving up ownership of the lock. + /// + /// # Safety + /// + /// It must only be called by the current owner of the lock. + unsafe fn unlock(&self); + + /// Returns the data protected by the lock. + fn locked_data(&self) -> &core::cell::UnsafeCell; +} diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs new file mode 100644 index 0000000000000..d3e0b0d5e9b4e --- /dev/null +++ b/rust/kernel/sync/locked_by.rs @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A wrapper for data protected by a lock that does not wrap it. + +use super::{Guard, Lock}; +use core::{cell::UnsafeCell, ops::Deref, ptr}; + +/// Allows access to some data to be serialised by a lock that does not wrap it. +/// +/// In most cases, data protected by a lock is wrapped by the appropriate lock type, e.g., +/// [`super::Mutex`] or [`super::SpinLock`]. [`LockedBy`] is meant for cases when this is not +/// possible. For example, if a container has a lock and some data in the contained elements needs +/// to be protected by the same lock. +/// +/// [`LockedBy`] wraps the data in lieu of another locking primitive, and only allows access to it +/// when the caller shows evidence that 'external' lock is locked. +/// +/// # Example +/// +/// The following is an example for illustrative purposes: `InnerDirectory::bytes_used` is an +/// aggregate of all `InnerFile::bytes_used` and must be kept consistent; so we wrap `InnerFile` in +/// a `LockedBy` so that it shares a lock with `InnerDirectory`. This allows us to enforce at +/// compile-time that access to `InnerFile` is only granted when an `InnerDirectory` is also +/// locked; we enforce at run time that the right `InnerDirectory` is locked. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::sync::{LockedBy, Mutex}; +/// +/// struct InnerFile { +/// bytes_used: u64, +/// } +/// +/// struct File { +/// name: String, +/// inner: LockedBy>, +/// } +/// +/// struct InnerDirectory { +/// /// The sum of the bytes used by all files. +/// bytes_used: u64, +/// files: Vec, +/// } +/// +/// struct Directory { +/// name: String, +/// inner: Mutex, +/// } +/// ``` +pub struct LockedBy { + owner: *const L::Inner, + data: UnsafeCell, +} + +// SAFETY: `LockedBy` can be transferred across thread boundaries iff the data it protects can. +unsafe impl Send for LockedBy {} + +// SAFETY: `LockedBy` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl Sync for LockedBy {} + +impl LockedBy { + /// Constructs a new instance of [`LockedBy`]. + /// + /// It stores a raw pointer to the owner that is never dereferenced. It is only used to ensure + /// that the right owner is being used to access the protected data. If the owner is freed, the + /// data becomes inaccessible; if another instance of the owner is allocated *on the same + /// memory location*, the data becomes accessible again: none of this affects memory safety + /// because in any case at most one thread (or CPU) can access the protected data at a time. + pub fn new(owner: &L, data: T) -> Self { + Self { + owner: owner.locked_data().get(), + data: UnsafeCell::new(data), + } + } +} + +impl LockedBy { + /// Returns a reference to the protected data when the caller provides evidence (via a + /// [`Guard`]) that the owner is locked. + pub fn access<'a>(&'a self, guard: &'a Guard<'_, L>) -> &'a T { + if !ptr::eq(guard.deref(), self.owner) { + panic!("guard does not match owner"); + } + + // SAFETY: `guard` is evidence that the owner is locked. + unsafe { &mut *self.data.get() } + } + + /// Returns a mutable reference to the protected data when the caller provides evidence (via a + /// mutable [`Guard`]) that the owner is locked mutably. + pub fn access_mut<'a>(&'a self, guard: &'a mut Guard<'_, L>) -> &'a mut T { + if !ptr::eq(guard.deref().deref(), self.owner) { + panic!("guard does not match owner"); + } + + // SAFETY: `guard` is evidence that the owner is locked. + unsafe { &mut *self.data.get() } + } + + /// Returns a mutable reference to the protected data when the caller provides evidence (via a + /// mutable owner) that the owner is locked mutably. Showing a mutable reference to the owner + /// is sufficient because we know no other references can exist to it. + pub fn access_from_mut<'a>(&'a self, owner: &'a mut L::Inner) -> &'a mut T { + if !ptr::eq(owner, self.owner) { + panic!("mismatched owners"); + } + + // SAFETY: `owner` is evidence that there is only one reference to the owner. + unsafe { &mut *self.data.get() } + } +} diff --git a/rust/kernel/sync/mod.rs b/rust/kernel/sync/mod.rs new file mode 100644 index 0000000000000..ce863109c06eb --- /dev/null +++ b/rust/kernel/sync/mod.rs @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Synchronisation primitives. +//! +//! This module contains the kernel APIs related to synchronisation that have been ported or +//! wrapped for usage by Rust code in the kernel and is shared by all of them. +//! +//! # Example +//! +//! ```no_run +//! # use kernel::prelude::*; +//! # use kernel::mutex_init; +//! # use kernel::sync::Mutex; +//! # use alloc::boxed::Box; +//! # use core::pin::Pin; +//! // SAFETY: `init` is called below. +//! let mut data = Pin::from(Box::new(unsafe { Mutex::new(0) })); +//! mutex_init!(data.as_mut(), "test::data"); +//! *data.lock() = 10; +//! pr_info!("{}\n", *data.lock()); +//! ``` + +use crate::str::CStr; +use crate::{bindings, c_types}; +use core::pin::Pin; + +mod arc; +mod condvar; +mod guard; +mod locked_by; +mod mutex; +mod spinlock; + +pub use arc::{Ref, RefBorrow}; +pub use condvar::CondVar; +pub use guard::{Guard, Lock}; +pub use locked_by::LockedBy; +pub use mutex::Mutex; +pub use spinlock::SpinLock; + +extern "C" { + fn rust_helper_cond_resched() -> c_types::c_int; +} + +/// Safely initialises an object that has an `init` function that takes a name and a lock class as +/// arguments, examples of these are [`Mutex`] and [`SpinLock`]. Each of them also provides a more +/// specialised name that uses this macro. +#[doc(hidden)] +#[macro_export] +macro_rules! init_with_lockdep { + ($obj:expr, $name:literal) => {{ + static mut CLASS: core::mem::MaybeUninit<$crate::bindings::lock_class_key> = + core::mem::MaybeUninit::uninit(); + let obj = $obj; + let name = $crate::c_str!($name); + // SAFETY: `CLASS` is never used by Rust code directly; the kernel may change it though. + #[allow(unused_unsafe)] + unsafe { + $crate::sync::NeedsLockClass::init(obj, name, CLASS.as_mut_ptr()) + }; + }}; +} + +/// A trait for types that need a lock class during initialisation. +/// +/// Implementers of this trait benefit from the [`init_with_lockdep`] macro that generates a new +/// class for each initialisation call site. +pub trait NeedsLockClass { + /// Initialises the type instance so that it can be safely used. + /// + /// Callers are encouraged to use the [`init_with_lockdep`] macro as it automatically creates a + /// new lock class on each usage. + /// + /// # Safety + /// + /// `key` must point to a valid memory location as it will be used by the kernel. + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key); +} + +/// Reschedules the caller's task if needed. +pub fn cond_resched() -> bool { + // SAFETY: No arguments, reschedules `current` if needed. + unsafe { rust_helper_cond_resched() != 0 } +} diff --git a/rust/kernel/sync/mutex.rs b/rust/kernel/sync/mutex.rs new file mode 100644 index 0000000000000..36605e8cdd628 --- /dev/null +++ b/rust/kernel/sync/mutex.rs @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A kernel mutex. +//! +//! This module allows Rust code to use the kernel's [`struct mutex`]. + +use super::{Guard, Lock, NeedsLockClass}; +use crate::bindings; +use crate::str::CStr; +use core::{cell::UnsafeCell, marker::PhantomPinned, pin::Pin}; + +/// Safely initialises a [`Mutex`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! mutex_init { + ($mutex:expr, $name:literal) => { + $crate::init_with_lockdep!($mutex, $name) + }; +} + +/// Exposes the kernel's [`struct mutex`]. When multiple threads attempt to lock the same mutex, +/// only one at a time is allowed to progress, the others will block (sleep) until the mutex is +/// unlocked, at which point another thread will be allowed to wake up and make progress. +/// +/// A [`Mutex`] must first be initialised with a call to [`Mutex::init`] before it can be used. The +/// [`mutex_init`] macro is provided to automatically assign a new lock class to a mutex instance. +/// +/// Since it may block, [`Mutex`] needs to be used with care in atomic contexts. +/// +/// [`struct mutex`]: ../../../include/linux/mutex.h +pub struct Mutex { + /// The kernel `struct mutex` object. + mutex: UnsafeCell, + + /// A mutex needs to be pinned because it contains a [`struct list_head`] that is + /// self-referential, so it cannot be safely moved once it is initialised. + _pin: PhantomPinned, + + /// The data protected by the mutex. + data: UnsafeCell, +} + +// SAFETY: `Mutex` can be transferred across thread boundaries iff the data it protects can. +unsafe impl Send for Mutex {} + +// SAFETY: `Mutex` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl Sync for Mutex {} + +impl Mutex { + /// Constructs a new mutex. + /// + /// # Safety + /// + /// The caller must call [`Mutex::init`] before using the mutex. + pub unsafe fn new(t: T) -> Self { + Self { + mutex: UnsafeCell::new(bindings::mutex::default()), + data: UnsafeCell::new(t), + _pin: PhantomPinned, + } + } +} + +impl Mutex { + /// Locks the mutex and gives the caller access to the data protected by it. Only one thread at + /// a time is allowed to access the protected data. + pub fn lock(&self) -> Guard<'_, Self> { + self.lock_noguard(); + // SAFETY: The mutex was just acquired. + unsafe { Guard::new(self) } + } +} + +impl NeedsLockClass for Mutex { + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key) { + unsafe { bindings::__mutex_init(self.mutex.get(), name.as_char_ptr(), key) }; + } +} + +extern "C" { + fn rust_helper_mutex_lock(mutex: *mut bindings::mutex); +} + +impl Lock for Mutex { + type Inner = T; + + fn lock_noguard(&self) { + // SAFETY: `mutex` points to valid memory. + unsafe { + rust_helper_mutex_lock(self.mutex.get()); + } + } + + unsafe fn unlock(&self) { + unsafe { bindings::mutex_unlock(self.mutex.get()) }; + } + + fn locked_data(&self) -> &UnsafeCell { + &self.data + } +} diff --git a/rust/kernel/sync/spinlock.rs b/rust/kernel/sync/spinlock.rs new file mode 100644 index 0000000000000..c6e38ef85b00e --- /dev/null +++ b/rust/kernel/sync/spinlock.rs @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A kernel spinlock. +//! +//! This module allows Rust code to use the kernel's [`struct spinlock`]. +//! +//! See . + +use super::{Guard, Lock, NeedsLockClass}; +use crate::str::CStr; +use crate::{bindings, c_types}; +use core::{cell::UnsafeCell, marker::PhantomPinned, pin::Pin}; + +extern "C" { + #[allow(improper_ctypes)] + fn rust_helper_spin_lock_init( + lock: *mut bindings::spinlock_t, + name: *const c_types::c_char, + key: *mut bindings::lock_class_key, + ); + fn rust_helper_spin_lock(lock: *mut bindings::spinlock); + fn rust_helper_spin_unlock(lock: *mut bindings::spinlock); +} + +/// Safely initialises a [`SpinLock`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! spinlock_init { + ($spinlock:expr, $name:literal) => { + $crate::init_with_lockdep!($spinlock, $name) + }; +} + +/// Exposes the kernel's [`spinlock_t`]. When multiple CPUs attempt to lock the same spinlock, only +/// one at a time is allowed to progress, the others will block (spinning) until the spinlock is +/// unlocked, at which point another CPU will be allowed to make progress. +/// +/// A [`SpinLock`] must first be initialised with a call to [`SpinLock::init`] before it can be +/// used. The [`spinlock_init`] macro is provided to automatically assign a new lock class to a +/// spinlock instance. +/// +/// [`SpinLock`] does not manage the interrupt state, so it can be used in only two cases: (a) when +/// the caller knows that interrupts are disabled, or (b) when callers never use it in interrupt +/// handlers (in which case it is ok for interrupts to be enabled). +/// +/// [`spinlock_t`]: ../../../include/linux/spinlock.h +pub struct SpinLock { + spin_lock: UnsafeCell, + + /// Spinlocks are architecture-defined. So we conservatively require them to be pinned in case + /// some architecture uses self-references now or in the future. + _pin: PhantomPinned, + + data: UnsafeCell, +} + +// SAFETY: `SpinLock` can be transferred across thread boundaries iff the data it protects can. +unsafe impl Send for SpinLock {} + +// SAFETY: `SpinLock` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl Sync for SpinLock {} + +impl SpinLock { + /// Constructs a new spinlock. + /// + /// # Safety + /// + /// The caller must call [`SpinLock::init`] before using the spinlock. + pub unsafe fn new(t: T) -> Self { + Self { + spin_lock: UnsafeCell::new(bindings::spinlock::default()), + data: UnsafeCell::new(t), + _pin: PhantomPinned, + } + } +} + +impl SpinLock { + /// Locks the spinlock and gives the caller access to the data protected by it. Only one thread + /// at a time is allowed to access the protected data. + pub fn lock(&self) -> Guard<'_, Self> { + self.lock_noguard(); + // SAFETY: The spinlock was just acquired. + unsafe { Guard::new(self) } + } +} + +impl NeedsLockClass for SpinLock { + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key) { + unsafe { rust_helper_spin_lock_init(self.spin_lock.get(), name.as_char_ptr(), key) }; + } +} + +impl Lock for SpinLock { + type Inner = T; + + fn lock_noguard(&self) { + // SAFETY: `spin_lock` points to valid memory. + unsafe { rust_helper_spin_lock(self.spin_lock.get()) }; + } + + unsafe fn unlock(&self) { + unsafe { rust_helper_spin_unlock(self.spin_lock.get()) }; + } + + fn locked_data(&self) -> &UnsafeCell { + &self.data + } +} diff --git a/rust/kernel/sysctl.rs b/rust/kernel/sysctl.rs new file mode 100644 index 0000000000000..42a2c7c818240 --- /dev/null +++ b/rust/kernel/sysctl.rs @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! System control. +//! +//! C header: [`include/linux/sysctl.h`](../../../../include/linux/sysctl.h) +//! +//! Reference: + +use alloc::boxed::Box; +use alloc::vec; +use core::mem; +use core::ptr; +use core::sync::atomic; + +use crate::{ + bindings, c_types, error, + io_buffer::IoBufferWriter, + str::CStr, + types, + user_ptr::{UserSlicePtr, UserSlicePtrWriter}, +}; + +/// Sysctl storage. +pub trait SysctlStorage: Sync { + /// Writes a byte slice. + fn store_value(&self, data: &[u8]) -> (usize, error::Result); + + /// Reads via a [`UserSlicePtrWriter`]. + fn read_value(&self, data: &mut UserSlicePtrWriter) -> (usize, error::Result); +} + +fn trim_whitespace(mut data: &[u8]) -> &[u8] { + while !data.is_empty() && (data[0] == b' ' || data[0] == b'\t' || data[0] == b'\n') { + data = &data[1..]; + } + while !data.is_empty() + && (data[data.len() - 1] == b' ' + || data[data.len() - 1] == b'\t' + || data[data.len() - 1] == b'\n') + { + data = &data[..data.len() - 1]; + } + data +} + +impl SysctlStorage for &T +where + T: SysctlStorage, +{ + fn store_value(&self, data: &[u8]) -> (usize, error::Result) { + (*self).store_value(data) + } + + fn read_value(&self, data: &mut UserSlicePtrWriter) -> (usize, error::Result) { + (*self).read_value(data) + } +} + +impl SysctlStorage for atomic::AtomicBool { + fn store_value(&self, data: &[u8]) -> (usize, error::Result) { + let result = match trim_whitespace(data) { + b"0" => { + self.store(false, atomic::Ordering::Relaxed); + Ok(()) + } + b"1" => { + self.store(true, atomic::Ordering::Relaxed); + Ok(()) + } + _ => Err(error::Error::EINVAL), + }; + (data.len(), result) + } + + fn read_value(&self, data: &mut UserSlicePtrWriter) -> (usize, error::Result) { + let value = if self.load(atomic::Ordering::Relaxed) { + b"1\n" + } else { + b"0\n" + }; + (value.len(), data.write_slice(value)) + } +} + +/// Holds a single `sysctl` entry (and its table). +pub struct Sysctl { + inner: Box, + // Responsible for keeping the `ctl_table` alive. + _table: Box<[bindings::ctl_table]>, + header: *mut bindings::ctl_table_header, +} + +// SAFETY: The only public method we have is `get()`, which returns `&T`, and +// `T: Sync`. Any new methods must adhere to this requirement. +unsafe impl Sync for Sysctl {} + +unsafe extern "C" fn proc_handler( + ctl: *mut bindings::ctl_table, + write: c_types::c_int, + buffer: *mut c_types::c_void, + len: *mut usize, + ppos: *mut bindings::loff_t, +) -> c_types::c_int { + // If we are reading from some offset other than the beginning of the file, + // return an empty read to signal EOF. + if unsafe { *ppos } != 0 && write == 0 { + unsafe { *len = 0 }; + return 0; + } + + let data = unsafe { UserSlicePtr::new(buffer, *len) }; + let storage = unsafe { &*((*ctl).data as *const T) }; + let (bytes_processed, result) = if write != 0 { + let data = match data.read_all() { + Ok(r) => r, + Err(e) => return e.to_kernel_errno(), + }; + storage.store_value(&data) + } else { + let mut writer = data.writer(); + storage.read_value(&mut writer) + }; + unsafe { *len = bytes_processed }; + unsafe { *ppos += *len as bindings::loff_t }; + match result { + Ok(()) => 0, + Err(e) => e.to_kernel_errno(), + } +} + +impl Sysctl { + /// Registers a single entry in `sysctl`. + pub fn register( + path: &'static CStr, + name: &'static CStr, + storage: T, + mode: types::Mode, + ) -> error::Result> { + if name.contains(&b'/') { + return Err(error::Error::EINVAL); + } + + let storage = Box::try_new(storage)?; + let mut table = vec![ + bindings::ctl_table { + procname: name.as_char_ptr(), + mode: mode.as_int(), + data: &*storage as *const T as *mut c_types::c_void, + proc_handler: Some(proc_handler::), + + maxlen: 0, + child: ptr::null_mut(), + poll: ptr::null_mut(), + extra1: ptr::null_mut(), + extra2: ptr::null_mut(), + }, + unsafe { mem::zeroed() }, + ] + .try_into_boxed_slice()?; + + let result = unsafe { bindings::register_sysctl(path.as_char_ptr(), table.as_mut_ptr()) }; + if result.is_null() { + return Err(error::Error::ENOMEM); + } + + Ok(Sysctl { + inner: storage, + _table: table, + header: result, + }) + } + + /// Gets the storage. + pub fn get(&self) -> &T { + &self.inner + } +} + +impl Drop for Sysctl { + fn drop(&mut self) { + unsafe { + bindings::unregister_sysctl_table(self.header); + } + self.header = ptr::null_mut(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_trim_whitespace() { + assert_eq!(trim_whitespace(b"foo "), b"foo"); + assert_eq!(trim_whitespace(b" foo"), b"foo"); + assert_eq!(trim_whitespace(b" foo "), b"foo"); + } +} diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs new file mode 100644 index 0000000000000..dcf376b992ec4 --- /dev/null +++ b/rust/kernel/task.rs @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Tasks (threads and processes). +//! +//! C header: [`include/linux/sched.h`](../../../../include/linux/sched.h). + +use crate::{bindings, c_types}; +use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref}; + +extern "C" { + #[allow(improper_ctypes)] + fn rust_helper_signal_pending(t: *const bindings::task_struct) -> c_types::c_int; + #[allow(improper_ctypes)] + fn rust_helper_get_current() -> *mut bindings::task_struct; + #[allow(improper_ctypes)] + fn rust_helper_get_task_struct(t: *mut bindings::task_struct); + #[allow(improper_ctypes)] + fn rust_helper_put_task_struct(t: *mut bindings::task_struct); +} + +/// Wraps the kernel's `struct task_struct`. +/// +/// # Invariants +/// +/// The pointer `Task::ptr` is non-null and valid. Its reference count is also non-zero. +/// +/// # Examples +/// +/// The following is an example of getting the PID of the current thread with zero additional cost +/// when compared to the C version: +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::task::Task; +/// +/// # fn test() { +/// Task::current().pid(); +/// # } +/// ``` +/// +/// Getting the PID of the current process, also zero additional cost: +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::task::Task; +/// +/// # fn test() { +/// Task::current().group_leader().pid(); +/// # } +/// ``` +/// +/// Getting the current task and storing it in some struct. The reference count is automatically +/// incremented when creating `State` and decremented when it is dropped: +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::task::Task; +/// +/// struct State { +/// creator: Task, +/// index: u32, +/// } +/// +/// impl State { +/// fn new() -> Self { +/// Self { +/// creator: Task::current().clone(), +/// index: 0, +/// } +/// } +/// } +/// ``` +pub struct Task { + pub(crate) ptr: *mut bindings::task_struct, +} + +// SAFETY: Given that the task is referenced, it is OK to send it to another thread. +unsafe impl Send for Task {} + +// SAFETY: It's OK to access `Task` through references from other threads because we're either +// accessing properties that don't change (e.g., `pid`, `group_leader`) or that are properly +// synchronised by C code (e.g., `signal_pending`). +unsafe impl Sync for Task {} + +/// The type of process identifiers (PIDs). +type Pid = bindings::pid_t; + +impl Task { + /// Returns a task reference for the currently executing task/thread. + pub fn current<'a>() -> TaskRef<'a> { + // SAFETY: Just an FFI call. + let ptr = unsafe { rust_helper_get_current() }; + + // SAFETY: If the current thread is still running, the current task is valid. Given + // that `TaskRef` is not `Send`, we know it cannot be transferred to another thread (where + // it could potentially outlive the caller). + unsafe { TaskRef::from_ptr(ptr) } + } + + /// Returns the group leader of the given task. + pub fn group_leader(&self) -> TaskRef<'_> { + // SAFETY: By the type invariant, we know that `self.ptr` is non-null and valid. + let ptr = unsafe { (*self.ptr).group_leader }; + + // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, + // and given that a task has a reference to its group leader, we know it must be valid for + // the lifetime of the returned task reference. + unsafe { TaskRef::from_ptr(ptr) } + } + + /// Returns the PID of the given task. + pub fn pid(&self) -> Pid { + // SAFETY: By the type invariant, we know that `self.ptr` is non-null and valid. + unsafe { (*self.ptr).pid } + } + + /// Determines whether the given task has pending signals. + pub fn signal_pending(&self) -> bool { + // SAFETY: By the type invariant, we know that `self.ptr` is non-null and valid. + unsafe { rust_helper_signal_pending(self.ptr) != 0 } + } +} + +impl PartialEq for Task { + fn eq(&self, other: &Self) -> bool { + self.ptr == other.ptr + } +} + +impl Eq for Task {} + +impl Clone for Task { + fn clone(&self) -> Self { + // SAFETY: The type invariants guarantee that `self.ptr` has a non-zero reference count. + unsafe { rust_helper_get_task_struct(self.ptr) }; + + // INVARIANT: We incremented the reference count to account for the new `Task` being + // created. + Self { ptr: self.ptr } + } +} + +impl Drop for Task { + fn drop(&mut self) { + // INVARIANT: We may decrement the refcount to zero, but the `Task` is being dropped, so + // this is not observable. + // SAFETY: The type invariants guarantee that `Task::ptr` has a non-zero reference count. + unsafe { rust_helper_put_task_struct(self.ptr) }; + } +} + +/// A wrapper for [`Task`] that doesn't automatically decrement the refcount when dropped. +/// +/// We need the wrapper because [`ManuallyDrop`] alone would allow callers to call +/// [`ManuallyDrop::into_inner`]. This would allow an unsafe sequence to be triggered without +/// `unsafe` blocks because it would trigger an unbalanced call to `put_task_struct`. +/// +/// We make this explicitly not [`Send`] so that we can use it to represent the current thread +/// without having to increment/decrement its reference count. +/// +/// # Invariants +/// +/// The wrapped [`Task`] remains valid for the lifetime of the object. +pub struct TaskRef<'a> { + task: ManuallyDrop, + _not_send: PhantomData<(&'a (), *mut ())>, +} + +impl TaskRef<'_> { + /// Constructs a new `struct task_struct` wrapper that doesn't change its reference count. + /// + /// # Safety + /// + /// The pointer `ptr` must be non-null and valid for the lifetime of the object. + pub(crate) unsafe fn from_ptr(ptr: *mut bindings::task_struct) -> Self { + Self { + task: ManuallyDrop::new(Task { ptr }), + _not_send: PhantomData, + } + } +} + +// SAFETY: It is OK to share a reference to the current thread with another thread because we know +// the owner cannot go away while the shared reference exists (and `Task` itself is `Sync`). +unsafe impl Sync for TaskRef<'_> {} + +impl Deref for TaskRef<'_> { + type Target = Task; + + fn deref(&self) -> &Self::Target { + self.task.deref() + } +} diff --git a/rust/kernel/traits.rs b/rust/kernel/traits.rs new file mode 100644 index 0000000000000..39a43169bf70e --- /dev/null +++ b/rust/kernel/traits.rs @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Traits useful to drivers, and their implementations for common types. + +use core::{ops::Deref, pin::Pin}; + +use alloc::{alloc::AllocError, sync::Arc}; + +/// Trait which provides a fallible version of `pin()` for pointer types. +/// +/// Common pointer types which implement a `pin()` method include [`Box`](alloc::boxed::Box) and [`Arc`]. +pub trait TryPin { + /// Constructs a new `Pin>`. If `T` does not implement [`Unpin`], then data + /// will be pinned in memory and unable to be moved. An error will be returned + /// if allocation fails. + fn try_pin(data: P::Target) -> core::result::Result, AllocError>; +} + +impl TryPin> for Arc { + fn try_pin(data: T) -> core::result::Result>, AllocError> { + // SAFETY: the data `T` is exposed only through a `Pin>`, which + // does not allow data to move out of the `Arc`. Therefore it can + // never be moved. + Ok(unsafe { Pin::new_unchecked(Arc::try_new(data)?) }) + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs new file mode 100644 index 0000000000000..2146ee819cc99 --- /dev/null +++ b/rust/kernel/types.rs @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel types. +//! +//! C header: [`include/linux/types.h`](../../../../include/linux/types.h) + +use crate::{ + bindings, c_types, + sync::{Ref, RefBorrow}, +}; +use alloc::{boxed::Box, sync::Arc}; +use core::{ops::Deref, pin::Pin, ptr::NonNull}; + +/// Permissions. +/// +/// C header: [`include/uapi/linux/stat.h`](../../../../include/uapi/linux/stat.h) +/// +/// C header: [`include/linux/stat.h`](../../../../include/linux/stat.h) +pub struct Mode(bindings::umode_t); + +impl Mode { + /// Creates a [`Mode`] from an integer. + pub fn from_int(m: u16) -> Mode { + Mode(m) + } + + /// Returns the mode as an integer. + pub fn as_int(&self) -> u16 { + self.0 + } +} + +/// Used to convert an object into a raw pointer that represents it. +/// +/// It can eventually be converted back into the object. This is used to store objects as pointers +/// in kernel data structures, for example, an implementation of [`FileOperations`] in `struct +/// file::private_data`. +pub trait PointerWrapper { + /// Type of values borrowed between calls to [`PointerWrapper::into_pointer`] and + /// [`PointerWrapper::from_pointer`]. + type Borrowed: Deref; + + /// Returns the raw pointer. + fn into_pointer(self) -> *const c_types::c_void; + + /// Returns a borrowed value. + /// + /// # Safety + /// + /// `ptr` must have been returned by a previous call to [`PointerWrapper::into_pointer`]. + /// Additionally, [`PointerWrapper::from_pointer`] can only be called after *all* values + /// returned by [`PointerWrapper::borrow`] have been dropped. + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed; + + /// Returns the instance back from the raw pointer. + /// + /// # Safety + /// + /// The passed pointer must come from a previous call to [`PointerWrapper::into_pointer()`]. + unsafe fn from_pointer(ptr: *const c_types::c_void) -> Self; +} + +impl PointerWrapper for Box { + type Borrowed = UnsafeReference; + + fn into_pointer(self) -> *const c_types::c_void { + Box::into_raw(self) as _ + } + + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed { + // SAFETY: The safety requirements for this function ensure that the object is still alive, + // so it is safe to dereference the raw pointer. + // The safety requirements also ensure that the object remains alive for the lifetime of + // the returned value. + unsafe { UnsafeReference::new(&*ptr.cast()) } + } + + unsafe fn from_pointer(ptr: *const c_types::c_void) -> Self { + // SAFETY: The passed pointer comes from a previous call to [`Self::into_pointer()`]. + unsafe { Box::from_raw(ptr as _) } + } +} + +impl PointerWrapper for Ref { + type Borrowed = RefBorrow; + + fn into_pointer(self) -> *const c_types::c_void { + Ref::into_usize(self) as _ + } + + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed { + // SAFETY: The safety requirements for this function ensure that the underlying object + // remains valid for the lifetime of the returned value. + unsafe { Ref::borrow_usize(ptr as _) } + } + + unsafe fn from_pointer(ptr: *const c_types::c_void) -> Self { + // SAFETY: The passed pointer comes from a previous call to [`Self::into_pointer()`]. + unsafe { Ref::from_usize(ptr as _) } + } +} + +impl PointerWrapper for Arc { + type Borrowed = UnsafeReference; + + fn into_pointer(self) -> *const c_types::c_void { + Arc::into_raw(self) as _ + } + + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed { + // SAFETY: The safety requirements for this function ensure that the object is still alive, + // so it is safe to dereference the raw pointer. + // The safety requirements also ensure that the object remains alive for the lifetime of + // the returned value. + unsafe { UnsafeReference::new(&*ptr.cast()) } + } + + unsafe fn from_pointer(ptr: *const c_types::c_void) -> Self { + // SAFETY: The passed pointer comes from a previous call to [`Self::into_pointer()`]. + unsafe { Arc::from_raw(ptr as _) } + } +} + +/// A reference with manually-managed lifetime. +/// +/// # Invariants +/// +/// There are no mutable references to the underlying object, and it remains valid for the lifetime +/// of the [`UnsafeReference`] instance. +pub struct UnsafeReference { + ptr: NonNull, +} + +impl UnsafeReference { + /// Creates a new [`UnsafeReference`] instance. + /// + /// # Safety + /// + /// Callers must ensure the following for the lifetime of the returned [`UnsafeReference`] + /// instance: + /// 1. That `obj` remains valid; + /// 2. That no mutable references to `obj` are created. + unsafe fn new(obj: &T) -> Self { + // INVARIANT: The safety requirements of this function ensure that the invariants hold. + Self { + ptr: NonNull::from(obj), + } + } +} + +impl Deref for UnsafeReference { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariant, the object is still valid and alive, and there are no + // mutable references to it. + unsafe { self.ptr.as_ref() } + } +} + +impl PointerWrapper for Pin { + type Borrowed = T::Borrowed; + + fn into_pointer(self) -> *const c_types::c_void { + // SAFETY: We continue to treat the pointer as pinned by returning just a pointer to it to + // the caller. + let inner = unsafe { Pin::into_inner_unchecked(self) }; + inner.into_pointer() + } + + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed { + // SAFETY: The safety requirements for this function are the same as the ones for + // `T::borrow`. + unsafe { T::borrow(ptr) } + } + + unsafe fn from_pointer(p: *const c_types::c_void) -> Self { + // SAFETY: The object was originally pinned. + // The passed pointer comes from a previous call to `inner::into_pointer()`. + unsafe { Pin::new_unchecked(T::from_pointer(p)) } + } +} + +/// Runs a cleanup function/closure when dropped. +/// +/// The [`ScopeGuard::dismiss`] function prevents the cleanup function from running. +/// +/// # Examples +/// +/// In the example below, we have multiple exit paths and we want to log regardless of which one is +/// taken: +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::ScopeGuard; +/// fn example1(arg: bool) { +/// let _log = ScopeGuard::new(|| pr_info!("example1 completed\n")); +/// +/// if arg { +/// return; +/// } +/// +/// // Do something... +/// } +/// ``` +/// +/// In the example below, we want to log the same message on all early exits but a different one on +/// the main exit path: +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::ScopeGuard; +/// fn example2(arg: bool) { +/// let log = ScopeGuard::new(|| pr_info!("example2 returned early\n")); +/// +/// if arg { +/// return; +/// } +/// +/// // (Other early returns...) +/// +/// log.dismiss(); +/// pr_info!("example2 no early return\n"); +/// } +/// ``` +pub struct ScopeGuard { + cleanup_func: Option, +} + +impl ScopeGuard { + /// Creates a new cleanup object with the given cleanup function. + pub fn new(cleanup_func: T) -> Self { + Self { + cleanup_func: Some(cleanup_func), + } + } + + /// Prevents the cleanup function from running. + pub fn dismiss(mut self) { + self.cleanup_func.take(); + } +} + +impl Drop for ScopeGuard { + fn drop(&mut self) { + // Run the cleanup function if one is still present. + if let Some(cleanup) = self.cleanup_func.take() { + cleanup(); + } + } +} diff --git a/rust/kernel/user_ptr.rs b/rust/kernel/user_ptr.rs new file mode 100644 index 0000000000000..71ec659bcee30 --- /dev/null +++ b/rust/kernel/user_ptr.rs @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! User pointers. +//! +//! C header: [`include/linux/uaccess.h`](../../../../include/linux/uaccess.h) + +use crate::{ + c_types, + error::Error, + io_buffer::{IoBufferReader, IoBufferWriter}, + Result, +}; +use alloc::vec::Vec; + +extern "C" { + fn rust_helper_copy_from_user( + to: *mut c_types::c_void, + from: *const c_types::c_void, + n: c_types::c_ulong, + ) -> c_types::c_ulong; + + fn rust_helper_copy_to_user( + to: *mut c_types::c_void, + from: *const c_types::c_void, + n: c_types::c_ulong, + ) -> c_types::c_ulong; + + fn rust_helper_clear_user(to: *mut c_types::c_void, n: c_types::c_ulong) -> c_types::c_ulong; +} + +/// A reference to an area in userspace memory, which can be either +/// read-only or read-write. +/// +/// All methods on this struct are safe: invalid pointers return +/// `EFAULT`. Concurrent access, *including data races to/from userspace +/// memory*, is permitted, because fundamentally another userspace +/// thread/process could always be modifying memory at the same time +/// (in the same way that userspace Rust's [`std::io`] permits data races +/// with the contents of files on disk). In the presence of a race, the +/// exact byte values read/written are unspecified but the operation is +/// well-defined. Kernelspace code should validate its copy of data +/// after completing a read, and not expect that multiple reads of the +/// same address will return the same value. +/// +/// All APIs enforce the invariant that a given byte of memory from userspace +/// may only be read once. By preventing double-fetches we avoid TOCTOU +/// vulnerabilities. This is accomplished by taking `self` by value to prevent +/// obtaining multiple readers on a given [`UserSlicePtr`], and the readers +/// only permitting forward reads. +/// +/// Constructing a [`UserSlicePtr`] performs no checks on the provided +/// address and length, it can safely be constructed inside a kernel thread +/// with no current userspace process. Reads and writes wrap the kernel APIs +/// `copy_from_user` and `copy_to_user`, which check the memory map of the +/// current process and enforce that the address range is within the user +/// range (no additional calls to `access_ok` are needed). +/// +/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html +pub struct UserSlicePtr(*mut c_types::c_void, usize); + +impl UserSlicePtr { + /// Constructs a user slice from a raw pointer and a length in bytes. + /// + /// # Safety + /// + /// Callers must be careful to avoid time-of-check-time-of-use + /// (TOCTOU) issues. The simplest way is to create a single instance of + /// [`UserSlicePtr`] per user memory block as it reads each byte at + /// most once. + pub unsafe fn new(ptr: *mut c_types::c_void, length: usize) -> Self { + UserSlicePtr(ptr, length) + } + + /// Reads the entirety of the user slice. + /// + /// Returns `EFAULT` if the address does not currently point to + /// mapped, readable memory. + pub fn read_all(self) -> Result> { + self.reader().read_all() + } + + /// Constructs a [`UserSlicePtrReader`]. + pub fn reader(self) -> UserSlicePtrReader { + UserSlicePtrReader(self.0, self.1) + } + + /// Writes the provided slice into the user slice. + /// + /// Returns `EFAULT` if the address does not currently point to + /// mapped, writable memory (in which case some data from before the + /// fault may be written), or `data` is larger than the user slice + /// (in which case no data is written). + pub fn write_all(self, data: &[u8]) -> Result { + self.writer().write_slice(data) + } + + /// Constructs a [`UserSlicePtrWriter`]. + pub fn writer(self) -> UserSlicePtrWriter { + UserSlicePtrWriter(self.0, self.1) + } + + /// Constructs both a [`UserSlicePtrReader`] and a [`UserSlicePtrWriter`]. + pub fn reader_writer(self) -> (UserSlicePtrReader, UserSlicePtrWriter) { + ( + UserSlicePtrReader(self.0, self.1), + UserSlicePtrWriter(self.0, self.1), + ) + } +} + +/// A reader for [`UserSlicePtr`]. +/// +/// Used to incrementally read from the user slice. +pub struct UserSlicePtrReader(*mut c_types::c_void, usize); + +impl IoBufferReader for UserSlicePtrReader { + /// Returns the number of bytes left to be read from this. + /// + /// Note that even reading less than this number of bytes may fail. + fn len(&self) -> usize { + self.1 + } + + /// Reads raw data from the user slice into a raw kernel buffer. + /// + /// # Safety + /// + /// The output buffer must be valid. + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result { + if len > self.1 || len > u32::MAX as usize { + return Err(Error::EFAULT); + } + let res = unsafe { rust_helper_copy_from_user(out as _, self.0, len as _) }; + if res != 0 { + return Err(Error::EFAULT); + } + // Since this is not a pointer to a valid object in our program, + // we cannot use `add`, which has C-style rules for defined + // behavior. + self.0 = self.0.wrapping_add(len); + self.1 -= len; + Ok(()) + } +} + +/// A writer for [`UserSlicePtr`]. +/// +/// Used to incrementally write into the user slice. +pub struct UserSlicePtrWriter(*mut c_types::c_void, usize); + +impl IoBufferWriter for UserSlicePtrWriter { + fn len(&self) -> usize { + self.1 + } + + fn clear(&mut self, mut len: usize) -> Result { + let mut ret = Ok(()); + if len > self.1 { + ret = Err(Error::EFAULT); + len = self.1; + } + + // SAFETY: The buffer will be validated by `clear_user`. We ensure that `len` is within + // bounds in the check above. + let left = unsafe { rust_helper_clear_user(self.0, len as _) } as usize; + if left != 0 { + ret = Err(Error::EFAULT); + len -= left; + } + + self.0 = self.0.wrapping_add(len); + self.1 -= len; + ret + } + + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result { + if len > self.1 || len > u32::MAX as usize { + return Err(Error::EFAULT); + } + let res = unsafe { rust_helper_copy_to_user(self.0, data as _, len as _) }; + if res != 0 { + return Err(Error::EFAULT); + } + // Since this is not a pointer to a valid object in our program, + // we cannot use `add`, which has C-style rules for defined + // behavior. + self.0 = self.0.wrapping_add(len); + self.1 -= len; + Ok(()) + } +} From 296e421767f3c320b9c614faef7904174db77a93 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:18:23 +0200 Subject: [PATCH 069/851] rust: export generated symbols All symbols are reexported reusing the `EXPORT_SYMBOL_GPL` macro from C. The lists of symbols are generated on the fly. There are three main sets of symbols to distinguish: - The ones from the `core` and `alloc` crates (from the Rust standard library). The code is licensed as Apache/MIT. - The ones from our abstractions in the `kernel` crate. - The helpers (already exported since they are not generated). We export everything as GPL. This ensures we do not mistakenly expose GPL kernel symbols/features as non-GPL, even indirectly. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/exports.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 rust/exports.c diff --git a/rust/exports.c b/rust/exports.c new file mode 100644 index 0000000000000..d7dff1b3b9199 --- /dev/null +++ b/rust/exports.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// A hack to export Rust symbols for loadable modules without having to redo +// the entire `include/linux/export.h` logic in Rust. +// +// This requires the Rust's new/future `v0` mangling scheme because the default +// one ("legacy") uses invalid characters for C identifiers (thus we cannot use +// the `EXPORT_SYMBOL_*` macros). + +#include + +#define EXPORT_SYMBOL_RUST_GPL(sym) extern int sym; EXPORT_SYMBOL_GPL(sym); + +#include "exports_core_generated.h" +#include "exports_alloc_generated.h" +#include "exports_kernel_generated.h" From dc08d49444e9b576158849cddd96e7c1d02bad18 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:42:57 +0200 Subject: [PATCH 070/851] Kbuild: add Rust support Having all the new files in place, we now enable Rust support in the build system, including `Kconfig` entries related to Rust, the Rust configuration printer, the target definition files, the version detection script and a few other bits. In the future, we will likely want to generate the target files on the fly via a script. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- .gitignore | 5 + .rustfmt.toml | 12 ++ Documentation/process/changes.rst | 13 ++ Makefile | 159 ++++++++++++++- arch/arm/rust/target.json | 28 +++ arch/arm64/rust/target.json | 35 ++++ arch/powerpc/rust/target.json | 30 +++ arch/riscv/Makefile | 1 + arch/riscv/rust/rv32ima.json | 37 ++++ arch/riscv/rust/rv32imac.json | 37 ++++ arch/riscv/rust/rv64ima.json | 37 ++++ arch/riscv/rust/rv64imac.json | 37 ++++ arch/x86/rust/target.json | 37 ++++ init/Kconfig | 28 +++ lib/Kconfig.debug | 144 ++++++++++++++ rust/.gitignore | 6 + rust/Makefile | 316 ++++++++++++++++++++++++++++++ rust/bindgen_parameters | 13 ++ scripts/Makefile.build | 22 +++ scripts/Makefile.lib | 12 ++ scripts/kconfig/confdata.c | 67 ++++++- scripts/rust-version.sh | 31 +++ 22 files changed, 1099 insertions(+), 8 deletions(-) create mode 100644 .rustfmt.toml create mode 100644 arch/arm/rust/target.json create mode 100644 arch/arm64/rust/target.json create mode 100644 arch/powerpc/rust/target.json create mode 100644 arch/riscv/rust/rv32ima.json create mode 100644 arch/riscv/rust/rv32imac.json create mode 100644 arch/riscv/rust/rv64ima.json create mode 100644 arch/riscv/rust/rv64imac.json create mode 100644 arch/x86/rust/target.json create mode 100644 rust/.gitignore create mode 100644 rust/Makefile create mode 100644 rust/bindgen_parameters create mode 100755 scripts/rust-version.sh diff --git a/.gitignore b/.gitignore index 7afd412dadd2c..48c68948f476d 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ *.o *.o.* *.patch +*.rmeta *.s *.so *.so.dbg @@ -96,6 +97,7 @@ modules.order !.gitattributes !.gitignore !.mailmap +!.rustfmt.toml # # Generated include files @@ -161,3 +163,6 @@ x509.genkey # Documentation toolchain sphinx_*/ + +# Rust analyzer configuration +/rust-project.json diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000000000..4fea7c464f0d3 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,12 @@ +edition = "2018" +newline_style = "Unix" + +# Unstable options that help catching some mistakes in formatting and that we may want to enable +# when they become stable. +# +# They are kept here since they are useful to run from time to time. +#format_code_in_doc_comments = true +#reorder_impl_items = true +#comment_width = 100 +#wrap_comments = true +#normalize_comments = true diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index d3a8557b66a1a..7654a7105dcf5 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -26,11 +26,18 @@ running a Linux kernel. Also, not all tools are necessary on all systems; obviously, if you don't have any PC Card hardware, for example, you probably needn't concern yourself with pcmciautils. +Furthermore, note that newer versions of the Rust toolchain may or may not work +because, for the moment, we depend on some unstable features. Thus, unless you +know what you are doing, use the exact version listed here. Please see +:ref:`Documentation/rust/quick-start.rst ` for details. + ====================== =============== ======================================== Program Minimal version Command to check the version ====================== =============== ======================================== GNU C 4.9 gcc --version Clang/LLVM (optional) 10.0.1 clang --version +rustc (optional) 1.54.0-beta.1 rustc --version +bindgen (optional) 0.56.0 bindgen --version GNU make 3.81 make --version binutils 2.23 ld -v flex 2.5.35 flex --version @@ -329,6 +336,12 @@ Sphinx Please see :ref:`sphinx_install` in :ref:`Documentation/doc-guide/sphinx.rst ` for details about Sphinx requirements. +rustdoc +------- + +``rustdoc`` is used to generate Rust documentation. Please see +:ref:`Documentation/rust/docs.rst ` for more information. + Getting updated software ======================== diff --git a/Makefile b/Makefile index 6e823d8bd6459..c814b209b6c91 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,13 @@ endif export KBUILD_CHECKSRC +# Enable "clippy" (a linter) as part of the Rust compilation. +# +# Use 'make CLIPPY=1' to enable it. +ifeq ("$(origin CLIPPY)", "command line") + KBUILD_CLIPPY := $(CLIPPY) +endif + # Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the # directory of external module to build. Setting M= takes precedence. ifeq ("$(origin M)", "command line") @@ -263,7 +270,7 @@ no-dot-config-targets := $(clean-targets) \ cscope gtags TAGS tags help% %docs check% coccicheck \ $(version_h) headers headers_% archheaders archscripts \ %asm-generic kernelversion %src-pkg dt_binding_check \ - outputmakefile + outputmakefile rustfmt rustfmtcheck # Installation targets should not require compiler. Unfortunately, vdso_install # is an exception where build artifacts may be updated. This must be fixed. no-compiler-targets := $(no-dot-config-targets) install dtbs_install \ @@ -452,6 +459,12 @@ OBJDUMP = $(CROSS_COMPILE)objdump READELF = $(CROSS_COMPILE)readelf STRIP = $(CROSS_COMPILE)strip endif +RUSTC = rustc +RUSTDOC = rustdoc +RUSTFMT = rustfmt +CLIPPY_DRIVER = clippy-driver +BINDGEN = bindgen +CARGO = cargo PAHOLE = pahole RESOLVE_BTFIDS = $(objtree)/tools/bpf/resolve_btfids/resolve_btfids LEX = flex @@ -475,9 +488,11 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS := CFLAGS_MODULE = +RUSTFLAGS_MODULE = AFLAGS_MODULE = LDFLAGS_MODULE = CFLAGS_KERNEL = +RUSTFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = @@ -506,15 +521,41 @@ KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \ -Werror=return-type -Wno-format-security \ -std=gnu89 KBUILD_CPPFLAGS := -D__KERNEL__ +KBUILD_RUST_TARGET := $(srctree)/arch/$(SRCARCH)/rust/target.json +KBUILD_RUSTFLAGS := --emit=dep-info,obj,metadata --edition=2018 \ + -Cpanic=abort -Cembed-bitcode=n -Clto=n -Crpath=n \ + -Cforce-unwind-tables=n -Ccodegen-units=1 \ + -Zbinary_dep_depinfo=y -Zsymbol-mangling-version=v0 \ + -Dunsafe_op_in_unsafe_fn -Drust_2018_idioms \ + -Wmissing_docs +KBUILD_CLIPPYFLAGS := -Dclippy::correctness -Dclippy::style \ + -Dclippy::complexity -Dclippy::perf -Dclippy::float_arithmetic KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := +KBUILD_RUSTFLAGS_KERNEL := KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE +KBUILD_RUSTFLAGS_MODULE := --cfg MODULE KBUILD_LDFLAGS_MODULE := KBUILD_LDFLAGS := CLANG_FLAGS := +ifeq ($(KBUILD_CLIPPY),1) + RUSTC_OR_CLIPPY_QUIET := CLIPPY + RUSTC_OR_CLIPPY = $(CLIPPY_DRIVER) $(KBUILD_CLIPPYFLAGS) +else + RUSTC_OR_CLIPPY_QUIET := RUSTC + RUSTC_OR_CLIPPY = $(RUSTC) +endif + +ifdef RUST_LIB_SRC + export RUST_LIB_SRC +endif + +export RUSTC_BOOTSTRAP := 1 + export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC +export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD @@ -522,9 +563,10 @@ export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE +export KBUILD_RUST_TARGET KBUILD_RUSTFLAGS RUSTFLAGS_KERNEL RUSTFLAGS_MODULE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE -export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE -export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL +export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_RUSTFLAGS_MODULE KBUILD_LDFLAGS_MODULE +export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL KBUILD_RUSTFLAGS_KERNEL # Files to ignore in find ... statements @@ -724,7 +766,7 @@ $(KCONFIG_CONFIG): quiet_cmd_syncconfig = SYNC $@ cmd_syncconfig = $(MAKE) -f $(srctree)/Makefile syncconfig -%/config/auto.conf %/config/auto.conf.cmd %/generated/autoconf.h: $(KCONFIG_CONFIG) +%/config/auto.conf %/config/auto.conf.cmd %/generated/autoconf.h %/generated/rustc_cfg: $(KCONFIG_CONFIG) +$(call cmd,syncconfig) else # !may-sync-config # External modules and some install targets need include/generated/autoconf.h @@ -750,12 +792,43 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +ifdef CONFIG_RUST_DEBUG_ASSERTIONS +KBUILD_RUSTFLAGS += -Cdebug-assertions=y +else +KBUILD_RUSTFLAGS += -Cdebug-assertions=n +endif + +ifdef CONFIG_RUST_OVERFLOW_CHECKS +KBUILD_RUSTFLAGS += -Coverflow-checks=y +else +KBUILD_RUSTFLAGS += -Coverflow-checks=n +endif + ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 +KBUILD_RUSTFLAGS_OPT_LEVEL_MAP := 2 else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 KBUILD_CFLAGS += -O3 +KBUILD_RUSTFLAGS_OPT_LEVEL_MAP := 3 else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os +KBUILD_RUSTFLAGS_OPT_LEVEL_MAP := z +endif + +ifdef CONFIG_RUST_OPT_LEVEL_SIMILAR_AS_CHOSEN_FOR_C +KBUILD_RUSTFLAGS += -Copt-level=$(KBUILD_RUSTFLAGS_OPT_LEVEL_MAP) +else ifdef CONFIG_RUST_OPT_LEVEL_0 +KBUILD_RUSTFLAGS += -Copt-level=0 +else ifdef CONFIG_RUST_OPT_LEVEL_1 +KBUILD_RUSTFLAGS += -Copt-level=1 +else ifdef CONFIG_RUST_OPT_LEVEL_2 +KBUILD_RUSTFLAGS += -Copt-level=2 +else ifdef CONFIG_RUST_OPT_LEVEL_3 +KBUILD_RUSTFLAGS += -Copt-level=3 +else ifdef CONFIG_RUST_OPT_LEVEL_S +KBUILD_RUSTFLAGS += -Copt-level=s +else ifdef CONFIG_RUST_OPT_LEVEL_Z +KBUILD_RUSTFLAGS += -Copt-level=z endif # Tell gcc to never replace conditional load with a non-conditional one @@ -805,6 +878,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls +KBUILD_RUSTFLAGS += -Cforce-frame-pointers=y else # Some targets (ARM with Thumb2, for example), can't be built with frame # pointers. For those, we don't have FUNCTION_TRACER automatically @@ -842,6 +916,8 @@ ifdef CONFIG_CC_IS_GCC DEBUG_CFLAGS += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments)) endif +DEBUG_RUSTFLAGS := + ifdef CONFIG_DEBUG_INFO ifdef CONFIG_DEBUG_INFO_SPLIT @@ -852,6 +928,11 @@ endif ifneq ($(LLVM_IAS),1) KBUILD_AFLAGS += -Wa,-gdwarf-2 +ifdef CONFIG_DEBUG_INFO_REDUCED +DEBUG_RUSTFLAGS += -Cdebuginfo=1 +else +DEBUG_RUSTFLAGS += -Cdebuginfo=2 +endif endif ifndef CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT @@ -876,6 +957,9 @@ endif # CONFIG_DEBUG_INFO KBUILD_CFLAGS += $(DEBUG_CFLAGS) export DEBUG_CFLAGS +KBUILD_RUSTFLAGS += $(DEBUG_RUSTFLAGS) +export DEBUG_RUSTFLAGS + ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FTRACE_MCOUNT_USE_CC CC_FLAGS_FTRACE += -mrecord-mcount @@ -1031,10 +1115,11 @@ include $(addprefix $(srctree)/, $(include-y)) # Do not add $(call cc-option,...) below this line. When you build the kernel # from the clean source tree, the GCC plugins do not exist at this point. -# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments +# Add user supplied CPPFLAGS, AFLAGS, CFLAGS and RUSTFLAGS as the last assignments KBUILD_CPPFLAGS += $(KCPPFLAGS) KBUILD_AFLAGS += $(KAFLAGS) KBUILD_CFLAGS += $(KCFLAGS) +KBUILD_RUSTFLAGS += $(KRUSTFLAGS) KBUILD_LDFLAGS_MODULE += --build-id=sha1 LDFLAGS_vmlinux += --build-id=sha1 @@ -1138,6 +1223,10 @@ export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ +ifdef CONFIG_RUST +core-y += rust/ +endif + vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ $(libs-y) $(libs-m))) @@ -1238,6 +1327,9 @@ archprepare: outputmakefile archheaders archscripts scripts include/config/kerne prepare0: archprepare $(Q)$(MAKE) $(build)=scripts/mod $(Q)$(MAKE) $(build)=. +ifdef CONFIG_RUST + $(Q)$(MAKE) $(build)=rust +endif # All the preparing.. prepare: prepare0 prepare-objtool prepare-resolve_btfids @@ -1511,7 +1603,7 @@ endif # CONFIG_MODULES # Directories & files removed with 'make clean' CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \ modules.builtin modules.builtin.modinfo modules.nsdeps \ - compile_commands.json .thinlto-cache + compile_commands.json .thinlto-cache rust/test rust/doc # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -1522,7 +1614,8 @@ MRPROPER_FILES += include/config include/generated \ certs/signing_key.pem certs/signing_key.x509 \ certs/x509.genkey \ vmlinux-gdb.py \ - *.spec + *.spec \ + rust/*_generated.h rust/*_generated.rs rust/libmacros.so # clean - Delete most, but leave enough to build external modules # @@ -1634,6 +1727,17 @@ help: @echo ' kselftest-merge - Merge all the config dependencies of' @echo ' kselftest to existing .config.' @echo '' + @echo 'Rust targets:' + @echo ' rustfmt - Reformat all the Rust code in the kernel' + @echo ' rustfmtcheck - Checks if all the Rust code in the kernel' + @echo ' is formatted, printing a diff otherwise.' + @echo ' rustdoc - Generate Rust documentation' + @echo ' (requires kernel .config)' + @echo ' rusttest - Runs the Rust tests' + @echo ' (requires kernel .config; downloads external repos)' + @echo ' rust-analyzer - Generate rust-project.json rust-analyzer support file' + @echo ' (requires kernel .config)' + @echo '' @$(if $(dtstree), \ echo 'Devicetree:'; \ echo '* dtbs - Build device tree blobs for enabled boards'; \ @@ -1705,6 +1809,46 @@ PHONY += $(DOC_TARGETS) $(DOC_TARGETS): $(Q)$(MAKE) $(build)=Documentation $@ + +# Rust targets +# --------------------------------------------------------------------------- + +# Documentation target +# +# Using the singular to avoid running afoul of `no-dot-config-targets`. +PHONY += rustdoc +rustdoc: prepare0 + $(Q)$(MAKE) $(build)=rust $@ + +# Testing target +PHONY += rusttest +rusttest: prepare0 + $(Q)$(MAKE) $(build)=rust $@ + +# Formatting targets +PHONY += rustfmt rustfmtcheck + +# We skip `rust/alloc` since we want to minimize the diff w.r.t. upstream. +# +# We match using absolute paths since `find` does not resolve them +# when matching, which is a problem when e.g. `srctree` is `..`. +# We `grep` afterwards in order to remove the directory entry itself. +rustfmt: + $(Q)find $(abs_srctree) -type f -name '*.rs' \ + -o -path $(abs_srctree)/rust/alloc -prune \ + -o -path $(abs_objtree)/rust/test -prune \ + | grep -Fv $(abs_srctree)/rust/alloc \ + | grep -Fv $(abs_objtree)/rust/test \ + | xargs $(RUSTFMT) $(rustfmt_flags) + +rustfmtcheck: rustfmt_flags = --check +rustfmtcheck: rustfmt + +# IDE support targets +PHONY += rust-analyzer +rust-analyzer: prepare0 + $(Q)$(MAKE) $(build)=rust $@ + # Misc # --------------------------------------------------------------------------- @@ -1862,6 +2006,7 @@ clean: $(clean-dirs) $(call cmd,rmfiles) @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ \( -name '*.[aios]' -o -name '*.ko' -o -name '.*.cmd' \ + -o -name '*.rmeta' \ -o -name '*.ko.*' \ -o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \ -o -name '*.dwo' -o -name '*.lst' \ diff --git a/arch/arm/rust/target.json b/arch/arm/rust/target.json new file mode 100644 index 0000000000000..37710eb727b1c --- /dev/null +++ b/arch/arm/rust/target.json @@ -0,0 +1,28 @@ +{ + "arch": "arm", + "crt-static-respected": true, + "data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64", + "dynamic-linking": true, + "env": "gnu", + "executables": true, + "features": "+strict-align,+v6", + "function-sections": false, + "has-elf-tls": true, + "has-rpath": true, + "is-builtin": true, + "linker-is-gnu": true, + "llvm-target": "arm-unknown-linux-gnueabi", + "max-atomic-width": 64, + "os": "linux", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack" + ] + }, + "relocation-model": "static", + "target-family": "unix", + "target-mcount": "\u0001__gnu_mcount_nc", + "target-pointer-width": "32" +} diff --git a/arch/arm64/rust/target.json b/arch/arm64/rust/target.json new file mode 100644 index 0000000000000..9ea86ed6c736c --- /dev/null +++ b/arch/arm64/rust/target.json @@ -0,0 +1,35 @@ +{ + "arch": "aarch64", + "data-layout": "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", + "disable-redzone": true, + "eliminate-frame-pointer": false, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+strict-align,+neon,+fp-armv8", + "function-sections": false, + "is-builtin": true, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "aarch64-unknown-none", + "max-atomic-width": 128, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "" +} diff --git a/arch/powerpc/rust/target.json b/arch/powerpc/rust/target.json new file mode 100644 index 0000000000000..1e53f83080927 --- /dev/null +++ b/arch/powerpc/rust/target.json @@ -0,0 +1,30 @@ +{ + "arch": "powerpc64", + "code-mode": "kernel", + "cpu": "ppc64le", + "data-layout": "e-m:e-i64:64-n32:64", + "env": "gnu", + "features": "-altivec,-vsx,-hard-float", + "function-sections": false, + "is-builtin": true, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "powerpc64le-elf", + "max-atomic-width": 64, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "target-family": "unix", + "target-mcount": "_mcount", + "target-endian": "little", + "target-pointer-width": "64" +} diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 99ecd8bcfd77f..2a476eb5da534 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -54,6 +54,7 @@ riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) +KBUILD_RUST_TARGET := $(srctree)/arch/riscv/rust/$(subst fd,,$(riscv-march-y)).json KBUILD_CFLAGS += -mno-save-restore KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) diff --git a/arch/riscv/rust/rv32ima.json b/arch/riscv/rust/rv32ima.json new file mode 100644 index 0000000000000..8e9b46011e7c9 --- /dev/null +++ b/arch/riscv/rust/rv32ima.json @@ -0,0 +1,37 @@ +{ + "arch": "riscv32", + "code-model": "medium", + "cpu": "generic-rv32", + "data-layout": "e-m:e-p:32:32-i64:64-n32-S128", + "disable-redzone": true, + "eliminate-frame-pointer": false, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a", + "function-sections": false, + "is-builtin": true, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv32", + "max-atomic-width": 32, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m32" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "32", + "vendor": "" +} diff --git a/arch/riscv/rust/rv32imac.json b/arch/riscv/rust/rv32imac.json new file mode 100644 index 0000000000000..2b3a139da999f --- /dev/null +++ b/arch/riscv/rust/rv32imac.json @@ -0,0 +1,37 @@ +{ + "arch": "riscv32", + "code-model": "medium", + "cpu": "generic-rv32", + "data-layout": "e-m:e-p:32:32-i64:64-n32-S128", + "disable-redzone": true, + "eliminate-frame-pointer": false, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a,+c", + "function-sections": false, + "is-builtin": true, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv32", + "max-atomic-width": 32, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m32" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "32", + "vendor": "" +} diff --git a/arch/riscv/rust/rv64ima.json b/arch/riscv/rust/rv64ima.json new file mode 100644 index 0000000000000..091da50069a31 --- /dev/null +++ b/arch/riscv/rust/rv64ima.json @@ -0,0 +1,37 @@ +{ + "arch": "riscv64", + "code-model": "medium", + "cpu": "generic-rv64", + "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n64-S128", + "disable-redzone": true, + "eliminate-frame-pointer": false, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a", + "function-sections": false, + "is-builtin": true, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv64", + "max-atomic-width": 64, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "" +} diff --git a/arch/riscv/rust/rv64imac.json b/arch/riscv/rust/rv64imac.json new file mode 100644 index 0000000000000..aa5a8f4549f1e --- /dev/null +++ b/arch/riscv/rust/rv64imac.json @@ -0,0 +1,37 @@ +{ + "arch": "riscv64", + "code-model": "medium", + "cpu": "generic-rv64", + "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n64-S128", + "disable-redzone": true, + "eliminate-frame-pointer": false, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a,+c", + "function-sections": false, + "is-builtin": true, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv64", + "max-atomic-width": 64, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "" +} diff --git a/arch/x86/rust/target.json b/arch/x86/rust/target.json new file mode 100644 index 0000000000000..76ac800d38efb --- /dev/null +++ b/arch/x86/rust/target.json @@ -0,0 +1,37 @@ +{ + "arch": "x86_64", + "code-model": "kernel", + "cpu": "x86-64", + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", + "disable-redzone": true, + "eliminate-frame-pointer": false, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", + "function-sections": false, + "is-builtin": true, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "x86_64-elf", + "max-atomic-width": 64, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "unknown" +} diff --git a/init/Kconfig b/init/Kconfig index a61c92066c2e4..5318f209a3b6b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -60,6 +60,15 @@ config LLD_VERSION default $(ld-version) if LD_IS_LLD default 0 +config HAS_RUST + depends on ARM64 || CPU_32v6 || CPU_32v6K || (PPC64 && CPU_LITTLE_ENDIAN) || X86_64 || RISCV + def_bool $(success,$(RUSTC) --version) + +config RUSTC_VERSION + depends on HAS_RUST + int + default $(shell,$(srctree)/scripts/rust-version.sh $(RUSTC)) + config CC_CAN_LINK bool default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT @@ -2011,6 +2020,25 @@ config PROFILING Say Y here to enable the extended profiling support mechanisms used by profilers. +config RUST + bool "Rust support" + depends on HAS_RUST + depends on !COMPILE_TEST + depends on !MODVERSIONS + default n + help + Enables Rust support in the kernel. + + This allows other Rust-related options, like drivers written in Rust, + to be selected. + + It is also required to be able to load external kernel modules + written in Rust. + + See Documentation/rust/ for more information. + + If unsure, say N. + # # Place an empty function call at each tracepoint site. Can be # dynamically changed for a probe function. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 678c13967580e..30b705709c2b0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2602,6 +2602,150 @@ config HYPERV_TESTING endmenu # "Kernel Testing and Coverage" +menu "Rust hacking" + +config RUST_DEBUG_ASSERTIONS + bool "Debug assertions" + default n + depends on RUST + help + Enables rustc's `-Cdebug-assertions` codegen option. + + This flag lets you turn `cfg(debug_assertions)` conditional + compilation on or off. This can be used to enable extra debugging + code in development but not in production. For example, it controls + the behavior of the standard library's `debug_assert!` macro. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say N. + +config RUST_OVERFLOW_CHECKS + bool "Overflow checks" + default y + depends on RUST + help + Enables rustc's `-Coverflow-checks` codegen option. + + This flag allows you to control the behavior of runtime integer + overflow. When overflow-checks are enabled, a panic will occur + on overflow. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say Y. + +choice + prompt "Optimization level" + default RUST_OPT_LEVEL_SIMILAR_AS_CHOSEN_FOR_C + depends on RUST + help + Controls rustc's `-Copt-level` codegen option. + + This flag controls the optimization level. + + If unsure, say "Similar as chosen for C". + +config RUST_OPT_LEVEL_SIMILAR_AS_CHOSEN_FOR_C + bool "Similar as chosen for C" + help + This choice will pick a similar optimization level as chosen in + the "Compiler optimization level" for C: + + -O2 is currently mapped to -Copt-level=2 + -O3 is currently mapped to -Copt-level=3 + -Os is currently mapped to -Copt-level=z + + The mapping may change over time to follow the intended semantics + of the choice for C as sensibly as possible. + + This is the default. + +config RUST_OPT_LEVEL_0 + bool "No optimizations (-Copt-level=0)" + help + Not recommended for most purposes. It may come in handy for debugging + suspected optimizer bugs, unexpected undefined behavior, etc. + + Note that this level will *not* enable debug assertions nor overflow + checks on its own (like it happens when interacting with rustc + directly). Use the corresponding configuration options to control + that instead, orthogonally. + + Note this level may cause excessive stack usage, which can lead to stack + overflow and subsequent crashes. + +config RUST_OPT_LEVEL_1 + bool "Basic optimizations (-Copt-level=1)" + help + Useful for debugging without getting too lost, but without + the overhead and boilerplate of no optimizations at all. + + Note this level may cause excessive stack usage, which can lead to stack + overflow and subsequent crashes. + +config RUST_OPT_LEVEL_2 + bool "Some optimizations (-Copt-level=2)" + help + The sensible choice in most cases. + +config RUST_OPT_LEVEL_3 + bool "All optimizations (-Copt-level=3)" + help + Yet more performance (hopefully). + +config RUST_OPT_LEVEL_S + bool "Optimize for size (-Copt-level=s)" + help + Smaller kernel, ideally without too much performance loss. + +config RUST_OPT_LEVEL_Z + bool "Optimize for size, no loop vectorization (-Copt-level=z)" + help + Like the previous level, but also turn off loop vectorization. + +endchoice + +choice + prompt "Build-time assertions" + default RUST_BUILD_ASSERT_ALLOW if RUST_OPT_LEVEL_0 + default RUST_BUILD_ASSERT_DENY if !RUST_OPT_LEVEL_0 + depends on RUST + help + Controls how are `build_error!` and `build_assert!` handled during build. + + If calls to them exist in the binary, it may indicate a violated invariant + or that the optimizer failed to verify the invariant during compilation. + You can choose to abort compilation or ignore them during build and let the + check be carried to runtime. + + If optimizations are turned off, you cannot select "Deny". + + If unsure, say "Deny". + +config RUST_BUILD_ASSERT_ALLOW + bool "Allow" + help + Unoptimized calls to `build_error!` will be converted to `panic!` + and checked at runtime. + +config RUST_BUILD_ASSERT_WARN + bool "Warn" + help + Unoptimized calls to `build_error!` will be converted to `panic!` + and checked at runtime, but warnings will be generated when building. + +config RUST_BUILD_ASSERT_DENY + bool "Deny" + depends on !RUST_OPT_LEVEL_0 + help + Unoptimized calls to `build_error!` will abort compilation. + +endchoice + + +endmenu # "Rust" + source "Documentation/Kconfig" endmenu # Kernel hacking diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 0000000000000..c6186b71e1c3f --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +bindings_generated.rs +exports_*_generated.h +doc/ +test/ diff --git a/rust/Makefile b/rust/Makefile new file mode 100644 index 0000000000000..233b8f365482e --- /dev/null +++ b/rust/Makefile @@ -0,0 +1,316 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_RUST) += core.o compiler_builtins.o helpers.o +extra-$(CONFIG_RUST) += exports_core_generated.h + +extra-$(CONFIG_RUST) += libmacros.so + +extra-$(CONFIG_RUST) += bindings_generated.rs +obj-$(CONFIG_RUST) += alloc.o kernel.o +extra-$(CONFIG_RUST) += exports_alloc_generated.h exports_kernel_generated.h + +ifdef CONFIG_RUST_BUILD_ASSERT_DENY +extra-$(CONFIG_RUST) += build_error.o +else +obj-$(CONFIG_RUST) += build_error.o +endif + +obj-$(CONFIG_RUST) += exports.o + +ifeq ($(quiet),silent_) +cargo_quiet=-q +rust_test_quiet=-q +rustdoc_test_quiet=--test-args -q +else ifeq ($(quiet),quiet_) +rust_test_quiet=-q +rustdoc_test_quiet=--test-args -q +else +cargo_quiet=--verbose +endif + +quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< + cmd_rustdoc = \ + RUST_BINDINGS_FILE=$(abspath $(objtree)/rust/bindings_generated.rs) \ + $(RUSTDOC) $(if $(rustdoc_host),,$(rust_cross_flags)) \ + $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags))) \ + $(rustc_target_flags) -L $(objtree)/rust \ + --output $(objtree)/rust/doc --crate-name $(subst rustdoc-,,$@) \ + @$(objtree)/include/generated/rustc_cfg $< + +# This is a temporary fix for the CSS, visible on `type`s (`Result`). +# It is already fixed in nightly. +RUSTDOC_FIX_BEFORE := .impl,.method,.type:not(.container-rustdoc),.associatedconstant,.associatedtype +RUSTDOC_FIX_AFTER := .impl,.impl-items .method,.methods .method,.impl-items \ + .type,.methods .type,.impl-items .associatedconstant,.methods \ + .associatedconstant,.impl-items .associatedtype,.methods .associatedtype + +rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins rustdoc-alloc rustdoc-kernel + $(Q)cp $(srctree)/Documentation/rust/assets/* $(objtree)/rust/doc + $(Q)sed -i "s/$(RUSTDOC_FIX_BEFORE)/$(RUSTDOC_FIX_AFTER)/" \ + $(objtree)/rust/doc/rustdoc.css + +rustdoc-macros: private rustdoc_host = yes +rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ + --extern proc_macro +rustdoc-macros: $(srctree)/rust/macros/lib.rs FORCE + $(call if_changed,rustdoc) + +rustdoc-compiler_builtins: $(srctree)/rust/compiler_builtins.rs rustdoc-core FORCE + $(call if_changed,rustdoc) + +# We need to allow `broken_intra_doc_links` because some +# `no_global_oom_handling` functions refer to non-`no_global_oom_handling` +# functions. Ideally `rustdoc` would have a way to distinguish broken links +# due to things that are "configured out" vs. entirely non-existing ones. +rustdoc-alloc: private rustc_target_flags = --cfg no_global_oom_handling \ + -Abroken_intra_doc_links +rustdoc-alloc: $(srctree)/rust/alloc/lib.rs rustdoc-core \ + rustdoc-compiler_builtins FORCE + $(call if_changed,rustdoc) + +rustdoc-kernel: private rustc_target_flags = --extern alloc \ + --extern build_error \ + --extern macros=$(objtree)/rust/libmacros.so +rustdoc-kernel: $(srctree)/rust/kernel/lib.rs rustdoc-core \ + rustdoc-macros rustdoc-compiler_builtins rustdoc-alloc \ + $(objtree)/rust/libmacros.so $(objtree)/rust/bindings_generated.rs FORCE + $(call if_changed,rustdoc) + +quiet_cmd_rustc_test_library = RUSTC TL $< + cmd_rustc_test_library = \ + RUST_BINDINGS_FILE=$(abspath $(objtree)/rust/bindings_generated.rs) \ + $(RUSTC) $(filter-out --sysroot=%, $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags)))) \ + $(rustc_target_flags) --crate-type $(if $(rustc_test_library_proc),proc-macro,rlib) \ + --out-dir $(objtree)/rust/test/ --cfg testlib \ + --sysroot $(objtree)/rust/test/sysroot \ + -L $(objtree)/rust/test/ --crate-name $(subst rusttest-,,$(subst rusttestlib-,,$@)) $< + +rusttestlib-build_error: $(srctree)/rust/build_error.rs rusttest-prepare FORCE + $(call if_changed,rustc_test_library) + +rusttestlib-macros: private rustc_target_flags = --extern proc_macro +rusttestlib-macros: private rustc_test_library_proc = yes +rusttestlib-macros: $(srctree)/rust/macros/lib.rs rusttest-prepare FORCE + $(call if_changed,rustc_test_library) + +quiet_cmd_rustdoc_test = RUSTDOC T $< + cmd_rustdoc_test = \ + RUST_BINDINGS_FILE=$(abspath $(objtree)/rust/bindings_generated.rs) \ + $(RUSTDOC) --test $(filter-out --sysroot=%, $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags)))) \ + $(rustc_target_flags) $(rustdoc_test_target_flags) \ + --sysroot $(objtree)/rust/test/sysroot $(rustdoc_test_quiet) \ + -L $(objtree)/rust/test \ + --output $(objtree)/rust/doc --crate-name $(subst rusttest-,,$@) \ + @$(objtree)/include/generated/rustc_cfg $< + +# We cannot use `-Zpanic-abort-tests` because some tests are dynamic, +# so for the moment we skip `-Cpanic=abort`. +quiet_cmd_rustc_test = RUSTC T $< + cmd_rustc_test = \ + RUST_BINDINGS_FILE=$(abspath $(objtree)/rust/bindings_generated.rs) \ + $(RUSTC) --test $(filter-out --sysroot=%, $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags)))) \ + $(rustc_target_flags) --out-dir $(objtree)/rust/test \ + --sysroot $(objtree)/rust/test/sysroot \ + -L $(objtree)/rust/test/ --crate-name $(subst rusttest-,,$@) $<; \ + $(objtree)/rust/test/$(subst rusttest-,,$@) $(rust_test_quiet) \ + $(rustc_test_run_flags) + +rusttest: rusttest-macros rusttest-kernel + +# This prepares a custom sysroot with our custom `alloc` instead of +# the standard one. +# +# This requires several hacks: +# - Unlike `core` and `alloc`, `std` depends on more than a dozen crates, +# including third-party crates that need to be downloaded, plus custom +# `build.rs` steps. Thus hardcoding things here is not maintainable. +# - `cargo` knows how to build the standard library, but it is an unstable +# feature so far (`-Zbuild-std`). +# - `cargo` only considers the use case of building the standard library +# to use it in a given package. Thus we need to create a dummy package +# and pick the generated libraries from there. +# - Since we only keep a subset of upstream `alloc` in-tree, we need +# to recreate it on the fly by putting our sources on top. +# - The usual ways of modifying the dependency graph in `cargo` do not seem +# to apply for the `-Zbuild-std` steps, thus we have to mislead it +# by modifying the sources in the sysroot. +# - To avoid messing with the user's Rust installation, we create a clone +# of the sysroot. However, `cargo` ignores `RUSTFLAGS` in the `-Zbuild-std` +# steps, thus we use a wrapper binary passed via `RUSTC` to pass the flag. +# +# In the future, we hope to avoid the whole ordeal by either: +# - Making the `test` crate not depend on `std` (either improving upstream +# or having our own custom crate). +# - Making the tests run in kernel space (requires the previous point). +# - Making `std` and friends be more like a "normal" crate, so that +# `-Zbuild-std` and related hacks are not needed. +quiet_cmd_rustsysroot = RUSTSYSROOT + cmd_rustsysroot = \ + rm -rf $(objtree)/rust/test; \ + mkdir -p $(objtree)/rust/test; \ + cp -a $(rustc_sysroot) $(objtree)/rust/test/sysroot; \ + cp -r $(srctree)/rust/alloc/* \ + $(objtree)/rust/test/sysroot/lib/rustlib/src/rust/library/alloc/src; \ + echo '\#!/bin/sh' > $(objtree)/rust/test/rustc_sysroot; \ + echo "$(RUSTC) --sysroot=$(abspath $(objtree)/rust/test/sysroot) \"\$$@\"" \ + >> $(objtree)/rust/test/rustc_sysroot; \ + chmod u+x $(objtree)/rust/test/rustc_sysroot; \ + $(CARGO) -q new $(objtree)/rust/test/dummy; \ + RUSTC=$(objtree)/rust/test/rustc_sysroot $(CARGO) $(cargo_quiet) \ + test -Zbuild-std --target $(rustc_host_target) \ + --manifest-path $(objtree)/rust/test/dummy/Cargo.toml; \ + rm $(objtree)/rust/test/sysroot/lib/rustlib/$(rustc_host_target)/lib/*; \ + cp $(objtree)/rust/test/dummy/target/$(rustc_host_target)/debug/deps/* \ + $(objtree)/rust/test/sysroot/lib/rustlib/$(rustc_host_target)/lib + +rusttest-prepare: FORCE + $(call if_changed,rustsysroot) + +rusttest-macros: private rustc_target_flags = --extern proc_macro +rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro +rusttest-macros: $(srctree)/rust/macros/lib.rs rusttest-prepare FORCE + $(call if_changed,rustc_test) + $(call if_changed,rustdoc_test) + +rusttest-kernel: private rustc_target_flags = --extern alloc \ + --extern build_error --extern macros +rusttest-kernel: private rustc_test_run_flags = \ + --skip bindgen_test_layout_ +rusttest-kernel: $(srctree)/rust/kernel/lib.rs rusttest-prepare \ + rusttestlib-build_error rusttestlib-macros FORCE + $(call if_changed,rustc_test) + $(call if_changed,rustc_test_library) + $(call if_changed,rustdoc_test) + +ifdef CONFIG_CC_IS_CLANG +bindgen_c_flags = $(c_flags) +else +# bindgen relies on libclang to parse C. Ideally, bindgen would support a GCC +# plugin backend and/or the Clang driver would be perfectly compatible with GCC. +# +# For the moment, here we are tweaking the flags on the fly. Some config +# options may not work (e.g. `GCC_PLUGIN_RANDSTRUCT` if we end up using one +# of those structs). We might want to redo how Clang flags are kept track of +# in the general `Makefile` even for GCC builds, similar to what we did with +# `TENTATIVE_CLANG_FLAGS`. +bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ + -mskip-rax-setup -mgeneral-regs-only -msign-return-address=% \ + -mindirect-branch=thunk-extern -mindirect-branch-register -mrecord-mcount \ + -mabi=lp64 -mstack-protector-guard% -fconserve-stack -falign-jumps=% \ + -falign-loops=% -fno-ipa-cp-clone -fno-partial-inlining \ + -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ + -Wno-packed-not-aligned -Wno-format-truncation -Wno-format-overflow \ + -Wno-stringop-truncation -Wno-unused-but-set-variable \ + -Wno-stringop-overflow -Wno-restrict -Wno-maybe-uninitialized \ + -Werror=designated-init -Wno-zero-length-bounds \ + --param=% --param asan-% + +# PowerPC +bindgen_skip_c_flags += -mtraceback=no -mno-pointers-to-nested-functions \ + -mno-string -mno-strict-align + +bindgen_extra_c_flags = $(TENTATIVE_CLANG_FLAGS) -Wno-address-of-packed-member +bindgen_c_flags = $(filter-out $(bindgen_skip_c_flags), $(c_flags)) \ + $(bindgen_extra_c_flags) +endif + +# To avoid several recompilations in PowerPC, which inserts `-D_TASK_CPU` +bindgen_c_flags_final = $(filter-out -D_TASK_CPU=%, $(bindgen_c_flags)) + +quiet_cmd_bindgen = BINDGEN $@ + cmd_bindgen = \ + $(BINDGEN) $< $(shell grep -v '^\#\|^$$' $(srctree)/rust/bindgen_parameters) \ + --use-core --with-derive-default --ctypes-prefix c_types \ + --no-debug '.*' \ + --size_t-is-usize -o $@ -- $(bindgen_c_flags_final) -DMODULE + +$(objtree)/rust/bindings_generated.rs: $(srctree)/rust/kernel/bindings_helper.h \ + $(srctree)/rust/bindgen_parameters FORCE + $(call if_changed_dep,bindgen) + +quiet_cmd_exports = EXPORTS $@ + cmd_exports = \ + $(NM) -p --defined-only $< \ + | grep -E ' (T|R|D) ' | cut -d ' ' -f 3 \ + | xargs -Isymbol \ + echo 'EXPORT_SYMBOL_RUST_GPL(symbol);' > $@ + +$(objtree)/rust/exports_core_generated.h: $(objtree)/rust/core.o FORCE + $(call if_changed,exports) + +$(objtree)/rust/exports_alloc_generated.h: $(objtree)/rust/alloc.o FORCE + $(call if_changed,exports) + +$(objtree)/rust/exports_kernel_generated.h: $(objtree)/rust/kernel.o FORCE + $(call if_changed,exports) + +# `-Cpanic=unwind -Cforce-unwind-tables=y` overrides `rust_flags` in order to +# avoid the https://github.com/rust-lang/rust/issues/82320 rustc crash. +quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ + cmd_rustc_procmacro = \ + $(RUSTC_OR_CLIPPY) $(rust_flags) \ + --emit=dep-info,link --extern proc_macro \ + -Cpanic=unwind -Cforce-unwind-tables=y \ + --crate-type proc-macro --out-dir $(objtree)/rust/ \ + --crate-name $(patsubst lib%.so,%,$(notdir $@)) $<; \ + mv $(objtree)/rust/$(patsubst lib%.so,%,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) + +# Procedural macros can only be used with the `rustc` that compiled it. +# Therefore, to get `libmacros.so` automatically recompiled when the compiler +# version changes, we add `core.o` as a dependency (even if it is not needed). +$(objtree)/rust/libmacros.so: $(srctree)/rust/macros/lib.rs \ + $(objtree)/rust/core.o FORCE + $(call if_changed_dep,rustc_procmacro) + +quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L $@ + cmd_rustc_library = \ + RUST_BINDINGS_FILE=$(abspath $(objtree)/rust/bindings_generated.rs) \ + $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ + $(rust_flags) $(rust_cross_flags) $(rustc_target_flags) \ + --crate-type rlib --out-dir $(objtree)/rust/ -L $(objtree)/rust/ \ + --crate-name $(patsubst %.o,%,$(notdir $@)) $<; \ + mv $(objtree)/rust/$(patsubst %.o,%,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) \ + $(if $(rustc_objcopy),;$(OBJCOPY) $(rustc_objcopy) $@) + +# `$(rust_flags)` is passed in case the user added `--sysroot`. +rustc_sysroot = $(shell $(RUSTC) $(rust_flags) --print sysroot) +rustc_host_target = $(shell $(RUSTC) --version --verbose | grep -F 'host: ' | cut -d' ' -f2) +RUST_LIB_SRC ?= $(rustc_sysroot)/lib/rustlib/src/rust/library + +rust-analyzer: + $(Q)$(srctree)/scripts/generate_rust_analyzer.py $(srctree) $(objtree) $(RUST_LIB_SRC) $(objtree)/rust/bindings_generated.rs > $(objtree)/rust-project.json + +$(objtree)/rust/compiler_builtins.o: private rustc_objcopy = -w -W '__*' +$(objtree)/rust/compiler_builtins.o: $(srctree)/rust/compiler_builtins.rs \ + $(objtree)/rust/core.o FORCE + $(call if_changed_dep,rustc_library) + +$(objtree)/rust/alloc.o: private skip_clippy = 1 +$(objtree)/rust/alloc.o: private rustc_target_flags = --cfg no_global_oom_handling +$(objtree)/rust/alloc.o: $(srctree)/rust/alloc/lib.rs \ + $(objtree)/rust/compiler_builtins.o FORCE + $(call if_changed_dep,rustc_library) + +$(objtree)/rust/build_error.o: $(srctree)/rust/build_error.rs \ + $(objtree)/rust/compiler_builtins.o FORCE + $(call if_changed_dep,rustc_library) + +# ICE on `--extern macros`: https://github.com/rust-lang/rust/issues/56935 +$(objtree)/rust/kernel.o: private rustc_target_flags = --extern alloc \ + --extern build_error \ + --extern macros=$(objtree)/rust/libmacros.so +$(objtree)/rust/kernel.o: $(srctree)/rust/kernel/lib.rs $(objtree)/rust/alloc.o \ + $(objtree)/rust/build_error.o \ + $(objtree)/rust/libmacros.so $(objtree)/rust/bindings_generated.rs FORCE + $(call if_changed_dep,rustc_library) + +# Targets that need to expand twice +.SECONDEXPANSION: +$(objtree)/rust/core.o: private skip_clippy = 1 +$(objtree)/rust/core.o: $$(RUST_LIB_SRC)/core/src/lib.rs FORCE + $(call if_changed_dep,rustc_library) + +rustdoc-core: $$(RUST_LIB_SRC)/core/src/lib.rs FORCE + $(call if_changed,rustdoc) diff --git a/rust/bindgen_parameters b/rust/bindgen_parameters new file mode 100644 index 0000000000000..c2cc4a88234ef --- /dev/null +++ b/rust/bindgen_parameters @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +--opaque-type xregs_state +--opaque-type desc_struct +--opaque-type arch_lbr_state +--opaque-type local_apic + +# If SMP is disabled, `arch_spinlock_t` is defined as a ZST which triggers a Rust +# warning. We don't need to peek into it anyway. +--opaque-type spinlock + +# `seccomp`'s comment gets understood as a doctest +--no-doc-comments diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 949f723efe538..bb22acf8415bc 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -26,6 +26,7 @@ EXTRA_CPPFLAGS := EXTRA_LDFLAGS := asflags-y := ccflags-y := +rustflags-y := cppflags-y := ldflags-y := @@ -288,6 +289,27 @@ quiet_cmd_cc_lst_c = MKLST $@ $(obj)/%.lst: $(src)/%.c FORCE $(call if_changed_dep,cc_lst_c) +# Compile Rust sources (.rs) +# --------------------------------------------------------------------------- + +# Need to use absolute path here and have symbolic links resolved; +# otherwise rustdoc and rustc compute different hashes for the target. +rust_cross_flags := --target=$(realpath $(KBUILD_RUST_TARGET)) + +quiet_cmd_rustc_o_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ + cmd_rustc_o_rs = \ + RUST_MODFILE=$(modfile) \ + $(RUSTC_OR_CLIPPY) $(rust_flags) $(rust_cross_flags) \ + -Zallow-features=allocator_api,bench_black_box,concat_idents,global_asm,try_reserve \ + --extern alloc --extern kernel \ + --crate-type rlib --out-dir $(obj) -L $(objtree)/rust/ \ + --crate-name $(patsubst %.o,%,$(notdir $@)) $<; \ + mv $(obj)/$(subst .o,,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) + +$(obj)/%.o: $(src)/%.rs FORCE + $(call if_changed_dep,rustc_o_rs) + # Compile assembler sources (.S) # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 10950559b223b..cee5af32c65c3 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -8,6 +8,7 @@ ldflags-y += $(EXTRA_LDFLAGS) # flags that take effect in current and sub directories KBUILD_AFLAGS += $(subdir-asflags-y) KBUILD_CFLAGS += $(subdir-ccflags-y) +KBUILD_RUSTFLAGS += $(subdir-rustflags-y) # Figure out what we need to build from the various variables # =========================================================================== @@ -133,6 +134,10 @@ _c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \ $(filter-out $(ccflags-remove-y), \ $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(ccflags-y)) \ $(CFLAGS_$(target-stem).o)) +_rust_flags = $(filter-out $(RUSTFLAGS_REMOVE_$(target-stem).o), \ + $(filter-out $(rustflags-remove-y), \ + $(KBUILD_RUSTFLAGS) $(rustflags-y)) \ + $(RUSTFLAGS_$(target-stem).o)) _a_flags = $(filter-out $(AFLAGS_REMOVE_$(target-stem).o), \ $(filter-out $(asflags-remove-y), \ $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) $(asflags-y)) \ @@ -202,6 +207,11 @@ modkern_cflags = \ $(KBUILD_CFLAGS_MODULE) $(CFLAGS_MODULE), \ $(KBUILD_CFLAGS_KERNEL) $(CFLAGS_KERNEL) $(modfile_flags)) +modkern_rustflags = \ + $(if $(part-of-module), \ + $(KBUILD_RUSTFLAGS_MODULE) $(RUSTFLAGS_MODULE), \ + $(KBUILD_RUSTFLAGS_KERNEL) $(RUSTFLAGS_KERNEL)) + modkern_aflags = $(if $(part-of-module), \ $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE), \ $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL)) @@ -211,6 +221,8 @@ c_flags = -Wp,-MMD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(_c_flags) $(modkern_cflags) \ $(basename_flags) $(modname_flags) +rust_flags = $(_rust_flags) $(modkern_rustflags) @$(objtree)/include/generated/rustc_cfg + a_flags = -Wp,-MMD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(_a_flags) $(modkern_aflags) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index cf72680cd7692..d9fc638dfa86a 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -651,6 +651,56 @@ static struct conf_printer kconfig_printer_cb = .print_comment = kconfig_print_comment, }; +/* + * rustc cfg printer + * + * This printer is used when generating the resulting rustc configuration + * after kconfig invocation and `defconfig` files. + */ +static void rustc_cfg_print_symbol(FILE *fp, struct symbol *sym, const char *value, void *arg) +{ + const char *str; + + switch (sym->type) { + case S_INT: + case S_HEX: + case S_BOOLEAN: + case S_TRISTATE: + str = sym_escape_string_value(value); + + /* + * We don't care about disabled ones, i.e. no need for + * what otherwise are "comments" in other printers. + */ + if (*value == 'n') + return; + + /* + * To have similar functionality to the C macro `IS_ENABLED()` + * we provide an empty `--cfg CONFIG_X` here in both `y` + * and `m` cases. + * + * Then, the common `fprintf()` below will also give us + * a `--cfg CONFIG_X="y"` or `--cfg CONFIG_X="m"`, which can + * be used as the equivalent of `IS_BUILTIN()`/`IS_MODULE()`. + */ + if (*value == 'y' || *value == 'm') + fprintf(fp, "--cfg=%s%s\n", CONFIG_, sym->name); + + break; + default: + str = value; + break; + } + + fprintf(fp, "--cfg=%s%s=%s\n", CONFIG_, sym->name, str); +} + +static struct conf_printer rustc_cfg_printer_cb = +{ + .print_symbol = rustc_cfg_print_symbol, +}; + /* * Header printer * @@ -1058,7 +1108,7 @@ int conf_write_autoconf(int overwrite) struct symbol *sym; const char *name; const char *autoconf_name = conf_get_autoconfig_name(); - FILE *out, *out_h; + FILE *out, *out_h, *out_rustc_cfg; int i; if (!overwrite && is_present(autoconf_name)) @@ -1079,6 +1129,13 @@ int conf_write_autoconf(int overwrite) return 1; } + out_rustc_cfg = fopen(".tmp_rustc_cfg", "w"); + if (!out_rustc_cfg) { + fclose(out); + fclose(out_h); + return 1; + } + conf_write_heading(out, &kconfig_printer_cb, NULL); conf_write_heading(out_h, &header_printer_cb, NULL); @@ -1090,9 +1147,11 @@ int conf_write_autoconf(int overwrite) /* write symbols to auto.conf and autoconf.h */ conf_write_symbol(out, sym, &kconfig_printer_cb, (void *)1); conf_write_symbol(out_h, sym, &header_printer_cb, NULL); + conf_write_symbol(out_rustc_cfg, sym, &rustc_cfg_printer_cb, NULL); } fclose(out); fclose(out_h); + fclose(out_rustc_cfg); name = getenv("KCONFIG_AUTOHEADER"); if (!name) @@ -1111,6 +1170,12 @@ int conf_write_autoconf(int overwrite) if (rename(".tmpconfig", autoconf_name)) return 1; + name = "include/generated/rustc_cfg"; + if (make_parent_dir(name)) + return 1; + if (rename(".tmp_rustc_cfg", name)) + return 1; + return 0; } diff --git a/scripts/rust-version.sh b/scripts/rust-version.sh new file mode 100755 index 0000000000000..67b6d31688e24 --- /dev/null +++ b/scripts/rust-version.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# rust-version rust-command +# +# Print the compiler version of `rust-command' in a 5 or 6-digit form +# such as `14502' for rustc-1.45.2 etc. +# +# Returns 0 if not found (so that Kconfig does not complain) +compiler="$*" + +if [ ${#compiler} -eq 0 ]; then + echo "Error: No compiler specified." >&2 + printf "Usage:\n\t$0 \n" >&2 + exit 1 +fi + +if ! command -v $compiler >/dev/null 2>&1; then + echo 0 + exit 0 +fi + +VERSION=$($compiler --version | cut -f2 -d' ') + +# Cut suffix if any (e.g. `-dev`) +VERSION=$(echo $VERSION | cut -f1 -d'-') + +MAJOR=$(echo $VERSION | cut -f1 -d'.') +MINOR=$(echo $VERSION | cut -f2 -d'.') +PATCHLEVEL=$(echo $VERSION | cut -f3 -d'.') +printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL From d1828cb117439cfbdd8d7f158791a54a4eb38a39 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:23:16 +0200 Subject: [PATCH 071/851] docs: add Rust documentation Most of the documentation for Rust is written within the source code itself, as it is idiomatic for Rust projects. This applies to both the shared infrastructure at `rust/` as well as any other Rust module (e.g. drivers) written across the kernel. These documents contain general information that does not fit particularly well in the source code, like the Quick Start guide. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- Documentation/doc-guide/kernel-doc.rst | 3 + Documentation/index.rst | 1 + Documentation/kbuild/kbuild.rst | 4 + Documentation/rust/arch-support.rst | 35 +++ Documentation/rust/assets/favicon-16x16.png | Bin 0 -> 798 bytes Documentation/rust/assets/favicon-32x32.png | Bin 0 -> 2076 bytes Documentation/rust/assets/rust-logo.png | Bin 0 -> 53976 bytes Documentation/rust/coding.rst | 92 ++++++++ Documentation/rust/docs.rst | 110 ++++++++++ Documentation/rust/index.rst | 20 ++ Documentation/rust/quick-start.rst | 222 ++++++++++++++++++++ 11 files changed, 487 insertions(+) create mode 100644 Documentation/rust/arch-support.rst create mode 100644 Documentation/rust/assets/favicon-16x16.png create mode 100644 Documentation/rust/assets/favicon-32x32.png create mode 100644 Documentation/rust/assets/rust-logo.png create mode 100644 Documentation/rust/coding.rst create mode 100644 Documentation/rust/docs.rst create mode 100644 Documentation/rust/index.rst create mode 100644 Documentation/rust/quick-start.rst diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst index 79aaa55d6bcf2..724e2ffddff12 100644 --- a/Documentation/doc-guide/kernel-doc.rst +++ b/Documentation/doc-guide/kernel-doc.rst @@ -11,6 +11,9 @@ when it is embedded in source files. reasons. The kernel source contains tens of thousands of kernel-doc comments. Please stick to the style described here. +.. note:: kernel-doc does not cover Rust code: please see + Documentation/rust/docs.rst instead. + The kernel-doc structure is extracted from the comments, and proper `Sphinx C Domain`_ function and type descriptions with anchors are generated from them. The descriptions are filtered for special kernel-doc diff --git a/Documentation/index.rst b/Documentation/index.rst index 54ce34fd6fbda..1b13c2445e87b 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -82,6 +82,7 @@ merged much easier. maintainer/index fault-injection/index livepatch/index + rust/index Kernel API documentation diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index 2d1fc03d346ea..468a0d216c29f 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -57,6 +57,10 @@ CFLAGS_MODULE ------------- Additional module specific options to use for $(CC). +KRUSTFLAGS +---------- +Additional options to the Rust compiler (for built-in and modules). + LDFLAGS_MODULE -------------- Additional options used for $(LD) when linking modules. diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst new file mode 100644 index 0000000000000..0dd603d378195 --- /dev/null +++ b/Documentation/rust/arch-support.rst @@ -0,0 +1,35 @@ +.. _rust_arch_support: + +Arch Support +============ + +Currently, the Rust compiler (``rustc``) uses LLVM for code generation, +which limits the supported architectures we can target. In addition, support +for building the kernel with LLVM/Clang varies (see :ref:`kbuild_llvm`), +which ``bindgen`` relies on through ``libclang``. + +Below is a general summary of architectures that currently work. Level of +support corresponds to ``S`` values in the ``MAINTAINERS`` file. + +.. list-table:: + :widths: 10 10 10 + :header-rows: 1 + + * - Architecture + - Level of support + - Constraints + * - ``arm`` + - Maintained + - ``armv6`` and compatible only, ``RUST_OPT_LEVEL >= 2`` + * - ``arm64`` + - Maintained + - None + * - ``powerpc`` + - Maintained + - ``ppc64le`` only, ``RUST_OPT_LEVEL < 2`` requires ``CONFIG_THREAD_SHIFT=15`` + * - ``riscv`` + - Maintained + - ``riscv64`` only + * - ``x86`` + - Maintained + - ``x86_64`` only diff --git a/Documentation/rust/assets/favicon-16x16.png b/Documentation/rust/assets/favicon-16x16.png new file mode 100644 index 0000000000000000000000000000000000000000..d93115e8f47a939635b73ad3b3226837f83f7584 GIT binary patch literal 798 zcmV+(1L6FMP)60Bw|2Q zLBXm~u^5_#D=(`S=^_iY%Mx4&QgPv>=t9NnMoV2tLKM_ms)9Af%fw5(#7W1=T>klg z7yT{L`Yg|R59hq^5&lmV5CyuFQk4LO^dYzb*M0snm08V&e67;5T5iYw;fNPy^F}ZNPfK0%jMB`BiI{q-%F<$tb^i%%!2% z=HkHQ)9hU`E4w8WwDzSQ5^j{RP{$p90T-9FPFg3J3zVz#ZUcfX>bi z7A;yxeM1~$49R3G@pv=Fc*1TDf4T3TrT_sPU=(;8iPWo&8+$OuAOc!zT3TA@?cGW+ z7>p@pRs+p|0vrRR0UOu~e4a?mpOs7|DU~ch5D~QY(Ae0Bhyct;FLAM1-5y z2Kgj+hMr}b_BJnN;yk!=JO5*`#Ge}A_;>#7>}u)p7^<{}rih2JH?=%?W^?ZRc@FOX zfv!*AP*de$e7x{#Zj~JBuFoDhc9oI(osf4OvsN0v@aIZeBNvb@80Jz^D$ z%nUi!p0EA+aD^Hjx|7=*m|r;E-;d3Ik^-QoorqB;acbCFn$6(@P>M`f!VHEg^yrD9 c{K$v;cZ2KTr4Y@#wEzGB07*qoM6N<$f+87m)&Kwi literal 0 HcmV?d00001 diff --git a/Documentation/rust/assets/favicon-32x32.png b/Documentation/rust/assets/favicon-32x32.png new file mode 100644 index 0000000000000000000000000000000000000000..655ccbcfc8cddde1e9c7d826a3f44c1e056b1dc8 GIT binary patch literal 2076 zcmV+%2;=vOP)3;q-MhQ?`hkrNhS-jwDF%$eMm!QnNsuCF zOiL0VZ9|aS(2`dhMWInem4v8Y8dWVHS_z0M6uF@!K_U@pAZ-Fh4UvGhD2PINnDB_P z&BKl{yI#Dz_nv;Zvt|t;Nt+&NwCnNA`Tyt4|C}=m|Gl*U3(WUbW?<%8{s(_w$z}jg z0Oi1D;56vxl+ve_(x-rDgE=;vHBXp5U&|bz9*7!%EU*_;pHh}+ZF^eVVp)z97nfum zCnP{$u!gMJ6E!}Zn}Mh=AW#YX85jUcfJOkNG)n1fEFFi2hN*OQbT=)+fR#t!DsVqR`s7qLXf`hg9`Swu+qaA@8+YG~vwG>g zd}w24lt_Hp|GTkx~k78^E-I2y_-)jD!?BLve2yG0~|IHx!2%b20{cH>B$jz z0Qe0smM^iA;$j*nMhS&eB>K~w??`beF`4#5HbtdXfRIckETF*vw-a3TLx5`zI1l(2 z@b?^nGXx4P0bV62BtGx~L1l~O2;B0GnM|LvgbSaXFPzmkr?WEIb zQmIsKc?{S>Ajk+ok-7~y06Y&Q0q!6up##7n0<<@#V!AUl=B!lqH~lX?*+@_z2ieAU-je3pB6; z_`R8QfdXJP@FSofP@u3ZoB8waB^HaJwZ?T_9LK@)Jd(*IeSN(&G&B&8$5Be5ltODY zj-VWW*I-l-)BrJm@6ZHI<0-E7g?CI826GqbI1UvR71Y+&;&~pxFh~HT$z-xjojQdF z9=IRZb%{hGfO^QXtOW$QzZrPbxKsx7*=t?pnvIyVEQ?sIfTc^9(9&|pFsN+@H7pm6 z^71mOtE+kZ@s-rp)=*YfMmQXf_&yI4bj2nD0e-HnB2c{mzs;ME#bO12yY<#O*}s3E zi1^}p8OdaZB%4i(=Ve9=-_J@aH7LuLEtAsHQBqo3CQ4c7z?ns!mJHUa+-c_nPv%|0 zWT1*58zw7dRZp5UiTe8b;Z~cwhU-m4a9x+m%1ZkCFXy6FuC=!en2@(r8O-@MkRZs< zVl!HRoq(^k6|*coe%iEY#9}dg-x~(HxhV<@3$bk*5kW*8U?n5_PCu{@NZ17B@;I=K zpcF?iccqmg9xtG#riT2ti&9_oj)9?c2G?~F5iHBX^Rm~!h>ih21uo}C&gsH|e;nZ5Jp;^WZYCOyAR@!rkQWwG13mQae~i;7_Okt>K~kw9 zns0-7O~OBqrZKcX2G#-}0^K&PVLK2gjsmIO=+=4X+>>imW{9PWqW%E zGn<#HuuRP6^R^ zN0hpnFr`Hn{R5tVup{%S=aVIP;9P`}MdyU)(ON%V?MP>Y{A>1om@`H5_~IfeD#~xr zfYufRR}yS_{ZCxF&_lGqp}HnQMMVgu6$LSi=YLqDx4u-mr*;gB2}IBJ1TyceVIUUM zC0gI8a>s%dtq#xpU>0FFf>IV*2bp#3*fE}d`Wd!tewC2rp|rws6iO?+tWRPfLtk&2 zh?P|@tmHgFy9Y+*CdrgDF_G}6r3N7!R#^76YXC)oljQxat9g0dPHwAjAmrE_J9dJd zJ9p91(LwVB8`lBxg9xUQ89F8FezU!89b+~Ppw_+$uBk((d zmI^^{|B2S@n>Wq*H6r(fEIB;0(EH1p-JcKqb?pE8^ZY;Wk^Z@%?TL~A0000{`>ua0DKbA$MpmFgJ3KtEdllZ z-_Q4+vScVIawr)IQB{we^IZ1~Q?<-@;iE8F5_2ysc6lfS0s;|7;-CmPB@vVea#@yw z4A|4o!Klv8+Ow>Mc+Q$LA{NG=T1v{o#zI3V5Sr*N3-4doN}t)Q_%{2+n$yZlqW3~N?2wfC*tXez2hhSt!zT-4vX00#Pr{=NzCA!WM`X2XabL#jhHYEhrlis z-vm&G*I4?n11Gc&Z)xklqq_VzgcBaJvR+9q6du0q4xjHc9jg37MAz#}&_!-D2P)9B zpZ|Nnz2Sh3Uwx1(C^TeCCPZiz)E_0c5Lg@79Gx1wD(f+Qa?GE)VnFK*()jrx_KM>u z0_DFZ&yiOHx>IWnc@VMNH;z0#;=M6hE#Y7v@@DTm)Pr6A8+T?Z&o$FF zH7M~)vA=I2Z&g8K_dV#E_r3ZVI?oD%!svGu-h79~P zv>ak-Q7+{|sk#)WQ(;i^vL_jIo&Ar%I40hX5N zdDjM%zm3gtiBT(6)j<~f<3Bq@DEW+JW+#k_qZKtYf=(5CLsun z1b?*{%#CJm?cq!mXh< z!So!2^PfXt*zT=blI(~d1a?F0n`6f3`V$%B3~)S&(G{ELyd7h znV=kZ7&RlZR{hw>E+&i`foTUSig84Et?x-pGi78yY~mD*2@AAD9{h} z2-;%$`aX4Ba^UEO=5%IiCnQnRCF?q$zikCk<`gMP3B>Ry9lBN2Pd6UqW^eRTiYZrg z&lu#Pr z8DX=&HOY5uGz)_Yi%ab@qI+Jo_d{%K$X*4GqtVx34<3^C2GqR}*8r)i%&U4Ns3mO9Uc?$t>h3f&Bs#>v?&CZ4CYfv_|IlhJmE`y#|7ijP{G@L=Q;AobrN!cPs2Fp#&}VaG~EN|_8BJ0Tz-5D*ia z8X6`@7zcO=hzgL>;6-CaMvS+s%ay?Py1wQ4)&)I)vmgYwb%8zoMdR*gT-%}0G5E@% zaMr(L;mMIXB zEEtc9G#)d01nIuRm_j|$$iu+@(R+t)d1?%+4-$W4QOr_A1`6PY(V*akRMF{uVA$vX zM$y>%x>Uw0ZjFHixy}jx(~}p8IV|Rh-8kYVJWteutN)RBYnK%O zmH?U6b^~%CI{Ae{AFL08NBx9T*YCj1HKBNG`S`&kyJ`qX83%o>HTo^imP-dGafKeK zYfk%M#;pk>cM)4`e$v7T^_sMp*0iYPNjOL~5$$8dSCJqy;bI28qTv8;@)|{d1Ro+g z*X0j-4CF3f1iC%in&cO*H9?q^i4clDaYby5`;JuADNMRpGYrS2P6LiREgw8*ioaM{ z_lu-qw%Ec;`*-5zz~@QkE@q?z zWk#vbf$drz-sah+I;qGshCz*_MCUq|7VyFl<{mg*9=-NN1UwA%hQMGE7~!a2UCR#> z`(t<@x5&AYq50(3>XyiPzq!O(M+ST)%{0^crUU`;F-U>&K9k1dQ#Bel5F|pHw`mf~ z!Brs}K`+-#2ycXu@*S8JUaL2L+b!j-*VB@fcA|@o zva-ENN#*U~R+BX#5+cbh89RSzx|RB9)&LFIIw*Unyzr0%1Q}X#Xe=a1ee>SXCyT={ zQN4gae>197na)4&(7x(wy-(^u=CYlHdT^p5trYz2IaAn>4iq}IB}AS^o6p!kAwdFw z5P~aK$k^^r{wjZNQnHi#SQ#l@ahbZ5rOPD8mHjz=Dsk#yGKxOUK})Gw-8^m*tKjlx zH`))|wBvg(4KSe}>-?TVv|&z=A{elk;`S1z&1K3Kij}R{GEp++!*rQ6a0u_aV{PA4 zC5mZ~nGr-iF!9L?0^sDfkjHh~gt|TOy{@>_gz{<9r|{v$s^f`QY~x({zVPW`CCF3j zP*bKFb^VQ$QG5_zmE7`239IYS)CNixJTDLZseSMV1TSmfXow=M^)8PT@=`WaFhf2X(B&Y!qy^`SQiHA-Sm z9J!3xRoxy}-(H5c3n(ZYS@~+-b>*t`LKbU@!x&Xf#DkYwsTzm zwih;DboIisD?{(|z9`9u2Sqh^>!R3-5plv}*3P^CFCM3#pPHrAzitU4^T27MokLwp z(<}TTd8H+%b|k!JlAX?gqE2XSXv2wH3y>ijMTQ3z%N=*wf%|&BVf&r>Bm0{YFTV%p z^<6;;q5HrbS1FIb{`d>QmoL3OJ(l%W& zdXs^zWXLdXN>@Tv{9WL?SSS2H+@wHKqz|ghF7sByJ6qiN<4iK!Sliy?Ov8yO)zKDI z{P2Da*d3PV$oUZAkKDIm@0hczVzpi+*ZwYCHFnN;ikz@MXQ~h&>=Qn#H2aI~&R~H;8 zuv3i@QbohYO1n1%4Vp0!P>`9H8R6-L6x|)(Q{8Z!7}I$W9=heM_5$vo%z^zJN>8*0c!=D!jyf76z5pQ$%<0qD5U!8Jqu~eslF|Pi_ zcYWx(_l(9=Z}q;QmfbbNjQlN8q(qrWn+Sm`Q7y>v&Z?-OQ?B4xv2ER0(?;h}u&+6~ zx1cJxU@q=k3L%HU+y%;)(afVgO~6b*`@`g|okaejRq5x(jNaQhMURYqNQ^#dVM|fg zOw(Hb=R{mq9c@td`f<@Qn%;{Q1$M19a-lqUjk_0?;3()CcN=$HeerS?U9Bp0_luvR z2p!OA1ORXLJ{Hc=mRjNP|8pz)tgJ7fgn!H2-)IZWKyYv{2=4K;CnRJgMl;PR zWa+J^@4yee9-OQRmJ%SP!ip@!F;lbaN>mOPDg~3d7FfN~YQS2qs{NlEELo~0tjuuYpf*b+Y?mHb*=AyYHucciARq{=oJ z4Z)hFLAbIhfNfDO8qzj6fA-;_1;zTpr*YsYh9c4Z9TqIwD`)geJ@OseZ{! zC?8pO<3z^w<2}q3oZs2l1j>*X`YyLs)MAx}%b&Sf!<|7%9)&!be4nIj`zOXs`Ny~b zqj4^E02tw6lR40*QfA7-<6%{$?4mlfhm1ty6eOAWh#Jff*_fpziXD!np;y9%WdBqQfEr0| z93I*YYzUTwVPf?cZ-ej!@{<*XB30=*YhI*5SQOdQ&WmcohC^Yfc?~hUOYxs3QWPwr zCOrMv1O$HYZ#{lfuJ{!LqcmG%rqzAl^FAKS{nklKY41_@%m?iMH!W~hp*(Rn9OLV2 zyH>W_HT}|7ifQiLP0aNr3(1x=ukE3nk*3>B=YJ&l0;C2}X2n`5ee|qS*%aXNG_Lj# z5fC2w5jSUV52lbc{+5LB1Jh#H)t>6IpD11L_saM7a@3dd7t1~OOgmlL?s4RFvC4Dk z6O)m(b)#)|<>uPtGs@8RTQzz0s_Oh8oF`HI5v_adrnx9JdqMX z5|sZHH>B~{(IXAu=$#!Sp|_iOR)2OaDFy>iisWSMj8d(?^$HqfW0?9laO5vhkMyhq(`=B8|E zd8*+z5U!z<{I5)HFFDwf_SQaI`s?Ynai$MoPC6GpxkYVE^ae5AGb$+#qLEg)HSN8< zJ>73B-VNYLF?Y$~S>1%T-oMlG^72r>7%|u*)>S99BLz)>FPe0S2kE} zI~N~~ibKcF+5MTyQkLpF2y7RK8E$1h`6B^CiS9$}WZTMKLC3!adpMmvHkr!swNlmb zYY2sMGn=l__F~@ix3b;j3JMbK+Wh45~BtZ5LzZ;24&~*Be&T&{Io0 z+Qq7lD1U^`Bi>dAJznqi^JjF4@~@qc6@ZHj;3cRuP>x)^D50-G%C(=5FDmKF_iZCX z@O2%9GUd}e1mcV`2&7Kvg@|J7T~Czp7_}jktY0EmoNC4Y{neUFn+S}vW6xQn;s;0aTe>5H=D0zZ~uQOfHTPm8D zv^D^U?dH=`+jYmRFR7)$na+@*-!l@_9pW}&f9j7OFIV3VU-;KE*uajzie#|*T0DG| zq(SUO`DQDPCGrXic=ykYQ@h2<3cKe`v&Mf-5++sopwTxxCKp^ORbz|c#jMu=U1TH`V6s~&R99yk;G`#eWeS$^RMV=(XaR|RtaB+b|D z%8;{>5&zQ^upaqq^wf=1g!n+i&5IIp)036sIQTI%f@(VHvt0XeA+lE2zn^JTu|f|6 z=s2&`HI6@6#~hCf!f{^k@Y2-$vwi_d`8Y&ofADYZ%G9U(8!u>q1K7QI&z49=uCKo; zi2%7}k>jL2RaYR2bwOI+?++sld+%=PBb+I*l+;@cVCnFb+hem{MR#9Ji|$H+>;6Tk zYVDRJjlL(fk*~VUC$3r2WWF&#sw@c}LnEHg5?bVJ{*h>Tk%X<>2?KoV`GE)PiXPN>0}E43p4xVpo_(B zp7*86>yt^xd87YsMOofptKII62P@wh5Vrlry50(uLx#=%i}_*f0KaFa1KJEMUE}hjROZ#lX*5NevgtYgGQUlx`UA@Vd>!Csr)n1cRq5qaDW5@f!A=APc3uNFFVOV2$w zsZz7%?+#;s5qIV7{rBlwo5RB1o(X%L+Zc)a@y^cUDht>ytF`r_oRolI9#}q>rqb(v zR;gcz8k#m_nbSD6YU`|dVP&SMTMLp>@TD!*b?b7>&qK2^kg~h zB29^@AoN&VYLN#45s;FOvc&8v#p-o^yO~eV(~)O`i+UuTvkVCg6vYE;j_;1hX*OH3 zBICt;ZyD-1>dg^;Jz?e?BO@T#{=q)Gogj{8^~A{`TE>XM+(w>%f!ZWkj~q#5GucR; zFC0;A^-N*2&SyH!-|J|wJYQqfZ2?OL9ap{AKxq1jCRX<&2-D(Hb*J|uXuytm<%t(f zbLTw;_K0sArnA%Ni@8r}yecJYfbv1~-Ape3<86AM_REHy4nGISkA4y28Y64^iQm8V z_3u07+D}S9*+@wV-glJU-sKxC|N3z>ovm?xVMNf9`}BUx{60Av`>E<}ngs=b<>KOv zu_Z0$#>PPq(4;uJ;4nPC?(l~D*1-2q$-04NTGd($OB|6OoH1AOm~@-&Xs^M=Ag>Kc z-mZ`X2y?<(0%gk(q-0lg-FaM`JD)=GJI`x-$KPOP|5r zQG!C?hQ5xal+(g$W-E_@+u}Wg5Ao$<_jG?hz=Z;Ar?Yl#|FYi1pN>T1cU_?N(cxbira#4(%lJ~ z{Cz62N80?SUo@3@26Q85SJer1vW#*C=+TFCsu3W7!O>jLry6}Td4h}HJai?Rt>B2KFm^1v z`vA8DapJRg5eJ{kwUO`u{BK@Z`kh}ygnk2%3zM+eVIrMRFcHBqW5-$@@8tIRvb_jL z>JpfYdR2w3Gmuo z?pNkxak=N~9ntdC|KR}7%{v8Pn=w1z=t{R;VmwgO6RGWcd#b$eT6Aq%;h5ORF)`VT ziH&9D8G1IYEQBir>Mij2}A}vu}{COB~LM;sWoDV-!=1$cdv<;h^7m#j`l|Zrm zTS2ieWwpCDrwm$(G^ScUPuKC^J^vISTGALD+RY+*&_*j2*^k0%ZwNsAI zocrzT)qDNn_VXnJjPV-gf3!InnaT6Ni%=6p{%_}oM+%sUz}w5t$vM*LCf#7YNDp9q z^}FszpS(4Px?JW`93C`NHbQC|8YJVE;JLZEpbu#e(+<||7z65Pli#+Rm7 zA{5yrmd3x0c`p}d`;a3aGl`zzbOqwW`U9XlOI-0+XAl|5Ms(Ycg~Mr`($A|MYoZmZ zL{mg#jq=1}11vFxTf6A~4^$nxwe5OpmBtK&m$Gz?D=0-p3(UG3^EW^#YRV?bmrI>J z?KDn-csC~e78r~!^r0rVP;uDoY^la9QG-*TUz@*JS*DrkuT`Dv)0Wqn-C};tO^vJU ztAw$jW1zp_wNG)h`7$_OgB5RH#{|DO=;uFA56IN4f4!Kruy|r&yi7@@xqNhmG(?EyBYz8a2o7 zd2Q>aF})`SGR$;WLR<2?)ov`NjxT*GdFnu>jGZ7X(2gc@Y;Q#9RKL}3O`SU6bF+ga z#aY?jM9AM;@BOYDxu+(Q7CE$z@pGiklPG_f$Amu&I)N%DRyR;3*30;7J>D8CyO*u@ zHDPHC|Y@t05U3UG3re69d2NOg4XVU7?^@Ebh8Uv|#Tzl}C{396`=*u~ReN~cP^cF1)b^6qIL>;xuhYqY|FNze=^9fCB^ zE_|E#X7^mRZYJ#Sxc38@p$vfZULF3njk7&~HajGplcn&7QKvO&_Q)wmz_1`?S8J9p z4=A>$o5wowAZCO4avVgTQ)LsU%?U|J4mPgd!gv2=C_6rgMp+QsJ3kCysxvidHj@t) z!egd1Ld@hxsd2Mg4?M5uQ2aY=lDfD+GSnuZWEuO;leQeEQNoy*A7iZ}WQ~?{ln983 z85x#6-wrUPiMeW2WrdgrJKb!&mnCYvXJDK>37o3m@to-IVe^Zo=-l5Z2{ldL5oN19K12V?< zYF(=~=<5aFG-$;Md3kLJYJkZ+zF&}q>sNm~a>l2m>??g-_2?NS-R=X3D_LM;K?ZF; zGXPMrAC&lYgCAI|p~?xdJAw2B2)z#7iAImbDke%II{`wk#mZCw(%*)E>Kn8Q*Pj)9 z$3B=MtLMFPO&&oea`=kdfNPOQd=yQ_@wha0&r)7zG*#Ni4pce6Qh06@`KR-5vJQ07{^oRUOc)<2G7G5{FPg| z1nHci0~!Y+smJ%@90X0E{&VnN(;*`2Zv-#Z%3}K78qa7yqt1Be$#gHVX@34WHJ9be zH+!6iqQgbA(o8qeYG?KDayn6gAFTh~bA)h>*LBlObK)vFXMJdYA7qi|i2$%fsM9S; zi&f~`e-@_r(I=lNKI_`|z5Yv^*`n3HFGep;5{6=yj4GRvoQK%e*A{(hs~7Qs-IDa8 zndKyfpP?sqL96-ul&Sq|)AwGG*zH%MX%jCX&a$8YRR=@+W#^g4^+33V;QM&OuF946 z;N*myQmfSRb=waKp*oq*B;PJ(nrz(TOxYAUDhY|Ww3_>kPuOG*KSfv&Y)kLo3Y#ci zj_k)Yb?hfC--I%A80Z}ik-dG*d>>lFf4%IwGFz_I`;yuMAATX6kDH|YafOO9jD zl>5p2&jWj0j08TOx-##YVsqYN#~(7y-@aCUKwIbhax9AH4&X;UwFdBhNY*+J*s z3Bk_HsqV!#qV?E7M%}!Zjh3*bsSM>5vx@YRH6x*>zC9#BJ{VGpPo{2It9`&VCami^}sV@zHj%9mz*96|!esfBz!lh*t?gBBE;OVtSe z6{hUI+|3@HR{4tvkTR!K&f-n=TCiCG%P05xbyaJ0xS=Kl>#PsHgUK!UBe>lLPn zh^RfJLuIkBYPnzkA>akm>VjY%`kIg&n}iF^di%2jEWR%Vg}+N zCFzmX-qvQ4c3NM_QsX^5b_@ z=SNYlOTMVwOnerHn}*-5p@3Mjt5jK260P;P^*H1H2!P*%=l4R=;-cMZ!E9oX(N*+# zT9k09?sYY^y(GQwJtedB>hm|o9&&PoB-`aB^ErxXrT74-9Op?opA$KQ{zmzIlk#W< zvz{Y#6x(j%g>k%^I1xArTrkD~4xBiwf{`1}lz4@Jti{}Z#ut$&M1+bL*?)(5)9o&l zS}olVM54HB+txK5?{A16ciCIY)g0^g%?LH0WbJ$L0p4D)6U@+wFe~}O-{eo*6+IRZ zd~zEX({o*Fz5nIBQaz4h5feId!{Fv&FTUxjcS^IxTDVcGt#lFRF_)XY{UkVpK3bU) zXOh-Vhpq&vZ9JZxCElwC4(u_T=FPvL!@Z?9sijN4f zsNi~g@|~BDjoFZrg_tiAGNsM{l>3jrI!HH|`TbiHv2MS+u7D|xoEN`NEJ^ChI83mf zX`2i1)@T5FBJ>P8L|5HB2ZWmiiQ!AJZT6Cft2su4xn1i`d&5>JC76HcA!M& zZW+J*;>;)=1wZgUe(%}VyR|7HCa|KruNcH*o!)7IMt#hBGZg2#t;oRfvQ8iIc20CphL z_*PJ>3lSU7ya!;QC+tZ@kkG?idH-sXlgqerZ~3qzlGLZXF1!yoo|?-qn7DD61d3$B zD^H((f9O*CZLdQ6V{i+lU(6^%GzcM8nyK}yO~jfcY<~En2TZa)`8XSYcgrrt!Z72i|8}@70>$^Bh}i#^qrfJ$Pej3t&Bzqy@!&V{N}^~adqR4 zy;$@V7vm?&Sfj4ve$6?1J;}m%H|UDrgIUz%VUY#4U(@!h2=KHs2`1s>F|klMq(tM_ zX{$N>K7644iByAw5alUOT}H}3+_e1pVL?!)_1v;gr$7?gNei|c(=)Mc-giw~1+r1C zF$XZZNbL!v^+MmD??k%KMJ91WTv^#uS?Q#kEH!UJxwX#QKtJhH3%**yPqIK^*GO`F z#iPZp-?zY_jU2Ns)s@S2!<(avMw5QTJ2n&A83rYQ-Hz??rHn>7 zbe8HZ!T!KA;=PFG_%rn5hn~g04#mt@>m?DFM^C)xt3A(Zb#`khIj=%lie_0E%ev7P z9$Br(gTF27x)_LR)KQH3H8wxvf|G%Q;BdUe#gtvkITMV>tKg;myrih&66=Hq+RiQ z5yvQ=ghXem2CTH37uMgO(~BN@2nppO`WZIkD71!K@KMnUO#Aw9q>dog4ob4x{Gq|W zF73MoQ8RX^vPI3X^vqf(t6#`%jqr#ryG??|NHa}oD>XafM+QT=z75l=BH#%_m~u^~ zqm~TZu-Q?(0`>ye5()$E;RH2vb&+2%JS2gj!o%Kt))T?!O1z!_5`F7q$mqyp?9|@& zJjee?Pr(kBlC_!M>$g6k}s zg~7Tnq)6B(4MK2w*9rTAf9q~Tw@RecW1-E7g)&{+4^Ndw|Cws?6ca?*pb(divX>J} z*ky*DOc)W8IJ}y1M;X!P{QIzsQF6P_-C{PJxe{vI@S1d~>y1O%{Y=%f)pIU7ye~9+ zQPkXVY?c7ih}A%g#<1Qg%#7mrBY{bhj>o5D(E13kPu}@z^pi-vD4D=x38WqXycI$q^z6hHYc*aBp?Lx4RzPUGK*! zfUsKKe!yM#^+%te$?a=I6urCU=0SpEplEQ=n7e4T=spPuJD?b2`5}+PYzOyG1;m{n zhPjf~xGk;+iK`PVr0tTYGnF98#M)C9P~n4yOVD?zw~s!2A|6#eHP_%-N=&b;kc zk^lHbPZXF;K7m`0@9Q1c3T2F=p>r9^zrxS?RqC)Wq`NXP{xt-y3 zrQ5cd=Nu)TllFl(Md{igICL(xOS-Zi)wpF&NFi{+?1{bVWaxk zD^I2Y%&pY$95$9%%Lpx|63Xam7sM(h<(d&m#>%IGhxo?nSfG8dKyRuD!(2Dq-|*f( zysD3TqBNWxoK*(oVFvei$S_A@#E0+-*NIcwFE9x;RknDf8>b1AGjE+K$^>Dqe$8NH zz|tT=`MWjNOlzc%lM*D`@i}&FMtK-Z#?{ynoydcsXKt@p4Gn5@e=PFlX}Ej5><%ze zSMr*v5Ps=g+SQzubZ7V038PPARSI>X1z~rm&}K;6lr`IxxDK$*h2~q3?&fth>fYso zW{QUfjYfh*31n$^F~1C}YnGaBdbd?wzm&H`NOP5~C?Cx<|9^H*84wl<$~aJo{?8Y< zSRe6YDT+rV4(`pM4B`HodwUXK#lCd+$0t7=NElo$c#0j;qFrxn?ejcmBCV;TP#cw5vfL>uj%cFjh&V4mudPu3Z@EX5r7){ zNDx6s$)y=;Js4J{ow}>^AehWZqPU0|cwn!jl_tqSL`9n3dmCm{&_bO)m84JxDPsrd z1?LXpxIG+x@)wp+)F_t`TuS z@WY6dfciq6Z0777RZ$_6G(k6ig0fkFaee$liMK!5S|Zb z^T)QgG5)h8N!;q0B1co_oZ3yu*GZvF;C5AF2jlZ#U^ePkVE+bh4TPI2^ug^5Z8M4t zm)(W_;b|EZYYDqaIm|2LVEyz@r`(ZvBzcXaBF1=Y#J)L67^V=V&8JI@E>^iSp=?+C zL+AzgpdFR3lmy#^*Ux>A*8gRiKx)RwlmHJ!*8KIWJZ3Zw7Ho6}NmGQhK4tAZ{0+qH zi=XqA=!@Hs0(~AU>ar7QtP~*uf$7_e=Z}2he9yZuLSkZ2Vg%rItp_77aIm5vFQ}4I z(B-zJr7B_N#6^sYLMGz8`B{0cjf%f5>zZSoP_8)VG6IhSOSIs02O(G)9Nxh6|-hVXX+A{K%OF>qnjuF-e7$Ut^2(pryVBg(#jT&nc?hwn#@ zd$HV}ov`Zo7$rERR#=e|5YR3PDk|X47zQQ*7WN~c0Jf4)U^PT3X;FeoyWQ=PFI|`lIhzSBGly)&szU4ug^{se;WfkKW#}n-CVs#aMtu^GE7I(` zTsiOi2lwL!@#{XMWG65-s~j~NzkCT!qM}j%_&_-Q@-AxhmucGb70F(F-`d25|GKz2 z<{u&>Ox-odL~VSIWc)gjSaugb?cbp@!j>#9BZK7O`3=VW0$& zEl%`LqJv2S^&ah;PrUiio@bok;C8>-z(KqvYL|P7=Waufz|%D?e;akAHb#8E(A{VM z&v>{45i3aig$*b-ixCPZ^ENdZrT2p4riO~_7Aj5PI*s@`JGZv3 zAz+=ZW4=K@U^O<)p@Fwa?1-vVY!US1<24X~-JZ0VaPo_*``%LmlhMxL2nrf1tTf9A ze0iTge(J1AWw%$Amn3hV0==3&Ysb7s6HtxdszC zESh{8YZ`? zb4QX1!?e|DZEQ}vW#o6i%)0U4YyEqHafi`&I=05KIhC=7--|YH>~Um^ZW}^gd9xYA z5s-D(VkTzIwpOVUb^D)_0~R(osjPlh3B=yDJ&|t$9Y<%>P?2&N_sr?vW5CXaB8Nwp z#-tweLj{Dz!t$M^TC6t|`lQ>K{v<)3VRW=H=5o5;UB1bkBR<)!9`$is+Kxa*WH?Qk z3R5oWy^u>25CWntHA_=;HNu#vsnnzAZjcZoMsB|TgO4E$HyUHaN{cnighw72ZoL@X z(ME{6n-#7jCvTzq2njQFj*5(wK|r7eylunAlv1Tucn%}zlscNmJ^SOP^KP#+s6)d} zeCDCupB}(pw@h{R^y~f{u=uIxFf^6Z_}k0N&NcNT!pweFBtZ;^;UG4$T~*ZENsP*1W0nC!*jp6 zewTg-u?w_|ax=;x~$@QWh<7+$Ro}R+&Ad+k?+&poN9EXmuBL&t3bB*hV|w zFh)A%_gu3ZNt2gzg8-YB-Vq%p^kl}`l4}f^(BNYFQ zI40OYfv*KONzs@P%f1TC0uD8zM^Oo;+bzU79@I|?d5TKjVC&~7Nr|wc6f+Je*%x;> zMDEksg=$L~Wl151moqQ5tK8MgdnQ2~ZK4v`?#?mY$1o0kQr-Q9S5*Wek}3enoLF%G z_?P1{v*VKgEb!OPB()kik&@#f02r4Tky3R2N%&B5%~^JDqem{i3R|m;4Qh7>Zo}_y zK|2cp#;>Q5c zB~jGSXb0Ai(7kbHO!hjdXWr+9=$z5O@V&o5vNt*=*8&iO`8~nIo&-GFSYy|QB+uei$x2f)Dc&+-)Ww8k@nSX9G;|=1fskR z+{QI zu@PgQ-y1ILL&zXYbz#X)i^n=+xlgZYLrlEl^)U9|nhE=3hw!iCBggr$eDA`rvq;D5 zxrf%)KEJ zUZP&O6iR=tFfi{Ziwb}6v)qR+{p(4R*>03rO_984Lc?w7{8m>Bq&6tD};_23RSMv^h7anoT7HbAU>cp&)1 zEu?ne(G!SH(ht-7soyg^z=7(s>LJmm#NkWTgkZRi(1S+&oACAZ(i<@^xT&O^kjV?& zu?9JXzl%pnME*hN`+M*p8{Ui&^UJ#yX4wLD+GJ_r-l*A*}P@3)D^ zDEpGr7>R$jSzcpm>Vzqv27#r%b)*5u&LoO>Q&Q)2V=VE*3wf`NFRIKXjo<~dTA5yAYB?5&L0^5nZoQNI*wbKhvdcwAsS@hm9`}n!F}rgZmBx>z-VMbg^xO_xMsG z=d_mI1U+Z#Jd)=Cvi>+UqEFHoX}0GESc@Wamv`MK9?-!BNX6-bZMmlQ_u-0) zOym`~^raPNY!RQU=Rv^gSQH+(}~Qh;^}Sf(sdSgm*>1R?EwECvt4 z7=Cu-ap^O4kuGoZdMK2PF15SJD2?3Xk;DufT>SfzdMv&L`S^xdXBiP%1KGIE0g!ys(zCrw+!=n?}Y!?ytE&UJb&k{|q@{BkCE1 zE6KyCO72`EL6ihCm|QOhHO&JpvnvWL5_N8Us(-%}w(M*~IX89psD}fGRvSLc6{o-| zzxv+(6HqKs2DCIUK*C@8uI2&Ki`LER=gjzzo!4&@KzVfM^ry1 zM}!NvpM5$1s%s?qc7F8|J&-yYi5&T(V20-ZxNXlJ7ur#a;C;pg-EkIMsp|Ejrw^eU zL|dgf7myNjFT$~SEkPBH_#J3+HQ;5#nr*v(wDTKBc_o7c%2K)KpM!3$}pBH9$ z-%2w3V*&8i0%ma>mLh+QW#t<&kj6W5YB39J@d+570FuI8z!Ww=%YdFZE{yq{K5@6f?@G7HUaKkC(-3a9isG60~Hd0`Pk1~TeyPR zjcxYEUH8%`$8fHKR2?Yc^Hs@=?1Wm3td;Zwio3*@cdZL%n+Q~zF{h~zk+yl)xt%K<~re! zCO`HOOs|ZsHLLZln}rG`rS+kxii$dDM+=>}oXw1cK!bp1e{7SAS=RS)`{LOLrQ8qO zp!c75+#hXS3NY!XjpLL@JMpT{BVn*mTPe6dyCsQJUQ~Ze&z;{EXbVJM)3&Ws-J2}1 zu00~%`5YblQ>~GAgzf?_(2?_@Uan}iO!tMI(`NPG+|8Thx^^&U*pQ~Q+Ap>*(%c^+ zD1x@g*BLha_Jy34upLJ`PdW`G{aC-hr8U$n$H3dOW5!f{wO4teAp%XS`q&PbME?zv z)KswBH`h>^)bpW8(tAb)T-c>*A>|8nMjm@w7n2n`Ept8fZ4s70iASs;0zbj* zIL@%K{7nYE0)UM0nXEKlOt0`~s9K|F+dDdh5I#YIEln}2Wq#`XmNo9lTF>>qyIbRO z>QYUK1ZUb_2wCLrv=p)Jla9}{q`owrVDttg71vdpf3!uH&&^lzHIUgk^gF(c#^oA# zzX%o+c0z2dC_);!KfB)uzdHoJYmL3}vooo+()+g^o3o+GJElJxI#-HgX#7cn>jMly%o)Jrjhx8JO`rnvl54^5Vg-yDGh6{dO!7_Mvwsq z{r8T{+Ru1*;CkU&jj8cO21HqYVXv~YJ{UdUvT5yTj@CWSj&Y=+j;>!_Z_+xq+DG!= zM=dC1U-OsRCvSZBe7@y-GXE`Wzz>&E_?v=*F(j1M^bd*kezoKQ&n2WIi~lvg1(yf@2`%^IcQmx8<3M`ui2nJb8w?e80A? z2cgDfBdBaEiNw;Qmz@;El0Wcz*A*viwz6yjcNaqene|&aZn75K_`(xo)6G8G-^)JU z0<#OOK7)YwFk$WBjzf(J+EC+jjEb4Hk&OF%Lus43)LD6n!`5NIyz+2AZfDT1(L`8Y z$sHKHxY^UsFDua(Mr@A_^QwUp9Egmj3^j>Dp=Daa~HNVw>c_VH`Ue8h=#5gJD=3iV=fzMDa_w`b?JkGH*-`vi_G+H!Qwzh^acZx%jq zwr}MrB`fMH6{$;$%9xQUUW8{gytJYHhHWovNpOPS$Y5TVFU+@j5H}TS1xq+Z76!Dx zZZ@ZNi!a`=x@xF2}Gj}z1zFAv&K1*b~ZdLwqqh&a~ucP}&DT^5i^rGCl)FPaHBbYAPR;J^b zdhuB9r$}mx+}he|h~&YCwD~oz)>MUqJF_g2tv~;}w3@QZv2n!9c>IH|_EtVcM%Zd| zur?zb8KJ~CnK*EA;%bY4a8Ng(6i~1;CZb38N|1(%1n&?R%8Chn^ha@3&ZDStV@IsZ zV}JM2QA%Dt9QUZnag%bumTJkwx@Zw50I#k6v;=OQ_v+G>w||dFj~)Kr9z$en*k;3# zCyBllM=m5sBZTSmoW6M4GHJsQCpm6kC7v1xktb@(<0wE1gRbIh3ptwczWj|exz_2L zG7YFG%ZFF!<4N&YOQ&6PXZ}R@7qV1^n?7Swsn&bG)jg4Y3C`8`l&(XDOzw7V!KLSq1&ipg+UY$5DFEb4V;2Yu1%+#+R3rGHpY3F>q(nym zk8;QRFt@Hx+E;80HfuQBELazCnR~O%dLh3t)F=h$LFK2Wzt1c`x@QXH@V}I{sJ1(- z$l!Q*F8AmkRsO~PW6`7B%%HA0K|T~9EaZNo?O%evjwcu@ow*OwS9xdo#h&`5UuDx@ zFtd_y&Wp)Z<$YzWG#MgF5-*}OToTM7w{GIk?J6yjvTyL%kQky4} zYqtGaJ>JToIl+srvARTPkh+$>B*D5;{jmx^uy?PlcEWJO`@y>vT6p2l5|p(%S>oshLPwPqI(f`p?Y_%Tfsg4p@P|aYOuIa)IUk?=O#uqt)=AyM zN7I=>TY1RfP!AqOUb1unXX04XboOa)7W8pn)5FqWAv|FXR0 z8^M2x%E0YuleQJt^~*iSNtBYBsNZ9ldk-*|QG%#kf#ecgs(-3C+`ee;oM57v@eiLN z=I^X;;CZd zH3Act1gqp`L6vKi_|#HL0(jIUONBj$0T1RACxycSibcf zTk9mUmL9|GqmQOb<0zf=I3ws~YGXRC_GwB7s*Ve?2)Z-5*VGdw4csXyrM)WoitD4w zGB}is*>;oB*w2M=k z2u9(De;G_o_=L+YpiA>x*C}T+^4Zk^=SAH`jA^;OzUfkNm2++;D%9Ex7cEzG1ab1* zt~WX@?fV1QuDz~IGrAs~*p5g>kxR@?ee* zzX#W4c}VU*+I=0eq`CaR^h9uQyYObNczNujEJ!c^||q!$(b?-`j|r zz~}5&uTbUY1md_aTqR^3+rD{uWa)0(GS;{=dj$$5UFRNBJnlrmOWs2 z!TTp>aXL{XL`BWrKtCWjKDS)7MT-Y3P237{W%w^ns&<9Bn&pXghQ$^t=!b z9@TxCN33(+7ItFaUjy#l+9Qe<&ul8M49w$l`@TmDLif&LUYB6Hq+L;<1Vx z$#yEU7E>70r4)_8dZ5F^C~-`p9Fs&@d`#YYRa|hSO0@5DwRXx-Sf5UbEne>NYNd|f z>ehXQmpgh@K*xS>Tf<6$98kQ5tmAaRs`|}GWlVzjhjq~EM#~Hb?4-hyIk+qUb`eG% zeXQ+sU9skp+o7K4jaT`u-LoJ^dDm7Mdv9=`6lIiFUt&&}Fi$7cR0+sdtfWMYp=_a@53h?e0mVgxxBE>lom*a5cL06(Si85a{rC0 zN46zESPy*9=7gkY_5h<5wI2=N1w?nLvFkohQ6d(o?V{0KFxaeH3aPJX*m#r#YApu~ zbaJ!jYKbck+ZuFozO}2(F`BUUE*+>mzu~+I1d5Vaym|IvNL5{Qc-{mHz1k1ll+HnS zEPE|O|%UXHpb_VnnQiM~jt`wz%~=%!c(k_Nd=2$D@YZJ+<(L#meoYc!8~n(Nd1HOmOBZh_JF*+1K&6y>?ASA;MHFDERjO9#*r z+sw=Hocc$LpWMF`y>xX!Y`mSjx(~}u81D>3a2!;`BmJ&5oRXmMtDCH^S8A0xYGXn` zvbE~%J_!3Zg0C6+P4!XmQqn}Tp&f6VIkxq1Y~ap`$Q`2X-#KC(&NyYXQ>YR>Hqx$x zI}nNkQV~F3_Wux20C5b{$3j$T)RODFKpd-eKc&Aza67~1*vt3}ZH!d;#a}Jt)$5Mq z{ogO#doOe`COlW)N8YM>ozJS)uF==G-$yi$feCV3d*c}nz@Kn*TCydc@HSNhAGkku zL>(L*0D1h!)@4<3)m4afdu#4Xo#{|mJbiG*gEQZhUAk)?i~|Ahz$00gQeK=n6cq1` zloj4~L`y2_#zPUJAlW4l@K1;3vo)aX`Mb|loG%lPyl;UgO_8G~_c-pPb7Piv^yjCu zE|&*;8b*i7Y)|te(aqAeaDY~yN~rXROg_KHF5n{A0|J&ae{{)^1EEsnaTXIEJETar zg0+E;CivoIx-G4EeBRfI;LeqOR1jaNVJ77}&rFwo_Q?x}_>f})C)r`uKYfT`E>_=#&yvE6r12woA=5gtap>dk|LNh1J`N^}#)4T!2NSvoBtY;pUj;zokQb}P z8qZK-Rt89G!4_~&+ox+Zp87_RWFtnNe6iw?v!u=YWc8%ri`@tfmB#!h0;z9UW%tKziE-Wyet%bkK>^7PC0Uj> zvK%!fg$eO-ay-*&%eN0CdE;ub6(-NAYpXOVS?GRbk=*H~S>B1xC+BHbo*C)sJvaP7 zYe>xSCif}Tk!`SsSUVsuK_x4P4=4Ed070N*IB04(S_1+<`Paqc68H22q+lSU^D4t( z2~KT1k$S{iL_PShyCE{U5wklojO&*(#|E{Z=-^8KInBnmlh|B5+4p@U_MjN(G$_6z08OWDZaY8qUG|j-vBb~>=E2`PXiG{ zd8^o}6^QJ#a}gVPm>BBUH0=0n7;@!pQ7Ht6oH748$XCS8qHG5+M58AUip)s>SP0Pm zkOP+MVXKV}?*Ye^Y9n|oEG!^ek8fmzOoNSNT+m~CpptScqf2P| zNrY9Et+OD{j`9-X($y?Q-|?L-rRtkRu(z(L<7g&3a#Ze_=UPaM!VlhC2a}rwj4H_? zDoh{aoy_#~)cpi%;%QJIwO7dn5 z+~^P#nfYPmUiV~&c8aJVq-fAS%G9EwV==-mPn%a`a33D;R>BfVE~1w`56J!qi7b2O zt|C8Qsz*o5o3F9yqetjVnfCe>7ku#oL<)e;5a8|Dy>Zv`s&*NA29nf3?jsmr{?L$s zRz$4QyZ0hDO}Mp2H6Hc^gE_?!#zQOuRK}xJh_-%t$={ovR*zhB`D9-Y(%)q+@SB`kDt%pqdTH>)bnFv# zvKZ*@n*6Ke+kgp9hkeE74m?LpDh+}+Q~A1slg$WKJm_u{5KI!~#4Euo&;{)0RAh;M zzlP#*wXT0GG_Lmm9a-Fn80kj1+J1maq{_pGInKXN8!^k#+43TmSZtx zPu&F-kOXqra{%I)7vmlsz`6lU1~}+3ak`cyE|i{EZwlnGVhKosGN0bs(*iW0?rSw{xiF9NZ{FvV)Azy$)D%8alhbgQD(C{38DtA}9uK)(d9kyy zGl%Oh00hR(Em<_Ds~1g(f9Eh9muY_hzcOm5cY%Ld7q=MC{T46uiy9fGkve<#Cra9h zxyqUwW!eOBL_c2uBz05B)t-~{^s83$y!}D%zktrrkexKC7%+sO@JJIz?z!{Gopjd5ISCgxHe^{;}UY5f6CmPR5eGVrC!{ZQg7_M2S;%yC^uuYI=5T@#}eMO@99+x)j1Y~a=Z&$bAF zc#NV1^fczim8A%~19A1Z8AeJpUO%lxI#9Lh2A8tuw|w~wA5YG-92yE--c^s2@#-<_ zx94ED*Q-G?%&&NOq!~n@1ldF#DSJ@_J*fBYYE(Ql4`sNwEO*0?um>eFgM7K^@KfR( zt?fh%>H#MXZtkwBDfDKYNT8YDERq-R#|Iz>6p08U9S%I|{UTGv!$=zTmM((CZJM@t z)hqRM7wyNL)fMo)0ce=}eoE0-g1OWS;1hty$=IO3h{b|G{*Toh(i6HpsWGx5yH4(Vtvs#W1+p@!~VNP_7Nk6ozJI(cRr5K=?K^ zV1r!sZ^M$B8QPSoqymM!VZ+bQWrPpEORBSj6HiK)U;NWicQIA1r9 zin0tw6H`uF+%rd4)?F-R+^;(Jr=O^~%o8lhT#WuY47Lzjcs}hV)mY4zqzwNjyDHDot$ z5qX+Yi0z_*{e3IzeyWU1QgJqi!6w-7lJPrrNX9y8#f2O+10)=O=LpZe1dabHaYeEy zL0)1PW)Ysyf8S$A>t$Lil22LyGi5y+k(AU!!52c^u3QNt`X5!m1%L#sufrTbn!o8` zT|Z8q67caNAd)Mvru5ysQkpd!k0ir|A@`nT3V(V;O2s;-{hgCaWr@{-@Xx$UOd(DE zW;zZ=g+``q&MBd)E{Yu>s(eWq8>3+X$}zy!*lehpC)fhL=M&>VO2fou^fQgKvh{p9 zmBH?e=olK>9EVW{*nos<)|vgu$>}yHuW`S@S*X-klHY4T*GIt&=O2+K ziPH2x-<_P`-3{Q|{%l-xIf;`(%T`~PE|4AO1c)wO|4FnZMl0Xk6iA`}du1s@%k)2( zRaN8iXSI+4@+&Y$R)LJB%WwvZEgaCWzWh{ zCFN0ZpyTf?OI80c1cK4P|BZ^CoYbK-b8?DLv-NSz;`czfuQzZFEmLP%8kpD6*LAd7 zvE6oO{O?lGfB9u)g#iF_!ld)fD3Kq}Y=7fRz!lEY#15;;*eH7y(%Zvk#O*9t!(3z4 zm33zDmJm!*if&>!;jfJPb{6oBv_)l4R;wbHkK-lSu<~7qSrVrT`++urwqm*ED!xrq zB!DHk%h(fuyU|`2SEj~}R{3T*^6TRqKqip~6S^QgtZ}ukw#L`=@z_Ff5lB(#CLN># zP^x$p{*PB={)bgZ06`D{I2$SB0ER9=#g&sSV@Iv$Ybp|bFG4}x%`2TcpYHmSf%91% zWfGbTT0eC>cpua7o?;We>t*`QhcEDmP9!&6%*N_LJT=g8_j~oS&Hn=W0PYc)^I9~3 zF#rZxp-3cv^ukq7R&{EkW77GyPFK<7muB1Hy!@k(x#<2Extt#Zur|vY)OU_2%&mEB zg5lkd&jVCCwZ5dJ0!ix!9uM6;diIrd9RD}Fv*?-42xB`kN}D+|$n zHYuB9nHjLu_q5Odp3I`N2rv(V5dC`dXK8d68Ca4EW@#iPC3`N{MZ7km2G#+>3O=XR zCr9bqyW^KobCLm;^eYMWj%OjV$P_1LpQyyGMV7(!vxs-CPN+yIn*!0s5>ydIj1cd* zm95l5*3xM~BJbNMw)-atAe5bE(^8zGYV*arCIp@{G+xT`JF5|cW=%lcH{Rmin~n=M zVDcXdVN^Cu-@Eq=BhSNv_uim$p9E+kz6Z@)euHX!Ap%xjQ}*e|cmzi06k&Y$J7y`~ z*jho8U+t&7i5}5;zq#FKn5RZ%l0-XPQ(F)dWgbYq@#-w@Z3Pn-TaEz`tC04G^;%6} z44m$_^-*p=shQd?6Q)f^>Hz~6#*@mQ;SqAmiPYaxGad%}qC)L1(<{cds>ovwef(uVzwCruk?1xOnex~>13Rc!&D z9#CP&4jYpaOY;ELrM=E#=rWn7p1EU<+eB3DkuHN_gBu=08dKLX^*`!V<)S^lF5)U$ z&W5k8zyxofQ}Z-JGcCwJxswxoJo@sZ)rBFL*vG845TDM96+`qEF<;_GvjYY2GwPFt z&$=0bqTu0S89*c3r3S!h6w10H=*)tG-H(rq4$ZH1_mGMLci6!M4$|1K28+6-RHtp< z@(BicN|mUcEH!4h?NJr}Hm#S5>QAzpuQvaT0*V`=_~n#{vzC8FI(+0=KF?nP^58Mv z6^iX)-*!O(^&D`08kaZyd679H|v7Rh3v3~XqK>9th@~l*b1KV@DC6-&H=|;5)XL&dKNk~ zOMI@6`b?*O8K%*et2rA%xCb*cjA7Is1TYUj3Et*(l4VT-5FwEqr7nAdc$FL+l~1#Q z+qSCfyp06FlWu&q-QQGU2kiRxfrsj|KY)3SzEfMF(delCw*cMG$CVU3zF04d?Ktk z@p&c{dVZE~6f9ffbJ^@piJ|q|8%u-w4Wn2L=bYE-pS<1&9UrYK%TzeoEjLXlAzfsz z*U_uW*5b@y>RBhl_e;ffy(dH-9Gdo1OH-L=AC>5yOB6}V0lN8SvQcCWdn7-Q8v@YY znC=`aKZAiA>vFanir(*`%d*%H1wxpkJ9wNHFT*k?PNfV_lK|mD53-w|HgEd6C}v|I zC?9`N^G>!8xp({;U1)jshDi6mU!vD-sROR#=X%oWXEQ>em(z6Up5ECFG!Dh$%KUfO zgN224x>oB#zcF_Yf{mtFd0g22g)1Z$qaMNqa=13OCSKkC*YB68}&Q9%~rWx83HW8bQ1+`^*Pq?+tB=aZonCv#(I8#wgU0XoMe1T zSXkD1pI)fOZ~Y4Fn>g&6`^(8HpEbk>2_Sagg!)JKUqt2Xi1zA3`}I)_bhBFp8`c8NvMm* zKXV2AWT<5;byd;mZL-JLGU0la;NtZ~ekHT>&ciZiO`QD|^FwsVpEj*BY>yZ-6b5)^ z%1X^hyYxKIx`Fa|x{PKpNESx<8-X{bxd18;xet`Gti&(ZFfD1-KVP6XCk9X#3DR^A zHa#{+g~j$mBoWAAy>JifDDUU(j9aOgk@H0e%-(5SGd(kje`N|?^siGp*kcV(8a2j`ax}DH;FCqf(PkI{k+raxwak01gFiWuLXOBHxf5EA0o^~fq%k}{^0qXbwQea z)pv!_YKL)v(YiUj|92Q?;v@G0DtPdqKZ6dlXg^=r7k|TQu&wND+TYv+0XBPw)mdwv zXYv`JvLDs7_bkr1qRf5MW87#3+$95}=9_Qhd>$G^kI zBdm2wM?%X@vHl+TejpFyz>nf~$cIi?3YI?7l?%LYb%AR!*FSM*)NZh<@C4){LAr_J zdzPr_KUPkyXe}z^3+(e2*(wy%C+4E%e;Eg;weE>EP88}9jL{!)+ErsPH_`Xe&8MKq zh1<)$ikgf6V6B}eRatAlQI^$PE)Hh0nv(vcz|v!%?!d4+>8se_ zgt-Q0PlYbIPpFgtY2>jrc_PN5eiA|@tE9zCihwgratB~52G{dr01)Hz%W17f`m8mg zMT~SKh1fl&q*SmGp&|kXM#!{aYWq(mT2`Y1-zdo6ijJOS&TQ%s8s9yu7m{4GR@siw z#~Nr&YgrJE3Fr0d#PFSC6N~|bjxzw~T9#@*jFa|6cl}a%@uCdhV_04bS}=+yLztTe z$fM3(X{jDn>s{vadVl93m%kR{$yl+&`FkJK4MWzH0h?9@hZTK)?+weAk?Rri&Apvg{;W;k=^vL0|I>Mt zEp{})4BH$24nUusi8uSO_;@bMqHM%?2`k}ZWhpRnw5e}HX^Hn(SXJy_d<;h z1B(#+xBo+(jDYCC45u2%hV6sT>`idRmk^c{8 zv;6RMJbe1T%F7=seev(&gF)|t__1zIxh9*w7T)bN?c=*uOV7f8%uNtA_9yVJ7^ZoL zZEh!ON)W^&tQ}zs>3}x4oXz`hB33(rs+sn`#pz)RT*yV{-*}aM0Rxq*9v+fKv*s=v z-s%Y{t#IB9`QAeZJKB6WZ^S|T*Vstq)56cnKQM^kI*`1G+`JlyGcE7;9J{2zy}o(V zOX*A#_GRutOLu%#ScqpWKiIai{Zapho3$YB32eWZ32c)`!y@CL!$3ZHfpr*d%|Ydt z;4w=p_zr|NY;N?X44!ECzPOH$&aMx!q-}2E@|;@*ozEmD6mt>L^s&Lk;T&vW0=q5| z$IM?;*;+i|g7$T|_LxIJ>Oi(G%1clxC)ySfJl1_RG)vRvF7aB?<7hUhzi(`z^sN zpq>k)EK}bDckvIyvQU-)Wd9J)OOqw;KcOdfPBM>*s=;D~i!9o+3A%xc=diKgCJAJl zAJ#oNS?43gHoWG)hR8u{HrW}3K8++-AqA?ZR(T}yH{||0zdB^}Z=8j>_FYiRe&uKx zyhPgM=(=IxHW@VFY|k7`lTINz3>AmkcsQ2b*Qzhsz%7b;#J~k(19ySD4h|DWq|@tT z^@?9>ph9?bRiEp{xBRx|TTbmt=X8&e3;OX($tb=w1*TxAc&r+&3?L~ycn1ZUFczbzh-Cw zza>!_i59l{kjm-Dlb+NSV@N7L14XxjGpxC7R~$666b|3u5zCilw0e;k@KX zdc(Rg!5eZ#O0^0=pa%^C209tzj+u@q9sE~ffh6P_eA=($A|mCvnX{UI*sLv-NLWxG{Q0~`Hn^D|AI;uP^xs6f%KnqQ+OJ}z55>!4_$ zGy^1G+rt|`8)Zb^7?)Ud7J_ak(Fuy|vY^?PCohAin?^Z3Krt!x`WN90Gr@VYJ z0XpuYGlnQY3sKfQ=?0@ zocAXkVjpCO{KdUirQedq8VZyGQOch`UPP3|N=c#^K$c}-00%g6C#@)=`e(Oa^8UIM zalyY1?{mDNp&@EO29T!&3T;ZL(uE*Nd( z>)LquLl-3voQ#lAT>J^xHJ&cl#{tObY5?Ema)xK>;NU0lu$s61qVizilPxz?>(S#Aq;E zICIQ=7w>stdG~8Rq)y z&kYi!WTEhql)|F_0g6`1_AJB&$o>BXZLhBW+p-PM zsTJlprbsQ$6oEa#QmHI%wLpKe18Pi%vtcmDHgRB(IYx7-Yy#ygG4bGob9>qJGzKb< zF-qsh+ncbrz$_nOz0IY&(~T?k@w6bopzrf}IYXu{EG!(_t1+%ERPzLbsOKrey=~?9 zV81>+3kyBG!2lt9dd=-k?iMmr!7S62hBM#`q~PkgRWlmW^|v(#!AP!Q0}1 zS5XNT^Q&7?fetM$Dk7s69nY_bpN$26-TCB7VP`v+L$hHYuhwAO4_0BCGqog@*zx@# zdjR&4h-#2$)kMXa{z=-~{Dj#GJwH20n7XNbaL8fQn{t0l}hQY&{hE(LWRYNj; z^5~xOh`6@-_$uWm1rVuh{2f2;;B+D5TkFY#XJ*t%3(zwM))&os3ko1v3=B5rzauno z^}PTC$JF9fTT5gl^gEQB2H-^U7yc$TNb;ZzjflJcju$M%Gf$EMrQzoU6A2UW^XSWI zYwPA7mCU-MbJv3p(IwXhCUYi@_}Al#lN!sUr~`NI$Xx(RzYCE1&|J<^H3|lv%`h@O zZH(p@fd-Ohhu7Cumxy4QDgZQMHD|6=44M;^%|k*7g=nh4ueWtsGpsWpRYp+IB_+jP zE0~47%>|BUW9DoK`ESU_+&C0P}5Ng8uJvp?7;M0O~nVY?2@^H2N@`P;Af;P5%sP`4DwF#!QI8JH;m2EnjNtubp#n1J9|9tp||OWd$H zjA75b?jlK&T&%psO%kU&qRyfS!dJntXgeK(L6QX7>nX7lfT1jh+>uZL7U!EqgDx~B zp2V>C)D}WZ|8kQo09Bq#T!r_(g%hIu!qur%Ky_v*>HT4qk+`4YzM2^8a}xI()FcUG1i}=?*ms zxd=Il#a%&yim~KBo`(*~ug34yqk}Oe{Ce5zX|R~@G6N4rvSn`XdD&z7NP|Y@j;4^q zX?$ukMN1PtO(@kJDL6Dc%lrN~BqcZ!r9{6G&nCed21*Mv{Takz?*#N5br^|bf~UFnY4b}l)Xaikv(=Ow{?j<^9@boIH+)`d zE#^mDe#1KFwQc=EC`5Zrv}+)%7w zaNY_)2NV;X#qNhUW`|KoC*wqwFhDbW(no?_*_iIMp-(3o1li9AxuIKYpX5Oa+UE@- z!ncxR=IpG(Z%A2^RUmGB2i3bMGjb&$5$?%iz@Sr8?|y#r10|{MI)>SPXn?iP%Q{-F zy|X5NzZ1LNmFlm}IYvM?VAb<`E}0QU=D|u6*Mhf)Kc1vApDsE1XK6IdNbvZL0j3flw>@gsvnH zmn{<4Wx#INJlN{+0IxG+wR?!M)S9Cm&U#miM%AG9N7+iq+Rf2e|uMapzDGopwGjnjrFeafX z^n({3(*#E=MCe)h7|E_hH&2rXj=v&65VQ0S(T|U(-=lULWPu_4 zsn+UsaVX9d7Q`D5cDfnx1=ulc`dCBcs;a!{ZqykhTd>(qlIj~!NQbn~(2{ zP!-A-nlnHx#dK(l9!d7Cs^W>Y8Iquk#;D2H9gxlkK3l1}Uf3!CYu#Pmgu zNz>l~hfC@dhi&l&MbzRr>rw4$=Lst~|%LR!RLo$nhKhl)=wd4Ig z-d8vqih388mPyGS5v(_i&;x;jEeZ$17R5m)MIa=BMWX%rX4KN>K{wPpq2A~yc#1Ti z%1!x|I4flA2OzftaGMG4rL7U3iJk~IJk%>zvWD~JuXgL|I z#1d2`W0tG8R781Vq&jEC)Y~JEA^}SMgO_a1cbTksq0}}iLCXMw33oz3&;EXz?V}Dj z-XMzS?Uv0AaHSl0WsZw;pyVr(*ACt*pkYuefl0G8c5D1})MLkau^s%pCP;?({tD+J zK@~eW>p-Fh6BCmVMeAZjOXdByPPMDn{wRTkHz`E(PiV9Av?}gIEV0pb$Eg}O3N6k-b$H}djS1ZxZ&Jle1_}#jBV1RZ?TcppzZakYWwZ-~5UmolhWJ@kRm4f*5N27_c!m|j;GoiZ$&?HC zyVBn&_#+QU5s_%3fX;SbKLG{gpEfXV* zRC&}-#Ar~c5H6NSr+C7=h{R0;zoyL&>7@euF}_o8_CBl6G+a%d{h%yIH$qZ{T2%fk zoFr-gIo&RLb7uCleuw8i`Y0eC*q=8aI38qV?beK@E=x8uCa)9k1t?d9-YQf1`ASz% zE1)PlI~Qn>h>AqV@nX_N{{;yTLlnS>n$K#GXb6w(mW6vCr_z67F$3#UyRS{Cp!xX* zP`NG6`@`^r0tC|*sDQlaiY~b>8#x`lj~eW(XA$~yUz08Kp8}~MBJs2=B61T2!wNHa zuv)LlW@pd%0CP+0)~L2%p5{+#*o)2))5%|=alyrfCjmQZtMmjle=zIx}Eo@;8FnZuw{k2fFl1D_JtP^ZNr-3boXue zt$}oKX2e_aM3J9Xh+fjV?=joq@L7-la8MZR0Ng*7p{XrK>Ro6*FJY2{?RpZ_1U`j5y?KLyNmQW`vM7QR8TR8voJ1iU~eJ z3YzspdoNafxRj+?zsc;}jg@0GHO%#nC?0tOB@i5Y%QFf7Fh}--po&3_1Ko-h$Mil+ znVIsyBFbW7!|YPfG9g~a>PT*eOxz0+J7y0m;G$t*lhfMWHt*{CrHkcc%P7Mv`?UQW z+WXUM8mVQ)f@GGmIK2zGCIW)^2(}5DQT}IJuzNqsFX)mtXk(;uU?d&5gOI^|NWIE@ z!#^WTYeri8EefR`O%;=l6djV(N`8;mpq5tAek#-Mjn2e1+ScbLtxVUG7w9g}joI2@ zj(CGiPU57^V+$0p=SOl{ZFqI;AY=oZrAo}^RnXBBMX9kLl+pkCfvBaCcx0k+>4}bY zs#$ocpH{7ruXGS)1Lt>)lkMn8aN&@t%Y$VgO$nxDg1!ikwtQGVkROW-dOLDle%mwz zam?bI`Uk+v;UMJCxPLZbNTfp;U_;u#$j3sWFj%VkId3@pK1ztXqFv=6pUcMM#N$H3 z?tXZEy5%S(zE3}hm{%yu00ZJZwA#yA7!YGi!5Y_U}dDL?4)acnVK$n>%hNnStI|muEtMsfsR|jL{MAL ziKZ$erW!urP$jBvRFpTzum>Tx6|Ken#a$BzO+pF5O=O)wsPq)W^QGJQ_WG2<$VnQI zGlY$&=91IO3DUg{{to~XLF~R{ibppe;f~NO1-b~GCJt~N9Zsu@B@&ivC#tv`X$*5c z8KZ?*|3_x5K%iVBP;}{(q_4Q+N|)EZFHde3Q76wm_Z%;I$(elSGoRqnOD~=4Z0CfT zNMt^u8fldGP?<&`GJy>L4T9ozQ(EajUqmWYL;?kb#XEyAkFXFZfOU z0{-sCQBFT98HKl_w24J2P(rs$z*qsKDb5LtMjRH8NM6|95$AIkI+Cg@&`kugkU91o zkrFOG+2ysTxhxnALu(zvW;DfQJYiIL!xENh*xXcRmkfQ6LTM@P2*3e1%*SwDf#>MZ zK}YJ$S1F7Bk4W3G9OAPMzB4RG@dgp-a}!FNv!qhEfy7TL_E@2K`$Z0a`OmX+CrwRF zarM>L^1uTR@!t2|F)gX9nJ^?$Y5#mfJtnL2R>07|VeB)V*LJvFRaI@aZ{KO8*< z1x+3(-AehWeO!M1aOcFozU%T~E;%DzyoW;VJW%FZ3GWSK_Qfn11B4?uaJArt&Pati z6Icwq(C&%UgU&T(PRAR8I#T0Gv2;{$=Nl8;aGuMeVcmONH+0hd$UP5yU*WmnoBypx z`?>3Blb7d#>x1Wlmr!_~!t<5&UV-Z>9IvQ}L}9#^??T$3q_A>ch%4Dp8Z)B0YJ#XKn}R_@irD8ir< z6WD^TY>i_76&({8kOFRO_ay2?w-88@Iee*OL-6q%5*)Bf;D)uL98(e3Q@E}c`Uy|q z`{4P4gs-0`JV7EM@O?od?BC)0nnVn-Bz*8){Ta^}nm9b&vexso79w4JOjG<&gqsSZ zT^g2!*5`!~QSB`$z;#s=QtY`B-xDO0lDEFrBRR)b8sGiycRBv}6M68#^$kwEkJ+ML z@-U=fKMFTh;xvcSHB0&WC0f%cYX_i&CYOCzIIP*dy+YS_1s}Y2h-=PGMfZ|2enBXj zqlF<46`8k+RUtH)tXkSJ1Oo6T=CD|92fBqot_GIQ<^w{&d#_5cY(bcd;b_OU7itnl z;Cnt=d$5VZ(~q8O{?|m3^mO@aKM}s}>*rob7+&~Z*8|5j(NLz~VbSjaWnX+FBx2Xb zqIh#4wTf<+jCWmdeZ{h|0ykeY_Z#MsM;_(WQ%~dj-~WCC6Yry5RMS38=CvV@QWD99 z<<0eDAX8e~P{w;+;j$C9Yi-0TIJ&AhI4Fn>AH_!G%=N&6KaJNu^Iuh4Wnk-!K zJ%#V-|21hye81Q!&}@l>d2X}}iSLD*FY#Q7=ZfgFy5EP^bX+GiBT0dDiY2s7;$VsP z(P&JnWa%K_E@6dh71fwi5*=O^X(D>wN)*kZrZTn@ZbRvshinIKA-;Z1O+z;j$OXW@v-yA! zaLHOv3sE=B&i2rL57*KdyrLf@+Gl(up*hNPh4yz??=!v*&!_ml(5K>h;Y#1pT8MN) zJF6AxRgy>;hdIAA53Fp{&UJ^-ksq4ql1ahaUgP37aQ_>>$oc2Limh9>Hhvy`)I_Bm z51T+tZ*rrJ*qaiG5eibEjwY=P{~^^NU&mIYkX2eDZx7~PVuQ5 z2UxODL>&gTY1lYF!l0DM7ZN(qBl0lOS7BM9&@3=I*m0t8FSP9IXms4SRspxp_5(-m zBUmw}wGUTnLGFf;P>vUB35EpRQ1F{hfg!;}|9e_%G0$D=wdnofYuapOazBddaXERE zAg-zqWxf#OT$c_wLU)ssLYFylL(OaD;(!}p-uNYO*IjpU;)y47|NRd%JXRl-Q7LTK zCXqs+XwI@pCF(1$h;ls4;>DPvqKG6V?0~Eyk=R-YMWHPPj0_0=?wT}vENPx7kpj*; z*5ezu4f5(!e0`4zUG+i9Vkb{c2>Js;pH4}DfH3c|8@q+wKo^a4WMIZE?Vi9)=yU=( z2{>W4A39-w4>ycJ5)wQ|Mj- ztky!cxv4eiEOP`FS&7gwVs0MS2{o()q!N;spXhK<4CN{xee^L-Ib|(h``XtUAFPiO zR14p=$-_2jne20$WDGf&ybi)BB|{j>(n!LWiaD;K@6HLOMX{oS-aj@X`N*{?E;!DM zwbP#?;mpH4{^_Pc-f?k?MZ>xZf#ZnKbVax@0wEO2OgP-4Tq|Lkwuy)o30p15(7Zc3 z)RESr6VNiSW6_b{nuU*{EAq|?SDxXrY)tzgTt{g$lcTi_LmsZHeG{@wTd<=vup*jD zlK+JDUh_PBzhfdiqLonb&TMuYO>-z?qKM27)(xeiC8QuIC{&=>f0g8W|J}GMl*{G# z*0;XHh7B7y>#UdddzQz5dct=r5l4A4VSZvGj>_;cZ$tB<1z`vhDE?v+dYYD=;S+m zKj7oc!AtP^b3F!=0?#%5!i~vF;(A82AbOOR(AO%k7P^ts+aeA_DpVm8Aw%o15GAT} ztUiw!AgpJ?nnV!p)AzlxO9%!MkjX0Uesp`C!48{fqE(?1S#lq8-P3gb|NmZ1hM!izkL7!1ps1YricCJq(mi-$mWD6IB2!QOAhh4WUbHZUzXtLeO*QdEz*$S`Z{>U z4MXHzPlmPMWbxi}>=w~xMP-L1TRWmfmQGe3pUCrz2RkP@GQ@ts*XWS)?d9mG*tIg; z%p6T@qeG!)5w1vzc^E3}-lw!<+c?xLEg{OxmDY36grSPppqZF`Ped24J&sKrs_X;J zf@O|SD%7Y7I*d^X5K5D)@`O&J^x$-~J|tAAV>o zWA;G_DO4(JQt5pFQaUAGbL*+TZlh~clxVUBjuaH6AaDduASp;i+C17+wvS0v)(=w{m;7EZNhQE7`4o-0$f$JB| z#EvU)q%0{7Btxyk+`Hx;RZ5_nV(DO#ASjyPq$*Zzkbz|F5lKGq^{Lo?tC%2xw*c3* zdm@$4sXmY;%rOqy3okr<4TV*3O;2)Tw+%!0gk@r~b2My?A<``D=f(^qO842HiM)*l zZP1LPY+S;dQ=m(mnD4+>cu7Gj1w(_1|95`8>d`aLJi{4hyp$jR_{Xu1+lNga^}Rnp zh6`_Dt-OnVfpXZ_d34`|HQl6?QJ#|{W%w8dqc}QX#dE-u#rrYs3zjc%*n7Fdp?kU< zwuj3eOC<{iC4T6S^c;a7l7}&|Ij+DlR)f%NH>;pu@pMM z?F7O|n=uOxE80p3skQc~T|pQgZ*rzwiR(F9&>Ir5Hs3Ych?PfW$^wPXe4V4E$-@sH zT~CG%bQwiO3S*vjij}REWm7KsKCJs5<0mnJ0-^R|G!WMj9J40D>4&v1!@dS?qyx*F zbRvOVgN{6yGjfGu_LFgvMX~Ibsj9rrPfx*005ht)iQSi>eOQVx$iNpQlA1`Ck14J= zEB?!(P$+Qq)z|jJd}oRBe(;qZYrWSX$Y=F_YhOt5=L{$ zFf`l^9on8Nal_hcUReD=mkrWY8~jkqF#CPqgqG_n51wyn9A4<;ca8b1s7;#?c|Gl7 zK1j(4nv=GITBJ~s8QYMD**wSLFRvNE>pV-=B6LZojzXsqhy&cz=pd6B5jh5<=&3LS zF*1?P7udcTqsoWsyA1caF8B#aG6l&bTyTP9c@wgW4T=2ZCqIdO>|RGTvZzKTtE!#~DcB&5(eR<8Da$gq5m<7$n>3yH!M6OF1nxB@>kT^ZAqCKyc? zz6mc5t3&v%EXlWWT%!SjD~m*8NF%aNMTAlo;9?e%=2ermE<_1|NC#FdPVkW%Ix>3j z&2(7gY^M>(h3FLihOlEYD0Wt}!wn5AyRU5*?s-JyVYHC2;iu5$=@Us^t#@b$-f(_% zE{$9+$Ln5q1&=@eWK+iPMU-n6vn3AzmCsBM0<7dbFF9`b7zLnce*q?M%F%V^w3Z;l z3JzKeF%_vjzqt8+=;N?!%Oyf$@wKVS566QK!xtSlEN7pBR6=n4!IIBvas6-_7_DU%fGSe4J zs_ZdZ#Z&Z;L`iujTsHKlxnZAr$2iZ6$$q-!fsFD^Ono`m7x)R|I5(P!#Pzj*!}GLR ztQe|m`swK7M^zzYaXzKBq3KuwGpl?Im>(z^#`J@~RU%&L0ws zjEoDNL?A=JDRB(6Wf##6V4Q=-ac)XeN2`)>|15)Ax)0VlYTselp$&luTbw!`B+`&d zLo%Vb;e64w@8ws&`Zb^a^qtKayoXUv6qN|0(!N4|3KdqrvKLOK)29BM{hB3=!~cqj zv!;DB+PUq8T1Xfz<@uTvyoAK_9lWH2mvHcX2hY>b-GrlG4>c0s566irS`(3oArGTL zNFi)Bx`ZHRGEoal78v$0j&;-Y&2hBm<9jae`O^{3IHIFS}#DwAPb2pv2pYE z0s&A^#a_-T2$QWSW~Hhibd6LpbM+c5nIc6oTh|c^$E+@$(7ccmqz5FsFBe>NYV(-& zjyvAVmMvSFHh!(F1*AE&tAXca<+tp|*Q-pbW-tg)p%7g6K^S)!fQ`p%f{P#49@ywM!O(k=rjhY!DL zgmaGR7!q;Ova%!4Nd&SPh{dMy!p_h+7_x;;6a-eru9$w~^eoDvvJhe6N3=E~10C8c z9f6;KWC~I#!DVN-j5cY#vvuoMKK}7fHfQi2!%R-}YVR`$@|m!NjVd!`h2~vRTtr2| zCs;z7V%9g2aJRG(Ybz2;saePg$;LH)jj;HrYxbF}^C(;wiUmrGl_e-642%zi+S-RX z+j57xF=fdxNI(~kEG9ChladczKf>$JP0TQQOHcqhrguUo5y&p!H*pNKX-g1k9)VJt zJfi>g#TJ#fo;O~Q5Vk{6X@THSG-(N`bR2#PQYlz4s<`15%?tea>}NkmHk)nQ_`QT` z=Bs-8#250}$UJ2=C5%w6fd|(3Q5dX=S~5O^NOQ2A&_wwfj$_IQ+50ph=vpka()l5hL{jqR*N$@g<*6AC&~)4j z?40pkEkY*|2*7*e7-Z9qu<)BHT^;BeIFYH!+RXpG?gEJ*isxFiDT6D}z7Pe{(Jg+H zX+bh2IOAx+{wtd^o#&r_o_p@Or#XZ7Fe+&vwemm;*`50LHZ5rdU`)C;dk9rrrH87D zF%v0MbW~a`njO6YktV0YWQiLylxdOR6rURn#I#ayT#GoQup+NSn71^YJPet^EXV^& zH1X(W5c-5o*1b6wU7kfmiHyDrm%Ma{FT8CSw|#jcf7P}lS3su`$Pa+OY;=%KTePBX zG><~K>@p@`WosYCS2$l3k+LK)l!&r;jV)QASBQR6Fpv@qq$O{@sClhR@4BmBD{D5G zu7T7h4}kF<&xifYOM^;EeIlx4F{O-DHo+H^3U9~GpRoKDPME9VxP}BwLpRGzWi$_0 zo3EngPEm+(QLBkeOD6YYuH+GYKSZC6zQ4$n6_z|Q^AT3>A4lSOlH=DT`R*N~EE#RN z_`CxxBRd8im6Q_VZs7lbgV9A5XMzH@0pq|%;Q5IRPhN3`v)q+Jn1(Uh7OP4!PfaMG zNlowhNUSmlOE82*=p+A#iTzhXKtVB_R&3e~e|Ro_rcI{@xcu@m2%&?a|gfrw~w)Oq!1ZORg+^OQ0DUjpv)U#Ii*RH}Fp>5li^OCiqoM{@=KGkf*{bRnTl{cz9_4BH##dsWvw#J|4u5h& zg5N!wW5brH@yP?gcf!Xzjbu5ne*>>ogvGWpaT&$Maiyo7y? zZ-(2RzzVcUHjB3X<yC5eBwnacQC zlptE<8I7qC&7%^E#TFHT3h5#w5g~h73UED1BBjXZVd)~ln=Wwq>#xPzbf@n`pBY9AG0)3it3)1| zOolhV`Bv7gyN~nsPelu-Xb}aJhx00HOr=7fi7XyJl>Lz=qLdb>D8TdtQD#k|FD_FR z0wC(MV-otzOVaXhm~%1~qHv!qi?(XmVi4Kttns1+Rd_t50|5jo$`=U=3e$0JXh?9! zpAB>MIoT(Z+3nAte*3Ivp(sXGb!5}1TIe(%NNmWXHe=l|K>z_NYRhS&uu5`3<`a29 zi9<0ei><<&MHTyqcs?YPf^-^QdYs^}z2p6&mM-ncRtH^yYSCAj%oI1@{8oPVyWawk zP6nYiU@7b-D$8gFh$5M2H9qQv#S$)N-lht%G$F@2S#s~)k|!)!w&;nwKe%8` z3^VH*^ell$CAJg@i6SsDUn8M! z1Soa|lp@kDq#z2vbu6ETaZX1i*+WOQ5#U9ihdz)Y6T;(~3B>KIjAVdetdOCEog^uY zk0va&5|)`aK>)5RNDnwG7mvEXx#t55Zish=or|6&kN`Fpb3crx5w5@N#6rv}Ohp;$ z^A4dru>GSBW??CGM@L6%A;}c%wNh~9S@GS)=bn3ZY-9C0W=bfWnwsRMn{MWR{->By zB?Tlsq0LK}>t2{hUd)AWOj@?qVSE+=kt@I)S(~oRe~}4FGe|@-4v`=h4kqcSNDji% zI7BpbX@r@XVzU-^wka(^%d2mu5^NFv2M$9_6LVXZhpi)XdVCy1}s4j(e)ohj&7`bg6Lj|K4l!= z;fL*`M-_T8nuIZJX+n|KP7@+4tUMC?{Uyzw%<-F~b(Fxm_>G1Y6_bwR~ z5lIAA@Q4=BWu;BR{kCtggktZfRPd|l4H}U)V!fx7EJ~9LjXy-#D|h28FZ+Bn(Kg(R zCCS8*>C8518S+q(zb3j*BIkc;j+q1}l@M?Jx4#eq6*|h7;~bO&fGC8 zsge$Cf?|p1`E16rV7@1_E_9SY6evbzd4g0*kxIk)CpsLm$L#O+8*jXxJMa8-%rl=~ z5JK?$^Urh1C71HhL+h))Z)_+u)f%?21auU6Wvdn8|CWD!{-K&l%lKzZtry$8Wob_( zniPKpdJ1NY~9-}@FH{pg2roK7r+(77m2Bz*G8r+MWo zFXWL&9;Mcjec@o3Lm~_zsNy3!fJFai#-4tU%sZ&*&&w2)5{OFgEBjwM>h^ElKG;)X zzMY;(IA`RQ8g?j}tcpv6%J$X1^{NOFd0I_5Z$Tj}k0CLot`&&{q|>l`3_f|w?)>Pn zQ+VKk|6{|3O>Ek<3D5Hw8yn;QxA&#dmQ~fc-&|{-bL&rNIt}5yTwwV3sQ1_86(^@@R?6sl=BOYbm89BtpP% zZ{7^Ic{9>3zkiisf|r7{u{<>Mzmn$Zc2xKKt-p77s4TYH$LpEkA=>!il=KZ*JC*R0 zYd(zcU-Hpq9`t16v!DGep7D%l&6_-i4X|z+Igf&rElPd`XN%LWiDimmuME`jAF$NP zVL5f2~9V47DuGn$Vq`D7&kY5`>V4G_Z?C z7&Pu(_nv+6YM=hbTl5LPBW83JF2(V?^1XfCxrPXlK*R z#F*$4Ngy}hIyjnu0b#t=W^Th-l~EY;`A+f*Y+q4Ha;!osvjo(MwfwuevIasR!y)kV z8~+^_pT8g-iuYUGdh4xt(vzNoPkriBd-1OE70|sl5S+$6obI-uIn|2;V0EAScjuDY z1z}`$e=>I!j|IVz#9ClhYa>c5!>o=J7+pw+osvfYx+s=M8xp#}=sF+N+D2MhJEv(T2(r0RnAp?{UlSm4t~T5Kr_BsY|1=4=X9{B@k>kq6t`eEd{FMLsTT8 zj_e|}+CPNh&|x?vOf`�={(ii*e_c8s}vvjz=!aZr#uz6+_JBo8f9j61RS)! zxTPFQm0XKw0V?5DGdwS(RKmKFsf5NzLpSL8m?}#mTNg$cDzoKe?SXY)OcMBx2`Q$r z;CKug+XzY|fzh=>99_p~TSlwI(srz*!U&NNBdc{nAN#%cjjmnBL$=_UI08BA3`{qU zk9WBbx)O=ElFX*I(pk~mSS$HTSqYdknUE^7yaOCq3&}Bt17KA5`sINXKLzUNeg zFj}sWuNvHS*IhX6v=`w|{^U;=pi3j+;0=U<^&u~tOL4n#brmUb!NrU2v5mOTNR7gnCNCO%W*PD_@r$o_p38S_UNvlNCbr2U; zBy>?-T>&@Uf$gtc-h;OAm^cFYl>-LPgQIEZ0B@wxSTh@oh1C5AdghoHYN<160-BRm ztuzf1SK(m57!DXyL&9Ld7Mgpi|#f zMH<%WZ_dJL?740$)p(cL6q)0y8q3i@N!l|jBTHtjA5b)RRRU{(+$@h2#j;32;}sFR zAPYtplpI<{+X``vT1MM3MrL)i;<{?Ph_++3R1&&~NFlxOj0uW;7$%NDyz?*iPV+ys zM!(mR%b7H^nkLMZwcLYVWyq>Fz{ER3BwEgU;HBW=R09keVBmr4F8esH{Q9}eJo1%? zuIuoIH@pcaoNyw3_`~ZL^s|5IKv|hve;3O{AFp~_&i>j8$(9N*UVL;ZVc)x(DAu40VZG)5+)E}PRR(hnzw9&>>g8^gEI1B~}gAN#Y!k5l@HSYVM6R`2nBbM=>SUI@j zimUMQm%kj}{`P+@%I6=t!R01MHR)UY!wA9=BL_wxmw{UeF%TZR8;fs1Esim%ac5@Dh&5DNbPcw~I0F2rEuAmh;IuwZ$moQgK-msmzJ3nQj%X>^wC*CmA3Viy(!kK&$c6Gm;q zNUe>THeu98%#IR9Eif7Zv$MdcOBjuSwgcKIpA!>!Hqskk{_d@>TB0MjESQ*j+W+#u z*Bs>=eoy1zn}NDc8p5CWmj2#qp*w+Mi=B9?_xq5(~iXM5C7<0JKnyuM{Vgb zF)pk&(d(zcL(;&mvvMU=o9*MKd$v4!3uNBN6d-p)f@8;Xc`Twn6-Ru4emtK0=l6>DG2NsK4D*DW+z+l$}kmDV!CQ2!N zA`|tZD-`wmBcLjLE98+hVHK#wRmByR#$q2Ozayy|ilb~#Ml_tVt187}!C25;_Nxjj zL2pVdTKlStLNbvVF_L7NhlH4bn|{V&3hDUQo-=#ZA|JICfr%iH51%=GUq^guHgU-= znCgOF*9PKqQHt)6wyc` zAw>d1OOddW0!xx%Op?=K#S&wL$b4CHM@C)3g_j3h_^t5z*?|3mJb(#_feaJ>H3(Cr z#S={-p5=%@&M}e&jO;T3M|Q;+dY9IoQCoI)&nj533IGo9O^JzkN9sxhjyVdL-Fg$w zJo9xp>Zk|e%rjqy>#zUua{pHgg3)M%bI&~&kA3WK;i*skA4^Ui0C3m_b@Mo?F7l$r zDw}BhHv~jnQe8ibg5AVA%nE^(AK}th!-^QDSV%;Q8A@r9psK2nGs7d=E~4ugqYn^;&jK^s8C!QSw#^W>&j8zJ z6L!oJW;@`@>w(W)QjV~54v&BQ;F*0WZfd+P^3xDIEZeKyWM`@`6>Wy`XT z<#&I}<3aa!Si1&TH$_-K?XYf|ux_2h+9|@?X@}`G4pUPe!y%y=I1B~^?;V_Xk}sm- zlSMN_ROw~rMe!hT&KA!qDzzfaEoBymxtq8Oq{pf!M~V`Ava;$iigl3!t92okNsMNt z$n%gGNF;%Gb8qQqn#FSS?S#)<(&EY+;_U$52H-6K;^G{vm5EjF!a8tz>JON)A?v>x z;0QpwfPw8~l;`9i<(VCWlB~X>u6#_Pr3Q-lTu}(6NLH}lGYeJ6ATg>EClY(|1Pqul zQYm%g8BJXozyS}sx0KrWxmJEpDdD1vF2Y3@U4)Gr&%i0CoPuXR`{{V{lb?*WYu7H@ z|9W@$@sEFuPkrjs_~a))xu}Y&y~TmkG+w5s;(%QVN0wtHDY3MgJZwQCCzr)JkW6W1 z-TuV#;Hq&`m?O6pL5H}Pk_Nb?=2b#stCB0d;-keg5{a8jwrRd5vlS6%hpWf{la z!Fyo++G+s=pw0%&65ZV{f~3`ywoflj#TEi=YcbA`0d*_a-(OY8HT4u*7eI|kq`*1; z5v_!}JvL%ctD_K1u{ffYWzW`y5Dq7`QlflL?m7s%DJ8u9JD2Tv=ld^1w?Ic^Rby2X z$on>9ZQ6$8EfF;b)u1OB6B5f~7bvSz?6f5(S`W!k=X{N1is2G3A=t%4Ik=b=D*LT< zBEIta+9z;MUUQDnXh7X5fq0Vmi1cO=E9Bp)Sd1ytHf!5JrfE6Qv=2W4kLX3=K0S~TefV$`RAXH z^Ups&Z(F~9Js$9Yqj1zw55Q4J-4BN!emFL5+JsG;4#L3)AA;d<2R?C2c*SWG7(l{R@ zAxo~<6}eSbH_0N*<@1>JG^Q*<*&~Tm`*ZvvZz}=uF&(&V(;8ML(KTspCJ?QRYgR-E zteN4KC|I6LP6r{Rq(lNG4IrebBt&Xur0kwDH~!5tf|X(}KaBq?wfvK*6z?Lw88P0S>6T)kLglw5-t8J$x}Y0woF6Vd>=1DUkEWkg*eTd_p~ zkVu|8$hE*44-pcolN8C;cG;cdatwtuQ+wx0b|))kwUJ5{7b0Cb8hx5oLR>77x@wHc ztu6WRh(idC=eu=d^_V|4OzY2Mej<(IMJ|DLD|0@UPXVz&Qer)a%j$=i=M+^eCv0}- z=I<=0H*=H6A;_sv?zqOe?n) zCzN~}QWFJSxdoLxpv|asd*gt69SGcf$MXCylZN%vCj3yBuEqmNWv^_DDI*V^L|tMq z5!f*}k&03DJk|(F{;jzz77|ydsuD)Ez9^&t%qeil*+Me6Rmmi`JP#?i9_Loi?HFB> z%DFLEc6dS;B=N6{K#0=g!>kTT286^$t8Dd{a1B%1{JIafpTvs2;#MYBHGw4HMyD&M zq^_(8fGl9H_qlICN3CJhb@CN;|7BL zC~4NNb=lL3y^a_ZHI|a9cDR+x%U0HP4+Yj@jpF{23zD1?4a)12xx|1{F8yxuQ8J+{ zwS_YG`^-m?qvwES6PG3ETt>wBV}0Mo^()xiT4k6X$O6bF>Bvy`5a;q8DNzYfdoET; zOh|C*m~|Qx0T3y>uxbUha>AIEJyw21iHK0StdgqtD)PvZdn2|`TvO)060*=99TDgv zX~QOc-a=c_iUlH)Ua>%us;?M8inuL=^p3Q)J!|tvF}sw%&7@&f6UZw+htc~VNFRv_ zuNa%880hpt1Y-zs8#R#-J)`k}OX51AB=q9dQSVWR!K@8wMx`n)$B0S{juOc0ikgb4 zZ|WnGYTBiggo6+f#5LwfF4{3tu^0^RJ|D%1Wyz=9>icum)9laYs;7cY7P+WQ$udSE zz|1iW9I&P*kA=YWP!>4JCUA4_s)e3xg|DmoQb@4;c0`Os1mcvf=iG!17>k8e#;c%rT?txlxgaN^wJOFDu>tLO`y5+iAllse+IY0-@E!zYwKYM>1<8 z0*QF0i*yzanEl($@5hd%{beQ-tC~Onz-=G=b((IUNx*#w00My;b)f`;H`fX=VgodK zK{*cuBL|{P5&_mfy%2%dggAm^Qo|&{PW;AzxxrYh6pg2l_^Zmw&6Je-oXr&$i&U7H zi=SDS>6B#rwRJ-(9>~X`c5^Kfvt(SDJF?umCK3}F_&sVwd&~#J zMiwlVMa?E#Wzw2XYH3(rL}Yj+6G^63@@N2J@z;_B64lZc>e#tr-H?)qBt~5}eq(O1#1Pv+=0uY)cZrv{FdGA!_l7`^$C!NhVuUdy%@} zIO+SyVkF~9jr$aQ=dnBUPiRp&Qj0_TEm-ETxeZKFV=GPwU&g4f(M(`-V};n0$3mcS z<(QiFCljhpxGH-MULJSO7E>ZRaUfADb_vdtE{quLNb1PZco)y&vV!D|0CHXcSvCxm z9vzlE7(+RCBo?Cyfq`=e%eJdrSRJF)ct@+R>8vNYzCI>GU|`E#37@{C!_7bAYNS0D z0~13a0PxCpVFb*+{+*}eEe#K!>zRJv6FHp znnSi~v=!mry0}X4n@h|>WuRQGBulw$&i-`?fQm0#bof=7h-|gUeH(fUWFg?Zxef`D zWd%P}>8=HqLzK9IR~fsf1gex*o+BDHW!RJCND-;awYW~fJhn(lb^I%t2muwz2oc@8<79>gWdQJxk>7a_+|0PK0b zw)`2+57m0$Y-yPC%1A`q5mZ`)#u5|B`x7HrEg~h8nNd$33jyy`%>+%J^T@TJLLhV= zXnllt4vhmkN@^)5jc+7K&+#Y=rF^b2NL{X(>TAy5g`1Os(kYOU7aT?PrQ~5&L}J9O zu;MJwwRndF&aL;j=6>jVcZ?!Fb4kDtZrSNP>Iq~KF_8pP1UBI zp9_7zxw@RU*J~A9CrW*2;65dJEmvbEWe>>?C$I+Sb1_IRlYsfPId4>0a6bF5sUi=P zU6wN1b-C=I#L;o_Rg3yZEfOV{z(OcnXO`e(*biW(bws`@6Lknc@P$M|qwXs&xlpdt z1r=nKAxVXVgr0i$}sT+G8eCWS9>*pzX9+!|M!gP z`^4FFT0`{ors3lpVMw$K%OqPJ)x=MLw~Gr#24bOjB>`{BEF}cS&K8g4o|v1g?6bHE zoWy91P@EBpW4I_}Vit@Mm`haRDC&|!V(pJTo^N9Ah8j6H&{mR6XiT1ui*(ukF>T8X67yo4CByNiEbD7_?V-yDOj6fKg*jH*d+LXqXU#E3&lp6fi& zXd+#g0N)l1*O6J{a?&Mc43vRfayf2j3lfXP*^!PwS!k%Ul-05XI9nj%vSe-kRQ2PC z2`Q=7p?M!&hpe<}ZCelvrR_vr74+P>{Dy!FE{pnoyTr=uL^dJV?+B#EpS*46W&m#m z@Yc7!5bM^gA3l2kdcJFLq65~E#v-yy%Lv5WPEePVS`VIzJBt97_JfbJH2?q-MM*?K zRC2ax0Ga5`qR6hV%4(3943Akp)_@5F_=Z3MTHRLs*iLg;?6Z@)(6T#D-^<1tBt%rV ze3%PP>hogh)`n{Jm^2Wctqx1n3MK9lbs}h!xgUz_A!smET8GMHv*j1N|D8Cy(m0C; zu^i=i9e{FQeXq(ZvS7jj>5`l%pBr<=x%A!$QFA?%Kw<*gOdw{J zXfQpA71HYY(gns>ua5YS@AVZ(mJ|Cmfzq}r&TB-%rNClP^@_T*GC}0+l}k)let>PGF^hw!5RX|o-s|f?t;W$ywfr) zURRW=nsJpy6J2c~nY#+b3dxh7xVL2SsLGmD{MU&kU~Up=vpSL$uM0p>(&$1$tJcU! zrIu~>UyoZPZDjmxtB#<`z-NoVR@j@#e<4s?H|4wIPGey`ptYrA&zYBwVA@!O3KDAq z8kM#B=rHP7gdv?+sg_$LiGK-^9fn@qkd2cViq}9$mXxC`JTxsNOL--sR8Fic(@j7^ z%31J%f!OK2E-pH?S}o4eB_MXKlcm#@Y~P zH!hSws$nxAiMkg1Fr^*pma(jAFsqC!GWozok}GN}(z5Jojj*~z1F&@l_~bWQ-1_r* zohMfV__YN)xIK14Z-MLq=beXm9>P}ud<6hDpNfOm9XNQV2R+F-JbK{p%WTQI6{!Gn z;hE34SRYCZ>b3#_)S3HBtq#XkoJBv!QFd2(bD4R-yM)FAofFHXsVWR-)mXIBELv*C z#%14MPui85MuMTGC|~hm7ujwIq?J#L&$0&XBq}jVLnkGvE~vml1LWogZ4mQ=WUZHnkEig9!9yUx3rXCX`KVi_&V;V}oPL4K=TOx&6N>;i&uQS@M-j*dh ziuJ+So!~uT;54A_Al64C_l~sImluMmNoOp{(Ix{_{%oyJVTA$c3>Whbla->-#I0n3 z+Nya^*GabUC=x~^U^a;BYPJJrT3}|R_p?GSv#n$lfB$C2Wk2i#q4R+$rHo8=M<|ax z%HfDjavlsCVBmm(CrmYr;ZW*jO*vpNU`!2xfp_A{YQ*vw42mV&csc)^V^s{Q3sl{& z1W09RJzpelCX5n^$0`8_l@@D_D#GO<&YtwH&yAHN@c^C! z;Ew^E09g!20G9%I6MzdA@0aLd=m}(zv3WC`yLRv!%yd%I;4z-kBb}ooikqw4*Lf!d z>y}ttU(Rw&)T$V04X>smiN*nbAoq>Ayd3L3SERxwF?JFIk;Q1r`pjJDkS&W?CI)jy zgdhRX$kH8dq-vQ_AdFfeh}jmH>4ZFHM?xmEt;9t}9b?ooF1$Qp>+JsEzFG{_!TLfd zM{EL)zMq5l660vp6*csPftPs5aG=kXG=>dfpbL;;j<4y$q&hFH`I?)%=&<3`3lGLF zU09X}6qQu!iUpP&n2baceK&;JC(I#_xmXL}mjK)jVB3Np<{rC6Paw;H*E}EhUQ5H1 z0D8Rle4Ho#RS=q-V3;TNIuE6ai|FLvXh6Nunp{mI7k=Z^60vzU1U<{gKp)`(UKXGNbQa;bd>%{RK0x6~|=UgJQ zT3Ji|m*SaW%#tmYnnNPQ!2i(=M<>7k{Jv}8N=Hv1s{p5;j6(<0gXav0p4l`w!4a>^ z773M1_QrzjmG9bu=)MuM@QrMDO?jV{WXZ)GH4aiw2FawnQF=^Aty5%WCQPPlBr$6i zNlTTCSE{Q1@F$G_`cdz?T5SB*3?Lxn)(T#*<^L&))uhDcpmmTP2IND+0^ z)fTE8R4vw7&KZ;Qrs_T>AhJk!>*iybSR(#)_THoCo_%)Tt9&(}Cy-T!&8K3xEeuZv z@EqUp(>&o2?}~pq$44~Yk>ej08>!q9wIXXQh_90Yp&}5Q1gh$~r5|;%Kt^35leVKw zBwds`Mz{Y0`0CX`CBi)x6W#%je$W61Ogqg1s^Vg1q3U_j_(B%u{45jLj#axoiFK>8`)w5BCKtCtvrgp2BxqnrCAs&{YKr~5k)C$R%^>N2m;6JYknC*k+OOAVObboR5 zeVfzC;Q>~Wz*Xd7ZJOmdDQOf7)f%ML&*y6RD0F=0u)ykOK{yvt><`%sk0c@0sO3gx~To`QXiBa3xl3OCK#ukf`e}Q zvH$#0_j1P`w7#m+i}8t~R(63)R($qnn>)gM*>#q%Hj66d+NI1%7lF_*uKNk&dpD&% z=COQuFMvMS-S>! z=#d@|zPG~x)1?Bi1<(y+69J98A1nx3NQ6s5SitQe;jGYwcYfhYYzJ^9fQK*Xf84|H z0{{=+-OeZe+Tc;Xp}!`=DUdk1R%+I+m%zAtvx<3`C2Q`hxv+Be*6KDTx`vqF@Q{n! zk8kc(RuhAsKqenY0Qm2@8wtSS2NND}n8T5W5T>UnZ_n(3IWDY9(_a$AtVB0m)iHl` z{rd3UIc|x+gmise3bc^b_F(|-17LcIzIRWUfiyO{6~M6ohD-FluLbayJ-z6pW7gb{ zQhJeNynq}YB-Vw-KdkU)EwHHFS5*)_pEkw+nG(_02>kaCzqtLx=AJkv4n2WPJ_x{# z5F*(vHfMz$dD02Ld<}QknIBi~Rj_aT9>2LCE{gYWwp5d>m3ub}2|EkA|LQf$3 z2hIX;#ynqp3xF3r?L>Tcx*Hq;myU?7U!OdJOYyD%nn^KS^RJiUCwuYz7XUbAuSY$9 zFr=*a`**Yd=>Yy^5x;*|crJiX?`Gdi0sI3N_xk%hfRpdZ2b_2!n&BOT-$)6^ZtM8* zqY<9w2Rz*n!9g2}LCVx@A0_;Jd%~UDvYGLA%>SJ0(WA!-#=#I`;62kl9FBWMGbC2cy<~gP}F47o&4q(kZ$FMK>BLG`>^WE12 z_|--H?jAjQEEA>y{5gONA?m1$A>G>!TY~R?4;Fr+{Cn)38|aKh7{lD#e5|1wi*;NU z8~1{oI~POFr3)dF@s%Q6=+UFc1mRdL>?HZm*!w`V2k`9$9!Gt<3cz45$3y@=w7}yy zelN$+qeqV(D;`&2VJ65gK-^;cf=2-e3p9@UmH<3Jj^n)xFpjym-6_E4i)jECEx0pOqo7{>-k ze#AnN$L&}c_1Xpim)?VML`dFCj~+dGOe8*seVZJw#li;re|I0pv8&q)7jRsM0Ql}c zj^n}w97m5HJ$kG-JRf^MG5$AzUtWZ<{J(t|%iP;nu<$X5`#|KddpnlX7I+*zdi3bA zqOb5cpDZkui#f8b*lE2JVuK=wp;gNEO!A|yGUc{(W6I?6@xcpev{&z03Nx> zWBKcOj%8Q3*Dvze9sx-ToX=SHSTwOmj~+cH0S5uN4tGBxZU^vNOE8ux0N>lgSax;0 z7K@whZFnr?SnO#m3n4D((W6I?Re__h`=q{`v9!r>Pk>ge-;3K*mT-K>VfU)BZv(jh z5{{)uj~+c%1R6+C{i6WB1mKg9nq2oZ&tqTkPnbV>oVyI;TMIchpM)H%k3y7Wi(Mwa zM~@yodh7we1j*;v6M5W$rKug(qeqV(JysE(44}jA$)f}C)MXo8j~+dG^jOh&9Hf`{ zt^(C3EaSL(^jLA|i>~$Pu`?WmNFJ`wzXRZ_=zDti=+UD`j~+dG^ytx}M~@!Mg#Qox WKU7`}H-hE>0000`. + +We use the tool's default settings. This means we are following the idiomatic +Rust style. For instance, we use 4 spaces for indentation rather than tabs. + +Typically, you will want to instruct your editor/IDE to format while you type, +when you save or at commit time. However, if for some reason you want +to reformat the entire kernel Rust sources at some point, you may run:: + + make LLVM=1 rustfmt + +To check if everything is formatted (printing a diff otherwise), e.g. if you +have configured a CI for a tree as a maintainer, you may run:: + + make LLVM=1 rustfmtcheck + +Like ``clang-format`` for the rest of the kernel, ``rustfmt`` works on +individual files, and does not require a kernel configuration. Sometimes it may +even work with broken code. + + +Extra lints +----------- + +While ``rustc`` is a very helpful compiler, some extra lints and analysis are +available via ``clippy``, a Rust linter. To enable it, pass ``CLIPPY=1`` to +the same invocation you use for compilation, e.g.:: + + make LLVM=1 CLIPPY=1 + +At the moment, we do not enforce a "clippy-free" compilation, so you can treat +the output the same way as the extra warning levels for C, e.g. like ``W=2``. +Still, we use the default configuration, which is relatively conservative, so +it is a good idea to read any output it may produce from time to time and fix +the pointed out issues. The list of enabled lists will be likely tweaked over +time, and extra levels may end up being introduced, e.g. ``CLIPPY=2``. + + +Abstractions vs. bindings +------------------------- + +We don't have abstractions for all the kernel internal APIs and concepts, +but we would like to expand coverage as time goes on. Unless there is +a good reason not to, always use the abstractions instead of calling +the C bindings directly. + +If you are writing some code that requires a call to an internal kernel API +or concept that isn't abstracted yet, consider providing an (ideally safe) +abstraction for everyone to use. + + +Conditional compilation +----------------------- + +Rust code has access to conditional compilation based on the kernel +configuration: + +.. code-block:: rust + + #[cfg(CONFIG_X)] // `CONFIG_X` is enabled (`y` or `m`) + #[cfg(CONFIG_X="y")] // `CONFIG_X` is enabled as a built-in (`y`) + #[cfg(CONFIG_X="m")] // `CONFIG_X` is enabled as a module (`m`) + #[cfg(not(CONFIG_X))] // `CONFIG_X` is disabled + + +Documentation +------------- + +Please see :ref:`Documentation/rust/docs.rst `. diff --git a/Documentation/rust/docs.rst b/Documentation/rust/docs.rst new file mode 100644 index 0000000000000..ab29d8b6a00df --- /dev/null +++ b/Documentation/rust/docs.rst @@ -0,0 +1,110 @@ +.. _rust_docs: + +Docs +==== + +Rust kernel code is not documented like C kernel code (i.e. via kernel-doc). +Instead, we use the usual system for documenting Rust code: the ``rustdoc`` +tool, which uses Markdown (a *very* lightweight markup language). + +This document describes how to make the most out of the kernel documentation +for Rust. + + +Reading the docs +---------------- + +An advantage of using Markdown is that it attempts to make text look almost as +you would have written it in plain text. This makes the documentation quite +pleasant to read even in its source form. + +However, the generated HTML docs produced by ``rustdoc`` provide a *very* nice +experience, including integrated instant search, clickable items (types, +functions, constants, etc. -- including to all the standard Rust library ones +that we use in the kernel, e.g. ``core``), categorization, links to the source +code, etc. + +Like for the rest of the kernel documentation, pregenerated HTML docs for +the libraries (crates) inside ``rust/`` that are used by the rest of the kernel +are available at `kernel.org`_ (TODO: link when in mainline and generated +alongside the rest of the documentation). + +.. _kernel.org: http://kernel.org/ + +Otherwise, you can generate them locally. This is quite fast (same order as +compiling the code itself) and you do not need any special tools or environment. +This has the added advantage that they will be tailored to your particular +kernel configuration. To generate them, simply use the ``rustdoc`` target with +the same invocation you use for compilation, e.g.:: + + make LLVM=1 rustdoc + + +Writing the docs +---------------- + +If you already know Markdown, learning how to write Rust documentation will be +a breeze. If not, understanding the basics is a matter of minutes reading other +code. There are also many guides available out there, a particularly nice one +is at `GitHub`_. + +.. _GitHub: https://guides.github.com/features/mastering-markdown/#syntax + +This is how a well-documented Rust function may look like (derived from the Rust +standard library):: + + /// Returns the contained [`Some`] value, consuming the `self` value, + /// without checking that the value is not [`None`]. + /// + /// # Safety + /// + /// Calling this method on [`None`] is *[undefined behavior]*. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// let x = Some("air"); + /// assert_eq!(unsafe { x.unwrap_unchecked() }, "air"); + /// ``` + pub unsafe fn unwrap_unchecked(self) -> T { + match self { + Some(val) => val, + + // SAFETY: the safety contract must be upheld by the caller. + None => unsafe { hint::unreachable_unchecked() }, + } + } + +This example showcases a few ``rustdoc`` features and some common conventions +(that we also follow in the kernel): + +* The first paragraph must be a single sentence briefly describing what + the documented item does. Further explanations must go in extra paragraphs. + +* ``unsafe`` functions must document the preconditions needed for a call to be + safe under a ``Safety`` section. + +* While not shown here, if a function may panic, the conditions under which + that happens must be described under a ``Panics`` section. Please note that + panicking should be very rare and used only with a good reason. In almost + all cases, you should use a fallible approach, returning a `Result`. + +* If providing examples of usage would help readers, they must be written in + a section called ``Examples``. + +* Rust items (functions, types, constants...) will be automatically linked + (``rustdoc`` will find out the URL for you). + +* Following the Rust standard library conventions, any ``unsafe`` block must be + preceded by a ``SAFETY`` comment describing why the code inside is sound. + + While sometimes the reason might look trivial and therefore unneeded, writing + these comments is not just a good way of documenting what has been taken into + account, but also that there are no *extra* implicit constraints. + +To learn more about how to write documentation for Rust and extra features, +please take a look at the ``rustdoc`` `book`_. + +.. _book: https://doc.rust-lang.org/rustdoc/how-to-write-documentation.html diff --git a/Documentation/rust/index.rst b/Documentation/rust/index.rst new file mode 100644 index 0000000000000..257cf2b200b8a --- /dev/null +++ b/Documentation/rust/index.rst @@ -0,0 +1,20 @@ +Rust +==== + +Documentation related to Rust within the kernel. If you are starting out, +read the :ref:`Documentation/rust/quick-start.rst ` guide. + +.. toctree:: + :maxdepth: 1 + + quick-start + coding + docs + arch-support + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst new file mode 100644 index 0000000000000..3f5df2888e0ee --- /dev/null +++ b/Documentation/rust/quick-start.rst @@ -0,0 +1,222 @@ +.. _rust_quick_start: + +Quick Start +=========== + +This document describes how to get started with kernel development in Rust. +If you have worked previously with Rust, this will only take a moment. + +Please note that, at the moment, a very restricted subset of architectures +is supported, see :doc:`/rust/arch-support`. + + +Requirements: Building +---------------------- + +This section explains how to fetch the tools needed for building. + +Some of these requirements might be available from your Linux distribution +under names like ``rustc``, ``rust-src``, ``rust-bindgen``, etc. However, +at the time of writing, they are likely to not be recent enough. + + +rustc +***** + +A particular version (`1.54.0-beta.1`) of the Rust compiler is required. +Newer versions may or may not work because, for the moment, we depend on +some unstable Rust features. + +If you are using ``rustup``, run:: + + rustup default beta-2021-06-23 + +Otherwise, fetch a standalone installer or install ``rustup`` from: + + https://www.rust-lang.org + + +Rust standard library source +**************************** + +The Rust standard library source is required because the build system will +cross-compile ``core`` and ``alloc``. + +If you are using ``rustup``, run:: + + rustup component add rust-src + +Otherwise, if you used a standalone installer, you can clone the Rust +repository into the installation folder of your nightly toolchain:: + + git clone --recurse-submodules https://github.com/rust-lang/rust $(rustc --print sysroot)/lib/rustlib/src/rust + + +libclang +******** + +``libclang`` (part of LLVM) is used by ``bindgen`` to understand the C code +in the kernel, which means you will need an LLVM installed; like when +you compile the kernel with ``CC=clang`` or ``LLVM=1``. + +Your Linux distribution is likely to have a suitable one available, so it is +best if you check that first. + +There are also some binaries for several systems and architectures uploaded at: + + https://releases.llvm.org/download.html + +Otherwise, building LLVM takes quite a while, but it is not a complex process: + + https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm + +See Documentation/kbuild/llvm.rst for more information and further ways +to fetch pre-built releases and distribution packages. + + +bindgen +******* + +The bindings to the C side of the kernel are generated at build time using +the ``bindgen`` tool. The version we currently support is ``0.56.0``. + +Install it via (this will build the tool from source):: + + cargo install --locked --version 0.56.0 bindgen + + +Requirements: Developing +------------------------ + +This section explains how to fetch the tools needed for developing. That is, +if you only want to build the kernel, you do not need them. + + +rustfmt +******* + +The ``rustfmt`` tool is used to automatically format all the Rust kernel code, +including the generated C bindings (for details, please see +:ref:`Documentation/rust/coding.rst `). + +If you are using ``rustup``, its ``default`` profile already installs the tool, +so you should be good to go. If you are using another profile, you can install +the component manually:: + + rustup component add rustfmt + +The standalone installers also come with ``rustfmt``. + + +clippy +****** + +``clippy`` is a Rust linter. Installing it allows you to get extra warnings +for Rust code passing ``CLIPPY=1`` to ``make`` (for details, please see +:ref:`Documentation/rust/coding.rst `). + +If you are using ``rustup``, its ``default`` profile already installs the tool, +so you should be good to go. If you are using another profile, you can install +the component manually:: + + rustup component add clippy + +The standalone installers also come with ``clippy``. + + +cargo +***** + +``cargo`` is the Rust native build system. It is currently required to run +the tests (``rusttest`` target) since we use it to build a custom standard +library that contains the facilities provided by our custom ``alloc``. + +If you are using ``rustup``, all the profiles already install the tool, +so you should be good to go. The standalone installers also include ``cargo``. + + +rustdoc +******* + +``rustdoc`` is the documentation tool for Rust. It generates pretty HTML +documentation for Rust code (for details, please see +:ref:`Documentation/rust/docs.rst `. + +``rustdoc`` is also able to test the examples provided in documented Rust code +(called doctests or documentation tests). We use this feature, thus ``rustdoc`` +is required to run the tests (``rusttest`` target). + +If you are using ``rustup``, all the profiles already install the tool, +so you should be good to go. The standalone installers also include ``rustdoc``. + + +rust-analyzer +************* + +The `rust-analyzer `_ language server can +be used with many editors to enable syntax highlighting, completion, go to +definition, and other features. + +``rust-analyzer`` will need to be +`configured `_ +to work with the kernel by adding a ``rust-project.json`` file in the root folder. +A ``rust-project.json`` can be generated by building the Make target ``rust-analyzer``, +which will create a ``rust-project.json`` in the root of the output directory. + + +Configuration +------------- + +``Rust support`` (``CONFIG_RUST``) needs to be enabled in the ``General setup`` +menu. The option is only shown if the build system can locate ``rustc``. +In turn, this will make visible the rest of options that depend on Rust. + +Afterwards, go to:: + + Kernel hacking + -> Sample kernel code + -> Rust samples + +And enable some sample modules either as built-in or as loadable. + + +Building +-------- + +Building a kernel with a complete LLVM toolchain is the best supported setup +at the moment. That is:: + + make LLVM=1 + +For architectures that do not support a full LLVM toolchain, use:: + + make CC=clang + +Using GCC also works for some configurations, but it is *very* experimental at +the moment. + + +Hacking +------- + +If you want to dive deeper, take a look at the source code of the samples +at ``samples/rust/``, the Rust support code under ``rust/`` and +the ``Rust hacking`` menu under ``Kernel hacking``. + +If you use GDB/Binutils and Rust symbols aren't getting demangled, the reason +is your toolchain doesn't support Rust's new v0 mangling scheme yet. There are +a few ways out: + + - If you don't mind building your own tools, we provide the following fork + with the support cherry-picked from GCC: + + https://github.com/Rust-for-Linux/binutils-gdb/releases/tag/gdb-10.1-release-rust + https://github.com/Rust-for-Linux/binutils-gdb/releases/tag/binutils-2_35_1-rust + + - If you only need GDB and can enable ``CONFIG_DEBUG_INFO``, do so: + some versions of GDB (e.g. vanilla GDB 10.1) are able to use + the pre-demangled names embedded in the debug info. + + - If you don't need loadable module support, you may compile without + the ``-Zsymbol-mangling-version=v0`` flag. However, we don't maintain + support for that, so avoid it unless you are in a hurry. From 5dbc177c7119af422f68b677a74225138afb3ca2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:21:12 +0200 Subject: [PATCH 072/851] samples: add Rust examples A set of Rust modules that showcase how Rust modules look like and how to use the abstracted kernel features. These samples also double as tests in the CI. The semaphore sample comes with a C version for comparison. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- samples/Kconfig | 2 + samples/Makefile | 1 + samples/rust/Kconfig | 113 +++++++++++++ samples/rust/Makefile | 12 ++ samples/rust/rust_chrdev.rs | 51 ++++++ samples/rust/rust_minimal.rs | 38 +++++ samples/rust/rust_miscdev.rs | 150 +++++++++++++++++ samples/rust/rust_module_parameters.rs | 72 +++++++++ samples/rust/rust_print.rs | 57 +++++++ samples/rust/rust_random.rs | 61 +++++++ samples/rust/rust_semaphore.rs | 177 +++++++++++++++++++++ samples/rust/rust_semaphore_c.c | 212 +++++++++++++++++++++++++ samples/rust/rust_stack_probing.rs | 40 +++++ samples/rust/rust_sync.rs | 81 ++++++++++ 14 files changed, 1067 insertions(+) create mode 100644 samples/rust/Kconfig create mode 100644 samples/rust/Makefile create mode 100644 samples/rust/rust_chrdev.rs create mode 100644 samples/rust/rust_minimal.rs create mode 100644 samples/rust/rust_miscdev.rs create mode 100644 samples/rust/rust_module_parameters.rs create mode 100644 samples/rust/rust_print.rs create mode 100644 samples/rust/rust_random.rs create mode 100644 samples/rust/rust_semaphore.rs create mode 100644 samples/rust/rust_semaphore_c.c create mode 100644 samples/rust/rust_stack_probing.rs create mode 100644 samples/rust/rust_sync.rs diff --git a/samples/Kconfig b/samples/Kconfig index b5a1a7aa7e23a..eaa06c05d37f1 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -223,4 +223,6 @@ config SAMPLE_WATCH_QUEUE Build example userspace program to use the new mount_notify(), sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function. +source "samples/rust/Kconfig" + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 087e0988ccc56..291663e56a3cd 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/ subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/ +obj-$(CONFIG_SAMPLES_RUST) += rust/ diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig new file mode 100644 index 0000000000000..183a3c4dc80cd --- /dev/null +++ b/samples/rust/Kconfig @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: GPL-2.0 + +menuconfig SAMPLES_RUST + bool "Rust samples" + depends on RUST + help + You can build sample Rust kernel code here. + + If unsure, say N. + +if SAMPLES_RUST + +config SAMPLE_RUST_MINIMAL + tristate "Minimal" + help + This option builds the Rust minimal module sample. + + To compile this as a module, choose M here: + the module will be called rust_minimal. + + If unsure, say N. + +config SAMPLE_RUST_PRINT + tristate "Printing macros" + help + This option builds the Rust printing macros sample. + + To compile this as a module, choose M here: + the module will be called rust_print. + + If unsure, say N. + +config SAMPLE_RUST_MODULE_PARAMETERS + tristate "Module parameters" + help + This option builds the Rust module parameters sample. + + To compile this as a module, choose M here: + the module will be called rust_module_parameters. + + If unsure, say N. + +config SAMPLE_RUST_SYNC + tristate "Synchronisation primitives" + help + This option builds the Rust synchronisation primitives sample. + + To compile this as a module, choose M here: + the module will be called rust_sync. + + If unsure, say N. + +config SAMPLE_RUST_CHRDEV + tristate "Character device" + help + This option builds the Rust character device sample. + + To compile this as a module, choose M here: + the module will be called rust_chrdev. + + If unsure, say N. + +config SAMPLE_RUST_MISCDEV + tristate "Miscellaneous device" + help + This option builds the Rust miscellaneous device sample. + + To compile this as a module, choose M here: + the module will be called rust_miscdev. + + If unsure, say N. + +config SAMPLE_RUST_STACK_PROBING + tristate "Stack probing" + help + This option builds the Rust stack probing sample. + + To compile this as a module, choose M here: + the module will be called rust_stack_probing. + + If unsure, say N. + +config SAMPLE_RUST_SEMAPHORE + tristate "Semaphore" + help + This option builds the Rust semaphore sample. + + To compile this as a module, choose M here: + the module will be called rust_semaphore. + + If unsure, say N. + +config SAMPLE_RUST_SEMAPHORE_C + tristate "Semaphore (in C, for comparison)" + help + This option builds the Rust semaphore sample (in C, for comparison). + + To compile this as a module, choose M here: + the module will be called rust_semaphore_c. + + If unsure, say N. + +config SAMPLE_RUST_RANDOM + tristate "Random" + help + This option builds the Rust random sample. + + To compile this as a module, choose M here: + the module will be called rust_random. + + If unsure, say N. + +endif # SAMPLES_RUST diff --git a/samples/rust/Makefile b/samples/rust/Makefile new file mode 100644 index 0000000000000..48bc871ea1f8f --- /dev/null +++ b/samples/rust/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o +obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o +obj-$(CONFIG_SAMPLE_RUST_MODULE_PARAMETERS) += rust_module_parameters.o +obj-$(CONFIG_SAMPLE_RUST_SYNC) += rust_sync.o +obj-$(CONFIG_SAMPLE_RUST_CHRDEV) += rust_chrdev.o +obj-$(CONFIG_SAMPLE_RUST_MISCDEV) += rust_miscdev.o +obj-$(CONFIG_SAMPLE_RUST_STACK_PROBING) += rust_stack_probing.o +obj-$(CONFIG_SAMPLE_RUST_SEMAPHORE) += rust_semaphore.o +obj-$(CONFIG_SAMPLE_RUST_SEMAPHORE_C) += rust_semaphore_c.o +obj-$(CONFIG_SAMPLE_RUST_RANDOM) += rust_random.o diff --git a/samples/rust/rust_chrdev.rs b/samples/rust/rust_chrdev.rs new file mode 100644 index 0000000000000..ccdfb2da78555 --- /dev/null +++ b/samples/rust/rust_chrdev.rs @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust character device sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; +use kernel::{c_str, chrdev, file_operations::FileOperations}; + +module! { + type: RustChrdev, + name: b"rust_chrdev", + author: b"Rust for Linux Contributors", + description: b"Rust character device sample", + license: b"GPL v2", +} + +#[derive(Default)] +struct RustFile; + +impl FileOperations for RustFile { + kernel::declare_file_operations!(); +} + +struct RustChrdev { + _dev: Pin>>, +} + +impl KernelModule for RustChrdev { + fn init() -> Result { + pr_info!("Rust character device sample (init)\n"); + + let mut chrdev_reg = + chrdev::Registration::new_pinned(c_str!("rust_chrdev"), 0, &THIS_MODULE)?; + + // Register the same kind of device twice, we're just demonstrating + // that you can use multiple minors. There are two minors in this case + // because its type is `chrdev::Registration<2>` + chrdev_reg.as_mut().register::()?; + chrdev_reg.as_mut().register::()?; + + Ok(RustChrdev { _dev: chrdev_reg }) + } +} + +impl Drop for RustChrdev { + fn drop(&mut self) { + pr_info!("Rust character device sample (exit)\n"); + } +} diff --git a/samples/rust/rust_minimal.rs b/samples/rust/rust_minimal.rs new file mode 100644 index 0000000000000..49cfd8cf3aad6 --- /dev/null +++ b/samples/rust/rust_minimal.rs @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust minimal sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; + +module! { + type: RustMinimal, + name: b"rust_minimal", + author: b"Rust for Linux Contributors", + description: b"Rust minimal sample", + license: b"GPL v2", +} + +struct RustMinimal { + message: String, +} + +impl KernelModule for RustMinimal { + fn init() -> Result { + pr_info!("Rust minimal sample (init)\n"); + pr_info!("Am I built-in? {}\n", !cfg!(MODULE)); + + Ok(RustMinimal { + message: "on the heap!".try_to_owned()?, + }) + } +} + +impl Drop for RustMinimal { + fn drop(&mut self) { + pr_info!("My message is {}\n", self.message); + pr_info!("Rust minimal sample (exit)\n"); + } +} diff --git a/samples/rust/rust_miscdev.rs b/samples/rust/rust_miscdev.rs new file mode 100644 index 0000000000000..df8681af9cc27 --- /dev/null +++ b/samples/rust/rust_miscdev.rs @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust miscellaneous device sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; +use kernel::{ + c_str, + file::File, + file_operations::{FileOpener, FileOperations}, + io_buffer::{IoBufferReader, IoBufferWriter}, + miscdev, + sync::{CondVar, Mutex, Ref}, +}; + +module! { + type: RustMiscdev, + name: b"rust_miscdev", + author: b"Rust for Linux Contributors", + description: b"Rust miscellaneous device sample", + license: b"GPL v2", +} + +const MAX_TOKENS: usize = 3; + +struct SharedStateInner { + token_count: usize, +} + +struct SharedState { + state_changed: CondVar, + inner: Mutex, +} + +impl SharedState { + fn try_new() -> Result>> { + Ok(Ref::pinned(Ref::try_new_and_init( + Self { + // SAFETY: `condvar_init!` is called below. + state_changed: unsafe { CondVar::new() }, + // SAFETY: `mutex_init!` is called below. + inner: unsafe { Mutex::new(SharedStateInner { token_count: 0 }) }, + }, + |mut state| { + // SAFETY: `state_changed` is pinned when `state` is. + let pinned = unsafe { state.as_mut().map_unchecked_mut(|s| &mut s.state_changed) }; + kernel::condvar_init!(pinned, "SharedState::state_changed"); + // SAFETY: `inner` is pinned when `state` is. + let pinned = unsafe { state.as_mut().map_unchecked_mut(|s| &mut s.inner) }; + kernel::mutex_init!(pinned, "SharedState::inner"); + }, + )?)) + } +} + +struct Token; + +impl FileOpener>> for Token { + fn open(shared: &Pin>) -> Result { + Ok(shared.clone()) + } +} + +impl FileOperations for Token { + type Wrapper = Pin>; + + kernel::declare_file_operations!(read, write); + + fn read( + shared: &Ref, + _: &File, + data: &mut T, + offset: u64, + ) -> Result { + // Succeed if the caller doesn't provide a buffer or if not at the start. + if data.is_empty() || offset != 0 { + return Ok(0); + } + + { + let mut inner = shared.inner.lock(); + + // Wait until we are allowed to decrement the token count or a signal arrives. + while inner.token_count == 0 { + if shared.state_changed.wait(&mut inner) { + return Err(Error::EINTR); + } + } + + // Consume a token. + inner.token_count -= 1; + } + + // Notify a possible writer waiting. + shared.state_changed.notify_all(); + + // Write a one-byte 1 to the reader. + data.write_slice(&[1u8; 1])?; + Ok(1) + } + + fn write( + shared: &Ref, + _: &File, + data: &mut T, + _offset: u64, + ) -> Result { + { + let mut inner = shared.inner.lock(); + + // Wait until we are allowed to increment the token count or a signal arrives. + while inner.token_count == MAX_TOKENS { + if shared.state_changed.wait(&mut inner) { + return Err(Error::EINTR); + } + } + + // Increment the number of token so that a reader can be released. + inner.token_count += 1; + } + + // Notify a possible reader waiting. + shared.state_changed.notify_all(); + Ok(data.len()) + } +} + +struct RustMiscdev { + _dev: Pin>>>>, +} + +impl KernelModule for RustMiscdev { + fn init() -> Result { + pr_info!("Rust miscellaneous device sample (init)\n"); + + let state = SharedState::try_new()?; + + Ok(RustMiscdev { + _dev: miscdev::Registration::new_pinned::(c_str!("rust_miscdev"), None, state)?, + }) + } +} + +impl Drop for RustMiscdev { + fn drop(&mut self) { + pr_info!("Rust miscellaneous device sample (exit)\n"); + } +} diff --git a/samples/rust/rust_module_parameters.rs b/samples/rust/rust_module_parameters.rs new file mode 100644 index 0000000000000..57e59e8050275 --- /dev/null +++ b/samples/rust/rust_module_parameters.rs @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust module parameters sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; + +module! { + type: RustModuleParameters, + name: b"rust_module_parameters", + author: b"Rust for Linux Contributors", + description: b"Rust module parameters sample", + license: b"GPL v2", + params: { + my_bool: bool { + default: true, + permissions: 0, + description: b"Example of bool", + }, + my_i32: i32 { + default: 42, + permissions: 0o644, + description: b"Example of i32", + }, + my_str: str { + default: b"default str val", + permissions: 0o644, + description: b"Example of a string param", + }, + my_usize: usize { + default: 42, + permissions: 0o644, + description: b"Example of usize", + }, + my_array: ArrayParam { + default: [0, 1], + permissions: 0, + description: b"Example of array", + }, + }, +} + +struct RustModuleParameters; + +impl KernelModule for RustModuleParameters { + fn init() -> Result { + pr_info!("Rust module parameters sample (init)\n"); + + { + let lock = THIS_MODULE.kernel_param_lock(); + pr_info!("Parameters:\n"); + pr_info!(" my_bool: {}\n", my_bool.read()); + pr_info!(" my_i32: {}\n", my_i32.read(&lock)); + pr_info!( + " my_str: {}\n", + core::str::from_utf8(my_str.read(&lock))? + ); + pr_info!(" my_usize: {}\n", my_usize.read(&lock)); + pr_info!(" my_array: {:?}\n", my_array.read()); + } + + Ok(RustModuleParameters) + } +} + +impl Drop for RustModuleParameters { + fn drop(&mut self) { + pr_info!("Rust module parameters sample (exit)\n"); + } +} diff --git a/samples/rust/rust_print.rs b/samples/rust/rust_print.rs new file mode 100644 index 0000000000000..c43338fca5071 --- /dev/null +++ b/samples/rust/rust_print.rs @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust printing macros sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::pr_cont; +use kernel::prelude::*; + +module! { + type: RustPrint, + name: b"rust_print", + author: b"Rust for Linux Contributors", + description: b"Rust printing macros sample", + license: b"GPL v2", +} + +struct RustPrint; + +impl KernelModule for RustPrint { + fn init() -> Result { + pr_info!("Rust printing macros sample (init)\n"); + + pr_emerg!("Emergency message (level 0) without args\n"); + pr_alert!("Alert message (level 1) without args\n"); + pr_crit!("Critical message (level 2) without args\n"); + pr_err!("Error message (level 3) without args\n"); + pr_warn!("Warning message (level 4) without args\n"); + pr_notice!("Notice message (level 5) without args\n"); + pr_info!("Info message (level 6) without args\n"); + + pr_info!("A line that"); + pr_cont!(" is continued"); + pr_cont!(" without args\n"); + + pr_emerg!("{} message (level {}) with args\n", "Emergency", 0); + pr_alert!("{} message (level {}) with args\n", "Alert", 1); + pr_crit!("{} message (level {}) with args\n", "Critical", 2); + pr_err!("{} message (level {}) with args\n", "Error", 3); + pr_warn!("{} message (level {}) with args\n", "Warning", 4); + pr_notice!("{} message (level {}) with args\n", "Notice", 5); + pr_info!("{} message (level {}) with args\n", "Info", 6); + + pr_info!("A {} that", "line"); + pr_cont!(" is {}", "continued"); + pr_cont!(" with {}\n", "args"); + + Ok(RustPrint) + } +} + +impl Drop for RustPrint { + fn drop(&mut self) { + pr_info!("Rust printing macros sample (exit)\n"); + } +} diff --git a/samples/rust/rust_random.rs b/samples/rust/rust_random.rs new file mode 100644 index 0000000000000..135325b6ca0a4 --- /dev/null +++ b/samples/rust/rust_random.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust random device +//! +//! Adapted from Alex Gaynor's original available at +//! . + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::{ + file::File, + file_operations::FileOperations, + io_buffer::{IoBufferReader, IoBufferWriter}, + prelude::*, +}; + +#[derive(Default)] +struct RandomFile; + +impl FileOperations for RandomFile { + kernel::declare_file_operations!(read, write, read_iter, write_iter); + + fn read(_this: &Self, file: &File, buf: &mut T, _: u64) -> Result { + let total_len = buf.len(); + let mut chunkbuf = [0; 256]; + + while !buf.is_empty() { + let len = chunkbuf.len().min(buf.len()); + let chunk = &mut chunkbuf[0..len]; + + if file.is_blocking() { + kernel::random::getrandom(chunk)?; + } else { + kernel::random::getrandom_nonblock(chunk)?; + } + buf.write_slice(chunk)?; + } + Ok(total_len) + } + + fn write(_this: &Self, _file: &File, buf: &mut T, _: u64) -> Result { + let total_len = buf.len(); + let mut chunkbuf = [0; 256]; + while !buf.is_empty() { + let len = chunkbuf.len().min(buf.len()); + let chunk = &mut chunkbuf[0..len]; + buf.read_slice(chunk)?; + kernel::random::add_randomness(chunk); + } + Ok(total_len) + } +} + +module_misc_device! { + type: RandomFile, + name: b"rust_random", + author: b"Rust for Linux Contributors", + description: b"Just use /dev/urandom: Now with early-boot safety", + license: b"GPL v2", +} diff --git a/samples/rust/rust_semaphore.rs b/samples/rust/rust_semaphore.rs new file mode 100644 index 0000000000000..1408d5095b887 --- /dev/null +++ b/samples/rust/rust_semaphore.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust semaphore sample +//! +//! A counting semaphore that can be used by userspace. +//! +//! The count is incremented by writes to the device. A write of `n` bytes results in an increment +//! of `n`. It is decremented by reads; each read results in the count being decremented by 1. If +//! the count is already zero, a read will block until another write increments it. +//! +//! This can be used in user space from the shell for example as follows (assuming a node called +//! `semaphore`): `cat semaphore` decrements the count by 1 (waiting for it to become non-zero +//! before decrementing); `echo -n 123 > semaphore` increments the semaphore by 3, potentially +//! unblocking up to 3 blocked readers. + +#![no_std] +#![feature(allocator_api, global_asm)] + +use core::sync::atomic::{AtomicU64, Ordering}; +use kernel::{ + c_str, condvar_init, declare_file_operations, + file::File, + file_operations::{FileOpener, FileOperations, IoctlCommand, IoctlHandler}, + io_buffer::{IoBufferReader, IoBufferWriter}, + miscdev::Registration, + mutex_init, + prelude::*, + sync::{CondVar, Mutex, Ref}, + user_ptr::{UserSlicePtrReader, UserSlicePtrWriter}, +}; + +module! { + type: RustSemaphore, + name: b"rust_semaphore", + author: b"Rust for Linux Contributors", + description: b"Rust semaphore sample", + license: b"GPL v2", +} + +struct SemaphoreInner { + count: usize, + max_seen: usize, +} + +struct Semaphore { + changed: CondVar, + inner: Mutex, +} + +struct FileState { + read_count: AtomicU64, + shared: Ref, +} + +impl FileState { + fn consume(&self) -> Result { + let mut inner = self.shared.inner.lock(); + while inner.count == 0 { + if self.shared.changed.wait(&mut inner) { + return Err(Error::EINTR); + } + } + inner.count -= 1; + Ok(()) + } +} + +impl FileOpener> for FileState { + fn open(shared: &Ref) -> Result> { + Ok(Box::try_new(Self { + read_count: AtomicU64::new(0), + shared: shared.clone(), + })?) + } +} + +impl FileOperations for FileState { + declare_file_operations!(read, write, ioctl); + + fn read(this: &Self, _: &File, data: &mut T, offset: u64) -> Result { + if data.is_empty() || offset > 0 { + return Ok(0); + } + this.consume()?; + data.write_slice(&[0u8; 1])?; + this.read_count.fetch_add(1, Ordering::Relaxed); + Ok(1) + } + + fn write(this: &Self, _: &File, data: &mut T, _offs: u64) -> Result { + { + let mut inner = this.shared.inner.lock(); + inner.count = inner.count.saturating_add(data.len()); + if inner.count > inner.max_seen { + inner.max_seen = inner.count; + } + } + + this.shared.changed.notify_all(); + Ok(data.len()) + } + + fn ioctl(this: &Self, file: &File, cmd: &mut IoctlCommand) -> Result { + cmd.dispatch::(this, file) + } +} + +struct RustSemaphore { + _dev: Pin>>>, +} + +impl KernelModule for RustSemaphore { + fn init() -> Result { + pr_info!("Rust semaphore sample (init)\n"); + + let sema = Ref::try_new_and_init( + Semaphore { + // SAFETY: `condvar_init!` is called below. + changed: unsafe { CondVar::new() }, + + // SAFETY: `mutex_init!` is called below. + inner: unsafe { + Mutex::new(SemaphoreInner { + count: 0, + max_seen: 0, + }) + }, + }, + |mut sema| { + // SAFETY: `changed` is pinned when `sema` is. + let pinned = unsafe { sema.as_mut().map_unchecked_mut(|s| &mut s.changed) }; + condvar_init!(pinned, "Semaphore::changed"); + + // SAFETY: `inner` is pinned when `sema` is. + let pinned = unsafe { sema.as_mut().map_unchecked_mut(|s| &mut s.inner) }; + mutex_init!(pinned, "Semaphore::inner"); + }, + )?; + + Ok(Self { + _dev: Registration::new_pinned::(c_str!("rust_semaphore"), None, sema)?, + }) + } +} + +impl Drop for RustSemaphore { + fn drop(&mut self) { + pr_info!("Rust semaphore sample (exit)\n"); + } +} + +const IOCTL_GET_READ_COUNT: u32 = 0x80086301; +const IOCTL_SET_READ_COUNT: u32 = 0x40086301; + +impl IoctlHandler for FileState { + type Target = Self; + + fn read(this: &Self, _: &File, cmd: u32, writer: &mut UserSlicePtrWriter) -> Result { + match cmd { + IOCTL_GET_READ_COUNT => { + writer.write(&this.read_count.load(Ordering::Relaxed))?; + Ok(0) + } + _ => Err(Error::EINVAL), + } + } + + fn write(this: &Self, _: &File, cmd: u32, reader: &mut UserSlicePtrReader) -> Result { + match cmd { + IOCTL_SET_READ_COUNT => { + this.read_count.store(reader.read()?, Ordering::Relaxed); + Ok(0) + } + _ => Err(Error::EINVAL), + } + } +} diff --git a/samples/rust/rust_semaphore_c.c b/samples/rust/rust_semaphore_c.c new file mode 100644 index 0000000000000..cdc121d4030d0 --- /dev/null +++ b/samples/rust/rust_semaphore_c.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Rust semaphore sample (in C, for comparison) + * + * This is a C implementation of `rust_semaphore.rs`. Refer to the description + * in that file for details on the device. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#define IOCTL_GET_READ_COUNT _IOR('c', 1, u64) +#define IOCTL_SET_READ_COUNT _IOW('c', 1, u64) + +struct semaphore_state { + struct kref ref; + struct miscdevice miscdev; + wait_queue_head_t changed; + struct mutex mutex; + size_t count; + size_t max_seen; +}; + +struct file_state { + atomic64_t read_count; + struct semaphore_state *shared; +}; + +static int semaphore_consume(struct semaphore_state *state) +{ + DEFINE_WAIT(wait); + + mutex_lock(&state->mutex); + while (state->count == 0) { + prepare_to_wait(&state->changed, &wait, TASK_INTERRUPTIBLE); + mutex_unlock(&state->mutex); + schedule(); + finish_wait(&state->changed, &wait); + if (signal_pending(current)) + return -EINTR; + mutex_lock(&state->mutex); + } + + state->count--; + mutex_unlock(&state->mutex); + + return 0; +} + +static int semaphore_open(struct inode *nodp, struct file *filp) +{ + struct semaphore_state *shared = + container_of(filp->private_data, struct semaphore_state, miscdev); + struct file_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + kref_get(&shared->ref); + state->shared = shared; + atomic64_set(&state->read_count, 0); + + filp->private_data = state; + + return 0; +} + +static ssize_t semaphore_write(struct file *filp, const char __user *buffer, size_t count, + loff_t *ppos) +{ + struct file_state *state = filp->private_data; + struct semaphore_state *shared = state->shared; + + mutex_lock(&shared->mutex); + + shared->count += count; + if (shared->count < count) + shared->count = SIZE_MAX; + + if (shared->count > shared->max_seen) + shared->max_seen = shared->count; + + mutex_unlock(&shared->mutex); + + wake_up_all(&shared->changed); + + return count; +} + +static ssize_t semaphore_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct file_state *state = filp->private_data; + char c = 0; + int ret; + + if (count == 0 || *ppos > 0) + return 0; + + ret = semaphore_consume(state->shared); + if (ret) + return ret; + + if (copy_to_user(buffer, &c, sizeof(c))) + return -EFAULT; + + atomic64_add(1, &state->read_count); + *ppos += 1; + return 1; +} + +static long semaphore_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct file_state *state = filp->private_data; + void __user *buffer = (void __user *)arg; + u64 value; + + switch (cmd) { + case IOCTL_GET_READ_COUNT: + value = atomic64_read(&state->read_count); + if (copy_to_user(buffer, &value, sizeof(value))) + return -EFAULT; + return 0; + case IOCTL_SET_READ_COUNT: + if (copy_from_user(&value, buffer, sizeof(value))) + return -EFAULT; + atomic64_set(&state->read_count, value); + return 0; + default: + return -EINVAL; + } +} + +static void semaphore_free(struct kref *kref) +{ + struct semaphore_state *device; + + device = container_of(kref, struct semaphore_state, ref); + kfree(device); +} + +static int semaphore_release(struct inode *nodp, struct file *filp) +{ + struct file_state *state = filp->private_data; + + kref_put(&state->shared->ref, semaphore_free); + kfree(state); + return 0; +} + +static const struct file_operations semaphore_fops = { + .owner = THIS_MODULE, + .open = semaphore_open, + .read = semaphore_read, + .write = semaphore_write, + .compat_ioctl = semaphore_ioctl, + .release = semaphore_release, +}; + +static struct semaphore_state *device; + +static int __init semaphore_init(void) +{ + int ret; + struct semaphore_state *state; + + pr_info("Rust semaphore sample (in C, for comparison) (init)\n"); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + mutex_init(&state->mutex); + kref_init(&state->ref); + init_waitqueue_head(&state->changed); + + state->miscdev.fops = &semaphore_fops; + state->miscdev.minor = MISC_DYNAMIC_MINOR; + state->miscdev.name = "semaphore"; + + ret = misc_register(&state->miscdev); + if (ret < 0) { + kfree(state); + return ret; + } + + device = state; + + return 0; +} + +static void __exit semaphore_exit(void) +{ + pr_info("Rust semaphore sample (in C, for comparison) (exit)\n"); + + misc_deregister(&device->miscdev); + kref_put(&device->ref, semaphore_free); +} + +module_init(semaphore_init); +module_exit(semaphore_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Rust for Linux Contributors"); +MODULE_DESCRIPTION("Rust semaphore sample (in C, for comparison)"); diff --git a/samples/rust/rust_stack_probing.rs b/samples/rust/rust_stack_probing.rs new file mode 100644 index 0000000000000..fcb87a53ce7f1 --- /dev/null +++ b/samples/rust/rust_stack_probing.rs @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust stack probing sample + +#![no_std] +#![feature(allocator_api, global_asm)] +#![feature(bench_black_box)] + +use kernel::prelude::*; + +module! { + type: RustStackProbing, + name: b"rust_stack_probing", + author: b"Rust for Linux Contributors", + description: b"Rust stack probing sample", + license: b"GPL v2", +} + +struct RustStackProbing; + +impl KernelModule for RustStackProbing { + fn init() -> Result { + pr_info!("Rust stack probing sample (init)\n"); + + // Including this large variable on the stack will trigger + // stack probing on the supported archs. + // This will verify that stack probing does not lead to + // any errors if we need to link `__rust_probestack`. + let x: [u64; 514] = core::hint::black_box([5; 514]); + pr_info!("Large array has length: {}\n", x.len()); + + Ok(RustStackProbing) + } +} + +impl Drop for RustStackProbing { + fn drop(&mut self) { + pr_info!("Rust stack probing sample (exit)\n"); + } +} diff --git a/samples/rust/rust_sync.rs b/samples/rust/rust_sync.rs new file mode 100644 index 0000000000000..40bbfec9ad278 --- /dev/null +++ b/samples/rust/rust_sync.rs @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust synchronisation primitives sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; +use kernel::{ + condvar_init, mutex_init, spinlock_init, + sync::{CondVar, Mutex, SpinLock}, +}; + +module! { + type: RustSync, + name: b"rust_sync", + author: b"Rust for Linux Contributors", + description: b"Rust synchronisation primitives sample", + license: b"GPL v2", +} + +struct RustSync; + +impl KernelModule for RustSync { + fn init() -> Result { + pr_info!("Rust synchronisation primitives sample (init)\n"); + + // Test mutexes. + { + // SAFETY: `init` is called below. + let mut data = Pin::from(Box::try_new(unsafe { Mutex::new(0) })?); + mutex_init!(data.as_mut(), "RustSync::init::data1"); + *data.lock() = 10; + pr_info!("Value: {}\n", *data.lock()); + + // SAFETY: `init` is called below. + let mut cv = Pin::from(Box::try_new(unsafe { CondVar::new() })?); + condvar_init!(cv.as_mut(), "RustSync::init::cv1"); + + { + let mut guard = data.lock(); + while *guard != 10 { + let _ = cv.wait(&mut guard); + } + } + cv.notify_one(); + cv.notify_all(); + cv.free_waiters(); + } + + // Test spinlocks. + { + // SAFETY: `init` is called below. + let mut data = Pin::from(Box::try_new(unsafe { SpinLock::new(0) })?); + spinlock_init!(data.as_mut(), "RustSync::init::data2"); + *data.lock() = 10; + pr_info!("Value: {}\n", *data.lock()); + + // SAFETY: `init` is called below. + let mut cv = Pin::from(Box::try_new(unsafe { CondVar::new() })?); + condvar_init!(cv.as_mut(), "RustSync::init::cv2"); + { + let mut guard = data.lock(); + while *guard != 10 { + let _ = cv.wait(&mut guard); + } + } + cv.notify_one(); + cv.notify_all(); + cv.free_waiters(); + } + + Ok(RustSync) + } +} + +impl Drop for RustSync { + fn drop(&mut self) { + pr_info!("Rust synchronisation primitives sample (exit)\n"); + } +} From 2a8394d90e0fb10bd07e1983840914a0f90c79b2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:26:15 +0200 Subject: [PATCH 073/851] scripts: add `generate_rust_analyzer.py` The `generate_rust_analyzer.py` script generates the configuration file (`rust-project.json`) for rust-analyzer. rust-analyzer is a modular compiler frontend for the Rust language. It provides an LSP server which can be used in editors such as VS Code, Emacs or Vim. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 143 ++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100755 scripts/generate_rust_analyzer.py diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py new file mode 100755 index 0000000000000..72c453e1aea04 --- /dev/null +++ b/scripts/generate_rust_analyzer.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +"""generate_rust_analyzer - Generates the `rust-project.json` file for `rust-analyzer`. +""" + +import argparse +import json +import logging +import pathlib +import sys + +def generate_crates(srctree, objtree, sysroot_src, bindings_file): + # Generate the configuration list. + cfg = [] + with open(objtree / "include" / "generated" / "rustc_cfg") as fd: + for line in fd: + line = line.replace("--cfg=", "") + line = line.replace("\n", "") + cfg.append(line) + + # Now fill the crates list -- dependencies need to come first. + # + # Avoid O(n^2) iterations by keeping a map of indexes. + crates = [] + crates_indexes = {} + + def append_crate(display_name, root_module, is_workspace_member, deps, cfg): + crates_indexes[display_name] = len(crates) + crates.append({ + "display_name": display_name, + "root_module": str(root_module), + "is_workspace_member": is_workspace_member, + "deps": [{"crate": crates_indexes[dep], "name": dep} for dep in deps], + "cfg": cfg, + "edition": "2018", + "env": { + "RUST_MODFILE": "This is only for rust-analyzer" + } + }) + + # First, the ones in `rust/` since they are a bit special. + append_crate( + "core", + sysroot_src / "core" / "src" / "lib.rs", + False, + [], + [], + ) + + append_crate( + "compiler_builtins", + srctree / "rust" / "compiler_builtins.rs", + True, + [], + [], + ) + + append_crate( + "alloc", + srctree / "rust" / "alloc" / "lib.rs", + True, + ["core", "compiler_builtins"], + [], + ) + + append_crate( + "macros", + srctree / "rust" / "macros" / "lib.rs", + True, + [], + [], + ) + crates[-1]["proc_macro_dylib_path"] = "rust/libmacros.so" + + append_crate( + "build_error", + srctree / "rust" / "build_error.rs", + True, + ["core", "compiler_builtins"], + [], + ) + + append_crate( + "kernel", + srctree / "rust" / "kernel" / "lib.rs", + True, + ["core", "alloc", "macros", "build_error"], + cfg, + ) + crates[-1]["env"]["RUST_BINDINGS_FILE"] = str(bindings_file.resolve(True)) + crates[-1]["source"] = { + "include_dirs": [ + str(srctree / "rust" / "kernel"), + str(objtree / "rust") + ], + "exclude_dirs": [], + } + + # Then, the rest outside of `rust/`. + # + # We explicitly mention the top-level folders we want to cover. + for folder in ("samples", "drivers"): + for path in (srctree / folder).rglob("*.rs"): + logging.info("Checking %s", path) + name = path.name.replace(".rs", "") + + # Skip those that are not crate roots. + if f"{name}.o" not in open(path.parent / "Makefile").read(): + continue + + logging.info("Adding %s", name) + append_crate( + name, + path, + True, + ["core", "alloc", "kernel"], + cfg, + ) + + return crates + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--verbose', '-v', action='store_true') + parser.add_argument("srctree", type=pathlib.Path) + parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot_src", type=pathlib.Path) + parser.add_argument("bindings_file", type=pathlib.Path) + args = parser.parse_args() + + logging.basicConfig( + format="[%(asctime)s] [%(levelname)s] %(message)s", + level=logging.INFO if args.verbose else logging.WARNING + ) + + rust_project = { + "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.bindings_file), + "sysroot_src": str(args.sysroot_src), + } + + json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) + +if __name__ == "__main__": + main() From 5d3986cf8ed63ff8d86270e578649e71143112d6 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:24:21 +0200 Subject: [PATCH 074/851] MAINTAINERS: Rust Miguel, Alex and Wedson will be maintaining the Rust support. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Signed-off-by: Miguel Ojeda --- MAINTAINERS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0cce91cd56243..a439204d9be59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15894,6 +15894,20 @@ L: linux-rdma@vger.kernel.org S: Maintained F: drivers/infiniband/ulp/rtrs/ +RUST +M: Miguel Ojeda +M: Alex Gaynor +M: Wedson Almeida Filho +L: rust-for-linux@vger.kernel.org +S: Supported +W: https://github.com/Rust-for-Linux/linux +B: https://github.com/Rust-for-Linux/linux/issues +T: git https://github.com/Rust-for-Linux/linux.git rust-next +F: rust/ +F: samples/rust/ +F: Documentation/rust/ +K: \b(?i:rust)\b + RXRPC SOCKETS (AF_RXRPC) M: David Howells M: Marc Dionne From 20921e7eaf7b5c03addccfa0215986a3df3fafa9 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Sat, 26 Jun 2021 18:20:52 +0800 Subject: [PATCH 075/851] parisc: Replace symbolic permissions with octal permissions Resolve following checkpatch issue, Replace symbolic permissions with octal permissions Signed-off-by: Jinchao Wang Signed-off-by: Helge Deller --- drivers/parisc/led.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index 36c6613f7a36b..cf91cb024be30 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -250,14 +250,14 @@ static int __init led_create_procfs(void) if (!lcd_no_led_support) { - ent = proc_create_data("led", S_IRUGO|S_IWUSR, proc_pdc_root, + ent = proc_create_data("led", 0644, proc_pdc_root, &led_proc_ops, (void *)LED_NOLCD); /* LED */ if (!ent) return -1; } if (led_type == LED_HASLCD) { - ent = proc_create_data("lcd", S_IRUGO|S_IWUSR, proc_pdc_root, + ent = proc_create_data("lcd", 0644, proc_pdc_root, &led_proc_ops, (void *)LED_HASLCD); /* LCD */ if (!ent) return -1; } From 95b115332a835fb0cbd36dfabacf1c57d915e705 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 30 Jun 2021 10:33:36 -0400 Subject: [PATCH 076/851] zonefs: remove redundant null bio check bio_alloc() with __GFP_DIRECT_RECLAIM, which is included in GFP_NOFS, never fails, see comments in bio_alloc_bioset(). Signed-off-by: Xianting Tian Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index cd145d318b178..d6d08da505f38 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -705,9 +705,6 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) return 0; bio = bio_alloc(GFP_NOFS, nr_pages); - if (!bio) - return -ENOMEM; - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = zi->i_zsector; bio->bi_write_hint = iocb->ki_hint; From ca2b19a4bdb6200dc1d64619ea4b8de2a84a0439 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 7 Jul 2021 15:38:08 +0200 Subject: [PATCH 077/851] parisc: Increase gcc stack frame check to 2048 for 32- and 64-bit parisc uses much bigger frames than other architectures, so increase the stack frame check value to 2048 to avoid compiler warnings. Cc: Arnd Bergmann Cc: Abd-Alrhman Masalkhi Cc: Christoph Hellwig Signed-off-by: Helge Deller --- lib/Kconfig.debug | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 678c13967580e..1d99c3194e18f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -335,9 +335,8 @@ config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 1280 if (!64BIT && PARISC) - default 1024 if (!64BIT && !PARISC) - default 2048 if 64BIT + default 1024 if !(64BIT || PARISC) + default 2048 if (64BIT || PARISC) help Tell gcc to warn at build time for stack frames larger than this. Setting this too low will cause a lot of warnings. From b1e32e8c629b22845dadf4afb7f3af1efeec5f34 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 26 May 2021 00:03:37 -0700 Subject: [PATCH 078/851] xtensa: fix kconfig unmet dependency warning for HAVE_FUTEX_CMPXCHG XTENSA should only select HAVE_FUTEX_CMPXCHG when FUTEX is set/enabled. This prevents a kconfig warning. WARNING: unmet direct dependencies detected for HAVE_FUTEX_CMPXCHG Depends on [n]: FUTEX [=n] Selected by [y]: - XTENSA [=y] && !MMU [=n] Fixes: d951ba21b959 ("xtensa: nommu: select HAVE_FUTEX_CMPXCHG") Signed-off-by: Randy Dunlap Cc: Max Filippov Cc: Chris Zankel Cc: linux-xtensa@linux-xtensa.org Message-Id: <20210526070337.28130-1-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 2332b21569938..1bdb55c2d0c14 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -30,7 +30,7 @@ config XTENSA select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER - select HAVE_FUTEX_CMPXCHG if !MMU + select HAVE_FUTEX_CMPXCHG if !MMU && FUTEX select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_PCI From ac637a0ada717bdf798f9903d94e3ac65c357423 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 9 Jul 2021 04:13:23 -0700 Subject: [PATCH 079/851] xtensa: add fairness to IRQ handling Track which IRQs have been served at each level to make sure that no IRQ is served more than once while other IRQs at the same level are pending. Signed-off-by: Max Filippov --- arch/xtensa/kernel/traps.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index efc3a29cde803..874b6efc6fb31 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -268,6 +268,7 @@ void do_interrupt(struct pt_regs *regs) XCHAL_INTLEVEL7_MASK, }; struct pt_regs *old_regs; + unsigned unhandled = ~0u; trace_hardirqs_off(); @@ -283,6 +284,10 @@ void do_interrupt(struct pt_regs *regs) for (level = LOCKLEVEL; level > 0; --level) { if (int_at_level & int_level_mask[level]) { int_at_level &= int_level_mask[level]; + if (int_at_level & unhandled) + int_at_level &= unhandled; + else + unhandled |= int_level_mask[level]; break; } } @@ -290,6 +295,8 @@ void do_interrupt(struct pt_regs *regs) if (level == 0) break; + /* clear lowest pending irq in the unhandled mask */ + unhandled ^= (int_at_level & -int_at_level); do_IRQ(__ffs(int_at_level), regs); } From 53a05c8f6e8e8138a30e5859e760cef483a09e14 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Sun, 11 Jul 2021 16:34:30 +0200 Subject: [PATCH 080/851] arm64: dts: rockchip: remove interrupt-names from iommu nodes The iommu driver gets the interrupts by platform_get_irq(), so remove interrupt-names property from iommu nodes. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20210711143430.14347-2-jbx6244@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 5 ----- arch/arm64/boot/dts/rockchip/rk3368.dtsi | 5 ----- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 8 -------- 3 files changed, 18 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 8c821acb21ffb..becc1c61b1828 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -623,7 +623,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff330200 0 0x100>; interrupts = ; - interrupt-names = "h265e_mmu"; clocks = <&cru ACLK_H265>, <&cru PCLK_H265>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -634,7 +633,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff340800 0x0 0x40>; interrupts = ; - interrupt-names = "vepu_mmu"; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -656,7 +654,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff350800 0x0 0x40>; interrupts = ; - interrupt-names = "vpu_mmu"; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -667,7 +664,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff360480 0x0 0x40>, <0x0 0xff3604c0 0x0 0x40>; interrupts = ; - interrupt-names = "rkvdec_mmu"; clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -700,7 +696,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff373f00 0x0 0x100>; interrupts = ; - interrupt-names = "vop_mmu"; clocks = <&cru ACLK_VOP>, <&cru HCLK_VOP>; clock-names = "aclk", "iface"; #iommu-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index 4c64fbefb4834..4217897cd4544 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -709,7 +709,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff900800 0x0 0x100>; interrupts = ; - interrupt-names = "iep_mmu"; clocks = <&cru ACLK_IEP>, <&cru HCLK_IEP>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -721,7 +720,6 @@ reg = <0x0 0xff914000 0x0 0x100>, <0x0 0xff915000 0x0 0x100>; interrupts = ; - interrupt-names = "isp_mmu"; clocks = <&cru ACLK_ISP>, <&cru HCLK_ISP>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -733,7 +731,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff930300 0x0 0x100>; interrupts = ; - interrupt-names = "vop_mmu"; clocks = <&cru ACLK_VOP>, <&cru HCLK_VOP>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -745,7 +742,6 @@ reg = <0x0 0xff9a0440 0x0 0x40>, <0x0 0xff9a0480 0x0 0x40>; interrupts = ; - interrupt-names = "hevc_mmu"; clocks = <&cru ACLK_VIDEO>, <&cru HCLK_VIDEO>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -757,7 +753,6 @@ reg = <0x0 0xff9a0800 0x0 0x100>; interrupts = , ; - interrupt-names = "vepu_mmu", "vdpu_mmu"; clocks = <&cru ACLK_VIDEO>, <&cru HCLK_VIDEO>; clock-names = "aclk", "iface"; #iommu-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 3871c7fd83b00..aa5d7dca34320 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1240,7 +1240,6 @@ reg = <0x0 0xff650000 0x0 0x800>; interrupts = , ; - interrupt-names = "vepu", "vdpu"; clocks = <&cru ACLK_VCODEC>, <&cru HCLK_VCODEC>; clock-names = "aclk", "hclk"; iommus = <&vpu_mmu>; @@ -1251,7 +1250,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff650800 0x0 0x40>; interrupts = ; - interrupt-names = "vpu_mmu"; clocks = <&cru ACLK_VCODEC>, <&cru HCLK_VCODEC>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -1273,7 +1271,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff660480 0x0 0x40>, <0x0 0xff6604c0 0x0 0x40>; interrupts = ; - interrupt-names = "vdec_mmu"; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>; clock-names = "aclk", "iface"; power-domains = <&power RK3399_PD_VDU>; @@ -1284,7 +1281,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff670800 0x0 0x40>; interrupts = ; - interrupt-names = "iep_mmu"; clocks = <&cru ACLK_IEP>, <&cru HCLK_IEP>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -1666,7 +1662,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff8f3f00 0x0 0x100>; interrupts = ; - interrupt-names = "vopl_mmu"; clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>; clock-names = "aclk", "iface"; power-domains = <&power RK3399_PD_VOPL>; @@ -1723,7 +1718,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff903f00 0x0 0x100>; interrupts = ; - interrupt-names = "vopb_mmu"; clocks = <&cru ACLK_VOP0>, <&cru HCLK_VOP0>; clock-names = "aclk", "iface"; power-domains = <&power RK3399_PD_VOPB>; @@ -1761,7 +1755,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff914000 0x0 0x100>, <0x0 0xff915000 0x0 0x100>; interrupts = ; - interrupt-names = "isp0_mmu"; clocks = <&cru ACLK_ISP0_WRAPPER>, <&cru HCLK_ISP0_WRAPPER>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -1773,7 +1766,6 @@ compatible = "rockchip,iommu"; reg = <0x0 0xff924000 0x0 0x100>, <0x0 0xff925000 0x0 0x100>; interrupts = ; - interrupt-names = "isp1_mmu"; clocks = <&cru ACLK_ISP1_WRAPPER>, <&cru HCLK_ISP1_WRAPPER>; clock-names = "aclk", "iface"; #iommu-cells = <0>; From 3a9476ad636e4044b5175d1cd1e1ce7ef1ba448c Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Sun, 11 Jul 2021 16:59:00 +0200 Subject: [PATCH 081/851] arm64: dts: rockchip: rename flash nodenames Nodes with compatible "jedec,spi-nor" are now checked with jedec,spi-nor.yaml and mtd.yaml. The pattern is now "^flash(@.*)?$", so change that for the boards with a Rockchip SoC. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20210711145900.15443-1-jbx6244@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 1b0f7e4551ea4..f69a38f42d2d5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -345,7 +345,7 @@ &spi0 { status = "okay"; - spiflash@0 { + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index c1bcc8ca3769d..e310b51ab5789 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -543,7 +543,7 @@ ap_i2c_audio: &i2c8 { pinctrl-names = "default", "sleep"; pinctrl-1 = <&spi1_sleep>; - spiflash@0 { + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; From ac0af67f8c898f38427c7d15e3598773c19cd6e8 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 1 Jul 2021 16:41:09 +0200 Subject: [PATCH 082/851] arm64: dts: rockchip: remove clock_in_out from gmac2phy node in rk3318-a95x-z2.dts Recently a clock_in_out property was added to the gmac2phy node in rk3328.dtsi, so now the clock_in_out in rk3318-a95x-z2.dts can be removed. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20210701144110.12333-1-jbx6244@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts index 763cf9b4620eb..d41f786b2f4bc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts @@ -185,7 +185,6 @@ assigned-clock-parents = <&cru SCLK_MAC2PHY_SRC>; assigned-clock-rate = <50000000>; assigned-clocks = <&cru SCLK_MAC2PHY>; - clock_in_out = "output"; status = "okay"; }; From 1aeab005562f8dc26f7df4eb76b130b591b56f63 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 1 Jul 2021 16:41:10 +0200 Subject: [PATCH 083/851] arm64: dts: rockchip: remove ddc-i2c-scl-* properties from rk3318-a95x-z2.dts The ddc-i2c-scl-* properties in the hdmi node are not in use in the mainline kernel, so remove them. Reported-by: Alex Bee Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20210701144110.12333-2-jbx6244@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts index d41f786b2f4bc..43c928ac98f0f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts @@ -193,8 +193,6 @@ }; &hdmi { - ddc-i2c-scl-high-time-ns = <9625>; - ddc-i2c-scl-low-time-ns = <10000>; status = "okay"; }; From 02c6edd4b1a07f24b187a550d413a07260eb696d Mon Sep 17 00:00:00 2001 From: David Bartley Date: Sun, 16 May 2021 23:41:31 -0700 Subject: [PATCH 084/851] hwmon: (k10temp) support Zen3 APUs Add support for Zen3 Ryzen APU. Signed-off-by: David Bartley Link: https://lore.kernel.org/r/20210517064131.4369-1-andareed@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/k10temp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 5ff3669c2b608..fe3d92152e359 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -450,6 +450,7 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id) switch (boot_cpu_data.x86_model) { case 0x0 ... 0x1: /* Zen3 SP3/TR */ case 0x21: /* Zen3 Ryzen Desktop */ + case 0x50: /* Zen3 Ryzen APU */ k10temp_get_ccd_support(pdev, data, 8); break; } @@ -491,6 +492,7 @@ static const struct pci_device_id k10temp_id_table[] = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) }, { PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) }, {} }; From 0f79ce970e79ffb771733f9634d5918d0eb3e30a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 8 Jan 2021 12:22:33 +0100 Subject: [PATCH 085/851] gnss: drop stray semicolons Drop semicolons after function definitions that have managed to sneak in and get reproduced. Signed-off-by: Johan Hovold --- drivers/gnss/mtk.c | 2 +- drivers/gnss/serial.c | 2 +- drivers/gnss/sirf.c | 2 +- drivers/gnss/ubx.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gnss/mtk.c b/drivers/gnss/mtk.c index d1fc55560daf6..c62b1211f4fe4 100644 --- a/drivers/gnss/mtk.c +++ b/drivers/gnss/mtk.c @@ -126,7 +126,7 @@ static void mtk_remove(struct serdev_device *serdev) if (data->vbackup) regulator_disable(data->vbackup); gnss_serial_free(gserial); -}; +} #ifdef CONFIG_OF static const struct of_device_id mtk_of_match[] = { diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c index def64b36d9941..5d8e9bfb24d02 100644 --- a/drivers/gnss/serial.c +++ b/drivers/gnss/serial.c @@ -165,7 +165,7 @@ void gnss_serial_free(struct gnss_serial *gserial) { gnss_put_device(gserial->gdev); kfree(gserial); -}; +} EXPORT_SYMBOL_GPL(gnss_serial_free); int gnss_serial_register(struct gnss_serial *gserial) diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c index 2ecb1d3e8eeb3..bcb53ccfee4d5 100644 --- a/drivers/gnss/sirf.c +++ b/drivers/gnss/sirf.c @@ -551,7 +551,7 @@ static void sirf_remove(struct serdev_device *serdev) regulator_disable(data->vcc); gnss_put_device(data->gdev); -}; +} #ifdef CONFIG_OF static const struct of_device_id sirf_of_match[] = { diff --git a/drivers/gnss/ubx.c b/drivers/gnss/ubx.c index 7b05bc40532e5..c951be202ca25 100644 --- a/drivers/gnss/ubx.c +++ b/drivers/gnss/ubx.c @@ -126,7 +126,7 @@ static void ubx_remove(struct serdev_device *serdev) if (data->v_bckp) regulator_disable(data->v_bckp); gnss_serial_free(gserial); -}; +} #ifdef CONFIG_OF static const struct of_device_id ubx_of_match[] = { From f0d4baf574a1a59f4a4ca005b3f10b34d3c5da27 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 4 Jun 2021 11:37:45 +0100 Subject: [PATCH 086/851] fscache: Select netfs stats if fscache stats are enabled Unconditionally select the stats produced by the netfs lib if fscache stats are enabled as the former are displayed in the latter's procfile. Signed-off-by: David Howells cc: linux-cachefs@redhat.com --- fs/fscache/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 427efa73b9bdf..92c87d8e09130 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -14,6 +14,7 @@ config FSCACHE config FSCACHE_STATS bool "Gather statistical information on local caching" depends on FSCACHE && PROC_FS + select NETFS_STATS help This option causes statistical information to be gathered on local caching and exported through file: From a06b480def4eccd8901bf20c4a0980319966bec0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 May 2021 14:10:09 +0100 Subject: [PATCH 087/851] netfs: Move cookie debug ID to struct netfs_cache_resources Move the cookie debug ID from struct netfs_read_request to struct netfs_cache_resources and drop the 'cookie_' prefix. This makes it available for things that want to use netfs_cache_resources without having a netfs_read_request. Signed-off-by: David Howells --- fs/cachefiles/io.c | 2 +- include/linux/netfs.h | 2 +- include/trace/events/netfs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index b13fb45fc3f33..ca68bb97ca00c 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -410,7 +410,7 @@ int cachefiles_begin_read_operation(struct netfs_read_request *rreq, rreq->cache_resources.cache_priv = op; rreq->cache_resources.cache_priv2 = file; rreq->cache_resources.ops = &cachefiles_netfs_cache_ops; - rreq->cookie_debug_id = object->fscache.debug_id; + rreq->cache_resources.debug_id = object->fscache.debug_id; _leave(""); return 0; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 9062adfa2fb9d..5d6a4158a9a6f 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -102,6 +102,7 @@ struct netfs_cache_resources { const struct netfs_cache_ops *ops; void *cache_priv; void *cache_priv2; + unsigned int debug_id; /* Cookie debug ID */ }; /* @@ -137,7 +138,6 @@ struct netfs_read_request { struct list_head subrequests; /* Requests to fetch I/O from disk or net */ void *netfs_priv; /* Private data for the netfs */ unsigned int debug_id; - unsigned int cookie_debug_id; atomic_t nr_rd_ops; /* Number of read ops in progress */ atomic_t nr_wr_ops; /* Number of write ops in progress */ size_t submitted; /* Amount submitted for I/O so far */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index de1c64635e42b..4d470bffd9f13 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -139,7 +139,7 @@ TRACE_EVENT(netfs_read, TP_fast_assign( __entry->rreq = rreq->debug_id; - __entry->cookie = rreq->cookie_debug_id; + __entry->cookie = rreq->cache_resources.debug_id; __entry->start = start; __entry->len = len; __entry->what = what; From 20d7c833de3b7c70d2199a205d1c798eb72ab339 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 3 Jun 2021 10:51:28 +0100 Subject: [PATCH 088/851] cachefiles: Use file_inode() rather than accessing ->f_inode Use the file_inode() helper rather than accessing ->f_inode directly. Signed-off-by: David Howells --- fs/cachefiles/io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index ca68bb97ca00c..fac2e8e7b533e 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -70,7 +70,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres, _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start_pos, len, - i_size_read(file->f_inode)); + i_size_read(file_inode(file))); /* If the caller asked us to seek for data before doing the read, then * we should do that now. If we find a gap, we fill it with zeros. @@ -194,7 +194,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres, _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start_pos, len, - i_size_read(file->f_inode)); + i_size_read(file_inode(file))); ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) From bcfd29a8c2177f32db17f576f6a903390c337dbd Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 10 Feb 2020 10:00:22 +0000 Subject: [PATCH 089/851] fscache: Add a cookie debug ID and use that in traces Add a cookie debug ID and use that in traces and in procfiles rather than displaying the (hashed) pointer to the cookie. This is easier to correlate and we don't lose anything when interpreting oops output since that shows unhashed addresses and registers that aren't comparable to the hashed values. Signed-off-by: David Howells --- fs/fscache/cookie.c | 29 +++++--- fs/fscache/fsdef.c | 1 + fs/fscache/object-list.c | 14 ++-- include/linux/fscache.h | 1 + include/trace/events/cachefiles.h | 68 +++++++++--------- include/trace/events/fscache.h | 116 +++++++++++++++--------------- 6 files changed, 121 insertions(+), 108 deletions(-) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 751bc5b1cddf9..f2be98d2c64dd 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -29,21 +29,29 @@ static int fscache_attach_object(struct fscache_cookie *cookie, static void fscache_print_cookie(struct fscache_cookie *cookie, char prefix) { - struct hlist_node *object; + struct fscache_object *object; + struct hlist_node *o; const u8 *k; unsigned loop; - pr_err("%c-cookie c=%p [p=%p fl=%lx nc=%u na=%u]\n", - prefix, cookie, cookie->parent, cookie->flags, + pr_err("%c-cookie c=%08x [p=%08x fl=%lx nc=%u na=%u]\n", + prefix, + cookie->debug_id, + cookie->parent ? cookie->parent->debug_id : 0, + cookie->flags, atomic_read(&cookie->n_children), atomic_read(&cookie->n_active)); - pr_err("%c-cookie d=%p n=%p\n", - prefix, cookie->def, cookie->netfs_data); + pr_err("%c-cookie d=%p{%s} n=%p\n", + prefix, + cookie->def, + cookie->def ? cookie->def->name : "?", + cookie->netfs_data); - object = READ_ONCE(cookie->backing_objects.first); - if (object) - pr_err("%c-cookie o=%p\n", - prefix, hlist_entry(object, struct fscache_object, cookie_link)); + o = READ_ONCE(cookie->backing_objects.first); + if (o) { + object = hlist_entry(o, struct fscache_object, cookie_link); + pr_err("%c-cookie o=%u\n", prefix, object->debug_id); + } pr_err("%c-key=[%u] '", prefix, cookie->key_len); k = (cookie->key_len <= sizeof(cookie->inline_key)) ? @@ -129,6 +137,8 @@ static long fscache_compare_cookie(const struct fscache_cookie *a, return memcmp(ka, kb, a->key_len); } +static atomic_t fscache_cookie_debug_id = ATOMIC_INIT(1); + /* * Allocate a cookie. */ @@ -163,6 +173,7 @@ struct fscache_cookie *fscache_alloc_cookie( atomic_set(&cookie->usage, 1); atomic_set(&cookie->n_children, 0); + cookie->debug_id = atomic_inc_return(&fscache_cookie_debug_id); /* We keep the active count elevated until relinquishment to prevent an * attempt to wake up every time the object operations queue quiesces. diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index 09ed8795ad86c..5f8f6fe243fe2 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c @@ -45,6 +45,7 @@ static struct fscache_cookie_def fscache_fsdef_index_def = { }; struct fscache_cookie fscache_fsdef_index = { + .debug_id = 1, .usage = ATOMIC_INIT(1), .n_active = ATOMIC_INIT(1), .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index e106a1a1600d8..1a0dc32c0a332 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -170,7 +170,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) if ((unsigned long) v == 1) { seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" " EM EV FL S" - " | NETFS_COOKIE_DEF TY FL NETFS_DATA"); + " | COOKIE NETFS_COOKIE_DEF TY FL NETFS_DATA"); if (config & (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX)) seq_puts(m, " "); @@ -189,7 +189,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) if ((unsigned long) v == 2) { seq_puts(m, "======== ======== ==== ===== === === === == =====" " == == == =" - " | ================ == == ================"); + " | ======== ================ == === ================"); if (config & (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX)) seq_puts(m, " ================"); @@ -231,9 +231,9 @@ static int fscache_objlist_show(struct seq_file *m, void *v) } seq_printf(m, - "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %2lx %1x | ", + "%08x %08x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %2lx %1x | ", obj->debug_id, - obj->parent ? obj->parent->debug_id : -1, + obj->parent ? obj->parent->debug_id : UINT_MAX, obj->state->short_name, obj->n_children, obj->n_ops, @@ -246,7 +246,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) obj->flags, work_busy(&obj->work)); - if (fscache_use_cookie(obj)) { + if (obj->cookie) { uint16_t keylen = 0, auxlen = 0; switch (cookie->type) { @@ -263,7 +263,8 @@ static int fscache_objlist_show(struct seq_file *m, void *v) break; } - seq_printf(m, "%-16s %s %2lx %16p", + seq_printf(m, "%08x %-16s %s %3lx %16p", + cookie->debug_id, cookie->def->name, type, cookie->flags, @@ -292,7 +293,6 @@ static int fscache_objlist_show(struct seq_file *m, void *v) } seq_puts(m, "\n"); - fscache_unuse_cookie(obj); } else { seq_puts(m, "\n"); } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index abc1c4737fb88..ba58c427cf9a2 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -126,6 +126,7 @@ struct fscache_cookie { atomic_t usage; /* number of users of this cookie */ atomic_t n_children; /* number of children of this cookie */ atomic_t n_active; /* number of active users of netfs ptrs */ + unsigned int debug_id; spinlock_t lock; spinlock_t stores_lock; /* lock on page store tree */ struct hlist_head backing_objects; /* object(s) backing this file/index */ diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 5d9de24cb9c01..9a448fe9355db 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -78,20 +78,20 @@ TRACE_EVENT(cachefiles_ref, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, obj ) + __field(unsigned int, cookie ) __field(enum cachefiles_obj_ref_trace, why ) __field(int, usage ) ), TP_fast_assign( - __entry->obj = obj; - __entry->cookie = cookie; + __entry->obj = obj->fscache.debug_id; + __entry->cookie = cookie->debug_id; __entry->usage = usage; __entry->why = why; ), - TP_printk("c=%p o=%p u=%d %s", + TP_printk("c=%08x o=%08x u=%d %s", __entry->cookie, __entry->obj, __entry->usage, __print_symbolic(__entry->why, cachefiles_obj_ref_traces)) ); @@ -104,18 +104,18 @@ TRACE_EVENT(cachefiles_lookup, TP_ARGS(obj, de, inode), TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) __field(struct inode *, inode ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; __entry->inode = inode; ), - TP_printk("o=%p d=%p i=%p", + TP_printk("o=%08x d=%p i=%p", __entry->obj, __entry->de, __entry->inode) ); @@ -126,18 +126,18 @@ TRACE_EVENT(cachefiles_mkdir, TP_ARGS(obj, de, ret), TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) __field(int, ret ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; __entry->ret = ret; ), - TP_printk("o=%p d=%p r=%u", + TP_printk("o=%08x d=%p r=%u", __entry->obj, __entry->de, __entry->ret) ); @@ -148,18 +148,18 @@ TRACE_EVENT(cachefiles_create, TP_ARGS(obj, de, ret), TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) __field(int, ret ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; __entry->ret = ret; ), - TP_printk("o=%p d=%p r=%u", + TP_printk("o=%08x d=%p r=%u", __entry->obj, __entry->de, __entry->ret) ); @@ -172,18 +172,18 @@ TRACE_EVENT(cachefiles_unlink, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; __entry->why = why; ), - TP_printk("o=%p d=%p w=%s", + TP_printk("o=%08x d=%p w=%s", __entry->obj, __entry->de, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); @@ -198,20 +198,20 @@ TRACE_EVENT(cachefiles_rename, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) __field(struct dentry *, to ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; __entry->to = to; __entry->why = why; ), - TP_printk("o=%p d=%p t=%p w=%s", + TP_printk("o=%08x d=%p t=%p w=%s", __entry->obj, __entry->de, __entry->to, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); @@ -224,16 +224,16 @@ TRACE_EVENT(cachefiles_mark_active, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; ), - TP_printk("o=%p d=%p", + TP_printk("o=%08x d=%p", __entry->obj, __entry->de) ); @@ -246,22 +246,22 @@ TRACE_EVENT(cachefiles_wait_active, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) + __field(unsigned int, xobj ) __field(struct dentry *, de ) - __field(struct cachefiles_object *, xobj ) __field(u16, flags ) __field(u16, fsc_flags ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; - __entry->xobj = xobj; + __entry->xobj = xobj->fscache.debug_id; __entry->flags = xobj->flags; __entry->fsc_flags = xobj->fscache.flags; ), - TP_printk("o=%p d=%p wo=%p wf=%x wff=%x", + TP_printk("o=%08x d=%p wo=%08x wf=%x wff=%x", __entry->obj, __entry->de, __entry->xobj, __entry->flags, __entry->fsc_flags) ); @@ -275,18 +275,18 @@ TRACE_EVENT(cachefiles_mark_inactive, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) __field(struct inode *, inode ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; __entry->inode = inode; ), - TP_printk("o=%p d=%p i=%p", + TP_printk("o=%08x d=%p i=%p", __entry->obj, __entry->de, __entry->inode) ); @@ -299,18 +299,18 @@ TRACE_EVENT(cachefiles_mark_buried, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(struct cachefiles_object *, obj ) + __field(unsigned int, obj ) __field(struct dentry *, de ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( - __entry->obj = obj; + __entry->obj = obj->fscache.debug_id; __entry->de = de; __entry->why = why; ), - TP_printk("o=%p d=%p w=%s", + TP_printk("o=%08x d=%p w=%s", __entry->obj, __entry->de, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index d16fe6ed78a28..0b9e058aba4d6 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -167,8 +167,8 @@ TRACE_EVENT(fscache_cookie, TP_ARGS(cookie, where, usage), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) - __field(struct fscache_cookie *, parent ) + __field(unsigned int, cookie ) + __field(unsigned int, parent ) __field(enum fscache_cookie_trace, where ) __field(int, usage ) __field(int, n_children ) @@ -177,8 +177,8 @@ TRACE_EVENT(fscache_cookie, ), TP_fast_assign( - __entry->cookie = cookie; - __entry->parent = cookie->parent; + __entry->cookie = cookie->debug_id; + __entry->parent = cookie->parent ? cookie->parent->debug_id : 0; __entry->where = where; __entry->usage = usage; __entry->n_children = atomic_read(&cookie->n_children); @@ -186,7 +186,7 @@ TRACE_EVENT(fscache_cookie, __entry->flags = cookie->flags; ), - TP_printk("%s c=%p u=%d p=%p Nc=%d Na=%d f=%02x", + TP_printk("%s c=%08x u=%d p=%08x Nc=%d Na=%d f=%02x", __print_symbolic(__entry->where, fscache_cookie_traces), __entry->cookie, __entry->usage, __entry->parent, __entry->n_children, __entry->n_active, @@ -199,17 +199,17 @@ TRACE_EVENT(fscache_netfs, TP_ARGS(netfs), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) __array(char, name, 8 ) ), TP_fast_assign( - __entry->cookie = netfs->primary_index; + __entry->cookie = netfs->primary_index->debug_id; strncpy(__entry->name, netfs->name, 8); __entry->name[7] = 0; ), - TP_printk("c=%p n=%s", + TP_printk("c=%08x n=%s", __entry->cookie, __entry->name) ); @@ -219,8 +219,8 @@ TRACE_EVENT(fscache_acquire, TP_ARGS(cookie), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) - __field(struct fscache_cookie *, parent ) + __field(unsigned int, cookie ) + __field(unsigned int, parent ) __array(char, name, 8 ) __field(int, p_usage ) __field(int, p_n_children ) @@ -228,8 +228,8 @@ TRACE_EVENT(fscache_acquire, ), TP_fast_assign( - __entry->cookie = cookie; - __entry->parent = cookie->parent; + __entry->cookie = cookie->debug_id; + __entry->parent = cookie->parent->debug_id; __entry->p_usage = atomic_read(&cookie->parent->usage); __entry->p_n_children = atomic_read(&cookie->parent->n_children); __entry->p_flags = cookie->parent->flags; @@ -237,7 +237,7 @@ TRACE_EVENT(fscache_acquire, __entry->name[7] = 0; ), - TP_printk("c=%p p=%p pu=%d pc=%d pf=%02x n=%s", + TP_printk("c=%08x p=%08x pu=%d pc=%d pf=%02x n=%s", __entry->cookie, __entry->parent, __entry->p_usage, __entry->p_n_children, __entry->p_flags, __entry->name) ); @@ -248,8 +248,8 @@ TRACE_EVENT(fscache_relinquish, TP_ARGS(cookie, retire), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) - __field(struct fscache_cookie *, parent ) + __field(unsigned int, cookie ) + __field(unsigned int, parent ) __field(int, usage ) __field(int, n_children ) __field(int, n_active ) @@ -258,8 +258,8 @@ TRACE_EVENT(fscache_relinquish, ), TP_fast_assign( - __entry->cookie = cookie; - __entry->parent = cookie->parent; + __entry->cookie = cookie->debug_id; + __entry->parent = cookie->parent->debug_id; __entry->usage = atomic_read(&cookie->usage); __entry->n_children = atomic_read(&cookie->n_children); __entry->n_active = atomic_read(&cookie->n_active); @@ -267,7 +267,7 @@ TRACE_EVENT(fscache_relinquish, __entry->retire = retire; ), - TP_printk("c=%p u=%d p=%p Nc=%d Na=%d f=%02x r=%u", + TP_printk("c=%08x u=%d p=%08x Nc=%d Na=%d f=%02x r=%u", __entry->cookie, __entry->usage, __entry->parent, __entry->n_children, __entry->n_active, __entry->flags, __entry->retire) @@ -279,7 +279,7 @@ TRACE_EVENT(fscache_enable, TP_ARGS(cookie), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) __field(int, usage ) __field(int, n_children ) __field(int, n_active ) @@ -287,14 +287,14 @@ TRACE_EVENT(fscache_enable, ), TP_fast_assign( - __entry->cookie = cookie; + __entry->cookie = cookie->debug_id; __entry->usage = atomic_read(&cookie->usage); __entry->n_children = atomic_read(&cookie->n_children); __entry->n_active = atomic_read(&cookie->n_active); __entry->flags = cookie->flags; ), - TP_printk("c=%p u=%d Nc=%d Na=%d f=%02x", + TP_printk("c=%08x u=%d Nc=%d Na=%d f=%02x", __entry->cookie, __entry->usage, __entry->n_children, __entry->n_active, __entry->flags) ); @@ -305,7 +305,7 @@ TRACE_EVENT(fscache_disable, TP_ARGS(cookie), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) __field(int, usage ) __field(int, n_children ) __field(int, n_active ) @@ -313,14 +313,14 @@ TRACE_EVENT(fscache_disable, ), TP_fast_assign( - __entry->cookie = cookie; + __entry->cookie = cookie->debug_id; __entry->usage = atomic_read(&cookie->usage); __entry->n_children = atomic_read(&cookie->n_children); __entry->n_active = atomic_read(&cookie->n_active); __entry->flags = cookie->flags; ), - TP_printk("c=%p u=%d Nc=%d Na=%d f=%02x", + TP_printk("c=%08x u=%d Nc=%d Na=%d f=%02x", __entry->cookie, __entry->usage, __entry->n_children, __entry->n_active, __entry->flags) ); @@ -333,8 +333,8 @@ TRACE_EVENT(fscache_osm, TP_ARGS(object, state, wait, oob, event_num), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) - __field(struct fscache_object *, object ) + __field(unsigned int, cookie ) + __field(unsigned int, object ) __array(char, state, 8 ) __field(bool, wait ) __field(bool, oob ) @@ -342,15 +342,15 @@ TRACE_EVENT(fscache_osm, ), TP_fast_assign( - __entry->cookie = object->cookie; - __entry->object = object; + __entry->cookie = object->cookie->debug_id; + __entry->object = object->debug_id; __entry->wait = wait; __entry->oob = oob; __entry->event_num = event_num; memcpy(__entry->state, state->short_name, 8); ), - TP_printk("c=%p o=%p %s %s%sev=%d", + TP_printk("c=%08x o=%08d %s %s%sev=%d", __entry->cookie, __entry->object, __entry->state, @@ -370,18 +370,18 @@ TRACE_EVENT(fscache_page, TP_ARGS(cookie, page, why), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) __field(pgoff_t, page ) __field(enum fscache_page_trace, why ) ), TP_fast_assign( - __entry->cookie = cookie; + __entry->cookie = cookie->debug_id; __entry->page = page->index; __entry->why = why; ), - TP_printk("c=%p %s pg=%lx", + TP_printk("c=%08x %s pg=%lx", __entry->cookie, __print_symbolic(__entry->why, fscache_page_traces), __entry->page) @@ -394,20 +394,20 @@ TRACE_EVENT(fscache_check_page, TP_ARGS(cookie, page, val, n), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) __field(void *, page ) __field(void *, val ) __field(int, n ) ), TP_fast_assign( - __entry->cookie = cookie; + __entry->cookie = cookie->debug_id; __entry->page = page; __entry->val = val; __entry->n = n; ), - TP_printk("c=%p pg=%p val=%p n=%d", + TP_printk("c=%08x pg=%p val=%p n=%d", __entry->cookie, __entry->page, __entry->val, __entry->n) ); @@ -417,14 +417,14 @@ TRACE_EVENT(fscache_wake_cookie, TP_ARGS(cookie), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) ), TP_fast_assign( - __entry->cookie = cookie; + __entry->cookie = cookie->debug_id; ), - TP_printk("c=%p", __entry->cookie) + TP_printk("c=%08x", __entry->cookie) ); TRACE_EVENT(fscache_op, @@ -434,18 +434,18 @@ TRACE_EVENT(fscache_op, TP_ARGS(cookie, op, why), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) - __field(struct fscache_operation *, op ) + __field(unsigned int, cookie ) + __field(unsigned int, op ) __field(enum fscache_op_trace, why ) ), TP_fast_assign( - __entry->cookie = cookie; - __entry->op = op; + __entry->cookie = cookie->debug_id; + __entry->op = op->debug_id; __entry->why = why; ), - TP_printk("c=%p op=%p %s", + TP_printk("c=%08x op=%08x %s", __entry->cookie, __entry->op, __print_symbolic(__entry->why, fscache_op_traces)) ); @@ -457,20 +457,20 @@ TRACE_EVENT(fscache_page_op, TP_ARGS(cookie, page, op, what), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) + __field(unsigned int, op ) __field(pgoff_t, page ) - __field(struct fscache_operation *, op ) __field(enum fscache_page_op_trace, what ) ), TP_fast_assign( - __entry->cookie = cookie; + __entry->cookie = cookie->debug_id; __entry->page = page ? page->index : 0; - __entry->op = op; + __entry->op = op->debug_id; __entry->what = what; ), - TP_printk("c=%p %s pg=%lx op=%p", + TP_printk("c=%08x %s pg=%lx op=%08x", __entry->cookie, __print_symbolic(__entry->what, fscache_page_op_traces), __entry->page, __entry->op) @@ -483,20 +483,20 @@ TRACE_EVENT(fscache_wrote_page, TP_ARGS(cookie, page, op, ret), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) + __field(unsigned int, cookie ) + __field(unsigned int, op ) __field(pgoff_t, page ) - __field(struct fscache_operation *, op ) __field(int, ret ) ), TP_fast_assign( - __entry->cookie = cookie; + __entry->cookie = cookie->debug_id; __entry->page = page->index; - __entry->op = op; + __entry->op = op->debug_id; __entry->ret = ret; ), - TP_printk("c=%p pg=%lx op=%p ret=%d", + TP_printk("c=%08x pg=%lx op=%08x ret=%d", __entry->cookie, __entry->page, __entry->op, __entry->ret) ); @@ -507,22 +507,22 @@ TRACE_EVENT(fscache_gang_lookup, TP_ARGS(cookie, op, results, n, store_limit), TP_STRUCT__entry( - __field(struct fscache_cookie *, cookie ) - __field(struct fscache_operation *, op ) + __field(unsigned int, cookie ) + __field(unsigned int, op ) __field(pgoff_t, results0 ) __field(int, n ) __field(pgoff_t, store_limit ) ), TP_fast_assign( - __entry->cookie = cookie; - __entry->op = op; + __entry->cookie = cookie->debug_id; + __entry->op = op->debug_id; __entry->results0 = results[0] ? ((struct page *)results[0])->index : (pgoff_t)-1; __entry->n = n; __entry->store_limit = store_limit; ), - TP_printk("c=%p op=%p r0=%lx n=%d sl=%lx", + TP_printk("c=%08x op=%08x r0=%lx n=%d sl=%lx", __entry->cookie, __entry->op, __entry->results0, __entry->n, __entry->store_limit) ); From 36a04849fc93cb08e059348abaa495841fecc2e9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 10 Feb 2020 10:00:22 +0000 Subject: [PATCH 090/851] fscache: Procfile to display cookies Add /proc/fs/fscache/cookies to display active cookies. Signed-off-by: David Howells --- fs/fscache/cookie.c | 103 ++++++++++++++++++++++++++++++++++++++++ fs/fscache/internal.h | 1 + fs/fscache/proc.c | 7 +++ include/linux/fscache.h | 1 + 4 files changed, 112 insertions(+) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index f2be98d2c64dd..c7047544972b1 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -19,6 +19,8 @@ static atomic_t fscache_object_debug_id = ATOMIC_INIT(0); #define fscache_cookie_hash_shift 15 static struct hlist_bl_head fscache_cookie_hash[1 << fscache_cookie_hash_shift]; +static LIST_HEAD(fscache_cookies); +static DEFINE_RWLOCK(fscache_cookies_lock); static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie, loff_t object_size); @@ -65,6 +67,9 @@ void fscache_free_cookie(struct fscache_cookie *cookie) { if (cookie) { BUG_ON(!hlist_empty(&cookie->backing_objects)); + write_lock(&fscache_cookies_lock); + list_del(&cookie->proc_link); + write_unlock(&fscache_cookies_lock); if (cookie->aux_len > sizeof(cookie->inline_aux)) kfree(cookie->aux); if (cookie->key_len > sizeof(cookie->inline_key)) @@ -192,6 +197,10 @@ struct fscache_cookie *fscache_alloc_cookie( /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + + write_lock(&fscache_cookies_lock); + list_add_tail(&cookie->proc_link, &fscache_cookies); + write_unlock(&fscache_cookies_lock); return cookie; nomem: @@ -969,3 +978,97 @@ int __fscache_check_consistency(struct fscache_cookie *cookie, return -ESTALE; } EXPORT_SYMBOL(__fscache_check_consistency); + +/* + * Generate a list of extant cookies in /proc/fs/fscache/cookies + */ +static int fscache_cookies_seq_show(struct seq_file *m, void *v) +{ + struct fscache_cookie *cookie; + unsigned int keylen = 0, auxlen = 0; + char _type[3], *type; + u8 *p; + + if (v == &fscache_cookies) { + seq_puts(m, + "COOKIE PARENT USAGE CHILD ACT TY FL DEF NETFS_DATA\n" + "======== ======== ===== ===== === == === ================ ==========\n" + ); + return 0; + } + + cookie = list_entry(v, struct fscache_cookie, proc_link); + + switch (cookie->type) { + case 0: + type = "IX"; + break; + case 1: + type = "DT"; + break; + default: + snprintf(_type, sizeof(_type), "%02u", + cookie->type); + type = _type; + break; + } + + seq_printf(m, + "%08x %08x %5u %5u %3u %s %03lx %-16s %px", + cookie->debug_id, + cookie->parent ? cookie->parent->debug_id : 0, + atomic_read(&cookie->usage), + atomic_read(&cookie->n_children), + atomic_read(&cookie->n_active), + type, + cookie->flags, + cookie->def->name, + cookie->netfs_data); + + keylen = cookie->key_len; + auxlen = cookie->aux_len; + + if (keylen > 0 || auxlen > 0) { + seq_puts(m, " "); + p = keylen <= sizeof(cookie->inline_key) ? + cookie->inline_key : cookie->key; + for (; keylen > 0; keylen--) + seq_printf(m, "%02x", *p++); + if (auxlen > 0) { + seq_puts(m, ", "); + p = auxlen <= sizeof(cookie->inline_aux) ? + cookie->inline_aux : cookie->aux; + for (; auxlen > 0; auxlen--) + seq_printf(m, "%02x", *p++); + } + } + + seq_puts(m, "\n"); + return 0; +} + +static void *fscache_cookies_seq_start(struct seq_file *m, loff_t *_pos) + __acquires(fscache_cookies_lock) +{ + read_lock(&fscache_cookies_lock); + return seq_list_start_head(&fscache_cookies, *_pos); +} + +static void *fscache_cookies_seq_next(struct seq_file *m, void *v, loff_t *_pos) +{ + return seq_list_next(v, &fscache_cookies, _pos); +} + +static void fscache_cookies_seq_stop(struct seq_file *m, void *v) + __releases(rcu) +{ + read_unlock(&fscache_cookies_lock); +} + + +const struct seq_operations fscache_cookies_seq_ops = { + .start = fscache_cookies_seq_start, + .next = fscache_cookies_seq_next, + .stop = fscache_cookies_seq_stop, + .show = fscache_cookies_seq_show, +}; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index c483863b740ad..207a6bc81ca99 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -45,6 +45,7 @@ extern struct fscache_cache *fscache_select_cache_for_object( * cookie.c */ extern struct kmem_cache *fscache_cookie_jar; +extern const struct seq_operations fscache_cookies_seq_ops; extern void fscache_free_cookie(struct fscache_cookie *); extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *, diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c index 90a7bc22f7e19..da51fdfc86413 100644 --- a/fs/fscache/proc.c +++ b/fs/fscache/proc.c @@ -21,6 +21,10 @@ int __init fscache_proc_init(void) if (!proc_mkdir("fs/fscache", NULL)) goto error_dir; + if (!proc_create_seq("fs/fscache/cookies", S_IFREG | 0444, NULL, + &fscache_cookies_seq_ops)) + goto error_cookies; + #ifdef CONFIG_FSCACHE_STATS if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL, fscache_stats_show)) @@ -53,6 +57,8 @@ int __init fscache_proc_init(void) remove_proc_entry("fs/fscache/stats", NULL); error_stats: #endif + remove_proc_entry("fs/fscache/cookies", NULL); +error_cookies: remove_proc_entry("fs/fscache", NULL); error_dir: _leave(" = -ENOMEM"); @@ -73,5 +79,6 @@ void fscache_proc_cleanup(void) #ifdef CONFIG_FSCACHE_STATS remove_proc_entry("fs/fscache/stats", NULL); #endif + remove_proc_entry("fs/fscache/cookies", NULL); remove_proc_entry("fs/fscache", NULL); } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ba58c427cf9a2..ea61e54a6bc57 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -133,6 +133,7 @@ struct fscache_cookie { const struct fscache_cookie_def *def; /* definition */ struct fscache_cookie *parent; /* parent of this entry */ struct hlist_bl_node hash_link; /* Link in hash table */ + struct list_head proc_link; /* Link in proc list */ void *netfs_data; /* back pointer to netfs */ struct radix_tree_root stores; /* pages to be stored on this cookie */ #define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ From 61e4814dc8881bdc8511b6cebfe629d395467c66 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 May 2021 09:40:19 +0100 Subject: [PATCH 091/851] fscache, cachefiles: Remove the histogram stuff Remove the histogram stuff as it's mostly going to be outdated. Signed-off-by: David Howells --- fs/cachefiles/Kconfig | 19 ------ fs/cachefiles/Makefile | 2 - fs/cachefiles/internal.h | 25 -------- fs/cachefiles/main.c | 7 --- fs/cachefiles/namei.c | 13 ---- fs/cachefiles/proc.c | 114 ---------------------------------- fs/fscache/Kconfig | 17 ----- fs/fscache/Makefile | 1 - fs/fscache/histogram.c | 87 -------------------------- fs/fscache/internal.h | 24 ------- fs/fscache/object.c | 5 -- fs/fscache/operation.c | 3 - fs/fscache/page.c | 6 -- fs/fscache/proc.c | 13 ---- include/linux/fscache-cache.h | 1 - 15 files changed, 337 deletions(-) delete mode 100644 fs/cachefiles/proc.c delete mode 100644 fs/fscache/histogram.c diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig index ff9ca55a9ae9c..6827b40f7ddc3 100644 --- a/fs/cachefiles/Kconfig +++ b/fs/cachefiles/Kconfig @@ -19,22 +19,3 @@ config CACHEFILES_DEBUG caching on files module. If this is set, the debugging output may be enabled by setting bits in /sys/modules/cachefiles/parameter/debug or by including a debugging specifier in /etc/cachefilesd.conf. - -config CACHEFILES_HISTOGRAM - bool "Gather latency information on CacheFiles" - depends on CACHEFILES && PROC_FS - help - - This option causes latency information to be gathered on CacheFiles - operation and exported through file: - - /proc/fs/cachefiles/histogram - - The generation of this histogram adds a certain amount of overhead to - execution as there are a number of points at which data is gathered, - and on a multi-CPU system these may be on cachelines that keep - bouncing between CPUs. On the other hand, the histogram may be - useful for debugging purposes. Saying 'N' here is recommended. - - See Documentation/filesystems/caching/cachefiles.rst for more - information. diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile index 2227dc2d54986..02fd177317697 100644 --- a/fs/cachefiles/Makefile +++ b/fs/cachefiles/Makefile @@ -15,6 +15,4 @@ cachefiles-y := \ security.o \ xattr.o -cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o - obj-$(CONFIG_CACHEFILES) := cachefiles.o diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 4ed83aa5253b1..0a511c36dab85 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -180,31 +180,6 @@ extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); -/* - * proc.c - */ -#ifdef CONFIG_CACHEFILES_HISTOGRAM -extern atomic_t cachefiles_lookup_histogram[HZ]; -extern atomic_t cachefiles_mkdir_histogram[HZ]; -extern atomic_t cachefiles_create_histogram[HZ]; - -extern int __init cachefiles_proc_init(void); -extern void cachefiles_proc_cleanup(void); -static inline -void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) -{ - unsigned long jif = jiffies - start_jif; - if (jif >= HZ) - jif = HZ - 1; - atomic_inc(&histogram[jif]); -} - -#else -#define cachefiles_proc_init() (0) -#define cachefiles_proc_cleanup() do {} while (0) -#define cachefiles_hist(hist, start_jif) do {} while (0) -#endif - /* * rdwr.c */ diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index ddf0cd58d60cc..9c8d34c49b125 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -69,15 +69,9 @@ static int __init cachefiles_init(void) goto error_object_jar; } - ret = cachefiles_proc_init(); - if (ret < 0) - goto error_proc; - pr_info("Loaded\n"); return 0; -error_proc: - kmem_cache_destroy(cachefiles_object_jar); error_object_jar: misc_deregister(&cachefiles_dev); error_dev: @@ -94,7 +88,6 @@ static void __exit cachefiles_exit(void) { pr_info("Unloading\n"); - cachefiles_proc_cleanup(); kmem_cache_destroy(cachefiles_object_jar); misc_deregister(&cachefiles_dev); } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7bf0732ae25cf..92aa550dae7e8 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -496,7 +496,6 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, struct dentry *dir, *next = NULL; struct inode *inode; struct path path; - unsigned long start; const char *name; int ret, nlen; @@ -535,9 +534,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - start = jiffies; next = lookup_one_len(name, dir, nlen); - cachefiles_hist(cachefiles_lookup_histogram, start); if (IS_ERR(next)) { trace_cachefiles_lookup(object, next, NULL); goto lookup_error; @@ -568,9 +565,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, ret = security_path_mkdir(&path, next, 0); if (ret < 0) goto create_error; - start = jiffies; ret = vfs_mkdir(&init_user_ns, d_inode(dir), next, 0); - cachefiles_hist(cachefiles_mkdir_histogram, start); if (!key) trace_cachefiles_mkdir(object, next, ret); if (ret < 0) @@ -604,10 +599,8 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, ret = security_path_mknod(&path, next, S_IFREG, 0); if (ret < 0) goto create_error; - start = jiffies; ret = vfs_create(&init_user_ns, d_inode(dir), next, S_IFREG, true); - cachefiles_hist(cachefiles_create_histogram, start); trace_cachefiles_create(object, next, ret); if (ret < 0) goto create_error; @@ -765,7 +758,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, const char *dirname) { struct dentry *subdir; - unsigned long start; struct path path; int ret; @@ -775,9 +767,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, inode_lock(d_inode(dir)); retry: - start = jiffies; subdir = lookup_one_len(dirname, dir, strlen(dirname)); - cachefiles_hist(cachefiles_lookup_histogram, start); if (IS_ERR(subdir)) { if (PTR_ERR(subdir) == -ENOMEM) goto nomem_d_alloc; @@ -876,7 +866,6 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, struct cachefiles_object *object; struct rb_node *_n; struct dentry *victim; - unsigned long start; int ret; //_enter(",%pd/,%s", @@ -885,9 +874,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, /* look up the victim */ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - start = jiffies; victim = lookup_one_len(filename, dir, strlen(filename)); - cachefiles_hist(cachefiles_lookup_histogram, start); if (IS_ERR(victim)) goto lookup_error; diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c deleted file mode 100644 index 6e67aea0f24ed..0000000000000 --- a/fs/cachefiles/proc.c +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* CacheFiles statistics - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#include -#include -#include -#include "internal.h" - -atomic_t cachefiles_lookup_histogram[HZ]; -atomic_t cachefiles_mkdir_histogram[HZ]; -atomic_t cachefiles_create_histogram[HZ]; - -/* - * display the latency histogram - */ -static int cachefiles_histogram_show(struct seq_file *m, void *v) -{ - unsigned long index; - unsigned x, y, z, t; - - switch ((unsigned long) v) { - case 1: - seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n"); - return 0; - case 2: - seq_puts(m, "===== ===== ========= ========= =========\n"); - return 0; - default: - index = (unsigned long) v - 3; - x = atomic_read(&cachefiles_lookup_histogram[index]); - y = atomic_read(&cachefiles_mkdir_histogram[index]); - z = atomic_read(&cachefiles_create_histogram[index]); - if (x == 0 && y == 0 && z == 0) - return 0; - - t = (index * 1000) / HZ; - - seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z); - return 0; - } -} - -/* - * set up the iterator to start reading from the first line - */ -static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos) -{ - if ((unsigned long long)*_pos >= HZ + 2) - return NULL; - if (*_pos == 0) - *_pos = 1; - return (void *)(unsigned long) *_pos; -} - -/* - * move to the next line - */ -static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return (unsigned long long)*pos > HZ + 2 ? - NULL : (void *)(unsigned long) *pos; -} - -/* - * clean up after reading - */ -static void cachefiles_histogram_stop(struct seq_file *m, void *v) -{ -} - -static const struct seq_operations cachefiles_histogram_ops = { - .start = cachefiles_histogram_start, - .stop = cachefiles_histogram_stop, - .next = cachefiles_histogram_next, - .show = cachefiles_histogram_show, -}; - -/* - * initialise the /proc/fs/cachefiles/ directory - */ -int __init cachefiles_proc_init(void) -{ - _enter(""); - - if (!proc_mkdir("fs/cachefiles", NULL)) - goto error_dir; - - if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL, - &cachefiles_histogram_ops)) - goto error_histogram; - - _leave(" = 0"); - return 0; - -error_histogram: - remove_proc_entry("fs/cachefiles", NULL); -error_dir: - _leave(" = -ENOMEM"); - return -ENOMEM; -} - -/* - * clean up the /proc/fs/cachefiles/ directory - */ -void cachefiles_proc_cleanup(void) -{ - remove_proc_entry("fs/cachefiles/histogram", NULL); - remove_proc_entry("fs/cachefiles", NULL); -} diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 92c87d8e09130..5e3a5b3f950da 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -29,23 +29,6 @@ config FSCACHE_STATS See Documentation/filesystems/caching/fscache.rst for more information. -config FSCACHE_HISTOGRAM - bool "Gather latency information on local caching" - depends on FSCACHE && PROC_FS - help - This option causes latency information to be gathered on local - caching and exported through file: - - /proc/fs/fscache/histogram - - The generation of this histogram adds a certain amount of overhead to - execution as there are a number of points at which data is gathered, - and on a multi-CPU system these may be on cachelines that keep - bouncing between CPUs. On the other hand, the histogram may be - useful for debugging purposes. Saying 'N' here is recommended. - - See Documentation/filesystems/caching/fscache.rst for more information. - config FSCACHE_DEBUG bool "Debug FS-Cache" depends on FSCACHE diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 3b2ffa93ac184..45d5235a449bb 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -16,7 +16,6 @@ fscache-y := \ fscache-$(CONFIG_PROC_FS) += proc.o fscache-$(CONFIG_FSCACHE_STATS) += stats.o -fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o fscache-$(CONFIG_FSCACHE_OBJECT_LIST) += object-list.o obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c deleted file mode 100644 index 4e5beeaaf4541..0000000000000 --- a/fs/fscache/histogram.c +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* FS-Cache latency histogram - * - * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#define FSCACHE_DEBUG_LEVEL THREAD -#include -#include -#include -#include "internal.h" - -atomic_t fscache_obj_instantiate_histogram[HZ]; -atomic_t fscache_objs_histogram[HZ]; -atomic_t fscache_ops_histogram[HZ]; -atomic_t fscache_retrieval_delay_histogram[HZ]; -atomic_t fscache_retrieval_histogram[HZ]; - -/* - * display the time-taken histogram - */ -static int fscache_histogram_show(struct seq_file *m, void *v) -{ - unsigned long index; - unsigned n[5], t; - - switch ((unsigned long) v) { - case 1: - seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS\n"); - return 0; - case 2: - seq_puts(m, "===== ===== ========= ========= ========= ========= =========\n"); - return 0; - default: - index = (unsigned long) v - 3; - n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]); - n[1] = atomic_read(&fscache_ops_histogram[index]); - n[2] = atomic_read(&fscache_objs_histogram[index]); - n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]); - n[4] = atomic_read(&fscache_retrieval_histogram[index]); - if (!(n[0] | n[1] | n[2] | n[3] | n[4])) - return 0; - - t = (index * 1000) / HZ; - - seq_printf(m, "%4lu 0.%03u %9u %9u %9u %9u %9u\n", - index, t, n[0], n[1], n[2], n[3], n[4]); - return 0; - } -} - -/* - * set up the iterator to start reading from the first line - */ -static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos) -{ - if ((unsigned long long)*_pos >= HZ + 2) - return NULL; - if (*_pos == 0) - *_pos = 1; - return (void *)(unsigned long) *_pos; -} - -/* - * move to the next line - */ -static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return (unsigned long long)*pos > HZ + 2 ? - NULL : (void *)(unsigned long) *pos; -} - -/* - * clean up after reading - */ -static void fscache_histogram_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations fscache_histogram_ops = { - .start = fscache_histogram_start, - .stop = fscache_histogram_stop, - .next = fscache_histogram_next, - .show = fscache_histogram_show, -}; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 207a6bc81ca99..796678b2b32a4 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -63,30 +63,6 @@ extern void fscache_cookie_put(struct fscache_cookie *, extern struct fscache_cookie fscache_fsdef_index; extern struct fscache_cookie_def fscache_fsdef_netfs_def; -/* - * histogram.c - */ -#ifdef CONFIG_FSCACHE_HISTOGRAM -extern atomic_t fscache_obj_instantiate_histogram[HZ]; -extern atomic_t fscache_objs_histogram[HZ]; -extern atomic_t fscache_ops_histogram[HZ]; -extern atomic_t fscache_retrieval_delay_histogram[HZ]; -extern atomic_t fscache_retrieval_histogram[HZ]; - -static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif) -{ - unsigned long jif = jiffies - start_jif; - if (jif >= HZ) - jif = HZ - 1; - atomic_inc(&histogram[jif]); -} - -extern const struct seq_operations fscache_histogram_ops; - -#else -#define fscache_hist(hist, start_jif) do {} while (0) -#endif - /* * main.c */ diff --git a/fs/fscache/object.c b/fs/fscache/object.c index cb2146e02cd54..5dbaab2e12628 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -277,13 +277,10 @@ static void fscache_object_work_func(struct work_struct *work) { struct fscache_object *object = container_of(work, struct fscache_object, work); - unsigned long start; _enter("{OBJ%x}", object->debug_id); - start = jiffies; fscache_object_sm_dispatcher(object); - fscache_hist(fscache_objs_histogram, start); fscache_put_object(object, fscache_obj_put_work); } @@ -436,7 +433,6 @@ static const struct fscache_state *fscache_parent_ready(struct fscache_object *o spin_lock(&parent->lock); parent->n_ops++; parent->n_obj_ops++; - object->lookup_jif = jiffies; spin_unlock(&parent->lock); _leave(""); @@ -596,7 +592,6 @@ static const struct fscache_state *fscache_object_available(struct fscache_objec object->cache->ops->lookup_complete(object); fscache_stat_d(&fscache_n_cop_lookup_complete); - fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); fscache_stat(&fscache_n_object_avail); _leave(""); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 4a5651d4904e5..4338771077008 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -616,7 +616,6 @@ void fscache_op_work_func(struct work_struct *work) { struct fscache_operation *op = container_of(work, struct fscache_operation, work); - unsigned long start; _enter("{OBJ%x OP%x,%d}", op->object->debug_id, op->debug_id, atomic_read(&op->usage)); @@ -624,9 +623,7 @@ void fscache_op_work_func(struct work_struct *work) trace_fscache_op(op->object->cookie, op, fscache_op_work); ASSERT(op->processor != NULL); - start = jiffies; op->processor(op); - fscache_hist(fscache_ops_histogram, start); fscache_put_operation(op); _leave(""); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 991b0a871744e..27df94ef0e0bd 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -289,7 +289,6 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) ASSERTIFCMP(op->op.state != FSCACHE_OP_ST_INITIALISED, atomic_read(&op->n_pages), ==, 0); - fscache_hist(fscache_retrieval_histogram, op->start_time); if (op->context) fscache_put_context(op->cookie, op->context); @@ -324,7 +323,6 @@ struct fscache_retrieval *fscache_alloc_retrieval( op->mapping = mapping; op->end_io_func = end_io_func; op->context = context; - op->start_time = jiffies; INIT_LIST_HEAD(&op->to_do); /* Pin the netfs read context in case we need to do the actual netfs @@ -340,8 +338,6 @@ struct fscache_retrieval *fscache_alloc_retrieval( */ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) { - unsigned long jif; - _enter(""); if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) { @@ -351,7 +347,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) fscache_stat(&fscache_n_retrievals_wait); - jif = jiffies; if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, TASK_INTERRUPTIBLE) != 0) { fscache_stat(&fscache_n_retrievals_intr); @@ -362,7 +357,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)); smp_rmb(); - fscache_hist(fscache_retrieval_delay_histogram, jif); _leave(" = 0 [dly]"); return 0; } diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c index da51fdfc86413..061df8f61ffc7 100644 --- a/fs/fscache/proc.c +++ b/fs/fscache/proc.c @@ -31,12 +31,6 @@ int __init fscache_proc_init(void) goto error_stats; #endif -#ifdef CONFIG_FSCACHE_HISTOGRAM - if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL, - &fscache_histogram_ops)) - goto error_histogram; -#endif - #ifdef CONFIG_FSCACHE_OBJECT_LIST if (!proc_create("fs/fscache/objects", S_IFREG | 0444, NULL, &fscache_objlist_proc_ops)) @@ -49,10 +43,6 @@ int __init fscache_proc_init(void) #ifdef CONFIG_FSCACHE_OBJECT_LIST error_objects: #endif -#ifdef CONFIG_FSCACHE_HISTOGRAM - remove_proc_entry("fs/fscache/histogram", NULL); -error_histogram: -#endif #ifdef CONFIG_FSCACHE_STATS remove_proc_entry("fs/fscache/stats", NULL); error_stats: @@ -73,9 +63,6 @@ void fscache_proc_cleanup(void) #ifdef CONFIG_FSCACHE_OBJECT_LIST remove_proc_entry("fs/fscache/objects", NULL); #endif -#ifdef CONFIG_FSCACHE_HISTOGRAM - remove_proc_entry("fs/fscache/histogram", NULL); -#endif #ifdef CONFIG_FSCACHE_STATS remove_proc_entry("fs/fscache/stats", NULL); #endif diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 3235ddbdcc09c..fbff0b7e3ef17 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -147,7 +147,6 @@ struct fscache_retrieval { fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ void *context; /* netfs read context (pinned) */ struct list_head to_do; /* list of things to be done by the backend */ - unsigned long start_time; /* time at which retrieval started */ atomic_t n_pages; /* number of pages to be retrieved */ }; From c16794cd40a7f455c43793b3a0ddd24bb76e42e5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 May 2021 09:59:17 +0100 Subject: [PATCH 092/851] fscache: Remove the object list procfile Remove the object list procfile from fscache as objects will become entirely internal to the cache. Signed-off-by: David Howells --- fs/fscache/Kconfig | 7 - fs/fscache/Makefile | 1 - fs/fscache/cache.c | 1 - fs/fscache/cookie.c | 2 - fs/fscache/internal.h | 13 -- fs/fscache/object-list.c | 414 ---------------------------------- fs/fscache/object.c | 2 - include/linux/fscache-cache.h | 3 - 8 files changed, 443 deletions(-) delete mode 100644 fs/fscache/object-list.c diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 5e3a5b3f950da..b313a978ae0a2 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -38,10 +38,3 @@ config FSCACHE_DEBUG enabled by setting bits in /sys/modules/fscache/parameter/debug. See Documentation/filesystems/caching/fscache.rst for more information. - -config FSCACHE_OBJECT_LIST - bool "Maintain global object list for debugging purposes" - depends on FSCACHE && PROC_FS - help - Maintain a global list of active fscache objects that can be - retrieved through /proc/fs/fscache/objects for debugging purposes diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 45d5235a449bb..03a871d689bbb 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -16,6 +16,5 @@ fscache-y := \ fscache-$(CONFIG_PROC_FS) += proc.o fscache-$(CONFIG_FSCACHE_STATS) += stats.o -fscache-$(CONFIG_FSCACHE_OBJECT_LIST) += object-list.o obj-$(CONFIG_FSCACHE) := fscache.o diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index fcc136361415e..8a6ffcac867f6 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -261,7 +261,6 @@ int fscache_add_cache(struct fscache_cache *cache, spin_lock(&cache->object_list_lock); list_add_tail(&ifsdef->cache_link, &cache->object_list); spin_unlock(&cache->object_list_lock); - fscache_objlist_add(ifsdef); /* add the cache's netfs definition index object to the top level index * cookie as a known backing object */ diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index c7047544972b1..2f4d5271ad2ed 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -620,8 +620,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* Attach to the cookie. The object already has a ref on it. */ hlist_add_head(&object->cookie_link, &cookie->backing_objects); - - fscache_objlist_add(object); ret = 0; cant_attach_object: diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 796678b2b32a4..200082cafdda5 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -84,19 +84,6 @@ static inline bool fscache_object_congested(void) */ extern void fscache_enqueue_object(struct fscache_object *); -/* - * object-list.c - */ -#ifdef CONFIG_FSCACHE_OBJECT_LIST -extern const struct proc_ops fscache_objlist_proc_ops; - -extern void fscache_objlist_add(struct fscache_object *); -extern void fscache_objlist_remove(struct fscache_object *); -#else -#define fscache_objlist_add(object) do {} while(0) -#define fscache_objlist_remove(object) do {} while(0) -#endif - /* * operation.c */ diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c deleted file mode 100644 index 1a0dc32c0a332..0000000000000 --- a/fs/fscache/object-list.c +++ /dev/null @@ -1,414 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* Global fscache object list maintainer and viewer - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#define FSCACHE_DEBUG_LEVEL COOKIE -#include -#include -#include -#include -#include -#include -#include "internal.h" - -static struct rb_root fscache_object_list; -static DEFINE_RWLOCK(fscache_object_list_lock); - -struct fscache_objlist_data { - unsigned long config; /* display configuration */ -#define FSCACHE_OBJLIST_CONFIG_KEY 0x00000001 /* show object keys */ -#define FSCACHE_OBJLIST_CONFIG_AUX 0x00000002 /* show object auxdata */ -#define FSCACHE_OBJLIST_CONFIG_COOKIE 0x00000004 /* show objects with cookies */ -#define FSCACHE_OBJLIST_CONFIG_NOCOOKIE 0x00000008 /* show objects without cookies */ -#define FSCACHE_OBJLIST_CONFIG_BUSY 0x00000010 /* show busy objects */ -#define FSCACHE_OBJLIST_CONFIG_IDLE 0x00000020 /* show idle objects */ -#define FSCACHE_OBJLIST_CONFIG_PENDWR 0x00000040 /* show objects with pending writes */ -#define FSCACHE_OBJLIST_CONFIG_NOPENDWR 0x00000080 /* show objects without pending writes */ -#define FSCACHE_OBJLIST_CONFIG_READS 0x00000100 /* show objects with active reads */ -#define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */ -#define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */ -#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ -#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with work */ -#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without work */ -}; - -/* - * Add an object to the object list - * - we use the address of the fscache_object structure as the key into the - * tree - */ -void fscache_objlist_add(struct fscache_object *obj) -{ - struct fscache_object *xobj; - struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL; - - ASSERT(RB_EMPTY_NODE(&obj->objlist_link)); - - write_lock(&fscache_object_list_lock); - - while (*p) { - parent = *p; - xobj = rb_entry(parent, struct fscache_object, objlist_link); - - if (obj < xobj) - p = &(*p)->rb_left; - else if (obj > xobj) - p = &(*p)->rb_right; - else - BUG(); - } - - rb_link_node(&obj->objlist_link, parent, p); - rb_insert_color(&obj->objlist_link, &fscache_object_list); - - write_unlock(&fscache_object_list_lock); -} - -/* - * Remove an object from the object list. - */ -void fscache_objlist_remove(struct fscache_object *obj) -{ - if (RB_EMPTY_NODE(&obj->objlist_link)) - return; - - write_lock(&fscache_object_list_lock); - - BUG_ON(RB_EMPTY_ROOT(&fscache_object_list)); - rb_erase(&obj->objlist_link, &fscache_object_list); - - write_unlock(&fscache_object_list_lock); -} - -/* - * find the object in the tree on or after the specified index - */ -static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) -{ - struct fscache_object *pobj, *obj = NULL, *minobj = NULL; - struct rb_node *p; - unsigned long pos; - - if (*_pos >= (unsigned long) ERR_PTR(-ENOENT)) - return NULL; - pos = *_pos; - - /* banners (can't represent line 0 by pos 0 as that would involve - * returning a NULL pointer) */ - if (pos == 0) - return (struct fscache_object *)(long)++(*_pos); - if (pos < 3) - return (struct fscache_object *)pos; - - pobj = (struct fscache_object *)pos; - p = fscache_object_list.rb_node; - while (p) { - obj = rb_entry(p, struct fscache_object, objlist_link); - if (pobj < obj) { - if (!minobj || minobj > obj) - minobj = obj; - p = p->rb_left; - } else if (pobj > obj) { - p = p->rb_right; - } else { - minobj = obj; - break; - } - obj = NULL; - } - - if (!minobj) - *_pos = (unsigned long) ERR_PTR(-ENOENT); - else if (minobj != obj) - *_pos = (unsigned long) minobj; - return minobj; -} - -/* - * set up the iterator to start reading from the first line - */ -static void *fscache_objlist_start(struct seq_file *m, loff_t *_pos) - __acquires(&fscache_object_list_lock) -{ - read_lock(&fscache_object_list_lock); - return fscache_objlist_lookup(_pos); -} - -/* - * move to the next line - */ -static void *fscache_objlist_next(struct seq_file *m, void *v, loff_t *_pos) -{ - (*_pos)++; - return fscache_objlist_lookup(_pos); -} - -/* - * clean up after reading - */ -static void fscache_objlist_stop(struct seq_file *m, void *v) - __releases(&fscache_object_list_lock) -{ - read_unlock(&fscache_object_list_lock); -} - -/* - * display an object - */ -static int fscache_objlist_show(struct seq_file *m, void *v) -{ - struct fscache_objlist_data *data = m->private; - struct fscache_object *obj = v; - struct fscache_cookie *cookie; - unsigned long config = data->config; - char _type[3], *type; - u8 *p; - - if ((unsigned long) v == 1) { - seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" - " EM EV FL S" - " | COOKIE NETFS_COOKIE_DEF TY FL NETFS_DATA"); - if (config & (FSCACHE_OBJLIST_CONFIG_KEY | - FSCACHE_OBJLIST_CONFIG_AUX)) - seq_puts(m, " "); - if (config & FSCACHE_OBJLIST_CONFIG_KEY) - seq_puts(m, "OBJECT_KEY"); - if ((config & (FSCACHE_OBJLIST_CONFIG_KEY | - FSCACHE_OBJLIST_CONFIG_AUX)) == - (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX)) - seq_puts(m, ", "); - if (config & FSCACHE_OBJLIST_CONFIG_AUX) - seq_puts(m, "AUX_DATA"); - seq_puts(m, "\n"); - return 0; - } - - if ((unsigned long) v == 2) { - seq_puts(m, "======== ======== ==== ===== === === === == =====" - " == == == =" - " | ======== ================ == === ================"); - if (config & (FSCACHE_OBJLIST_CONFIG_KEY | - FSCACHE_OBJLIST_CONFIG_AUX)) - seq_puts(m, " ================"); - seq_puts(m, "\n"); - return 0; - } - - /* filter out any unwanted objects */ -#define FILTER(criterion, _yes, _no) \ - do { \ - unsigned long yes = FSCACHE_OBJLIST_CONFIG_##_yes; \ - unsigned long no = FSCACHE_OBJLIST_CONFIG_##_no; \ - if (criterion) { \ - if (!(config & yes)) \ - return 0; \ - } else { \ - if (!(config & no)) \ - return 0; \ - } \ - } while(0) - - cookie = obj->cookie; - if (~config) { - FILTER(cookie->def, - COOKIE, NOCOOKIE); - FILTER(fscache_object_is_active(obj) || - obj->n_ops != 0 || - obj->n_obj_ops != 0 || - obj->flags || - !list_empty(&obj->dependents), - BUSY, IDLE); - FILTER(test_bit(FSCACHE_OBJECT_PENDING_WRITE, &obj->flags), - PENDWR, NOPENDWR); - FILTER(atomic_read(&obj->n_reads), - READS, NOREADS); - FILTER(obj->events & obj->event_mask, - EVENTS, NOEVENTS); - FILTER(work_busy(&obj->work), WORK, NOWORK); - } - - seq_printf(m, - "%08x %08x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %2lx %1x | ", - obj->debug_id, - obj->parent ? obj->parent->debug_id : UINT_MAX, - obj->state->short_name, - obj->n_children, - obj->n_ops, - obj->n_obj_ops, - obj->n_in_progress, - obj->n_exclusive, - atomic_read(&obj->n_reads), - obj->event_mask, - obj->events, - obj->flags, - work_busy(&obj->work)); - - if (obj->cookie) { - uint16_t keylen = 0, auxlen = 0; - - switch (cookie->type) { - case 0: - type = "IX"; - break; - case 1: - type = "DT"; - break; - default: - snprintf(_type, sizeof(_type), "%02u", - cookie->type); - type = _type; - break; - } - - seq_printf(m, "%08x %-16s %s %3lx %16p", - cookie->debug_id, - cookie->def->name, - type, - cookie->flags, - cookie->netfs_data); - - if (config & FSCACHE_OBJLIST_CONFIG_KEY) - keylen = cookie->key_len; - - if (config & FSCACHE_OBJLIST_CONFIG_AUX) - auxlen = cookie->aux_len; - - if (keylen > 0 || auxlen > 0) { - seq_puts(m, " "); - p = keylen <= sizeof(cookie->inline_key) ? - cookie->inline_key : cookie->key; - for (; keylen > 0; keylen--) - seq_printf(m, "%02x", *p++); - if (auxlen > 0) { - if (config & FSCACHE_OBJLIST_CONFIG_KEY) - seq_puts(m, ", "); - p = auxlen <= sizeof(cookie->inline_aux) ? - cookie->inline_aux : cookie->aux; - for (; auxlen > 0; auxlen--) - seq_printf(m, "%02x", *p++); - } - } - - seq_puts(m, "\n"); - } else { - seq_puts(m, "\n"); - } - return 0; -} - -static const struct seq_operations fscache_objlist_ops = { - .start = fscache_objlist_start, - .stop = fscache_objlist_stop, - .next = fscache_objlist_next, - .show = fscache_objlist_show, -}; - -/* - * get the configuration for filtering the list - */ -static void fscache_objlist_config(struct fscache_objlist_data *data) -{ -#ifdef CONFIG_KEYS - const struct user_key_payload *confkey; - unsigned long config; - struct key *key; - const char *buf; - int len; - - key = request_key(&key_type_user, "fscache:objlist", NULL); - if (IS_ERR(key)) - goto no_config; - - config = 0; - rcu_read_lock(); - - confkey = user_key_payload_rcu(key); - if (!confkey) { - /* key was revoked */ - rcu_read_unlock(); - key_put(key); - goto no_config; - } - - buf = confkey->data; - - for (len = confkey->datalen - 1; len >= 0; len--) { - switch (buf[len]) { - case 'K': config |= FSCACHE_OBJLIST_CONFIG_KEY; break; - case 'A': config |= FSCACHE_OBJLIST_CONFIG_AUX; break; - case 'C': config |= FSCACHE_OBJLIST_CONFIG_COOKIE; break; - case 'c': config |= FSCACHE_OBJLIST_CONFIG_NOCOOKIE; break; - case 'B': config |= FSCACHE_OBJLIST_CONFIG_BUSY; break; - case 'b': config |= FSCACHE_OBJLIST_CONFIG_IDLE; break; - case 'W': config |= FSCACHE_OBJLIST_CONFIG_PENDWR; break; - case 'w': config |= FSCACHE_OBJLIST_CONFIG_NOPENDWR; break; - case 'R': config |= FSCACHE_OBJLIST_CONFIG_READS; break; - case 'r': config |= FSCACHE_OBJLIST_CONFIG_NOREADS; break; - case 'S': config |= FSCACHE_OBJLIST_CONFIG_WORK; break; - case 's': config |= FSCACHE_OBJLIST_CONFIG_NOWORK; break; - } - } - - rcu_read_unlock(); - key_put(key); - - if (!(config & (FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE))) - config |= FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE; - if (!(config & (FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE))) - config |= FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE; - if (!(config & (FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR))) - config |= FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR; - if (!(config & (FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS))) - config |= FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS; - if (!(config & (FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS))) - config |= FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS; - if (!(config & (FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK))) - config |= FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK; - - data->config = config; - return; - -no_config: -#endif - data->config = ULONG_MAX; -} - -/* - * open "/proc/fs/fscache/objects" to provide a list of active objects - * - can be configured by a user-defined key added to the caller's keyrings - */ -static int fscache_objlist_open(struct inode *inode, struct file *file) -{ - struct fscache_objlist_data *data; - - data = __seq_open_private(file, &fscache_objlist_ops, sizeof(*data)); - if (!data) - return -ENOMEM; - - /* get the configuration key */ - fscache_objlist_config(data); - - return 0; -} - -/* - * clean up on close - */ -static int fscache_objlist_release(struct inode *inode, struct file *file) -{ - struct seq_file *m = file->private_data; - - kfree(m->private); - m->private = NULL; - return seq_release(inode, file); -} - -const struct proc_ops fscache_objlist_proc_ops = { - .proc_open = fscache_objlist_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = fscache_objlist_release, -}; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 5dbaab2e12628..b3853274733f9 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -794,8 +794,6 @@ static void fscache_put_object(struct fscache_object *object, */ void fscache_object_destroy(struct fscache_object *object) { - fscache_objlist_remove(object); - /* We can get rid of the cookie now */ fscache_cookie_put(object->cookie, fscache_cookie_put_object); object->cookie = NULL; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index fbff0b7e3ef17..8d39491c5f9fa 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -384,9 +384,6 @@ struct fscache_object { struct list_head dependents; /* FIFO of dependent objects */ struct list_head dep_link; /* link in parent's dependents list */ struct list_head pending_ops; /* unstarted operations on this object */ -#ifdef CONFIG_FSCACHE_OBJECT_LIST - struct rb_node objlist_link; /* link in global object list */ -#endif pgoff_t store_limit; /* current storage limit */ loff_t store_limit_l; /* current storage limit */ }; From 3a335341dd6e772cc4c337d4f4cd0c65286bd2dc Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Oct 2020 21:32:55 +0100 Subject: [PATCH 093/851] fscache: Change %p in format strings to something else Change plain %p in format strings in fscache code to something more useful, since %p is now hashed before printing and thus no longer matches the contents of an oops register dump. Signed-off-by: David Howells --- fs/fscache/cache.c | 8 ++++---- fs/fscache/cookie.c | 16 +++++++--------- fs/fscache/object.c | 1 - 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 8a6ffcac867f6..e7a5d7ab40853 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -116,7 +116,7 @@ struct fscache_cache *fscache_select_cache_for_object( cache = NULL; spin_unlock(&cookie->lock); - _leave(" = %p [parent]", cache); + _leave(" = %s [parent]", cache ? cache->tag->name : "NULL"); return cache; } @@ -152,14 +152,14 @@ struct fscache_cache *fscache_select_cache_for_object( if (test_bit(FSCACHE_IOERROR, &tag->cache->flags)) return NULL; - _leave(" = %p [specific]", tag->cache); + _leave(" = %s [specific]", tag->name); return tag->cache; no_preference: /* netfs has no preference - just select first cache */ cache = list_entry(fscache_cache_list.next, struct fscache_cache, link); - _leave(" = %p [first]", cache); + _leave(" = %s [first]", cache->tag->name); return cache; } @@ -334,7 +334,7 @@ static void fscache_withdraw_all_objects(struct fscache_cache *cache, struct fscache_object, cache_link); list_move_tail(&object->cache_link, dying_objects); - _debug("withdraw %p", object->cookie); + _debug("withdraw %x", object->cookie->debug_id); /* This must be done under object_list_lock to prevent * a race with fscache_drop_object(). diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 2f4d5271ad2ed..ec9bce33085f7 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -375,7 +375,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie, bool (*can_enable)(void *data), void *data) { - _enter("%p", cookie); + _enter("%x", cookie->debug_id); trace_fscache_enable(cookie); @@ -472,10 +472,8 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie, /* we may be required to wait for lookup to complete at this point */ if (!fscache_defer_lookup) { - _debug("non-deferred lookup %p", &cookie->flags); wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, TASK_UNINTERRUPTIBLE); - _debug("complete"); if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) goto unavailable; } @@ -500,7 +498,7 @@ static int fscache_alloc_object(struct fscache_cache *cache, struct fscache_object *object; int ret; - _enter("%p,%p{%s}", cache, cookie, cookie->def->name); + _enter("%s,%x{%s}", cache->tag->name, cookie->debug_id, cookie->def->name); spin_lock(&cookie->lock); hlist_for_each_entry(object, &cookie->backing_objects, @@ -676,7 +674,7 @@ EXPORT_SYMBOL(__fscache_invalidate); */ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie) { - _enter("%p", cookie); + _enter("%x", cookie->debug_id); wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, TASK_UNINTERRUPTIBLE); @@ -731,7 +729,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, struct fscache_object *object; bool awaken = false; - _enter("%p,%u", cookie, invalidate); + _enter("%x,%u", cookie->debug_id, invalidate); trace_fscache_disable(cookie); @@ -821,8 +819,8 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, return; } - _enter("%p{%s,%p,%d},%d", - cookie, cookie->def->name, cookie->netfs_data, + _enter("%x{%s,%d},%d", + cookie->debug_id, cookie->def->name, atomic_read(&cookie->n_active), retire); trace_fscache_relinquish(cookie, retire); @@ -877,7 +875,7 @@ void fscache_cookie_put(struct fscache_cookie *cookie, struct fscache_cookie *parent; int usage; - _enter("%p", cookie); + _enter("%x", cookie->debug_id); do { usage = atomic_dec_return(&cookie->usage); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index b3853274733f9..f346a78f4bd67 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -518,7 +518,6 @@ void fscache_object_lookup_negative(struct fscache_object *object) set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); - _debug("wake up lookup %p", &cookie->flags); clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); } From 3ac8853c33137e3848d2af6328d7a8ba8c2b77ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Oct 2020 21:40:32 +0100 Subject: [PATCH 094/851] cachefiles: Change %p in format strings to something else Change plain %p in format strings in cachefiles code to something more useful, since %p is now hashed before printing and thus no longer matches the contents of an oops register dump. Signed-off-by: David Howells --- fs/cachefiles/bind.c | 2 -- fs/cachefiles/interface.c | 6 ++--- fs/cachefiles/key.c | 2 +- fs/cachefiles/namei.c | 48 ++++++++++++++++++--------------------- fs/cachefiles/xattr.c | 4 ++-- 5 files changed, 28 insertions(+), 34 deletions(-) diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index 38bb7764b4540..d463d89f5db8c 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -108,8 +108,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) atomic_set(&fsdef->usage, 1); fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; - _debug("- fsdef %p", fsdef); - /* look up the directory at the root of the cache */ ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path); if (ret < 0) diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index da3948fdb6152..da28ac1fa225d 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -33,7 +33,7 @@ static struct fscache_object *cachefiles_alloc_object( cache = container_of(_cache, struct cachefiles_cache, cache); - _enter("{%s},%p,", cache->cache.identifier, cookie); + _enter("{%s},%x,", cache->cache.identifier, cookie->debug_id); lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp); if (!lookup_data) @@ -96,7 +96,7 @@ static struct fscache_object *cachefiles_alloc_object( lookup_data->key = key; object->lookup_data = lookup_data; - _leave(" = %p [%p]", &object->fscache, lookup_data); + _leave(" = %x [%p]", object->fscache.debug_id, lookup_data); return &object->fscache; nomem_key: @@ -379,7 +379,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) const struct cred *saved_cred; int ret; - _enter("%p", _cache); + _enter("%s", _cache->tag->name); cache = container_of(_cache, struct cachefiles_cache, cache); diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index be96f5fc5caca..7f94efc97e23e 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -150,6 +150,6 @@ char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) key[len++] = 0; key[len] = 0; - _leave(" = %p %d", key, len); + _leave(" = %s %d", key, len); return key; } diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 92aa550dae7e8..a9aca5ab59708 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -39,18 +39,18 @@ void __cachefiles_printk_object(struct cachefiles_object *object, pr_err("%sops=%u inp=%u exc=%u\n", prefix, object->fscache.n_ops, object->fscache.n_in_progress, object->fscache.n_exclusive); - pr_err("%sparent=%p\n", - prefix, object->fscache.parent); + pr_err("%sparent=%x\n", + prefix, object->fscache.parent ? object->fscache.parent->debug_id : 0); spin_lock(&object->fscache.lock); cookie = object->fscache.cookie; if (cookie) { - pr_err("%scookie=%p [pr=%p nd=%p fl=%lx]\n", + pr_err("%scookie=%x [pr=%x nd=%p fl=%lx]\n", prefix, - object->fscache.cookie, - object->fscache.cookie->parent, - object->fscache.cookie->netfs_data, - object->fscache.cookie->flags); + cookie->debug_id, + cookie->parent ? cookie->parent->debug_id : 0, + cookie->netfs_data, + cookie->flags); pr_err("%skey=[%u] '", prefix, cookie->key_len); k = (cookie->key_len <= sizeof(cookie->inline_key)) ? cookie->inline_key : cookie->key; @@ -110,7 +110,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, /* found the dentry for */ found_dentry: - kdebug("preemptive burial: OBJ%x [%s] %p", + kdebug("preemptive burial: OBJ%x [%s] %pd", object->fscache.debug_id, object->fscache.state->name, dentry); @@ -140,7 +140,7 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache, struct rb_node **_p, *_parent = NULL; struct dentry *dentry; - _enter(",%p", object); + _enter(",%x", object->fscache.debug_id); try_again: write_lock(&cache->active_lock); @@ -298,8 +298,6 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, _enter(",'%pd','%pd'", dir, rep); - _debug("remove %p from %p", rep, dir); - /* non-directories can just be unlinked */ if (!d_is_dir(rep)) { _debug("unlink stale object"); @@ -446,7 +444,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, struct dentry *dir; int ret; - _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry); + _enter(",OBJ%x{%pd}", object->fscache.debug_id, object->dentry); ASSERT(object->dentry); ASSERT(d_backing_inode(object->dentry)); @@ -499,7 +497,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, const char *name; int ret, nlen; - _enter("OBJ%x{%p},OBJ%x,%s,", + _enter("OBJ%x{%pd},OBJ%x,%s,", parent->fscache.debug_id, parent->dentry, object->fscache.debug_id, key); @@ -542,7 +540,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, inode = d_backing_inode(next); trace_cachefiles_lookup(object, next, inode); - _debug("next -> %p %s", next, inode ? "positive" : "negative"); + _debug("next -> %pd %s", next, inode ? "positive" : "negative"); if (!key) object->new = !inode; @@ -578,8 +576,8 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, } ASSERT(d_backing_inode(next)); - _debug("mkdir -> %p{%p{ino=%lu}}", - next, d_backing_inode(next), d_backing_inode(next)->i_ino); + _debug("mkdir -> %pd{ino=%lu}", + next, d_backing_inode(next)->i_ino); } else if (!d_can_lookup(next)) { pr_err("inode %lu is not a directory\n", @@ -607,8 +605,8 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, ASSERT(d_backing_inode(next)); - _debug("create -> %p{%p{ino=%lu}}", - next, d_backing_inode(next), d_backing_inode(next)->i_ino); + _debug("create -> %pd{ino=%lu}", + next, d_backing_inode(next)->i_ino); } else if (!d_can_lookup(next) && !d_is_reg(next) @@ -774,7 +772,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, goto lookup_error; } - _debug("subdir -> %p %s", + _debug("subdir -> %pd %s", subdir, d_backing_inode(subdir) ? "positive" : "negative"); /* we need to create the subdir if it doesn't exist yet */ @@ -800,10 +798,8 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, } ASSERT(d_backing_inode(subdir)); - _debug("mkdir -> %p{%p{ino=%lu}}", - subdir, - d_backing_inode(subdir), - d_backing_inode(subdir)->i_ino); + _debug("mkdir -> %pd{ino=%lu}", + subdir, d_backing_inode(subdir)->i_ino); } inode_unlock(d_inode(dir)); @@ -878,7 +874,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, if (IS_ERR(victim)) goto lookup_error; - //_debug("victim -> %p %s", + //_debug("victim -> %pd %s", // victim, d_backing_inode(victim) ? "positive" : "negative"); /* if the object is no longer there then we probably retired the object @@ -909,7 +905,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, read_unlock(&cache->active_lock); - //_leave(" = %p", victim); + //_leave(" = %pd", victim); return victim; object_in_use: @@ -955,7 +951,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, if (IS_ERR(victim)) return PTR_ERR(victim); - _debug("victim -> %p %s", + _debug("victim -> %pd %s", victim, d_backing_inode(victim) ? "positive" : "negative"); /* okay... the victim is not being used so we can cull it diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index a591b5e096373..9e82de6685951 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -36,7 +36,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object) else snprintf(type, 3, "%02x", object->fscache.cookie->def->type); - _enter("%p{%s}", object, type); + _enter("%x{%s}", object->fscache.debug_id, type); /* attempt to install a type label directly */ ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, type, @@ -134,7 +134,7 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object, if (!dentry) return -ESTALE; - _enter("%p,#%d", object, auxdata->len); + _enter("%x,#%d", object->fscache.debug_id, auxdata->len); /* attempt to install the cache metadata directly */ _debug("SET #%u", auxdata->len); From 15b44291eb6162265b54dfefbc9ad2d41d007bf7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 17 Jun 2021 14:21:00 +0100 Subject: [PATCH 095/851] fscache: Fix cookie key hashing The current hash algorithm used for hashing cookie keys is really bad, producing almost no dispersion (after a test kernel build, ~30000 files were split over just 18 out of the 32768 hash buckets). Borrow the full_name_hash() hash function into fscache to do the hashing for cookie keys and, in the future, volume keys. I don't want to use full_name_hash() as-is because I want the hash value to be consistent across arches and over time as the hash value produced may get used on disk. I can also optimise parts of it away as the key will always be a padded array of aligned 32-bit words. Signed-off-by: David Howells --- fs/fscache/cookie.c | 14 +------------- fs/fscache/internal.h | 2 ++ fs/fscache/main.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index ec9bce33085f7..2558814193e9b 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -87,10 +87,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie) static int fscache_set_key(struct fscache_cookie *cookie, const void *index_key, size_t index_key_len) { - unsigned long long h; u32 *buf; int bufs; - int i; bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf)); @@ -104,17 +102,7 @@ static int fscache_set_key(struct fscache_cookie *cookie, } memcpy(buf, index_key, index_key_len); - - /* Calculate a hash and combine this with the length in the first word - * or first half word - */ - h = (unsigned long)cookie->parent; - h += index_key_len + cookie->type; - - for (i = 0; i < bufs; i++) - h += buf[i]; - - cookie->key_hash = h ^ (h >> 32); + cookie->key_hash = fscache_hash(0, buf, bufs); return 0; } diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 200082cafdda5..a49136c63e4b0 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -74,6 +74,8 @@ extern struct workqueue_struct *fscache_object_wq; extern struct workqueue_struct *fscache_op_wq; DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); +extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n); + static inline bool fscache_object_congested(void) { return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); diff --git a/fs/fscache/main.c b/fs/fscache/main.c index c1e6cc9091aac..4207f98e405fd 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -93,6 +93,45 @@ static struct ctl_table fscache_sysctls_root[] = { }; #endif +/* + * Mixing scores (in bits) for (7,20): + * Input delta: 1-bit 2-bit + * 1 round: 330.3 9201.6 + * 2 rounds: 1246.4 25475.4 + * 3 rounds: 1907.1 31295.1 + * 4 rounds: 2042.3 31718.6 + * Perfect: 2048 31744 + * (32*64) (32*31/2 * 64) + */ +#define HASH_MIX(x, y, a) \ + ( x ^= (a), \ + y ^= x, x = rol32(x, 7),\ + x += y, y = rol32(y,20),\ + y *= 9 ) + +static inline unsigned int fold_hash(unsigned long x, unsigned long y) +{ + /* Use arch-optimized multiply if one exists */ + return __hash_32(y ^ __hash_32(x)); +} + +/* + * Generate a hash. This is derived from full_name_hash(), but we want to be + * sure it is arch independent and that it doesn't change as bits of the + * computed hash value might appear on disk. The caller also guarantees that + * the hashed data will be a series of aligned 32-bit words. + */ +unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n) +{ + unsigned int a, x = 0, y = salt; + + for (; n; n--) { + a = *data++; + HASH_MIX(x, y, a); + } + return fold_hash(x, y); +} + /* * initialise the fs caching module */ From 65d52516947ecacd2e0a5692fbb522d45cfc06bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Jun 2021 11:19:49 +0100 Subject: [PATCH 096/851] fscache: Fix fscache_cookie_put() to not deref after dec fscache_cookie_put() accesses the cookie it has just put inside the tracepoint that monitors the change - but this is something it's not allowed to do if we didn't reduce the count to zero. Fix this by dropping most of those values from the tracepoint and grabbing the cookie debug ID before doing the dec. Also take the opportunity to switch over the usage and where arguments on the tracepoint to put the reason last. Signed-off-by: David Howells --- fs/fscache/cookie.c | 10 ++++++---- fs/fscache/internal.h | 2 +- fs/fscache/netfs.c | 2 +- include/trace/events/fscache.h | 24 +++++++----------------- 4 files changed, 15 insertions(+), 23 deletions(-) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 2558814193e9b..6df3732cf1b46 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -225,8 +225,8 @@ struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *candidate) collision: if (test_and_set_bit(FSCACHE_COOKIE_ACQUIRED, &cursor->flags)) { - trace_fscache_cookie(cursor, fscache_cookie_collision, - atomic_read(&cursor->usage)); + trace_fscache_cookie(cursor->debug_id, atomic_read(&cursor->usage), + fscache_cookie_collision); pr_err("Duplicate cookie detected\n"); fscache_print_cookie(cursor, 'O'); fscache_print_cookie(candidate, 'N'); @@ -305,7 +305,8 @@ struct fscache_cookie *__fscache_acquire_cookie( cookie = fscache_hash_cookie(candidate); if (!cookie) { - trace_fscache_cookie(candidate, fscache_cookie_discard, 1); + trace_fscache_cookie(candidate->debug_id, 1, + fscache_cookie_discard); goto out; } @@ -866,8 +867,9 @@ void fscache_cookie_put(struct fscache_cookie *cookie, _enter("%x", cookie->debug_id); do { + unsigned int cookie_debug_id = cookie->debug_id; usage = atomic_dec_return(&cookie->usage); - trace_fscache_cookie(cookie, where, usage); + trace_fscache_cookie(cookie_debug_id, usage, where); if (usage > 0) return; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index a49136c63e4b0..345105dbbfd1b 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -291,7 +291,7 @@ static inline void fscache_cookie_get(struct fscache_cookie *cookie, { int usage = atomic_inc_return(&cookie->usage); - trace_fscache_cookie(cookie, where, usage); + trace_fscache_cookie(cookie->debug_id, usage, where); } /* diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index cce92216fa28b..d6bdb7b5e7232 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -37,7 +37,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) if (!cookie) goto already_registered; if (cookie != candidate) { - trace_fscache_cookie(candidate, fscache_cookie_discard, 1); + trace_fscache_cookie(candidate->debug_id, 1, fscache_cookie_discard); fscache_free_cookie(candidate); } diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 0b9e058aba4d6..55b8802740fae 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -160,37 +160,27 @@ fscache_cookie_traces; TRACE_EVENT(fscache_cookie, - TP_PROTO(struct fscache_cookie *cookie, - enum fscache_cookie_trace where, - int usage), + TP_PROTO(unsigned int cookie_debug_id, + int usage, + enum fscache_cookie_trace where), - TP_ARGS(cookie, where, usage), + TP_ARGS(cookie_debug_id, usage, where), TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(unsigned int, parent ) __field(enum fscache_cookie_trace, where ) __field(int, usage ) - __field(int, n_children ) - __field(int, n_active ) - __field(u8, flags ) ), TP_fast_assign( - __entry->cookie = cookie->debug_id; - __entry->parent = cookie->parent ? cookie->parent->debug_id : 0; + __entry->cookie = cookie_debug_id; __entry->where = where; __entry->usage = usage; - __entry->n_children = atomic_read(&cookie->n_children); - __entry->n_active = atomic_read(&cookie->n_active); - __entry->flags = cookie->flags; ), - TP_printk("%s c=%08x u=%d p=%08x Nc=%d Na=%d f=%02x", + TP_printk("%s c=%08x u=%d", __print_symbolic(__entry->where, fscache_cookie_traces), - __entry->cookie, __entry->usage, - __entry->parent, __entry->n_children, __entry->n_active, - __entry->flags) + __entry->cookie, __entry->usage) ); TRACE_EVENT(fscache_netfs, From 5193f26aef591d289b74fcf2055fef002bd25432 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Mar 2021 13:53:50 +0100 Subject: [PATCH 097/851] fscache: Use refcount_t for the cookie refcount instead of atomic_t Use refcount_t for the fscache_cookie refcount instead of atomic_t and rename the 'usage' member to 'ref' in such cases. The tracepoints that reference it change from showing "u=%d" to "r=%d". Signed-off-by: David Howells --- fs/fscache/cache.c | 2 +- fs/fscache/cookie.c | 31 ++++++++++++++++-------- fs/fscache/fsdef.c | 2 +- fs/fscache/internal.h | 17 ++++++------- include/linux/fscache.h | 2 +- include/trace/events/fscache.h | 44 +++++++++++++++++----------------- 6 files changed, 55 insertions(+), 43 deletions(-) diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index e7a5d7ab40853..bd4f44c1cce03 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -269,7 +269,7 @@ int fscache_add_cache(struct fscache_cache *cache, hlist_add_head(&ifsdef->cookie_link, &fscache_fsdef_index.backing_objects); - atomic_inc(&fscache_fsdef_index.usage); + refcount_inc(&fscache_fsdef_index.ref); /* done */ spin_unlock(&fscache_fsdef_index.lock); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 6df3732cf1b46..cd42be646ed3b 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -164,7 +164,7 @@ struct fscache_cookie *fscache_alloc_cookie( goto nomem; } - atomic_set(&cookie->usage, 1); + refcount_set(&cookie->ref, 1); atomic_set(&cookie->n_children, 0); cookie->debug_id = atomic_inc_return(&fscache_cookie_debug_id); @@ -225,7 +225,7 @@ struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *candidate) collision: if (test_and_set_bit(FSCACHE_COOKIE_ACQUIRED, &cursor->flags)) { - trace_fscache_cookie(cursor->debug_id, atomic_read(&cursor->usage), + trace_fscache_cookie(cursor->debug_id, refcount_read(&cursor->ref), fscache_cookie_collision); pr_err("Duplicate cookie detected\n"); fscache_print_cookie(cursor, 'O'); @@ -826,13 +826,12 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, BUG_ON(!radix_tree_empty(&cookie->stores)); if (cookie->parent) { - ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); + ASSERTCMP(refcount_read(&cookie->parent->ref), >, 0); ASSERTCMP(atomic_read(&cookie->parent->n_children), >, 0); atomic_dec(&cookie->parent->n_children); } /* Dispose of the netfs's link to the cookie */ - ASSERTCMP(atomic_read(&cookie->usage), >, 0); fscache_cookie_put(cookie, fscache_cookie_put_relinquish); _leave(""); @@ -862,18 +861,17 @@ void fscache_cookie_put(struct fscache_cookie *cookie, enum fscache_cookie_trace where) { struct fscache_cookie *parent; - int usage; + int ref; _enter("%x", cookie->debug_id); do { unsigned int cookie_debug_id = cookie->debug_id; - usage = atomic_dec_return(&cookie->usage); - trace_fscache_cookie(cookie_debug_id, usage, where); + bool zero = __refcount_dec_and_test(&cookie->ref, &ref); - if (usage > 0) + trace_fscache_cookie(cookie_debug_id, ref - 1, where); + if (!zero) return; - BUG_ON(usage < 0); parent = cookie->parent; fscache_unhash_cookie(cookie); @@ -886,6 +884,19 @@ void fscache_cookie_put(struct fscache_cookie *cookie, _leave(""); } +/* + * Get a reference to a cookie. + */ +struct fscache_cookie *fscache_cookie_get(struct fscache_cookie *cookie, + enum fscache_cookie_trace where) +{ + int ref; + + __refcount_inc(&cookie->ref, &ref); + trace_fscache_cookie(cookie->debug_id, ref + 1, where); + return cookie; +} + /* * check the consistency between the netfs inode and the backing cache * @@ -1003,7 +1014,7 @@ static int fscache_cookies_seq_show(struct seq_file *m, void *v) "%08x %08x %5u %5u %3u %s %03lx %-16s %px", cookie->debug_id, cookie->parent ? cookie->parent->debug_id : 0, - atomic_read(&cookie->usage), + refcount_read(&cookie->ref), atomic_read(&cookie->n_children), atomic_read(&cookie->n_active), type, diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index 5f8f6fe243fe2..0402673c680e1 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c @@ -46,7 +46,7 @@ static struct fscache_cookie_def fscache_fsdef_index_def = { struct fscache_cookie fscache_fsdef_index = { .debug_id = 1, - .usage = ATOMIC_INIT(1), + .ref = REFCOUNT_INIT(1), .n_active = ATOMIC_INIT(1), .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), .backing_objects = HLIST_HEAD_INIT, diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 345105dbbfd1b..c3e4804b8fcbf 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -54,9 +54,18 @@ extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *, const void *, size_t, void *, loff_t); extern struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *); +extern struct fscache_cookie *fscache_cookie_get(struct fscache_cookie *, + enum fscache_cookie_trace); extern void fscache_cookie_put(struct fscache_cookie *, enum fscache_cookie_trace); +static inline void fscache_cookie_see(struct fscache_cookie *cookie, + enum fscache_cookie_trace where) +{ + trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref), + where); +} + /* * fsdef.c */ @@ -286,14 +295,6 @@ static inline void fscache_raise_event(struct fscache_object *object, fscache_enqueue_object(object); } -static inline void fscache_cookie_get(struct fscache_cookie *cookie, - enum fscache_cookie_trace where) -{ - int usage = atomic_inc_return(&cookie->usage); - - trace_fscache_cookie(cookie->debug_id, usage, where); -} - /* * get an extra reference to a netfs retrieval context */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ea61e54a6bc57..a4dab59986137 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -123,7 +123,7 @@ struct fscache_netfs { * - indices are created on disk just-in-time */ struct fscache_cookie { - atomic_t usage; /* number of users of this cookie */ + refcount_t ref; /* number of users of this cookie */ atomic_t n_children; /* number of children of this cookie */ atomic_t n_active; /* number of active users of netfs ptrs */ unsigned int debug_id; diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index 55b8802740fae..51f2b492b9eb8 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -161,26 +161,26 @@ fscache_cookie_traces; TRACE_EVENT(fscache_cookie, TP_PROTO(unsigned int cookie_debug_id, - int usage, + int ref, enum fscache_cookie_trace where), - TP_ARGS(cookie_debug_id, usage, where), + TP_ARGS(cookie_debug_id, ref, where), TP_STRUCT__entry( __field(unsigned int, cookie ) __field(enum fscache_cookie_trace, where ) - __field(int, usage ) + __field(int, ref ) ), TP_fast_assign( __entry->cookie = cookie_debug_id; __entry->where = where; - __entry->usage = usage; + __entry->ref = ref; ), - TP_printk("%s c=%08x u=%d", + TP_printk("%s c=%08x r=%d", __print_symbolic(__entry->where, fscache_cookie_traces), - __entry->cookie, __entry->usage) + __entry->cookie, __entry->ref) ); TRACE_EVENT(fscache_netfs, @@ -212,7 +212,7 @@ TRACE_EVENT(fscache_acquire, __field(unsigned int, cookie ) __field(unsigned int, parent ) __array(char, name, 8 ) - __field(int, p_usage ) + __field(int, p_ref ) __field(int, p_n_children ) __field(u8, p_flags ) ), @@ -220,15 +220,15 @@ TRACE_EVENT(fscache_acquire, TP_fast_assign( __entry->cookie = cookie->debug_id; __entry->parent = cookie->parent->debug_id; - __entry->p_usage = atomic_read(&cookie->parent->usage); + __entry->p_ref = refcount_read(&cookie->parent->ref); __entry->p_n_children = atomic_read(&cookie->parent->n_children); __entry->p_flags = cookie->parent->flags; memcpy(__entry->name, cookie->def->name, 8); __entry->name[7] = 0; ), - TP_printk("c=%08x p=%08x pu=%d pc=%d pf=%02x n=%s", - __entry->cookie, __entry->parent, __entry->p_usage, + TP_printk("c=%08x p=%08x pr=%d pc=%d pf=%02x n=%s", + __entry->cookie, __entry->parent, __entry->p_ref, __entry->p_n_children, __entry->p_flags, __entry->name) ); @@ -240,7 +240,7 @@ TRACE_EVENT(fscache_relinquish, TP_STRUCT__entry( __field(unsigned int, cookie ) __field(unsigned int, parent ) - __field(int, usage ) + __field(int, ref ) __field(int, n_children ) __field(int, n_active ) __field(u8, flags ) @@ -250,15 +250,15 @@ TRACE_EVENT(fscache_relinquish, TP_fast_assign( __entry->cookie = cookie->debug_id; __entry->parent = cookie->parent->debug_id; - __entry->usage = atomic_read(&cookie->usage); + __entry->ref = refcount_read(&cookie->ref); __entry->n_children = atomic_read(&cookie->n_children); __entry->n_active = atomic_read(&cookie->n_active); __entry->flags = cookie->flags; __entry->retire = retire; ), - TP_printk("c=%08x u=%d p=%08x Nc=%d Na=%d f=%02x r=%u", - __entry->cookie, __entry->usage, + TP_printk("c=%08x r=%d p=%08x Nc=%d Na=%d f=%02x r=%u", + __entry->cookie, __entry->ref, __entry->parent, __entry->n_children, __entry->n_active, __entry->flags, __entry->retire) ); @@ -270,7 +270,7 @@ TRACE_EVENT(fscache_enable, TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(int, usage ) + __field(int, ref ) __field(int, n_children ) __field(int, n_active ) __field(u8, flags ) @@ -278,14 +278,14 @@ TRACE_EVENT(fscache_enable, TP_fast_assign( __entry->cookie = cookie->debug_id; - __entry->usage = atomic_read(&cookie->usage); + __entry->ref = refcount_read(&cookie->ref); __entry->n_children = atomic_read(&cookie->n_children); __entry->n_active = atomic_read(&cookie->n_active); __entry->flags = cookie->flags; ), - TP_printk("c=%08x u=%d Nc=%d Na=%d f=%02x", - __entry->cookie, __entry->usage, + TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x", + __entry->cookie, __entry->ref, __entry->n_children, __entry->n_active, __entry->flags) ); @@ -296,7 +296,7 @@ TRACE_EVENT(fscache_disable, TP_STRUCT__entry( __field(unsigned int, cookie ) - __field(int, usage ) + __field(int, ref ) __field(int, n_children ) __field(int, n_active ) __field(u8, flags ) @@ -304,14 +304,14 @@ TRACE_EVENT(fscache_disable, TP_fast_assign( __entry->cookie = cookie->debug_id; - __entry->usage = atomic_read(&cookie->usage); + __entry->ref = refcount_read(&cookie->ref); __entry->n_children = atomic_read(&cookie->n_children); __entry->n_active = atomic_read(&cookie->n_active); __entry->flags = cookie->flags; ), - TP_printk("c=%08x u=%d Nc=%d Na=%d f=%02x", - __entry->cookie, __entry->usage, + TP_printk("c=%08x r=%d Nc=%d Na=%d f=%02x", + __entry->cookie, __entry->ref, __entry->n_children, __entry->n_active, __entry->flags) ); From 2e1cbaba3f3f6657d814de03ce4296141bce8954 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 6 Jul 2021 23:40:34 +0900 Subject: [PATCH 098/851] loop: reintroduce global lock for safe loop_validate_file() traversal Commit 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") re-opened a race window for NULL pointer dereference at loop_validate_file() where commit 310ca162d779efee ("block/loop: Use global lock for ioctl() operation.") has closed. Although we need to guarantee that other loop devices will not change during traversal, we can't take remote "struct loop_device"->lo_mutex inside loop_validate_file() in order to avoid AB-BA deadlock. Therefore, introduce a global lock dedicated for loop_validate_file() which is conditionally taken before local "struct loop_device"->lo_mutex is taken. Signed-off-by: Tetsuo Handa Fixes: 6cc8e7430801fa23 ("loop: scale loop device by introducing per device lock") --- drivers/block/loop.c | 128 ++++++++++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 31 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f37b9e3d833c2..f0cdff0c5fbf4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,6 +88,47 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping * the file ref can take open_mutex which creates circular locking @@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) @@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int lo_number; struct loop_worker *pos, *worker; + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ + mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { err = -ENXIO; From 4fb9c588398fde1536b219a229e231a9f501c168 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 13 Jul 2021 20:27:49 +0900 Subject: [PATCH 099/851] Bluetooth: call lock_sock() outside of spinlock section syzbot is hitting might_sleep() warning at hci_sock_dev_event() due to calling lock_sock() with rw spinlock held [1]. Among three possible approaches [2], this patch chose holding a refcount via sock_hold() and revalidating the element via sk_hashed(). Link: https://syzkaller.appspot.com/bug?extid=a5df189917e79d5e59c9 [1] Link: https://lkml.kernel.org/r/05535d35-30d6-28b6-067e-272d01679d24@i-love.sakura.ne.jp [2] Reported-by: syzbot Signed-off-by: Tetsuo Handa Tested-by: syzbot Fixes: e305509e678b3a4a ("Bluetooth: use correct lock to prevent UAF of hdev object") --- net/bluetooth/hci_sock.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf31..786a06a232fd9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -760,10 +760,18 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) struct sock *sk; /* Detach sockets from device */ +restart: read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { + /* This sock_hold(sk) is safe, for bt_sock_unlink(sk) + * is not called yet. + */ + sock_hold(sk); + read_unlock(&hci_sk_list.lock); lock_sock(sk); - if (hci_pi(sk)->hdev == hdev) { + write_lock(&hci_sk_list.lock); + /* Check that bt_sock_unlink(sk) is not called yet. */ + if (sk_hashed(sk) && hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; sk->sk_state = BT_OPEN; @@ -771,7 +779,27 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) hci_dev_put(hdev); } + write_unlock(&hci_sk_list.lock); release_sock(sk); + read_lock(&hci_sk_list.lock); + /* If bt_sock_unlink(sk) is not called yet, we can + * continue iteration. We can use __sock_put(sk) here + * because hci_sock_release() will call sock_put(sk) + * after bt_sock_unlink(sk). + */ + if (sk_hashed(sk)) { + __sock_put(sk); + continue; + } + /* Otherwise, we need to restart iteration, for the + * next socket pointed by sk->next might be already + * gone. We can't use __sock_put(sk) here because + * hci_sock_release() might have already called + * sock_put(sk) after bt_sock_unlink(sk). + */ + read_unlock(&hci_sk_list.lock); + sock_put(sk); + goto restart; } read_unlock(&hci_sk_list.lock); } From 0f253e3827986e089e9af9f8ffdb6c96177c1c60 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 15 Jun 2021 21:16:05 +0800 Subject: [PATCH 100/851] arm64: dts: lx2160a: Fix the compatible string of LX2160A UART Mark Kettenis told us that: According to the NXP documentation, the LX2160A has a real PL011 UART. Therefore, rewrite it to the compatible string of pl011. The property "current-speed" specific to "arm,sbsa-uart" is also deleted. Suggested-by: Shawn Guo Suggested-by: Mark Kettenis Signed-off-by: Zhen Lei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index c4b1a59ba424b..d2e6f7285674e 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -920,34 +920,30 @@ }; uart0: serial@21c0000 { - compatible = "arm,sbsa-uart","arm,pl011"; + compatible = "arm,pl011", "arm,primecell"; reg = <0x0 0x21c0000 0x0 0x1000>; interrupts = ; - current-speed = <115200>; status = "disabled"; }; uart1: serial@21d0000 { - compatible = "arm,sbsa-uart","arm,pl011"; + compatible = "arm,pl011", "arm,primecell"; reg = <0x0 0x21d0000 0x0 0x1000>; interrupts = ; - current-speed = <115200>; status = "disabled"; }; uart2: serial@21e0000 { - compatible = "arm,sbsa-uart","arm,pl011"; + compatible = "arm,pl011", "arm,primecell"; reg = <0x0 0x21e0000 0x0 0x1000>; interrupts = ; - current-speed = <115200>; status = "disabled"; }; uart3: serial@21f0000 { - compatible = "arm,sbsa-uart","arm,pl011"; + compatible = "arm,pl011", "arm,primecell"; reg = <0x0 0x21f0000 0x0 0x1000>; interrupts = ; - current-speed = <115200>; status = "disabled"; }; From 20072409e3b8aa798f749c9571b71c78009138de Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 15 Jun 2021 19:03:35 +0300 Subject: [PATCH 101/851] arm64: dts: lx2160ardb: update PHY nodes with IRQ information Update the DTS nodes for both the AR8035 and the AQR107 PHYs in order to describe their IRQ lines. Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-lx2160a-rdb.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-lx2160a-rdb.dts index 5dbf27493e8b2..028ff8074b9d0 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-rdb.dts @@ -65,6 +65,7 @@ rgmii_phy1: ethernet-phy@1 { /* AR8035 PHY */ compatible = "ethernet-phy-id004d.d072"; + interrupts-extended = <&extirq 4 IRQ_TYPE_LEVEL_LOW>; reg = <0x1>; eee-broken-1000t; }; @@ -72,6 +73,7 @@ rgmii_phy2: ethernet-phy@2 { /* AR8035 PHY */ compatible = "ethernet-phy-id004d.d072"; + interrupts-extended = <&extirq 5 IRQ_TYPE_LEVEL_LOW>; reg = <0x2>; eee-broken-1000t; }; @@ -79,12 +81,14 @@ aquantia_phy1: ethernet-phy@4 { /* AQR107 PHY */ compatible = "ethernet-phy-ieee802.3-c45"; + interrupts-extended = <&extirq 2 IRQ_TYPE_LEVEL_LOW>; reg = <0x4>; }; aquantia_phy2: ethernet-phy@5 { /* AQR107 PHY */ compatible = "ethernet-phy-ieee802.3-c45"; + interrupts-extended = <&extirq 3 IRQ_TYPE_LEVEL_LOW>; reg = <0x5>; }; }; From 16058f50b2cd51790fa947006df1c93147aa8b56 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 15 Jun 2021 19:03:36 +0300 Subject: [PATCH 102/851] arm64: dts: ls2088ardb: update PHY nodes with IRQ information Update the DTS nodes corresponding to the 4 10GBASE-R PHYs to describe their IRQ lines. Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls2088a-rdb.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2088a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls2088a-rdb.dts index 60563917be444..3e4e857db13fa 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2088a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls2088a-rdb.dts @@ -92,21 +92,25 @@ mdio2_phy1: ethernet-phy@0 { compatible = "ethernet-phy-id03a1.b4b0", "ethernet-phy-ieee802.3-c45"; + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0x0>; }; mdio2_phy2: ethernet-phy@1 { compatible = "ethernet-phy-id03a1.b4b0", "ethernet-phy-ieee802.3-c45"; + interrupts-extended = <&extirq 2 IRQ_TYPE_LEVEL_LOW>; reg = <0x1>; }; mdio2_phy3: ethernet-phy@2 { compatible = "ethernet-phy-id03a1.b4b0", "ethernet-phy-ieee802.3-c45"; + interrupts-extended = <&extirq 4 IRQ_TYPE_LEVEL_LOW>; reg = <0x2>; }; mdio2_phy4: ethernet-phy@3 { compatible = "ethernet-phy-id03a1.b4b0", "ethernet-phy-ieee802.3-c45"; + interrupts-extended = <&extirq 5 IRQ_TYPE_LEVEL_LOW>; reg = <0x3>; }; }; From b7df2058406635cf99695efdb68b325788456247 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 15 Jun 2021 19:03:37 +0300 Subject: [PATCH 103/851] arm64: dts: ls1088ardb: update PHY nodes with IRQ information Describe the IRQs for both the QSGMII PHYs and the 10GBASE-R PHY found on the LS1088ARDB board. Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts index bf7b43ab12932..1bfbce69cc8b7 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts @@ -83,34 +83,42 @@ status = "okay"; mdio1_phy5: ethernet-phy@c { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0xc>; }; mdio1_phy6: ethernet-phy@d { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0xd>; }; mdio1_phy7: ethernet-phy@e { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0xe>; }; mdio1_phy8: ethernet-phy@f { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0xf>; }; mdio1_phy1: ethernet-phy@1c { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0x1c>; }; mdio1_phy2: ethernet-phy@1d { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0x1d>; }; mdio1_phy3: ethernet-phy@1e { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0x1e>; }; mdio1_phy4: ethernet-phy@1f { + interrupts-extended = <&extirq 1 IRQ_TYPE_LEVEL_LOW>; reg = <0x1f>; }; }; @@ -120,6 +128,7 @@ mdio2_aquantia_phy: ethernet-phy@0 { compatible = "ethernet-phy-ieee802.3-c45"; + interrupts-extended = <&extirq 2 IRQ_TYPE_LEVEL_LOW>; reg = <0x0>; }; }; From 5e610bf0049ef1b80c81cc137a8967726da5e2a7 Mon Sep 17 00:00:00 2001 From: Kwon Tae-young Date: Fri, 18 Jun 2021 16:27:33 +0900 Subject: [PATCH 104/851] arm64: dts: imx8qxp-ai_ml: Fix checkpatch warnings Fix the following warnings reported by checkpatch: arch/..../imx8qxp-ai_ml.dts:198: WARNING: please, no space before tabs Signed-off-by: Kwon Tae-young Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts b/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts index 47bb68823b240..7d00e17f04474 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts +++ b/arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts @@ -195,7 +195,7 @@ fsl,pins = < IMX8QXP_UART0_RX_ADMA_UART0_RX 0X06000020 IMX8QXP_UART0_TX_ADMA_UART0_TX 0X06000020 - IMX8QXP_FLEXCAN0_TX_ADMA_UART0_CTS_B 0x06000020 + IMX8QXP_FLEXCAN0_TX_ADMA_UART0_CTS_B 0x06000020 IMX8QXP_FLEXCAN0_RX_ADMA_UART0_RTS_B 0x06000020 >; }; From 6fac89785ac8854271c2302287cca38618332b19 Mon Sep 17 00:00:00 2001 From: Mirela Rabulea Date: Sat, 19 Jun 2021 17:36:11 +0300 Subject: [PATCH 105/851] arm64: dts: imx8: Add jpeg encoder/decoder nodes Add dts for imaging subsytem, include jpeg nodes here. Tested on imx8qxp/qm. Signed-off-by: Mirela Rabulea Reviewed-by: Dong Aisheng Signed-off-by: Shawn Guo --- .../arm64/boot/dts/freescale/imx8-ss-img.dtsi | 80 +++++++++++++++++++ .../boot/dts/freescale/imx8qm-ss-img.dtsi | 12 +++ arch/arm64/boot/dts/freescale/imx8qm.dtsi | 2 + .../boot/dts/freescale/imx8qxp-ss-img.dtsi | 13 +++ arch/arm64/boot/dts/freescale/imx8qxp.dtsi | 2 + 5 files changed, 109 insertions(+) create mode 100644 arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi create mode 100644 arch/arm64/boot/dts/freescale/imx8qm-ss-img.dtsi create mode 100644 arch/arm64/boot/dts/freescale/imx8qxp-ss-img.dtsi diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi new file mode 100644 index 0000000000000..a90654155a88b --- /dev/null +++ b/arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2019-2021 NXP + * Zhou Guoniu + */ +img_subsys: bus@58000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x58000000 0x0 0x58000000 0x1000000>; + + img_ipg_clk: clock-img-ipg { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <200000000>; + clock-output-names = "img_ipg_clk"; + }; + + jpegdec: jpegdec@58400000 { + reg = <0x58400000 0x00050000>; + interrupts = , + , + , + ; + clocks = <&img_jpeg_dec_lpcg IMX_LPCG_CLK_0>, + <&img_jpeg_dec_lpcg IMX_LPCG_CLK_4>; + clock-names = "per", "ipg"; + assigned-clocks = <&img_jpeg_dec_lpcg IMX_LPCG_CLK_0>, + <&img_jpeg_dec_lpcg IMX_LPCG_CLK_4>; + assigned-clock-rates = <200000000>, <200000000>; + power-domains = <&pd IMX_SC_R_MJPEG_DEC_MP>, + <&pd IMX_SC_R_MJPEG_DEC_S0>, + <&pd IMX_SC_R_MJPEG_DEC_S1>, + <&pd IMX_SC_R_MJPEG_DEC_S2>, + <&pd IMX_SC_R_MJPEG_DEC_S3>; + }; + + jpegenc: jpegenc@58450000 { + reg = <0x58450000 0x00050000>; + interrupts = , + , + , + ; + clocks = <&img_jpeg_enc_lpcg IMX_LPCG_CLK_0>, + <&img_jpeg_enc_lpcg IMX_LPCG_CLK_4>; + clock-names = "per", "ipg"; + assigned-clocks = <&img_jpeg_enc_lpcg IMX_LPCG_CLK_0>, + <&img_jpeg_enc_lpcg IMX_LPCG_CLK_4>; + assigned-clock-rates = <200000000>, <200000000>; + power-domains = <&pd IMX_SC_R_MJPEG_ENC_MP>, + <&pd IMX_SC_R_MJPEG_ENC_S0>, + <&pd IMX_SC_R_MJPEG_ENC_S1>, + <&pd IMX_SC_R_MJPEG_ENC_S2>, + <&pd IMX_SC_R_MJPEG_ENC_S3>; + }; + + img_jpeg_dec_lpcg: clock-controller@585d0000 { + compatible = "fsl,imx8qxp-lpcg"; + reg = <0x585d0000 0x10000>; + #clock-cells = <1>; + clocks = <&img_ipg_clk>, <&img_ipg_clk>; + clock-indices = , + ; + clock-output-names = "img_jpeg_dec_lpcg_clk", + "img_jpeg_dec_lpcg_ipg_clk"; + power-domains = <&pd IMX_SC_R_MJPEG_DEC_MP>; + }; + + img_jpeg_enc_lpcg: clock-controller@585f0000 { + compatible = "fsl,imx8qxp-lpcg"; + reg = <0x585f0000 0x10000>; + #clock-cells = <1>; + clocks = <&img_ipg_clk>, <&img_ipg_clk>; + clock-indices = , + ; + clock-output-names = "img_jpeg_enc_lpcg_clk", + "img_jpeg_enc_lpcg_ipg_clk"; + power-domains = <&pd IMX_SC_R_MJPEG_ENC_MP>; + }; +}; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-img.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-img.dtsi new file mode 100644 index 0000000000000..7764b4146e0ab --- /dev/null +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-img.dtsi @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2021 NXP + */ + +&jpegdec { + compatible = "nxp,imx8qm-jpgdec", "nxp,imx8qxp-jpgdec"; +}; + +&jpegenc { + compatible = "nxp,imx8qm-jpgdec", "nxp,imx8qxp-jpgenc"; +}; diff --git a/arch/arm64/boot/dts/freescale/imx8qm.dtsi b/arch/arm64/boot/dts/freescale/imx8qm.dtsi index 12cd059b339b7..aebbe2b84aa13 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm.dtsi @@ -166,11 +166,13 @@ }; /* sorted in register address */ + #include "imx8-ss-img.dtsi" #include "imx8-ss-dma.dtsi" #include "imx8-ss-conn.dtsi" #include "imx8-ss-lsio.dtsi" }; +#include "imx8qm-ss-img.dtsi" #include "imx8qm-ss-dma.dtsi" #include "imx8qm-ss-conn.dtsi" #include "imx8qm-ss-lsio.dtsi" diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-ss-img.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp-ss-img.dtsi new file mode 100644 index 0000000000000..3a087317591d8 --- /dev/null +++ b/arch/arm64/boot/dts/freescale/imx8qxp-ss-img.dtsi @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2021 NXP + * Dong Aisheng + */ + +&jpegdec { + compatible = "nxp,imx8qxp-jpgdec"; +}; + +&jpegenc { + compatible = "nxp,imx8qxp-jpgenc"; +}; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi index 1e6b4995091e0..a625fb6bdc621 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qxp.dtsi @@ -258,12 +258,14 @@ }; /* sorted in register address */ + #include "imx8-ss-img.dtsi" #include "imx8-ss-adma.dtsi" #include "imx8-ss-conn.dtsi" #include "imx8-ss-ddr.dtsi" #include "imx8-ss-lsio.dtsi" }; +#include "imx8qxp-ss-img.dtsi" #include "imx8qxp-ss-adma.dtsi" #include "imx8qxp-ss-conn.dtsi" #include "imx8qxp-ss-lsio.dtsi" From ad0529424defbbe0b6a154cc100e82c1a9f91400 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:33 +0200 Subject: [PATCH 106/851] gpu: host1x: Add DMA fence implementation Add an implementation of dma_fences based on syncpoints. Syncpoint interrupts are used to signal fences. Additionally, after software signaling has been enabled, a 30 second timeout is started. If the syncpoint threshold is not reached within this period, the fence is signalled with an -ETIMEDOUT error code. This is to allow fences that would never reach their syncpoint threshold to be cleaned up. The timeout can potentially be removed in the future after job tracking code has been refactored. Signed-off-by: Mikko Perttunen Reported-by: kernel test robot Signed-off-by: Thierry Reding --- drivers/gpu/host1x/Makefile | 1 + drivers/gpu/host1x/fence.c | 209 ++++++++++++++++++++++++++++++++++++ drivers/gpu/host1x/fence.h | 13 +++ drivers/gpu/host1x/intr.c | 9 ++ drivers/gpu/host1x/intr.h | 2 + include/linux/host1x.h | 3 + 6 files changed, 237 insertions(+) create mode 100644 drivers/gpu/host1x/fence.c create mode 100644 drivers/gpu/host1x/fence.h diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index 096017b8789d2..d2b6f7de04989 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -9,6 +9,7 @@ host1x-y = \ job.o \ debug.o \ mipi.o \ + fence.o \ hw/host1x01.o \ hw/host1x02.o \ hw/host1x04.o \ diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c new file mode 100644 index 0000000000000..06c6b86237bde --- /dev/null +++ b/drivers/gpu/host1x/fence.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Syncpoint dma_fence implementation + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#include +#include +#include +#include +#include + +#include "fence.h" +#include "intr.h" +#include "syncpt.h" + +DEFINE_SPINLOCK(lock); + +struct host1x_syncpt_fence { + struct dma_fence base; + + atomic_t signaling; + + struct host1x_syncpt *sp; + u32 threshold; + + struct host1x_waitlist *waiter; + void *waiter_ref; + + struct delayed_work timeout_work; +}; + +static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) +{ + return "host1x"; +} + +static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f) +{ + return "syncpoint"; +} + +static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) +{ + return container_of(f, struct host1x_syncpt_fence, base); +} + +static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + int err; + + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) + return false; + + dma_fence_get(f); + + /* + * The dma_fence framework requires the fence driver to keep a + * reference to any fences for which 'enable_signaling' has been + * called (and that have not been signalled). + * + * We provide a userspace API to create arbitrary syncpoint fences, + * so we cannot normally guarantee that all fences get signalled. + * As such, setup a timeout, so that long-lasting fences will get + * reaped eventually. + */ + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); + + err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold, + HOST1X_INTR_ACTION_SIGNAL_FENCE, f, + sf->waiter, &sf->waiter_ref); + if (err) { + cancel_delayed_work_sync(&sf->timeout_work); + dma_fence_put(f); + return false; + } + + /* intr framework takes ownership of waiter */ + sf->waiter = NULL; + + /* + * The fence may get signalled at any time after the above call, + * so we need to initialize all state used by signalling + * before it. + */ + + return true; +} + +static void host1x_syncpt_fence_release(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + kfree(sf->waiter); + dma_fence_free(f); +} + +const struct dma_fence_ops host1x_syncpt_fence_ops = { + .get_driver_name = host1x_syncpt_fence_get_driver_name, + .get_timeline_name = host1x_syncpt_fence_get_timeline_name, + .enable_signaling = host1x_syncpt_fence_enable_signaling, + .release = host1x_syncpt_fence_release, +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *f) +{ + if (atomic_xchg(&f->signaling, 1)) + return; + + /* + * Cancel pending timeout work - if it races, it will + * not get 'f->signaling' and return. + */ + cancel_delayed_work_sync(&f->timeout_work); + + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false); + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static void do_fence_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = (struct delayed_work *)work; + struct host1x_syncpt_fence *f = + container_of(dwork, struct host1x_syncpt_fence, timeout_work); + + if (atomic_xchg(&f->signaling, 1)) + return; + + /* + * Cancel pending timeout work - if it races, it will + * not get 'f->signaling' and return. + */ + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true); + + dma_fence_set_error(&f->base, -ETIMEDOUT); + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) +{ + struct host1x_syncpt_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL); + if (!fence->waiter) + return ERR_PTR(-ENOMEM); + + fence->sp = sp; + fence->threshold = threshold; + + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock, + dma_fence_context_alloc(1), 0); + + INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); + + return &fence->base; +} +EXPORT_SYMBOL(host1x_fence_create); + +int host1x_fence_create_fd(struct host1x_syncpt *sp, u32 threshold) +{ + struct sync_file *file; + struct dma_fence *f; + int fd; + + f = host1x_fence_create(sp, threshold); + if (IS_ERR(f)) + return PTR_ERR(f); + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + dma_fence_put(f); + return fd; + } + + file = sync_file_create(f); + dma_fence_put(f); + if (!file) + return -ENOMEM; + + fd_install(fd, file->file); + + return fd; +} +EXPORT_SYMBOL(host1x_fence_create_fd); + +int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold) +{ + struct host1x_syncpt_fence *f; + + if (fence->ops != &host1x_syncpt_fence_ops) + return -EINVAL; + + f = container_of(fence, struct host1x_syncpt_fence, base); + + *id = f->sp->id; + *threshold = f->threshold; + + return 0; +} +EXPORT_SYMBOL(host1x_fence_extract); diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h new file mode 100644 index 0000000000000..70c91de82f146 --- /dev/null +++ b/drivers/gpu/host1x/fence.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef HOST1X_FENCE_H +#define HOST1X_FENCE_H + +struct host1x_syncpt_fence; + +void host1x_fence_signal(struct host1x_syncpt_fence *fence); + +#endif diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 6d1f3c0fdbe77..45b6be927ec4d 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -13,6 +13,7 @@ #include #include "channel.h" #include "dev.h" +#include "fence.h" #include "intr.h" /* Wait list management */ @@ -121,12 +122,20 @@ static void action_wakeup_interruptible(struct host1x_waitlist *waiter) wake_up_interruptible(wq); } +static void action_signal_fence(struct host1x_waitlist *waiter) +{ + struct host1x_syncpt_fence *f = waiter->data; + + host1x_fence_signal(f); +} + typedef void (*action_handler)(struct host1x_waitlist *waiter); static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { action_submit_complete, action_wakeup, action_wakeup_interruptible, + action_signal_fence, }; static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index 6ea55e615e3ae..e4c3460992735 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -33,6 +33,8 @@ enum host1x_intr_action { */ HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, + HOST1X_INTR_ACTION_SIGNAL_FENCE, + HOST1X_INTR_ACTION_COUNT }; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9b0487c885719..59e4a3bea0b0e 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -170,6 +170,9 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, u32 syncpt_id); +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold); +int host1x_fence_extract(struct dma_fence *fence, u32 *id, u32 *threshold); + /* * host1x channel */ From 684e366e17947e4446d16b606a5f671289246e1f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:34 +0200 Subject: [PATCH 107/851] gpu: host1x: Add no-recovery mode Add a new property for jobs to enable or disable recovery i.e. CPU increments of syncpoints to max value on job timeout. This allows for a more solid model for hanged jobs, where userspace doesn't need to guess if a syncpoint increment happened because the job completed, or because job timeout was triggered. On job timeout, we stop the channel, NOP all future jobs on the channel using the same syncpoint, mark the syncpoint as locked and resume the channel from the next job, if any. The future jobs are NOPed, since because we don't do the CPU increments, the value of the syncpoint is no longer synchronized, and any waiters would become confused if a future job incremented the syncpoint. The syncpoint is marked locked to ensure that any future jobs cannot increment the syncpoint either, until the application has recognized the situation and reallocated the syncpoint. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 1 + drivers/gpu/host1x/cdma.c | 58 ++++++++++++++++++++++++++---- drivers/gpu/host1x/hw/channel_hw.c | 2 +- drivers/gpu/host1x/job.c | 4 +++ drivers/gpu/host1x/syncpt.c | 2 ++ drivers/gpu/host1x/syncpt.h | 12 +++++++ include/linux/host1x.h | 9 +++++ 7 files changed, 81 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index f96c237b22426..739250acd498d 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -201,6 +201,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, job->client = client; job->class = client->class; job->serialize = true; + job->syncpt_recovery = true; /* * Track referenced BOs so that they can be unreferenced after the diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 6e6ca774f68d0..765e5aa64eb68 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -312,10 +312,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma) bool signal = false; struct host1x_job *job, *n; - /* If CDMA is stopped, queue is cleared and we can return */ - if (!cdma->running) - return; - /* * Walk the sync queue, reading the sync point registers as necessary, * to consume as many sync queue entries as possible without blocking @@ -324,7 +320,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma) struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ - if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && + !job->cancelled) { /* Start timer on next pending syncpt */ if (job->timeout) cdma_start_timer_locked(cdma, job); @@ -413,8 +410,11 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, else restart_addr = cdma->last_pos; + if (!job) + goto resume; + /* do CPU increments for the remaining syncpts */ - if (job) { + if (job->syncpt_recovery) { dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", __func__); @@ -433,8 +433,44 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + } else { + struct host1x_job *failed_job = job; + + host1x_job_dump(dev, job); + + host1x_syncpt_set_locked(job->syncpt); + failed_job->cancelled = true; + + list_for_each_entry_continue(job, &cdma->sync_queue, list) { + unsigned int i; + + if (job->syncpt != failed_job->syncpt) + continue; + + for (i = 0; i < job->num_slots; i++) { + unsigned int slot = (job->first_get/8 + i) % + HOST1X_PUSHBUFFER_SLOTS; + u32 *mapped = cdma->push_buffer.mapped; + + /* + * Overwrite opcodes with 0 word writes + * to offset 0xbad. This does nothing but + * has a easily detected signature in debug + * traces. + */ + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } + + job->cancelled = true; + } + + wmb(); + + update_cdma_locked(cdma); } +resume: /* roll back DMAGET and start up channel again */ host1x_hw_cdma_resume(host1x, cdma, restart_addr); } @@ -490,6 +526,16 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) mutex_lock(&cdma->lock); + /* + * Check if syncpoint was locked due to previous job timeout. + * This needs to be done within the cdma lock to avoid a race + * with the timeout handler. + */ + if (job->syncpt->locked) { + mutex_unlock(&cdma->lock); + return -EPERM; + } + if (job->timeout) { /* init state on first submit with timeout value */ if (!cdma->timeout.initialized) { diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index d4c28faf27d18..bf21512e5078f 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -191,7 +191,7 @@ static int channel_submit(struct host1x_job *job) /* schedule a submit complete interrupt */ err = host1x_intr_add_action(host, sp, syncval, HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, - completed_waiter, NULL); + completed_waiter, &job->waiter); completed_waiter = NULL; WARN(err, "Failed to set submit complete interrupt"); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index adbdc225de8d8..8f59b34672c22 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -79,6 +79,10 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->waiter) + host1x_intr_put_ref(job->syncpt->host, job->syncpt->id, + job->waiter, false); + if (job->syncpt) host1x_syncpt_put(job->syncpt); diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index e648ebbb2027b..d198a10848c6b 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -407,6 +407,8 @@ static void syncpt_release(struct kref *ref) atomic_set(&sp->max_val, host1x_syncpt_read(sp)); + sp->locked = false; + mutex_lock(&sp->host->syncpt_mutex); host1x_syncpt_base_free(sp->base); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index a6766f8d55ee6..95cd29b79d6dc 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -40,6 +40,13 @@ struct host1x_syncpt { /* interrupt data */ struct host1x_syncpt_intr intr; + + /* + * If a submission incrementing this syncpoint fails, lock it so that + * further submission cannot be made until application has handled the + * failure. + */ + bool locked; }; /* Initialize sync point array */ @@ -115,4 +122,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) return sp->id < host1x_syncpt_nb_pts(sp->host); } +static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp) +{ + sp->locked = true; +} + #endif diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 59e4a3bea0b0e..5fc12db94ca14 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -237,9 +237,15 @@ struct host1x_job { u32 syncpt_incrs; u32 syncpt_end; + /* Completion waiter ref */ + void *waiter; + /* Maximum time to wait for this job */ unsigned int timeout; + /* Job has timed out and should be released */ + bool cancelled; + /* Index and number of slots used in the push buffer */ unsigned int first_get; unsigned int num_slots; @@ -260,6 +266,9 @@ struct host1x_job { /* Add a channel wait for previous ops to complete */ bool serialize; + + /* Fast-forward syncpoint increments on job timeout */ + bool syncpt_recovery; }; struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, From a8cdf8c779e187ef8085e4f711f93489822ea6fd Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:35 +0200 Subject: [PATCH 108/851] gpu: host1x: Add job release callback Add a callback field to the job structure, to be called just before the job is to be freed. This allows the job's submitter to clean up any of its own state, like decrement runtime PM refcounts. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/job.c | 3 +++ include/linux/host1x.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 8f59b34672c22..09097e19c0d09 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -79,6 +79,9 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->release) + job->release(job); + if (job->waiter) host1x_intr_put_ref(job->syncpt->host, job->syncpt->id, job->waiter, false); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 5fc12db94ca14..7eecb0aa2519d 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -269,6 +269,10 @@ struct host1x_job { /* Fast-forward syncpoint increments on job timeout */ bool syncpt_recovery; + + /* Callback called when job is freed */ + void (*release)(struct host1x_job *job); + void *user_data; }; struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, From 579042ae0dc7c9a075ca386536195a86e597c98b Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:36 +0200 Subject: [PATCH 109/851] gpu: host1x: Add support for syncpoint waits in CDMA pushbuffer Add support for inserting syncpoint waits in the CDMA pushbuffer. These waits need to be done in HOST1X class, while gather submitted by the application execute in engine class. Support is added by converting the gather list of job into a command list that can include both gathers and waits. When the job is submitted, these commands are pushed as the appropriate opcodes on the CDMA pushbuffer. Also supported are waits relative to the start of the job, which are useful for jobs doing multiple things with an engine that doesn't natively support pipelining. While at it, use 32-bit waits on chips that support them. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/channel_hw.c | 85 +++++++++++++++++----- drivers/gpu/host1x/hw/debug_hw.c | 9 ++- drivers/gpu/host1x/hw/hw_host1x02_uclass.h | 12 +++ drivers/gpu/host1x/hw/hw_host1x04_uclass.h | 12 +++ drivers/gpu/host1x/hw/hw_host1x05_uclass.h | 12 +++ drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 12 +++ drivers/gpu/host1x/hw/hw_host1x07_uclass.h | 12 +++ drivers/gpu/host1x/job.c | 70 +++++++++++++----- drivers/gpu/host1x/job.h | 16 ++++ include/linux/host1x.h | 6 +- 10 files changed, 203 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index bf21512e5078f..1999780a7203a 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -47,39 +47,84 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_gathers(struct host1x_job *job) +static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold, + u32 next_class) +{ +#if HOST1X_HW >= 2 + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + host1x_opcode_setclass(next_class, 0, 0) + ); +#else + /* TODO add waitchk or use waitbases or other mitigation */ + host1x_cdma_push(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), + BIT(0) + ), + host1x_class_host_wait_syncpt(id, threshold) + ); + host1x_cdma_push(cdma, + host1x_opcode_setclass(next_class, 0, 0), + HOST1X_OPCODE_NOP + ); +#endif +} + +static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) { struct host1x_cdma *cdma = &job->channel->cdma; #if HOST1X_HW < 6 struct device *dev = job->channel->dev; #endif unsigned int i; + u32 threshold; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; - dma_addr_t addr = g->base + g->offset; - u32 op2, op3; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_cmd *cmd = &job->cmds[i]; - op2 = lower_32_bits(addr); - op3 = upper_32_bits(addr); + if (cmd->is_wait) { + if (cmd->wait.relative) + threshold = job_syncpt_base + cmd->wait.threshold; + else + threshold = cmd->wait.threshold; - trace_write_gather(cdma, g->bo, g->offset, g->words); + submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class); + } else { + struct host1x_job_gather *g = &cmd->gather; + + dma_addr_t addr = g->base + g->offset; + u32 op2, op3; + + op2 = lower_32_bits(addr); + op3 = upper_32_bits(addr); - if (op3 != 0) { + trace_write_gather(cdma, g->bo, g->offset, g->words); + + if (op3 != 0) { #if HOST1X_HW >= 6 - u32 op1 = host1x_opcode_gather_wide(g->words); - u32 op4 = HOST1X_OPCODE_NOP; + u32 op1 = host1x_opcode_gather_wide(g->words); + u32 op4 = HOST1X_OPCODE_NOP; - host1x_cdma_push_wide(cdma, op1, op2, op3, op4); + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); #else - dev_err(dev, "invalid gather for push buffer %pad\n", - &addr); - continue; + dev_err(dev, "invalid gather for push buffer %pad\n", + &addr); + continue; #endif - } else { - u32 op1 = host1x_opcode_gather(g->words); + } else { + u32 op1 = host1x_opcode_gather(g->words); - host1x_cdma_push(cdma, op1, op2); + host1x_cdma_push(cdma, op1, op2); + } } } } @@ -126,7 +171,7 @@ static int channel_submit(struct host1x_job *job) struct host1x *host = dev_get_drvdata(ch->dev->parent); trace_host1x_channel_submit(dev_name(ch->dev), - job->num_gathers, job->num_relocs, + job->num_cmds, job->num_relocs, job->syncpt->id, job->syncpt_incrs); /* before error checks, return current max */ @@ -181,7 +226,7 @@ static int channel_submit(struct host1x_job *job) host1x_opcode_setclass(job->class, 0, 0), HOST1X_OPCODE_NOP); - submit_gathers(job); + submit_gathers(job, syncval - user_syncpt_incrs); /* end CDMA submit & stash pinned hMems into sync queue */ host1x_cdma_end(&ch->cdma, job); diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c index ceb48229d14b3..35952fd5597ea 100644 --- a/drivers/gpu/host1x/hw/debug_hw.c +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -208,10 +208,15 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) job->first_get, job->timeout, job->num_slots, job->num_unpins); - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; u32 *mapped; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + if (job->gather_copy_mapped) mapped = (u32 *)job->gather_copy_mapped; else diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h index 4fc51f70496ba..0a2ab8f1da6f6 100644 --- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h index 9e84a4adca9fe..60c692b92955d 100644 --- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h index aee5a4e32877c..2fcc9a2ad3ef1 100644 --- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h index c4bacdb7155fe..5f831438d19bb 100644 --- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h index c74070f3f2031..8cd2ef087d5d0 100644 --- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 09097e19c0d09..32619b73a2fc2 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -38,7 +38,7 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, total = sizeof(struct host1x_job) + (u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + - (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + (u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(u32 *); if (total > ULONG_MAX) @@ -57,8 +57,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, mem += num_relocs * sizeof(struct host1x_reloc); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * sizeof(struct host1x_job_unpin_data); - job->gathers = num_cmdbufs ? mem : NULL; - mem += num_cmdbufs * sizeof(struct host1x_job_gather); + job->cmds = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); job->addr_phys = num_unpins ? mem : NULL; job->reloc_addr_phys = job->addr_phys; @@ -101,22 +101,38 @@ EXPORT_SYMBOL(host1x_job_put); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset) { - struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; gather->words = words; gather->bo = bo; gather->offset = offset; - job->num_gathers++; + job->num_cmds++; } EXPORT_SYMBOL(host1x_job_add_gather); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class) +{ + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; + + cmd->is_wait = true; + cmd->wait.id = id; + cmd->wait.threshold = thresh; + cmd->wait.next_class = next_class; + cmd->wait.relative = relative; + + job->num_cmds++; +} +EXPORT_SYMBOL(host1x_job_add_wait); + static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { struct host1x_client *client = job->client; struct device *dev = client->dev; struct host1x_job_gather *g; struct iommu_domain *domain; + struct sg_table *sgt; unsigned int i; int err; @@ -126,7 +142,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; dma_addr_t phys_addr, *phys; - struct sg_table *sgt; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -202,17 +217,20 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) return 0; - for (i = 0; i < job->num_gathers; i++) { + for (i = 0; i < job->num_cmds; i++) { size_t gather_size = 0; struct scatterlist *sg; - struct sg_table *sgt; dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; dma_addr_t *phys; unsigned int j; - g = &job->gathers[i]; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + g->bo = host1x_bo_get(g->bo); if (!g->bo) { err = -EINVAL; @@ -545,8 +563,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, fw.num_relocs = job->num_relocs; fw.class = job->class; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; size += g->words * sizeof(u32); } @@ -568,10 +591,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, job->gather_copy_size = size; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; void *gather; + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + /* Copy the gather */ gather = host1x_bo_mmap(g->bo); memcpy(job->gather_copy_mapped + offset, gather + g->offset, @@ -614,8 +641,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) } /* patch gathers */ - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; /* process each gather mem only once */ if (g->handled) @@ -625,10 +656,11 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) g->base = job->gather_addr_phys[i]; - for (j = i + 1; j < job->num_gathers; j++) { - if (job->gathers[j].bo == g->bo) { - job->gathers[j].handled = true; - job->gathers[j].base = g->base; + for (j = i + 1; j < job->num_cmds; j++) { + if (!job->cmds[j].is_wait && + job->cmds[j].gather.bo == g->bo) { + job->cmds[j].gather.handled = true; + job->cmds[j].gather.base = g->base; } } diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241d..b4428c5495c93 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -18,6 +18,22 @@ struct host1x_job_gather { bool handled; }; +struct host1x_job_wait { + u32 id; + u32 threshold; + u32 next_class; + bool relative; +}; + +struct host1x_job_cmd { + bool is_wait; + + union { + struct host1x_job_gather gather; + struct host1x_job_wait wait; + }; +}; + struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 7eecb0aa2519d..9b6784708f2e6 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -219,8 +219,8 @@ struct host1x_job { struct host1x_client *client; /* Gathers and their memory */ - struct host1x_job_gather *gathers; - unsigned int num_gathers; + struct host1x_job_cmd *cmds; + unsigned int num_cmds; /* Array of handles to be pinned & unpinned */ struct host1x_reloc *relocs; @@ -279,6 +279,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, u32 num_cmdbufs, u32 num_relocs); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class); struct host1x_job *host1x_job_get(struct host1x_job *job); void host1x_job_put(struct host1x_job *job); int host1x_job_pin(struct host1x_job *job, struct device *dev); From 67fe57e9bedaec2f5943782ee29e5e86309ccb2f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:37 +0200 Subject: [PATCH 110/851] drm/tegra: Extract tegra_gem_lookup The static function host1x_bo_lookup in drm.c is also useful elsewhere. Extract it as tegra_gem_lookup in gem.c. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 20 +++----------------- drivers/gpu/drm/tegra/gem.c | 13 +++++++++++++ drivers/gpu/drm/tegra/gem.h | 2 ++ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 739250acd498d..87954e69ab6c0 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -107,20 +107,6 @@ static void tegra_drm_context_free(struct tegra_drm_context *context) kfree(context); } -static struct host1x_bo * -host1x_bo_lookup(struct drm_file *file, u32 handle) -{ - struct drm_gem_object *gem; - struct tegra_bo *bo; - - gem = drm_gem_object_lookup(file, handle); - if (!gem) - return NULL; - - bo = to_tegra_bo(gem); - return &bo->base; -} - static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, struct drm_tegra_reloc __user *src, struct drm_device *drm, @@ -151,11 +137,11 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, dest->flags = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; - dest->cmdbuf.bo = host1x_bo_lookup(file, cmdbuf); + dest->cmdbuf.bo = tegra_gem_lookup(file, cmdbuf); if (!dest->cmdbuf.bo) return -ENOENT; - dest->target.bo = host1x_bo_lookup(file, target); + dest->target.bo = tegra_gem_lookup(file, target); if (!dest->target.bo) return -ENOENT; @@ -238,7 +224,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, goto fail; } - bo = host1x_bo_lookup(file, cmdbuf.handle); + bo = tegra_gem_lookup(file, cmdbuf.handle); if (!bo) { err = -ENOENT; goto fail; diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 26af8daa9a168..6ec598f5d5b3e 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -707,3 +707,16 @@ struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm, return &bo->gem; } + +struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle) +{ + struct drm_gem_object *gem; + struct tegra_bo *bo; + + gem = drm_gem_object_lookup(file, handle); + if (!gem) + return NULL; + + bo = to_tegra_bo(gem); + return &bo->base; +} diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h index c15fd99d6cb2b..cb5146a676683 100644 --- a/drivers/gpu/drm/tegra/gem.h +++ b/drivers/gpu/drm/tegra/gem.h @@ -80,4 +80,6 @@ struct dma_buf *tegra_gem_prime_export(struct drm_gem_object *gem, struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm, struct dma_buf *buf); +struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle); + #endif From 75ffbc5acf0f4853f478b41d002255b91abb4bc9 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:38 +0200 Subject: [PATCH 111/851] drm/tegra: Add new UAPI to header Update the tegra_drm.h UAPI header, adding the new proposed UAPI. The old staging UAPI is left in for now, with minor modification to avoid name collisions. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/uapi/drm/tegra_drm.h | 425 +++++++++++++++++++++++++++++++++-- 1 file changed, 402 insertions(+), 23 deletions(-) diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h index c4df3c3668b37..94cfc306d50ab 100644 --- a/include/uapi/drm/tegra_drm.h +++ b/include/uapi/drm/tegra_drm.h @@ -1,24 +1,5 @@ -/* - * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) 2012-2020 NVIDIA Corporation */ #ifndef _UAPI_TEGRA_DRM_H_ #define _UAPI_TEGRA_DRM_H_ @@ -29,6 +10,8 @@ extern "C" { #endif +/* Tegra DRM legacy UAPI. Only enabled with STAGING */ + #define DRM_TEGRA_GEM_CREATE_TILED (1 << 0) #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1) @@ -649,8 +632,8 @@ struct drm_tegra_gem_get_flags { #define DRM_TEGRA_SYNCPT_READ 0x02 #define DRM_TEGRA_SYNCPT_INCR 0x03 #define DRM_TEGRA_SYNCPT_WAIT 0x04 -#define DRM_TEGRA_OPEN_CHANNEL 0x05 -#define DRM_TEGRA_CLOSE_CHANNEL 0x06 +#define DRM_TEGRA_OPEN_CHANNEL 0x05 +#define DRM_TEGRA_CLOSE_CHANNEL 0x06 #define DRM_TEGRA_GET_SYNCPT 0x07 #define DRM_TEGRA_SUBMIT 0x08 #define DRM_TEGRA_GET_SYNCPT_BASE 0x09 @@ -674,6 +657,402 @@ struct drm_tegra_gem_get_flags { #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags) #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags) +/* New Tegra DRM UAPI */ + +/* + * Reported by the driver in the `capabilities` field. + * + * DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT: If set, the engine is cache coherent + * with regard to the system memory. + */ +#define DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT (1 << 0) + +struct drm_tegra_channel_open { + /** + * @host1x_class: [in] + * + * Host1x class of the engine that will be programmed using this + * channel. + */ + __u32 host1x_class; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @context: [out] + * + * Opaque identifier corresponding to the opened channel. + */ + __u32 context; + + /** + * @version: [out] + * + * Version of the engine hardware. This can be used by userspace + * to determine how the engine needs to be programmed. + */ + __u32 version; + + /** + * @capabilities: [out] + * + * Flags describing the hardware capabilities. + */ + __u32 capabilities; + __u32 padding; +}; + +struct drm_tegra_channel_close { + /** + * @context: [in] + * + * Identifier of the channel to close. + */ + __u32 context; + __u32 padding; +}; + +/* + * Mapping flags that can be used to influence how the mapping is created. + * + * DRM_TEGRA_CHANNEL_MAP_READ: create mapping that allows HW read access + * DRM_TEGRA_CHANNEL_MAP_WRITE: create mapping that allows HW write access + */ +#define DRM_TEGRA_CHANNEL_MAP_READ (1 << 0) +#define DRM_TEGRA_CHANNEL_MAP_WRITE (1 << 1) +#define DRM_TEGRA_CHANNEL_MAP_READ_WRITE (DRM_TEGRA_CHANNEL_MAP_READ | \ + DRM_TEGRA_CHANNEL_MAP_WRITE) + +struct drm_tegra_channel_map { + /** + * @context: [in] + * + * Identifier of the channel to which make memory available for. + */ + __u32 context; + + /** + * @handle: [in] + * + * GEM handle of the memory to map. + */ + __u32 handle; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @mapping: [out] + * + * Identifier corresponding to the mapping, to be used for + * relocations or unmapping later. + */ + __u32 mapping; +}; + +struct drm_tegra_channel_unmap { + /** + * @context: [in] + * + * Channel identifier of the channel to unmap memory from. + */ + __u32 context; + + /** + * @mapping: [in] + * + * Mapping identifier of the memory mapping to unmap. + */ + __u32 mapping; +}; + +/* Submission */ + +/** + * Specify that bit 39 of the patched-in address should be set to switch + * swizzling between Tegra and non-Tegra sector layout on systems that store + * surfaces in system memory in non-Tegra sector layout. + */ +#define DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT (1 << 0) + +struct drm_tegra_submit_buf { + /** + * @mapping: [in] + * + * Identifier of the mapping to use in the submission. + */ + __u32 mapping; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * Information for relocation patching. + */ + struct { + /** + * @target_offset: [in] + * + * Offset from the start of the mapping of the data whose + * address is to be patched into the gather. + */ + __u64 target_offset; + + /** + * @gather_offset_words: [in] + * + * Offset in words from the start of the gather data to + * where the address should be patched into. + */ + __u32 gather_offset_words; + + /** + * @shift: [in] + * + * Number of bits the address should be shifted right before + * patching in. + */ + __u32 shift; + } reloc; +}; + +/** + * Execute `words` words of Host1x opcodes specified in the `gather_data_ptr` + * buffer. Each GATHER_UPTR command uses successive words from the buffer. + */ +#define DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR 0 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT 1 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. The threshold is calculated relative to the start of the job. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE 2 + +struct drm_tegra_submit_cmd_gather_uptr { + __u32 words; + __u32 reserved[3]; +}; + +struct drm_tegra_submit_cmd_wait_syncpt { + __u32 id; + __u32 value; + __u32 reserved[2]; +}; + +struct drm_tegra_submit_cmd { + /** + * @type: [in] + * + * Command type to execute. One of the DRM_TEGRA_SUBMIT_CMD* + * defines. + */ + __u32 type; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + union { + struct drm_tegra_submit_cmd_gather_uptr gather_uptr; + struct drm_tegra_submit_cmd_wait_syncpt wait_syncpt; + __u32 reserved[4]; + }; +}; + +struct drm_tegra_submit_syncpt { + /** + * @id: [in] + * + * ID of the syncpoint that the job will increment. + */ + __u32 id; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @increments: [in] + * + * Number of times the job will increment this syncpoint. + */ + __u32 increments; + + /** + * @value: [out] + * + * Value the syncpoint will have once the job has completed all + * its specified syncpoint increments. + * + * Note that the kernel may increment the syncpoint before or after + * the job. These increments are not reflected in this field. + * + * If the job hangs or times out, not all of the increments may + * get executed. + */ + __u32 value; +}; + +struct drm_tegra_channel_submit { + /** + * @context: [in] + * + * Identifier of the channel to submit this job to. + */ + __u32 context; + + /** + * @num_bufs: [in] + * + * Number of elements in the `bufs_ptr` array. + */ + __u32 num_bufs; + + /** + * @num_cmds: [in] + * + * Number of elements in the `cmds_ptr` array. + */ + __u32 num_cmds; + + /** + * @gather_data_words: [in] + * + * Number of 32-bit words in the `gather_data_ptr` array. + */ + __u32 gather_data_words; + + /** + * @bufs_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_buf structures. + */ + __u64 bufs_ptr; + + /** + * @cmds_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_cmd structures. + */ + __u64 cmds_ptr; + + /** + * @gather_data_ptr: [in] + * + * Pointer to an array of Host1x opcodes to be used by GATHER_UPTR + * commands. + */ + __u64 gather_data_ptr; + + /** + * @syncobj_in: [in] + * + * Handle for DRM syncobj that will be waited before submission. + * Ignored if zero. + */ + __u32 syncobj_in; + + /** + * @syncobj_out: [in] + * + * Handle for DRM syncobj that will have its fence replaced with + * the job's completion fence. Ignored if zero. + */ + __u32 syncobj_out; + + /** + * @syncpt_incr: [in,out] + * + * Information about the syncpoint the job will increment. + */ + struct drm_tegra_submit_syncpt syncpt; +}; + +struct drm_tegra_syncpoint_allocate { + /** + * @id: [out] + * + * ID of allocated syncpoint. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_free { + /** + * @id: [in] + * + * ID of syncpoint to free. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_wait { + /** + * @timeout: [in] + * + * Absolute timestamp at which the wait will time out. + */ + __s64 timeout_ns; + + /** + * @id: [in] + * + * ID of syncpoint to wait on. + */ + __u32 id; + + /** + * @threshold: [in] + * + * Threshold to wait for. + */ + __u32 threshold; + + /** + * @value: [out] + * + * Value of the syncpoint upon wait completion. + */ + __u32 value; + + __u32 padding; +}; + +#define DRM_IOCTL_TEGRA_CHANNEL_OPEN DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open) +#define DRM_IOCTL_TEGRA_CHANNEL_CLOSE DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close) +#define DRM_IOCTL_TEGRA_CHANNEL_MAP DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map) +#define DRM_IOCTL_TEGRA_CHANNEL_UNMAP DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap) +#define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit) + +#define DRM_IOCTL_TEGRA_SYNCPOINT_ALLOCATE DRM_IOWR(DRM_COMMAND_BASE + 0x20, struct drm_tegra_syncpoint_allocate) +#define DRM_IOCTL_TEGRA_SYNCPOINT_FREE DRM_IOWR(DRM_COMMAND_BASE + 0x21, struct drm_tegra_syncpoint_free) +#define DRM_IOCTL_TEGRA_SYNCPOINT_WAIT DRM_IOWR(DRM_COMMAND_BASE + 0x22, struct drm_tegra_syncpoint_wait) + #if defined(__cplusplus) } #endif From 0debc5abc08a6f4c41642004a60115bbe3930bc4 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:39 +0200 Subject: [PATCH 112/851] drm/tegra: Boot VIC during runtime PM resume With the new UAPI implementation, engines are powered on and off when there are active jobs, and the core code handles channel allocation. To accommodate that, boot the engine as part of runtime PM instead of using the open_channel callback, which is not used by the new submit path. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/vic.c | 112 ++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index c9d55a9a31806..c02010ff2b7f2 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -29,7 +29,6 @@ struct vic_config { struct vic { struct falcon falcon; - bool booted; void __iomem *regs; struct tegra_drm_client client; @@ -52,48 +51,6 @@ static void vic_writel(struct vic *vic, u32 value, unsigned int offset) writel(value, vic->regs + offset); } -static int vic_runtime_resume(struct device *dev) -{ - struct vic *vic = dev_get_drvdata(dev); - int err; - - err = clk_prepare_enable(vic->clk); - if (err < 0) - return err; - - usleep_range(10, 20); - - err = reset_control_deassert(vic->rst); - if (err < 0) - goto disable; - - usleep_range(10, 20); - - return 0; - -disable: - clk_disable_unprepare(vic->clk); - return err; -} - -static int vic_runtime_suspend(struct device *dev) -{ - struct vic *vic = dev_get_drvdata(dev); - int err; - - err = reset_control_assert(vic->rst); - if (err < 0) - return err; - - usleep_range(2000, 4000); - - clk_disable_unprepare(vic->clk); - - vic->booted = false; - - return 0; -} - static int vic_boot(struct vic *vic) { #ifdef CONFIG_IOMMU_API @@ -103,9 +60,6 @@ static int vic_boot(struct vic *vic) void *hdr; int err = 0; - if (vic->booted) - return 0; - #ifdef CONFIG_IOMMU_API if (vic->config->supports_sid && spec) { u32 value; @@ -168,8 +122,6 @@ static int vic_boot(struct vic *vic) return err; } - vic->booted = true; - return 0; } @@ -323,35 +275,74 @@ static int vic_load_firmware(struct vic *vic) return err; } -static int vic_open_channel(struct tegra_drm_client *client, - struct tegra_drm_context *context) + +static int vic_runtime_resume(struct device *dev) { - struct vic *vic = to_vic(client); + struct vic *vic = dev_get_drvdata(dev); int err; - err = pm_runtime_resume_and_get(vic->dev); + err = clk_prepare_enable(vic->clk); if (err < 0) return err; + usleep_range(10, 20); + + err = reset_control_deassert(vic->rst); + if (err < 0) + goto disable; + + usleep_range(10, 20); + err = vic_load_firmware(vic); if (err < 0) - goto rpm_put; + goto assert; err = vic_boot(vic); if (err < 0) - goto rpm_put; + goto assert; + + return 0; + +assert: + reset_control_assert(vic->rst); +disable: + clk_disable_unprepare(vic->clk); + return err; +} + +static int vic_runtime_suspend(struct device *dev) +{ + struct vic *vic = dev_get_drvdata(dev); + int err; + + err = reset_control_assert(vic->rst); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + clk_disable_unprepare(vic->clk); + + return 0; +} + +static int vic_open_channel(struct tegra_drm_client *client, + struct tegra_drm_context *context) +{ + struct vic *vic = to_vic(client); + int err; + + err = pm_runtime_resume_and_get(vic->dev); + if (err < 0) + return err; context->channel = host1x_channel_get(vic->channel); if (!context->channel) { - err = -ENOMEM; - goto rpm_put; + pm_runtime_put(vic->dev); + return -ENOMEM; } return 0; - -rpm_put: - pm_runtime_put(vic->dev); - return err; } static void vic_close_channel(struct tegra_drm_context *context) @@ -359,7 +350,6 @@ static void vic_close_channel(struct tegra_drm_context *context) struct vic *vic = to_vic(context->client); host1x_channel_put(context->channel); - pm_runtime_put(vic->dev); } From 69d77868b1adedf5d001db29b9e0c10422c3a9db Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:40 +0200 Subject: [PATCH 113/851] drm/tegra: Allocate per-engine channel in core code To avoid code duplication, allocate the per-engine shared channel in the core code instead. This is the usual channel that all jobs are submitted to when MLOCKing is not in use. Once MLOCKs are implemented on Host1x side, we can also update this to avoid allocating a shared channel when MLOCKs are enabled. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 11 +++++++++++ drivers/gpu/drm/tegra/drm.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 87954e69ab6c0..cddee6425461e 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -870,6 +870,14 @@ static const struct drm_driver tegra_drm_driver = { int tegra_drm_register_client(struct tegra_drm *tegra, struct tegra_drm_client *client) { + /* + * When MLOCKs are implemented, change to allocate a shared channel + * only when MLOCKs are disabled. + */ + client->shared_channel = host1x_channel_request(&client->base); + if (!client->shared_channel) + return -EBUSY; + mutex_lock(&tegra->clients_lock); list_add_tail(&client->list, &tegra->clients); client->drm = tegra; @@ -886,6 +894,9 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra, client->drm = NULL; mutex_unlock(&tegra->clients_lock); + if (client->shared_channel) + host1x_channel_put(client->shared_channel); + return 0; } diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 0cb8680653483..536861017d242 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -91,7 +91,9 @@ struct tegra_drm_client { struct host1x_client base; struct list_head list; struct tegra_drm *drm; + struct host1x_channel *shared_channel; + /* Set by driver */ unsigned int version; const struct tegra_drm_client_ops *ops; }; From cdf631031f3e574b76afed51bda0ccc9d71d4a4e Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:41 +0200 Subject: [PATCH 114/851] drm/tegra: Implement new UAPI Implement the non-submission parts of the new UAPI, including channel management and memory mapping. The UAPI is under the CONFIG_DRM_TEGRA_STAGING config flag for now. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/Makefile | 1 + drivers/gpu/drm/tegra/drm.c | 37 ++-- drivers/gpu/drm/tegra/drm.h | 10 ++ drivers/gpu/drm/tegra/uapi.c | 313 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/tegra/uapi.h | 51 ++++++ 5 files changed, 396 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/tegra/uapi.c create mode 100644 drivers/gpu/drm/tegra/uapi.h diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index d6cf202414f0a..783475ffd9434 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -3,6 +3,7 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG tegra-drm-y := \ drm.o \ + uapi.o \ gem.o \ fb.o \ dp.o \ diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index cddee6425461e..6ee08e49ec57a 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -21,6 +21,7 @@ #include #include +#include "uapi.h" #include "drm.h" #include "gem.h" @@ -34,11 +35,6 @@ #define CARVEOUT_SZ SZ_64M #define CDMA_GATHER_FETCHES_MAX_NB 16383 -struct tegra_drm_file { - struct idr contexts; - struct mutex lock; -}; - static int tegra_atomic_check(struct drm_device *drm, struct drm_atomic_state *state) { @@ -94,7 +90,8 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) if (!fpriv) return -ENOMEM; - idr_init_base(&fpriv->contexts, 1); + idr_init_base(&fpriv->legacy_contexts, 1); + xa_init_flags(&fpriv->contexts, XA_FLAGS_ALLOC1); mutex_init(&fpriv->lock); filp->driver_priv = fpriv; @@ -419,7 +416,7 @@ static int tegra_client_open(struct tegra_drm_file *fpriv, if (err < 0) return err; - err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL); + err = idr_alloc(&fpriv->legacy_contexts, context, 1, 0, GFP_KERNEL); if (err < 0) { client->ops->close_channel(context); return err; @@ -474,13 +471,13 @@ static int tegra_close_channel(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -EINVAL; goto unlock; } - idr_remove(&fpriv->contexts, context->id); + idr_remove(&fpriv->legacy_contexts, context->id); tegra_drm_context_free(context); unlock: @@ -499,7 +496,7 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -ENODEV; goto unlock; @@ -528,7 +525,7 @@ static int tegra_submit(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -ENODEV; goto unlock; @@ -553,7 +550,7 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -ENODEV; goto unlock; @@ -722,10 +719,17 @@ static int tegra_gem_get_flags(struct drm_device *drm, void *data, static const struct drm_ioctl_desc tegra_drm_ioctls[] = { #ifdef CONFIG_DRM_TEGRA_STAGING - DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_OPEN, tegra_drm_ioctl_channel_open, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_CLOSE, tegra_drm_ioctl_channel_close, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_MAP, tegra_drm_ioctl_channel_map, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap, + DRM_RENDER_ALLOW), + + DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_syncpt_read, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_syncpt_incr, @@ -779,10 +783,11 @@ static void tegra_drm_postclose(struct drm_device *drm, struct drm_file *file) struct tegra_drm_file *fpriv = file->driver_priv; mutex_lock(&fpriv->lock); - idr_for_each(&fpriv->contexts, tegra_drm_context_cleanup, NULL); + idr_for_each(&fpriv->legacy_contexts, tegra_drm_context_cleanup, NULL); + tegra_drm_uapi_close_file(fpriv); mutex_unlock(&fpriv->lock); - idr_destroy(&fpriv->contexts); + idr_destroy(&fpriv->legacy_contexts); mutex_destroy(&fpriv->lock); kfree(fpriv); } diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 536861017d242..8b28327c931c1 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -64,12 +64,22 @@ struct tegra_drm { struct tegra_display_hub *hub; }; +static inline struct host1x *tegra_drm_to_host1x(struct tegra_drm *tegra) +{ + return dev_get_drvdata(tegra->drm->dev->parent); +} + struct tegra_drm_client; struct tegra_drm_context { struct tegra_drm_client *client; struct host1x_channel *channel; + + /* Only used by legacy UAPI. */ unsigned int id; + + /* Only used by new UAPI. */ + struct xarray mappings; }; struct tegra_drm_client_ops { diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c new file mode 100644 index 0000000000000..48e872c768a96 --- /dev/null +++ b/drivers/gpu/drm/tegra/uapi.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 NVIDIA Corporation */ + +#include +#include +#include + +#include +#include + +#include "drm.h" +#include "uapi.h" + +static void tegra_drm_mapping_release(struct kref *ref) +{ + struct tegra_drm_mapping *mapping = + container_of(ref, struct tegra_drm_mapping, ref); + + if (mapping->sgt) + dma_unmap_sgtable(mapping->dev, mapping->sgt, + mapping->direction, DMA_ATTR_SKIP_CPU_SYNC); + + host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_put(mapping->bo); + + kfree(mapping); +} + +void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping) +{ + kref_put(&mapping->ref, tegra_drm_mapping_release); +} + +static void tegra_drm_channel_context_close(struct tegra_drm_context *context) +{ + struct tegra_drm_mapping *mapping; + unsigned long id; + + xa_for_each(&context->mappings, id, mapping) + tegra_drm_mapping_put(mapping); + + xa_destroy(&context->mappings); + + host1x_channel_put(context->channel); + + kfree(context); +} + +void tegra_drm_uapi_close_file(struct tegra_drm_file *file) +{ + struct tegra_drm_context *context; + unsigned long id; + + xa_for_each(&file->contexts, id, context) + tegra_drm_channel_context_close(context); + + xa_destroy(&file->contexts); +} + +static struct tegra_drm_client *tegra_drm_find_client(struct tegra_drm *tegra, + u32 class) +{ + struct tegra_drm_client *client; + + list_for_each_entry(client, &tegra->clients, list) + if (client->base.class == class) + return client; + + return NULL; +} + +int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct tegra_drm *tegra = drm->dev_private; + struct drm_tegra_channel_open *args = data; + struct tegra_drm_client *client = NULL; + struct tegra_drm_context *context; + int err; + + if (args->flags) + return -EINVAL; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + client = tegra_drm_find_client(tegra, args->host1x_class); + if (!client) { + err = -ENODEV; + goto free; + } + + if (client->shared_channel) { + context->channel = host1x_channel_get(client->shared_channel); + } else { + context->channel = host1x_channel_request(&client->base); + if (!context->channel) { + err = -EBUSY; + goto free; + } + } + + err = xa_alloc(&fpriv->contexts, &args->context, context, + XA_LIMIT(1, U32_MAX), GFP_KERNEL); + if (err < 0) + goto put_channel; + + context->client = client; + xa_init_flags(&context->mappings, XA_FLAGS_ALLOC1); + + args->version = client->version; + args->capabilities = 0; + + if (device_get_dma_attr(client->base.dev) == DEV_DMA_COHERENT) + args->capabilities |= DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT; + + return 0; + +put_channel: + host1x_channel_put(context->channel); +free: + kfree(context); + + return err; +} + +int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_close *args = data; + struct tegra_drm_context *context; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + return -EINVAL; + } + + xa_erase(&fpriv->contexts, args->context); + + mutex_unlock(&fpriv->lock); + + tegra_drm_channel_context_close(context); + + return 0; +} + +int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_map *args = data; + struct tegra_drm_mapping *mapping; + struct tegra_drm_context *context; + int err = 0; + + if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE) + return -EINVAL; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + return -EINVAL; + } + + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) { + err = -ENOMEM; + goto unlock; + } + + kref_init(&mapping->ref); + + mapping->dev = context->client->base.dev; + mapping->bo = tegra_gem_lookup(file, args->handle); + if (!mapping->bo) { + err = -EINVAL; + goto unlock; + } + + if (context->client->base.group) { + /* IOMMU domain managed directly using IOMMU API */ + host1x_bo_pin(mapping->dev, mapping->bo, &mapping->iova); + } else { + switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { + case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: + mapping->direction = DMA_BIDIRECTIONAL; + break; + + case DRM_TEGRA_CHANNEL_MAP_WRITE: + mapping->direction = DMA_FROM_DEVICE; + break; + + case DRM_TEGRA_CHANNEL_MAP_READ: + mapping->direction = DMA_TO_DEVICE; + break; + + default: + return -EINVAL; + } + + mapping->sgt = host1x_bo_pin(mapping->dev, mapping->bo, NULL); + if (IS_ERR(mapping->sgt)) { + err = PTR_ERR(mapping->sgt); + goto put_gem; + } + + err = dma_map_sgtable(mapping->dev, mapping->sgt, + mapping->direction, + DMA_ATTR_SKIP_CPU_SYNC); + if (err) + goto unpin; + + mapping->iova = sg_dma_address(mapping->sgt->sgl); + } + + mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->size; + + err = xa_alloc(&context->mappings, &args->mapping, mapping, + XA_LIMIT(1, U32_MAX), GFP_KERNEL); + if (err < 0) + goto unmap; + + mutex_unlock(&fpriv->lock); + + return 0; + +unmap: + if (mapping->sgt) { + dma_unmap_sgtable(mapping->dev, mapping->sgt, + mapping->direction, DMA_ATTR_SKIP_CPU_SYNC); + } +unpin: + host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); +put_gem: + host1x_bo_put(mapping->bo); + kfree(mapping); +unlock: + mutex_unlock(&fpriv->lock); + return err; +} + +int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_unmap *args = data; + struct tegra_drm_mapping *mapping; + struct tegra_drm_context *context; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + return -EINVAL; + } + + mapping = xa_erase(&context->mappings, args->mapping); + + mutex_unlock(&fpriv->lock); + + if (mapping) { + tegra_drm_mapping_put(mapping); + return 0; + } else { + return -EINVAL; + } +} + +int tegra_drm_ioctl_gem_create(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct drm_tegra_gem_create *args = data; + struct tegra_bo *bo; + + if (args->flags) + return -EINVAL; + + bo = tegra_bo_create_with_handle(file, drm, args->size, args->flags, + &args->handle); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + return 0; +} + +int tegra_drm_ioctl_gem_mmap(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct drm_tegra_gem_mmap *args = data; + struct drm_gem_object *gem; + struct tegra_bo *bo; + + gem = drm_gem_object_lookup(file, args->handle); + if (!gem) + return -EINVAL; + + bo = to_tegra_bo(gem); + + args->offset = drm_vma_node_offset_addr(&bo->gem.vma_node); + + drm_gem_object_put(gem); + + return 0; +} diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h new file mode 100644 index 0000000000000..fbef39726c296 --- /dev/null +++ b/drivers/gpu/drm/tegra/uapi.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 NVIDIA Corporation */ + +#ifndef _TEGRA_DRM_UAPI_H +#define _TEGRA_DRM_UAPI_H + +#include +#include +#include +#include + +#include + +struct drm_file; +struct drm_device; + +struct tegra_drm_file { + /* Legacy UAPI state */ + struct idr legacy_contexts; + struct mutex lock; + + /* New UAPI state */ + struct xarray contexts; +}; + +struct tegra_drm_mapping { + struct kref ref; + + struct device *dev; + struct host1x_bo *bo; + struct sg_table *sgt; + enum dma_data_direction direction; + dma_addr_t iova; + dma_addr_t iova_end; +}; + +int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, + struct drm_file *file); + +void tegra_drm_uapi_close_file(struct tegra_drm_file *file); +void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping); + +#endif From 46a26a95e8e304201291a47ec7e993d32ecff121 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:42 +0200 Subject: [PATCH 115/851] drm/tegra: Implement syncpoint management UAPI Implement TegraDRM IOCTLs for allocating and freeing syncpoints. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 5 ++++ drivers/gpu/drm/tegra/uapi.c | 52 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/tegra/uapi.h | 5 ++++ 3 files changed, 62 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 6ee08e49ec57a..18aee825a1ff3 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -92,6 +92,7 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) idr_init_base(&fpriv->legacy_contexts, 1); xa_init_flags(&fpriv->contexts, XA_FLAGS_ALLOC1); + xa_init(&fpriv->syncpoints); mutex_init(&fpriv->lock); filp->driver_priv = fpriv; @@ -727,6 +728,10 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free, + DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index 48e872c768a96..e91394e7264e0 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -49,12 +49,17 @@ static void tegra_drm_channel_context_close(struct tegra_drm_context *context) void tegra_drm_uapi_close_file(struct tegra_drm_file *file) { struct tegra_drm_context *context; + struct host1x_syncpt *sp; unsigned long id; xa_for_each(&file->contexts, id, context) tegra_drm_channel_context_close(context); + xa_for_each(&file->syncpoints, id, sp) + host1x_syncpt_put(sp); + xa_destroy(&file->contexts); + xa_destroy(&file->syncpoints); } static struct tegra_drm_client *tegra_drm_find_client(struct tegra_drm *tegra, @@ -311,3 +316,50 @@ int tegra_drm_ioctl_gem_mmap(struct drm_device *drm, void *data, return 0; } + +int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private); + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_syncpoint_allocate *args = data; + struct host1x_syncpt *sp; + int err; + + if (args->id) + return -EINVAL; + + sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED, + current->comm); + if (!sp) + return -EBUSY; + + args->id = host1x_syncpt_id(sp); + + err = xa_insert(&fpriv->syncpoints, args->id, sp, GFP_KERNEL); + if (err) { + host1x_syncpt_put(sp); + return err; + } + + return 0; +} + +int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_syncpoint_allocate *args = data; + struct host1x_syncpt *sp; + + mutex_lock(&fpriv->lock); + sp = xa_erase(&fpriv->syncpoints, args->id); + mutex_unlock(&fpriv->lock); + + if (!sp) + return -EINVAL; + + host1x_syncpt_put(sp); + + return 0; +} diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h index fbef39726c296..e4e498facf616 100644 --- a/drivers/gpu/drm/tegra/uapi.h +++ b/drivers/gpu/drm/tegra/uapi.h @@ -21,6 +21,7 @@ struct tegra_drm_file { /* New UAPI state */ struct xarray contexts; + struct xarray syncpoints; }; struct tegra_drm_mapping { @@ -44,6 +45,10 @@ int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, struct drm_file *file); int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, struct drm_file *file); +int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, + struct drm_file *file); void tegra_drm_uapi_close_file(struct tegra_drm_file *file); void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping); From 6f9eb895f189e15ee94775d0ce8c27c20f119e5e Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:43 +0200 Subject: [PATCH 116/851] drm/tegra: Implement syncpoint wait UAPI Implement new syncpoint wait UAPI. This is different from the legacy one in taking an absolute timestamp in line with modern DRM conventions. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 2 ++ drivers/gpu/drm/tegra/uapi.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/tegra/uapi.h | 2 ++ 3 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 18aee825a1ff3..082d520bdffce 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -732,6 +732,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_WAIT, tegra_drm_ioctl_syncpoint_wait, + DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index e91394e7264e0..0ba57697706b0 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -7,6 +7,7 @@ #include #include +#include #include "drm.h" #include "uapi.h" @@ -363,3 +364,24 @@ int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, return 0; } + +int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private); + struct drm_tegra_syncpoint_wait *args = data; + signed long timeout_jiffies; + struct host1x_syncpt *sp; + + if (args->padding != 0) + return -EINVAL; + + sp = host1x_syncpt_get_by_id_noref(host1x, args->id); + if (!sp) + return -EINVAL; + + timeout_jiffies = drm_timeout_abs_to_jiffies(args->timeout_ns); + + return host1x_syncpt_wait(sp, args->threshold, timeout_jiffies, + &args->value); +} diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h index e4e498facf616..12adad770ad3f 100644 --- a/drivers/gpu/drm/tegra/uapi.h +++ b/drivers/gpu/drm/tegra/uapi.h @@ -49,6 +49,8 @@ int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, struct drm_file *file); int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, struct drm_file *file); +int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, + struct drm_file *file); void tegra_drm_uapi_close_file(struct tegra_drm_file *file); void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping); From 43636451db8c7f0a058da3d543e71f803a697589 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:44 +0200 Subject: [PATCH 117/851] drm/tegra: Implement job submission part of new UAPI Implement the job submission IOCTL with a minimum feature set. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/Makefile | 2 + drivers/gpu/drm/tegra/drm.c | 4 +- drivers/gpu/drm/tegra/gather_bo.c | 81 +++++ drivers/gpu/drm/tegra/gather_bo.h | 22 ++ drivers/gpu/drm/tegra/submit.c | 521 ++++++++++++++++++++++++++++++ drivers/gpu/drm/tegra/submit.h | 17 + 6 files changed, 646 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/tegra/gather_bo.c create mode 100644 drivers/gpu/drm/tegra/gather_bo.h create mode 100644 drivers/gpu/drm/tegra/submit.c create mode 100644 drivers/gpu/drm/tegra/submit.h diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index 783475ffd9434..ab4289d1c991f 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -4,6 +4,8 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG tegra-drm-y := \ drm.o \ uapi.o \ + submit.o \ + gather_bo.o \ gem.o \ fb.o \ dp.o \ diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 082d520bdffce..ae9dafc32c2b9 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -728,6 +728,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit, + DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free, @@ -852,7 +854,7 @@ static void tegra_debugfs_init(struct drm_minor *minor) static const struct drm_driver tegra_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | - DRIVER_ATOMIC | DRIVER_RENDER, + DRIVER_ATOMIC | DRIVER_RENDER | DRIVER_SYNCOBJ, .open = tegra_drm_open, .postclose = tegra_drm_postclose, .lastclose = drm_fb_helper_lastclose, diff --git a/drivers/gpu/drm/tegra/gather_bo.c b/drivers/gpu/drm/tegra/gather_bo.c new file mode 100644 index 0000000000000..755c2da623615 --- /dev/null +++ b/drivers/gpu/drm/tegra/gather_bo.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 NVIDIA Corporation */ + +#include +#include + +#include "gather_bo.h" + +static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + + kref_get(&bo->ref); + + return host_bo; +} + +static void gather_bo_release(struct kref *ref) +{ + struct gather_bo *bo = container_of(ref, struct gather_bo, ref); + + kfree(bo->gather_data); + kfree(bo); +} + +void gather_bo_put(struct host1x_bo *host_bo) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + + kref_put(&bo->ref, gather_bo_release); +} + +static struct sg_table * +gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + struct sg_table *sgt; + int err; + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (err) { + kfree(sgt); + return ERR_PTR(err); + } + + sg_init_one(sgt->sgl, bo->gather_data, bo->gather_data_words*4); + + return sgt; +} + +static void gather_bo_unpin(struct device *dev, struct sg_table *sgt) +{ + if (sgt) { + sg_free_table(sgt); + kfree(sgt); + } +} + +static void *gather_bo_mmap(struct host1x_bo *host_bo) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + + return bo->gather_data; +} + +static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr) +{ +} + +const struct host1x_bo_ops gather_bo_ops = { + .get = gather_bo_get, + .put = gather_bo_put, + .pin = gather_bo_pin, + .unpin = gather_bo_unpin, + .mmap = gather_bo_mmap, + .munmap = gather_bo_munmap, +}; diff --git a/drivers/gpu/drm/tegra/gather_bo.h b/drivers/gpu/drm/tegra/gather_bo.h new file mode 100644 index 0000000000000..6b4c9d83ac912 --- /dev/null +++ b/drivers/gpu/drm/tegra/gather_bo.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 NVIDIA Corporation */ + +#ifndef _TEGRA_DRM_SUBMIT_GATHER_BO_H +#define _TEGRA_DRM_SUBMIT_GATHER_BO_H + +#include +#include + +struct gather_bo { + struct host1x_bo base; + + struct kref ref; + + u32 *gather_data; + size_t gather_data_words; +}; + +extern const struct host1x_bo_ops gather_bo_ops; +void gather_bo_put(struct host1x_bo *host_bo); + +#endif diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c new file mode 100644 index 0000000000000..0225706e31638 --- /dev/null +++ b/drivers/gpu/drm/tegra/submit.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 NVIDIA Corporation */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "drm.h" +#include "gather_bo.h" +#include "gem.h" +#include "submit.h" +#include "uapi.h" + +#define SUBMIT_ERR(context, fmt, ...) \ + dev_err_ratelimited(context->client->base.dev, \ + "%s: job submission failed: " fmt "\n", \ + current->comm __VA_OPT__(,) __VA_ARGS__) + +static struct tegra_drm_mapping * +tegra_drm_mapping_get(struct tegra_drm_context *context, u32 id) +{ + struct tegra_drm_mapping *mapping; + + xa_lock(&context->mappings); + mapping = xa_load(&context->mappings, id); + if (mapping) + kref_get(&mapping->ref); + xa_unlock(&context->mappings); + + return mapping; +} + +static void *alloc_copy_user_array(void __user *from, size_t count, size_t size) +{ + size_t copy_len; + void *data; + + if (check_mul_overflow(count, size, ©_len)) + return ERR_PTR(-EINVAL); + + if (copy_len > 0x4000) + return ERR_PTR(-E2BIG); + + data = kvmalloc(copy_len, GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(data, from, copy_len)) { + kvfree(data); + return ERR_PTR(-EFAULT); + } + + return data; +} + +static int submit_copy_gather_data(struct gather_bo **pbo, struct tegra_drm_context *context, + struct drm_tegra_channel_submit *args) +{ + struct gather_bo *bo; + size_t copy_len; + + if (args->gather_data_words == 0) { + SUBMIT_ERR(context, "gather_data_words cannot be zero"); + return -EINVAL; + } + + if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, ©_len)) { + SUBMIT_ERR(context, "gather_data_words is too large"); + return -EINVAL; + } + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) { + SUBMIT_ERR(context, "failed to allocate memory for bo info"); + return -ENOMEM; + } + + kref_init(&bo->ref); + host1x_bo_init(&bo->base, &gather_bo_ops); + + bo->gather_data = kmalloc(copy_len, GFP_KERNEL | __GFP_NOWARN); + if (!bo->gather_data) { + SUBMIT_ERR(context, "failed to allocate memory for gather data"); + kfree(bo); + return -ENOMEM; + } + + if (copy_from_user(bo->gather_data, u64_to_user_ptr(args->gather_data_ptr), copy_len)) { + SUBMIT_ERR(context, "failed to copy gather data from userspace"); + kfree(bo->gather_data); + kfree(bo); + return -EFAULT; + } + + bo->gather_data_words = args->gather_data_words; + + *pbo = bo; + + return 0; +} + +static int submit_write_reloc(struct tegra_drm_context *context, struct gather_bo *bo, + struct drm_tegra_submit_buf *buf, struct tegra_drm_mapping *mapping) +{ + /* TODO check that target_offset is within bounds */ + dma_addr_t iova = mapping->iova + buf->reloc.target_offset; + u32 written_ptr; + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (buf->flags & DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) + iova |= BIT_ULL(39); +#endif + + written_ptr = iova >> buf->reloc.shift; + + if (buf->reloc.gather_offset_words >= bo->gather_data_words) { + SUBMIT_ERR(context, "relocation has too large gather offset (%u vs gather length %zu)", + buf->reloc.gather_offset_words, bo->gather_data_words); + return -EINVAL; + } + + buf->reloc.gather_offset_words = array_index_nospec(buf->reloc.gather_offset_words, + bo->gather_data_words); + + bo->gather_data[buf->reloc.gather_offset_words] = written_ptr; + + return 0; +} + +static int submit_process_bufs(struct tegra_drm_context *context, struct gather_bo *bo, + struct drm_tegra_channel_submit *args, + struct tegra_drm_submit_data *job_data) +{ + struct tegra_drm_used_mapping *mappings; + struct drm_tegra_submit_buf *bufs; + int err; + u32 i; + + bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr), args->num_bufs, + sizeof(*bufs)); + if (IS_ERR(bufs)) { + SUBMIT_ERR(context, "failed to copy bufs array from userspace"); + return PTR_ERR(bufs); + } + + mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL); + if (!mappings) { + SUBMIT_ERR(context, "failed to allocate memory for mapping info"); + err = -ENOMEM; + goto done; + } + + for (i = 0; i < args->num_bufs; i++) { + struct drm_tegra_submit_buf *buf = &bufs[i]; + struct tegra_drm_mapping *mapping; + + if (buf->flags & ~DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) { + SUBMIT_ERR(context, "invalid flag specified for buf"); + err = -EINVAL; + goto drop_refs; + } + + mapping = tegra_drm_mapping_get(context, buf->mapping); + if (!mapping) { + SUBMIT_ERR(context, "invalid mapping ID '%u' for buffer", buf->mapping); + err = -EINVAL; + goto drop_refs; + } + + err = submit_write_reloc(context, bo, buf, mapping); + if (err) { + tegra_drm_mapping_put(mapping); + goto drop_refs; + } + + mappings[i].mapping = mapping; + mappings[i].flags = buf->flags; + } + + job_data->used_mappings = mappings; + job_data->num_used_mappings = i; + + err = 0; + + goto done; + +drop_refs: + while (i--) + tegra_drm_mapping_put(mappings[i].mapping); + + kfree(mappings); + job_data->used_mappings = NULL; + +done: + kvfree(bufs); + + return err; +} + +static int submit_get_syncpt(struct tegra_drm_context *context, struct host1x_job *job, + struct xarray *syncpoints, struct drm_tegra_channel_submit *args) +{ + struct host1x_syncpt *sp; + + if (args->syncpt.flags) { + SUBMIT_ERR(context, "invalid flag specified for syncpt"); + return -EINVAL; + } + + /* Syncpt ref will be dropped on job release */ + sp = xa_load(syncpoints, args->syncpt.id); + if (!sp) { + SUBMIT_ERR(context, "syncpoint specified in syncpt was not allocated"); + return -EINVAL; + } + + job->syncpt = host1x_syncpt_get(sp); + job->syncpt_incrs = args->syncpt.increments; + + return 0; +} + +static int submit_job_add_gather(struct host1x_job *job, struct tegra_drm_context *context, + struct drm_tegra_submit_cmd_gather_uptr *cmd, + struct gather_bo *bo, u32 *offset, + struct tegra_drm_submit_data *job_data) +{ + u32 next_offset; + + if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2]) { + SUBMIT_ERR(context, "non-zero reserved field in GATHER_UPTR command"); + return -EINVAL; + } + + /* Check for maximum gather size */ + if (cmd->words > 16383) { + SUBMIT_ERR(context, "too many words in GATHER_UPTR command"); + return -EINVAL; + } + + if (check_add_overflow(*offset, cmd->words, &next_offset)) { + SUBMIT_ERR(context, "too many total words in job"); + return -EINVAL; + } + + if (next_offset > bo->gather_data_words) { + SUBMIT_ERR(context, "GATHER_UPTR command overflows gather data"); + return -EINVAL; + } + + host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4); + + *offset = next_offset; + + return 0; +} + +static struct host1x_job * +submit_create_job(struct tegra_drm_context *context, struct gather_bo *bo, + struct drm_tegra_channel_submit *args, struct tegra_drm_submit_data *job_data, + struct xarray *syncpoints) +{ + struct drm_tegra_submit_cmd *cmds; + u32 i, gather_offset = 0, class; + struct host1x_job *job; + int err; + + /* Set initial class for firewall. */ + class = context->client->base.class; + + cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr), args->num_cmds, + sizeof(*cmds)); + if (IS_ERR(cmds)) { + SUBMIT_ERR(context, "failed to copy cmds array from userspace"); + return ERR_CAST(cmds); + } + + job = host1x_job_alloc(context->channel, args->num_cmds, 0); + if (!job) { + SUBMIT_ERR(context, "failed to allocate memory for job"); + job = ERR_PTR(-ENOMEM); + goto done; + } + + err = submit_get_syncpt(context, job, syncpoints, args); + if (err < 0) + goto free_job; + + job->client = &context->client->base; + job->class = context->client->base.class; + job->serialize = true; + + for (i = 0; i < args->num_cmds; i++) { + struct drm_tegra_submit_cmd *cmd = &cmds[i]; + + if (cmd->flags) { + SUBMIT_ERR(context, "unknown flags given for cmd"); + err = -EINVAL; + goto free_job; + } + + if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) { + err = submit_job_add_gather(job, context, &cmd->gather_uptr, bo, + &gather_offset, job_data); + if (err) + goto free_job; + } else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) { + if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) { + SUBMIT_ERR(context, "non-zero reserved value"); + err = -EINVAL; + goto free_job; + } + + host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value, + false, class); + } else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE) { + if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) { + SUBMIT_ERR(context, "non-zero reserved value"); + err = -EINVAL; + goto free_job; + } + + if (cmd->wait_syncpt.id != args->syncpt.id) { + SUBMIT_ERR(context, "syncpoint ID in CMD_WAIT_SYNCPT_RELATIVE is not used by the job"); + err = -EINVAL; + goto free_job; + } + + host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value, + true, class); + } else { + SUBMIT_ERR(context, "unknown cmd type"); + err = -EINVAL; + goto free_job; + } + } + + if (gather_offset == 0) { + SUBMIT_ERR(context, "job must have at least one gather"); + err = -EINVAL; + goto free_job; + } + + goto done; + +free_job: + host1x_job_put(job); + job = ERR_PTR(err); + +done: + kvfree(cmds); + + return job; +} + +static void release_job(struct host1x_job *job) +{ + struct tegra_drm_client *client = container_of(job->client, struct tegra_drm_client, base); + struct tegra_drm_submit_data *job_data = job->user_data; + u32 i; + + for (i = 0; i < job_data->num_used_mappings; i++) + tegra_drm_mapping_put(job_data->used_mappings[i].mapping); + + kfree(job_data->used_mappings); + kfree(job_data); + + if (pm_runtime_enabled(client->base.dev)) + pm_runtime_put_autosuspend(client->base.dev); +} + +int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_submit *args = data; + struct tegra_drm_submit_data *job_data; + struct drm_syncobj *syncobj = NULL; + struct tegra_drm_context *context; + struct host1x_job *job; + struct gather_bo *bo; + u32 i; + int err; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + pr_err_ratelimited("%s: %s: invalid channel context '%#x'", __func__, + current->comm, args->context); + return -EINVAL; + } + + if (args->syncobj_in) { + struct dma_fence *fence; + + err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence); + if (err) { + SUBMIT_ERR(context, "invalid syncobj_in '%#x'", args->syncobj_in); + goto unlock; + } + + err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000)); + dma_fence_put(fence); + if (err) { + SUBMIT_ERR(context, "wait for syncobj_in timed out"); + goto unlock; + } + } + + if (args->syncobj_out) { + syncobj = drm_syncobj_find(file, args->syncobj_out); + if (!syncobj) { + SUBMIT_ERR(context, "invalid syncobj_out '%#x'", args->syncobj_out); + err = -ENOENT; + goto unlock; + } + } + + /* Allocate gather BO and copy gather words in. */ + err = submit_copy_gather_data(&bo, context, args); + if (err) + goto unlock; + + job_data = kzalloc(sizeof(*job_data), GFP_KERNEL); + if (!job_data) { + SUBMIT_ERR(context, "failed to allocate memory for job data"); + err = -ENOMEM; + goto put_bo; + } + + /* Get data buffer mappings and do relocation patching. */ + err = submit_process_bufs(context, bo, args, job_data); + if (err) + goto free_job_data; + + /* Allocate host1x_job and add gathers and waits to it. */ + job = submit_create_job(context, bo, args, job_data, &fpriv->syncpoints); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto free_job_data; + } + + /* Map gather data for Host1x. */ + err = host1x_job_pin(job, context->client->base.dev); + if (err) + goto put_job; + + /* Boot engine. */ + if (pm_runtime_enabled(context->client->base.dev)) { + err = pm_runtime_resume_and_get(context->client->base.dev); + if (err < 0) { + SUBMIT_ERR(context, "could not power up engine: %d", err); + goto unpin_job; + } + } + + job->user_data = job_data; + job->release = release_job; + job->timeout = 10000; + + /* + * job_data is now part of job reference counting, so don't release + * it from here. + */ + job_data = NULL; + + /* Submit job to hardware. */ + err = host1x_job_submit(job); + if (err) { + SUBMIT_ERR(context, "host1x job submission failed: %d", err); + goto unpin_job; + } + + /* Return postfences to userspace and add fences to DMA reservations. */ + args->syncpt.value = job->syncpt_end; + + if (syncobj) { + struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end); + if (IS_ERR(fence)) + err = PTR_ERR(fence); + + drm_syncobj_replace_fence(syncobj, fence); + } + + goto put_job; + +unpin_job: + host1x_job_unpin(job); +put_job: + host1x_job_put(job); +free_job_data: + if (job_data && job_data->used_mappings) { + for (i = 0; i < job_data->num_used_mappings; i++) + tegra_drm_mapping_put(job_data->used_mappings[i].mapping); + + kfree(job_data->used_mappings); + } + + if (job_data) + kfree(job_data); +put_bo: + gather_bo_put(&bo->base); +unlock: + if (syncobj) + drm_syncobj_put(syncobj); + + mutex_unlock(&fpriv->lock); + return err; +} diff --git a/drivers/gpu/drm/tegra/submit.h b/drivers/gpu/drm/tegra/submit.h new file mode 100644 index 0000000000000..0a165e9e4bda2 --- /dev/null +++ b/drivers/gpu/drm/tegra/submit.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 NVIDIA Corporation */ + +#ifndef _TEGRA_DRM_UAPI_SUBMIT_H +#define _TEGRA_DRM_UAPI_SUBMIT_H + +struct tegra_drm_used_mapping { + struct tegra_drm_mapping *mapping; + u32 flags; +}; + +struct tegra_drm_submit_data { + struct tegra_drm_used_mapping *used_mappings; + u32 num_used_mappings; +}; + +#endif From 5925961174ce42f8d4e36477164bccae54a7680d Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:45 +0200 Subject: [PATCH 118/851] drm/tegra: Add job firewall Add a firewall that validates jobs before submission to ensure they don't do anything they aren't allowed to do, like accessing memory they should not access. The firewall is functionality-wise a copy of the firewall already implemented in gpu/host1x. It is copied here as it makes more sense for it to live on the DRM side, as it is only needed for userspace job submissions, and generally the data it needs to do its job is easier to access here. In the future, the other implementation will be removed. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/Makefile | 1 + drivers/gpu/drm/tegra/firewall.c | 254 +++++++++++++++++++++++++++++++ drivers/gpu/drm/tegra/submit.c | 10 +- drivers/gpu/drm/tegra/submit.h | 4 + 4 files changed, 267 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/tegra/firewall.c diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index ab4289d1c991f..5d2039f0c7345 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -6,6 +6,7 @@ tegra-drm-y := \ uapi.o \ submit.o \ gather_bo.o \ + firewall.o \ gem.o \ fb.o \ dp.o \ diff --git a/drivers/gpu/drm/tegra/firewall.c b/drivers/gpu/drm/tegra/firewall.c new file mode 100644 index 0000000000000..1824d2db0e2ce --- /dev/null +++ b/drivers/gpu/drm/tegra/firewall.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2010-2020 NVIDIA Corporation */ + +#include "drm.h" +#include "submit.h" +#include "uapi.h" + +struct tegra_drm_firewall { + struct tegra_drm_submit_data *submit; + struct tegra_drm_client *client; + u32 *data; + u32 pos; + u32 end; + u32 class; +}; + +static int fw_next(struct tegra_drm_firewall *fw, u32 *word) +{ + if (fw->pos == fw->end) + return -EINVAL; + + *word = fw->data[fw->pos++]; + + return 0; +} + +static bool fw_check_addr_valid(struct tegra_drm_firewall *fw, u32 offset) +{ + u32 i; + + for (i = 0; i < fw->submit->num_used_mappings; i++) { + struct tegra_drm_mapping *m = fw->submit->used_mappings[i].mapping; + + if (offset >= m->iova && offset <= m->iova_end) + return true; + } + + return false; +} + +static int fw_check_reg(struct tegra_drm_firewall *fw, u32 offset) +{ + bool is_addr; + u32 word; + int err; + + err = fw_next(fw, &word); + if (err) + return err; + + if (!fw->client->ops->is_addr_reg) + return 0; + + is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class, + offset); + + if (!is_addr) + return 0; + + if (!fw_check_addr_valid(fw, word)) + return -EINVAL; + + return 0; +} + +static int fw_check_regs_seq(struct tegra_drm_firewall *fw, u32 offset, + u32 count, bool incr) +{ + u32 i; + + for (i = 0; i < count; i++) { + if (fw_check_reg(fw, offset)) + return -EINVAL; + + if (incr) + offset++; + } + + return 0; +} + +static int fw_check_regs_mask(struct tegra_drm_firewall *fw, u32 offset, + u16 mask) +{ + unsigned long bmask = mask; + unsigned int bit; + + for_each_set_bit(bit, &bmask, 16) { + if (fw_check_reg(fw, offset+bit)) + return -EINVAL; + } + + return 0; +} + +static int fw_check_regs_imm(struct tegra_drm_firewall *fw, u32 offset) +{ + bool is_addr; + + is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class, + offset); + if (is_addr) + return -EINVAL; + + return 0; +} + +static int fw_check_class(struct tegra_drm_firewall *fw, u32 class) +{ + if (!fw->client->ops->is_valid_class) { + if (class == fw->client->base.class) + return 0; + else + return -EINVAL; + } + + if (!fw->client->ops->is_valid_class(class)) + return -EINVAL; + + return 0; +} + +enum { + HOST1X_OPCODE_SETCLASS = 0x00, + HOST1X_OPCODE_INCR = 0x01, + HOST1X_OPCODE_NONINCR = 0x02, + HOST1X_OPCODE_MASK = 0x03, + HOST1X_OPCODE_IMM = 0x04, + HOST1X_OPCODE_RESTART = 0x05, + HOST1X_OPCODE_GATHER = 0x06, + HOST1X_OPCODE_SETSTRMID = 0x07, + HOST1X_OPCODE_SETAPPID = 0x08, + HOST1X_OPCODE_SETPYLD = 0x09, + HOST1X_OPCODE_INCR_W = 0x0a, + HOST1X_OPCODE_NONINCR_W = 0x0b, + HOST1X_OPCODE_GATHER_W = 0x0c, + HOST1X_OPCODE_RESTART_W = 0x0d, + HOST1X_OPCODE_EXTEND = 0x0e, +}; + +int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start, + u32 words, struct tegra_drm_submit_data *submit, + u32 *job_class) +{ + struct tegra_drm_firewall fw = { + .submit = submit, + .client = client, + .data = data, + .pos = start, + .end = start+words, + .class = *job_class, + }; + bool payload_valid = false; + u32 payload; + int err; + + while (fw.pos != fw.end) { + u32 word, opcode, offset, count, mask, class; + + err = fw_next(&fw, &word); + if (err) + return err; + + opcode = (word & 0xf0000000) >> 28; + + switch (opcode) { + case HOST1X_OPCODE_SETCLASS: + offset = word >> 16 & 0xfff; + mask = word & 0x3f; + class = (word >> 6) & 0x3ff; + err = fw_check_class(&fw, class); + fw.class = class; + *job_class = class; + if (!err) + err = fw_check_regs_mask(&fw, offset, mask); + if (err) + dev_warn(client->base.dev, + "illegal SETCLASS(offset=0x%x, mask=0x%x, class=0x%x) at word %u", + offset, mask, class, fw.pos-1); + break; + case HOST1X_OPCODE_INCR: + offset = (word >> 16) & 0xfff; + count = word & 0xffff; + err = fw_check_regs_seq(&fw, offset, count, true); + if (err) + dev_warn(client->base.dev, + "illegal INCR(offset=0x%x, count=%u) in class 0x%x at word %u", + offset, count, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_NONINCR: + offset = (word >> 16) & 0xfff; + count = word & 0xffff; + err = fw_check_regs_seq(&fw, offset, count, false); + if (err) + dev_warn(client->base.dev, + "illegal NONINCR(offset=0x%x, count=%u) in class 0x%x at word %u", + offset, count, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_MASK: + offset = (word >> 16) & 0xfff; + mask = word & 0xffff; + err = fw_check_regs_mask(&fw, offset, mask); + if (err) + dev_warn(client->base.dev, + "illegal MASK(offset=0x%x, mask=0x%x) in class 0x%x at word %u", + offset, mask, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_IMM: + /* IMM cannot reasonably be used to write a pointer */ + offset = (word >> 16) & 0xfff; + err = fw_check_regs_imm(&fw, offset); + if (err) + dev_warn(client->base.dev, + "illegal IMM(offset=0x%x) in class 0x%x at word %u", + offset, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_SETPYLD: + payload = word & 0xffff; + payload_valid = true; + break; + case HOST1X_OPCODE_INCR_W: + if (!payload_valid) + return -EINVAL; + + offset = word & 0x3fffff; + err = fw_check_regs_seq(&fw, offset, payload, true); + if (err) + dev_warn(client->base.dev, + "illegal INCR_W(offset=0x%x) in class 0x%x at word %u", + offset, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_NONINCR_W: + if (!payload_valid) + return -EINVAL; + + offset = word & 0x3fffff; + err = fw_check_regs_seq(&fw, offset, payload, false); + if (err) + dev_warn(client->base.dev, + "illegal NONINCR(offset=0x%x) in class 0x%x at word %u", + offset, fw.class, fw.pos-1); + break; + default: + dev_warn(client->base.dev, "illegal opcode at word %u", + fw.pos-1); + return -EINVAL; + } + + if (err) + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index 0225706e31638..c53b7207c4786 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -233,7 +233,7 @@ static int submit_get_syncpt(struct tegra_drm_context *context, struct host1x_jo static int submit_job_add_gather(struct host1x_job *job, struct tegra_drm_context *context, struct drm_tegra_submit_cmd_gather_uptr *cmd, struct gather_bo *bo, u32 *offset, - struct tegra_drm_submit_data *job_data) + struct tegra_drm_submit_data *job_data, u32 *class) { u32 next_offset; @@ -258,6 +258,12 @@ static int submit_job_add_gather(struct host1x_job *job, struct tegra_drm_contex return -EINVAL; } + if (tegra_drm_fw_validate(context->client, bo->gather_data, *offset, + cmd->words, job_data, class)) { + SUBMIT_ERR(context, "job was rejected by firewall"); + return -EINVAL; + } + host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4); *offset = next_offset; @@ -311,7 +317,7 @@ submit_create_job(struct tegra_drm_context *context, struct gather_bo *bo, if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) { err = submit_job_add_gather(job, context, &cmd->gather_uptr, bo, - &gather_offset, job_data); + &gather_offset, job_data, &class); if (err) goto free_job; } else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) { diff --git a/drivers/gpu/drm/tegra/submit.h b/drivers/gpu/drm/tegra/submit.h index 0a165e9e4bda2..cf6a2f0a29fc6 100644 --- a/drivers/gpu/drm/tegra/submit.h +++ b/drivers/gpu/drm/tegra/submit.h @@ -14,4 +14,8 @@ struct tegra_drm_submit_data { u32 num_used_mappings; }; +int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start, + u32 words, struct tegra_drm_submit_data *submit, + u32 *job_class); + #endif From b19502d1a683c11f6f2c92ad63c61288b0fbe1a1 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Fri, 9 Jul 2021 21:31:46 +0200 Subject: [PATCH 119/851] drm/tegra: Bump driver version Bump driver version to 1.0.0 to allow userspace to detect availability of new interfaces. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ae9dafc32c2b9..b20fd08336616 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -28,7 +28,7 @@ #define DRIVER_NAME "tegra" #define DRIVER_DESC "NVIDIA Tegra graphics" #define DRIVER_DATE "20120330" -#define DRIVER_MAJOR 0 +#define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 From 85fd4a8a84316166640102676a356755ddec80e0 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 12 Jul 2021 18:06:36 -0400 Subject: [PATCH 120/851] drm/ttm: Fix COW check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KFD Thunk maps invisible VRAM BOs with PROT_NONE, MAP_PRIVATE. is_cow_mapping returns true for these mappings. Add a check for vm_flags & VM_WRITE to avoid mmap failures on private read-only or PROT_NONE mappings. v2: protect against mprotect making a mapping writable after the fact v3: update driver-specific vm_operations_structs Fixes: f91142c62161 ("drm/ttm: nuke VM_MIXEDMAP on BO mappings v3") Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210712220636.475675-1-Felix.Kuehling@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 ++- drivers/gpu/drm/nouveau/nouveau_gem.c | 3 ++- drivers/gpu/drm/radeon/radeon_gem.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo_vm.c | 14 +++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c | 1 + include/drm/ttm/ttm_bo_api.h | 4 ++++ 6 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 9cf4beaf646c7..0d65c9d2144e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -79,7 +79,8 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = { .fault = amdgpu_gem_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access + .access = ttm_bo_vm_access, + .mprotect = ttm_bo_vm_mprotect }; static void amdgpu_gem_object_free(struct drm_gem_object *gobj) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 5b27845075a1c..164ea564bb7ac 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -70,7 +70,8 @@ static const struct vm_operations_struct nouveau_ttm_vm_ops = { .fault = nouveau_ttm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access + .access = ttm_bo_vm_access, + .mprotect = ttm_bo_vm_mprotect }; void diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 458f92a708879..c19ad07eb7b50 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -77,7 +77,8 @@ static const struct vm_operations_struct radeon_gem_vm_ops = { .fault = radeon_gem_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access + .access = ttm_bo_vm_access, + .mprotect = ttm_bo_vm_mprotect }; static void radeon_gem_object_free(struct drm_gem_object *gobj) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index f56be5bc0861e..fb325bad5db6c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -542,17 +542,29 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(ttm_bo_vm_access); +int ttm_bo_vm_mprotect(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags) +{ + /* Enforce no COW since would have really strange behavior with it. */ + if (is_cow_mapping(newflags) && (newflags & VM_WRITE)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_mprotect); + static const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, .access = ttm_bo_vm_access, + .mprotect = ttm_bo_vm_mprotect, }; int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo) { /* Enforce no COW since would have really strange behavior with it. */ - if (is_cow_mapping(vma->vm_flags)) + if (is_cow_mapping(vma->vm_flags) && (vma->vm_flags & VM_WRITE)) return -EINVAL; ttm_bo_get(bo); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c index e6b1f98ec99f0..e4bf7dc993208 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c @@ -61,6 +61,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma) .fault = vmw_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, + .mprotect = ttm_bo_vm_mprotect, #ifdef CONFIG_TRANSPARENT_HUGEPAGE .huge_fault = vmw_bo_vm_huge_fault, #endif diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index f681bbdbc6982..40eb958753557 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -605,6 +605,10 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); + +int ttm_bo_vm_mprotect(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags); + bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all); vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); From 6d805368f6729c331b138919ee9f81f7c2fbf459 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 14 Jul 2021 21:56:55 +0200 Subject: [PATCH 121/851] ioprio: move user space relevant ioprio bits to UAPI includes systemd added a modified copy of include/linux/ioprio.h into its code to get the relevant content definitions for the exposed ioprio_[get|set] system calls. Move the user space relevant ioprio bits to the UAPI includes to be able to use the ioprio_[get|set] syscalls as intended. Cc: Kay Sievers Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20210714195655.181943-1-socketcan@hartkopp.net Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 41 +-------------------------------- include/uapi/linux/ioprio.h | 46 +++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 40 deletions(-) create mode 100644 include/uapi/linux/ioprio.h diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index e9bfe6972aed9..ef9ad4fb245fe 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -6,46 +6,7 @@ #include #include -/* - * Gives us 8 prio classes with 13-bits of data for each class - */ -#define IOPRIO_CLASS_SHIFT (13) -#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) - -#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) -#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) -#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) - -#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) - -/* - * These are the io priority groups as implemented by CFQ. RT is the realtime - * class, it always gets premium service. BE is the best-effort scheduling - * class, the default for any process. IDLE is the idle scheduling class, it - * is only served when no one else is using the disk. - */ -enum { - IOPRIO_CLASS_NONE, - IOPRIO_CLASS_RT, - IOPRIO_CLASS_BE, - IOPRIO_CLASS_IDLE, -}; - -/* - * 8 best effort priority levels are supported - */ -#define IOPRIO_BE_NR (8) - -enum { - IOPRIO_WHO_PROCESS = 1, - IOPRIO_WHO_PGRP, - IOPRIO_WHO_USER, -}; - -/* - * Fallback BE priority - */ -#define IOPRIO_NORM (4) +#include /* * if process has set io priority explicitly, use that. if not, convert diff --git a/include/uapi/linux/ioprio.h b/include/uapi/linux/ioprio.h new file mode 100644 index 0000000000000..77b17e08b0da1 --- /dev/null +++ b/include/uapi/linux/ioprio.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_IOPRIO_H +#define _UAPI_LINUX_IOPRIO_H + +/* + * Gives us 8 prio classes with 13-bits of data for each class + */ +#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) + +#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) +#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +/* + * These are the io priority groups as implemented by CFQ. RT is the realtime + * class, it always gets premium service. BE is the best-effort scheduling + * class, the default for any process. IDLE is the idle scheduling class, it + * is only served when no one else is using the disk. + */ +enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, +}; + +#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) + +/* + * 8 best effort priority levels are supported + */ +#define IOPRIO_BE_NR (8) + +enum { + IOPRIO_WHO_PROCESS = 1, + IOPRIO_WHO_PGRP, + IOPRIO_WHO_USER, +}; + +/* + * Fallback BE priority + */ +#define IOPRIO_NORM (4) + +#endif /* _UAPI_LINUX_IOPRIO_H */ From f78cad9b114bf901b7f3ffd7d70960eb12d3eef5 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Sat, 26 Jun 2021 13:58:28 +0800 Subject: [PATCH 122/851] arc: Prefer unsigned int to bare use of unsigned Fix checkpatch warnings: WARNING: Prefer 'unsigned int' to bare use of 'unsigned' Signed-off-by: Jinchao Wang Signed-off-by: Vineet Gupta --- arch/arc/include/asm/checksum.h | 2 +- arch/arc/include/asm/perf_event.h | 2 +- arch/arc/kernel/unwind.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h index 69debd77cd046..0b485800a392e 100644 --- a/arch/arc/include/asm/checksum.h +++ b/arch/arc/include/asm/checksum.h @@ -24,7 +24,7 @@ */ static inline __sum16 csum_fold(__wsum s) { - unsigned r = s << 16 | s >> 16; /* ror */ + unsigned int r = s << 16 | s >> 16; /* ror */ s = ~s; s -= r; return s >> 16; diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 30b9ae511ea9e..e1971d34ef30e 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -123,7 +123,7 @@ static const char * const arc_pmu_ev_hw_map[] = { #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xffff -static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 47bab67f8649b..9e28058cdba85 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -260,7 +260,7 @@ static void init_unwind_hdr(struct unwind_table *table, { const u8 *ptr; unsigned long tableSize = table->size, hdrSize; - unsigned n; + unsigned int n; const u32 *fde; struct { u8 version; @@ -462,7 +462,7 @@ static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; uleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -483,7 +483,7 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; sleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -609,7 +609,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, static signed fde_pointer_type(const u32 *cie) { const u8 *ptr = (const u8 *)(cie + 2); - unsigned version = *ptr; + unsigned int version = *ptr; if (*++ptr) { const char *aug; @@ -904,7 +904,7 @@ int arc_unwind(struct unwind_frame_info *frame) const u8 *ptr = NULL, *end = NULL; unsigned long pc = UNW_PC(frame) - frame->call_frame; unsigned long startLoc = 0, endLoc = 0, cfa; - unsigned i; + unsigned int i; signed ptrType = -1; uleb128_t retAddrReg = 0; const struct unwind_table *table; From f9341d0e8b1477d67715df25612ef5525ecc2311 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 4 Jul 2021 10:28:24 +0100 Subject: [PATCH 123/851] arc: Fix spelling mistake and grammar in Kconfig There is a spelling mistake and incorrect grammar in the Kconfig text. Fix them. Signed-off-by: Colin Ian King Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d8f51eb8963bb..b5bf68e747320 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -409,7 +409,7 @@ choice help Depending on the configuration, CPU can contain DSP registers (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL). - Bellow is options describing how to handle these registers in + Below are options describing how to handle these registers in interrupt entry / exit and in context switch. config ARC_DSP_NONE From 7e6730ba68c0025c37ff1fa72666e59df90e2a24 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Jul 2021 07:50:33 -0700 Subject: [PATCH 124/851] ARC: Fix CONFIG_STACKDEPOT Enabling CONFIG_STACKDEPOT results in the following build error. arc-elf-ld: lib/stackdepot.o: in function `filter_irq_stacks': stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' Other architectures address this problem by adding IRQENTRY_TEXT and SOFTIRQENTRY_TEXT to the text segment, so do the same here. Signed-off-by: Guenter Roeck Signed-off-by: Vineet Gupta --- arch/arc/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index e2146a8da1953..529ae50f9fe23 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } From 3eada7b4bd85d0f73b34108db74d212fc8ad1e8c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 8 Jul 2021 17:13:48 -0700 Subject: [PATCH 125/851] ARC: fp: set FPU_STATUS.FWE to enable FPU_STATUS update on context switch FPU_STATUS register contains FP exception flags bits which are updated by core as side-effect of FP instructions but can also be manually wiggled such as by glibc C99 functions fe{raise,clear,test}except() etc. To effect the update, the programming model requires OR'ing FWE bit (31). This bit is write-only and RAZ, meaning it is effectively auto-cleared after write and thus needs to be set everytime: which is how glibc implements this. However there's another usecase of FPU_STATUS update, at the time of Linux task switch when incoming task value needs to be programmed into the register. This was added as part of f45ba2bd6da0dc ("ARCv2: fpu: preserve userspace fpu state") which missed OR'ing FWE bit, meaning the new value is effectively not being written at all. This patch remedies that. Interestingly, this snafu was not caught in interm glibc testing as the race window which relies on a specific exception bit to be set/clear is really small specially when it nvolves context switch. Fortunately this was caught by glibc's math/test-fenv-tls test which repeatedly set/clear exception flags in a big loop, concurrently in main program and also in a thread. Fixes: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/54 Fixes: f45ba2bd6da0dc ("ARCv2: fpu: preserve userspace fpu state") Cc: stable@vger.kernel.org #5.6+ Signed-off-by: Vineet Gupta --- arch/arc/kernel/fpu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c index c67c0f0f5f778..ec640219d989f 100644 --- a/arch/arc/kernel/fpu.c +++ b/arch/arc/kernel/fpu.c @@ -57,23 +57,26 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next) void fpu_init_task(struct pt_regs *regs) { + const unsigned int fwe = 0x80000000; + /* default rounding mode */ write_aux_reg(ARC_REG_FPU_CTRL, 0x100); - /* set "Write enable" to allow explicit write to exception flags */ - write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000); + /* Initialize to zero: setting requires FWE be set */ + write_aux_reg(ARC_REG_FPU_STATUS, fwe); } void fpu_save_restore(struct task_struct *prev, struct task_struct *next) { struct arc_fpu *save = &prev->thread.fpu; struct arc_fpu *restore = &next->thread.fpu; + const unsigned int fwe = 0x80000000; save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL); save->status = read_aux_reg(ARC_REG_FPU_STATUS); write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl); - write_aux_reg(ARC_REG_FPU_STATUS, restore->status); + write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status)); } #endif From b727acc8662057b722dbcd24d25cc480b44fc28c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 10 Jun 2021 17:02:41 +0200 Subject: [PATCH 126/851] ARM: dts: stm32: Set {bitclock,frame}-master phandles on DHCOM SoM Fix the following dtbs_check warning: arch/arm/boot/dts/stm32mp157c-dhcom-pdk2.dt.yaml: codec@a: port:endpoint@0:frame-master: True is not of type 'array' arch/arm/boot/dts/stm32mp157c-dhcom-pdk2.dt.yaml: codec@a: port:endpoint@0:bitclock-master: True is not of type 'array' Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: kernel@dh-electronics.com Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index c5ea08fec535f..d9316701fd03e 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -168,15 +168,15 @@ sgtl5000_tx_endpoint: endpoint@0 { reg = <0>; remote-endpoint = <&sai2a_endpoint>; - frame-master; - bitclock-master; + frame-master = <&sgtl5000_tx_endpoint>; + bitclock-master = <&sgtl5000_tx_endpoint>; }; sgtl5000_rx_endpoint: endpoint@1 { reg = <1>; remote-endpoint = <&sai2b_endpoint>; - frame-master; - bitclock-master; + frame-master = <&sgtl5000_rx_endpoint>; + bitclock-master = <&sgtl5000_rx_endpoint>; }; }; From e26d30c8df86994b926fec3514af162b36616f55 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 10 Jun 2021 17:02:42 +0200 Subject: [PATCH 127/851] ARM: dts: stm32: Add backlight and panel supply on DHCOM SoM Fix the following dtbs_check warning: arch/arm/boot/dts/stm32mp157c-dhcom-pdk2.dt.yaml: display-bl: 'power-supply' is a required property arch/arm/boot/dts/stm32mp157c-dhcom-pdk2.dt.yaml: panel: 'power-supply' is a required property Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: kernel@dh-electronics.com Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index d9316701fd03e..4f6b94f10f4a9 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -29,6 +29,7 @@ brightness-levels = <0 16 22 30 40 55 75 102 138 188 255>; default-brightness-level = <8>; enable-gpios = <&gpioi 0 GPIO_ACTIVE_HIGH>; + power-supply = <®_panel_bl>; status = "okay"; }; @@ -106,6 +107,7 @@ panel { compatible = "edt,etm0700g0edh6"; backlight = <&display_bl>; + power-supply = <®_panel_bl>; port { lcd_panel_in: endpoint { @@ -114,6 +116,21 @@ }; }; + reg_panel_bl: regulator-panel-bl { + compatible = "regulator-fixed"; + regulator-name = "panel_backlight"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + vin-supply = <®_panel_supply>; + }; + + reg_panel_supply: regulator-panel-supply { + compatible = "regulator-fixed"; + regulator-name = "panel_supply"; + regulator-min-microvolt = <24000000>; + regulator-max-microvolt = <24000000>; + }; + sound { compatible = "audio-graph-card"; routing = From a9877360b05f7f8369e0501525e9c73900a60f73 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 10 Jun 2021 17:02:43 +0200 Subject: [PATCH 128/851] ARM: dts: stm32: Add usbphyc_port1 supply on DHCOM SoM The port is unused, but shares the same supply with port0, so fill the DT property in. This fixes the following dtbs_check warning: arch/arm/boot/dts/stm32mp153c-dhcom-drc02.dt.yaml: usbphyc@5a006000: usb-phy@1: 'phy-supply' is a required property Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: kernel@dh-electronics.com Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi index fb45c5aa878da..4b10b013ffd52 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi @@ -159,3 +159,7 @@ &usbphyc_port0 { phy-supply = <&vdd_usb>; }; + +&usbphyc_port1 { + phy-supply = <&vdd_usb>; +}; From 49b3d89ab4214b59931dda65a2dae8e237bf90b9 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 14 Jun 2021 18:49:39 +0200 Subject: [PATCH 129/851] ARM: dts: stm32: Add coprocessor detach mbox on stm32mp157c-ed1 board To support the detach feature, add a new mailbox channel to inform the remote processor on a detach. This signal allows the remote processor firmware to stop IPC communication and to reinitialize the resources for a re-attach. Signed-off-by: Arnaud Pouliquen Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp157c-ed1.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp157c-ed1.dts b/arch/arm/boot/dts/stm32mp157c-ed1.dts index 95b08876b2b3b..46b471d09c50c 100644 --- a/arch/arm/boot/dts/stm32mp157c-ed1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ed1.dts @@ -313,8 +313,8 @@ &m4_rproc { memory-region = <&retram>, <&mcuram>, <&mcuram2>, <&vdev0vring0>, <&vdev0vring1>, <&vdev0buffer>; - mboxes = <&ipcc 0>, <&ipcc 1>, <&ipcc 2>; - mbox-names = "vq0", "vq1", "shutdown"; + mboxes = <&ipcc 0>, <&ipcc 1>, <&ipcc 2>, <&ipcc 3>; + mbox-names = "vq0", "vq1", "shutdown", "detach"; interrupt-parent = <&exti>; interrupts = <68 1>; status = "okay"; From fc5045ea6bdfd5be4522d8e2bee6ba8edc6b7201 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 14 Jun 2021 18:49:40 +0200 Subject: [PATCH 130/851] ARM: dts: stm32: Add coprocessor detach mbox on stm32mp15x-dkx boards To support the detach feature, add a new mailbox channel to inform the remote processor on a detach. This signal allows the remote processor firmware to stop IPC communication and to reinitialize the resources for a re-attach. Signed-off-by: Arnaud Pouliquen Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dkx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi index 59f18846cf5d0..06c11bad882a2 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi @@ -470,8 +470,8 @@ &m4_rproc { memory-region = <&retram>, <&mcuram>, <&mcuram2>, <&vdev0vring0>, <&vdev0vring1>, <&vdev0buffer>; - mboxes = <&ipcc 0>, <&ipcc 1>, <&ipcc 2>; - mbox-names = "vq0", "vq1", "shutdown"; + mboxes = <&ipcc 0>, <&ipcc 1>, <&ipcc 2>, <&ipcc 3>; + mbox-names = "vq0", "vq1", "shutdown", "detach"; interrupt-parent = <&exti>; interrupts = <68 1>; status = "okay"; From e505fdce70816efed994b8c21a6201c49af3c093 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 10 Jun 2021 17:03:06 +0200 Subject: [PATCH 131/851] ARM: dts: stm32: Set {bitclock,frame}-master phandles on ST DKx Fix the following dtbs_check warning: cs42l51@4a: port:endpoint@0:frame-master: True is not of type 'array' cs42l51@4a: port:endpoint@0:bitclock-master: True is not of type 'array' Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dkx.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi index 06c11bad882a2..899bfe04aeb91 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi @@ -220,15 +220,15 @@ cs42l51_tx_endpoint: endpoint@0 { reg = <0>; remote-endpoint = <&sai2a_endpoint>; - frame-master; - bitclock-master; + frame-master = <&cs42l51_tx_endpoint>; + bitclock-master = <&cs42l51_tx_endpoint>; }; cs42l51_rx_endpoint: endpoint@1 { reg = <1>; remote-endpoint = <&sai2b_endpoint>; - frame-master; - bitclock-master; + frame-master = <&cs42l51_rx_endpoint>; + bitclock-master = <&cs42l51_rx_endpoint>; }; }; }; From 1631761dc7fb3752f5801c5766ee435f96b86158 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 20 Jun 2021 20:54:21 +0200 Subject: [PATCH 132/851] ARM: dts: stm32: Prefer HW RTC on DHCOM SoM The DHCOM SoM has two RTC, one is the STM32 RTC built into the SoC and another is Microcrystal RV RTC. By default, only the later has battery backup, the former does not. The order in which the RTCs are probed on boot is random, which means the kernel might pick up system time from the STM32 RTC which has no battery backup. This then leads to incorrect initial system time setup, even though the HW RTC has correct time configured in it. Add DT alias entries, so that the RTCs get assigned fixed IDs and the HW RTC is always picked by the kernel as the default RTC, thus resulting in correct system time in early userspace. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index 2af0a67526747..8349c9099e301 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -12,6 +12,8 @@ aliases { ethernet0 = ðernet0; ethernet1 = &ksz8851; + rtc0 = &hwrtc; + rtc1 = &rtc; }; memory@c0000000 { @@ -248,7 +250,7 @@ /delete-property/dmas; /delete-property/dma-names; - rtc@32 { + hwrtc: rtc@32 { compatible = "microcrystal,rv8803"; reg = <0x32>; }; From b462e2fc78f098ab732c7d1aca6ecab879a1d103 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 20 Jun 2021 23:24:31 +0200 Subject: [PATCH 133/851] ARM: dts: stm32: Update AV96 adv7513 node per dtbs_check Swap reg and reg-names order and drop adi,input-justification and adi,input-style to fix the following dtbs_check warnings: arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dt.yaml: hdmi-transmitter@3d: adi,input-justification: False schema does not allow ['evenly'] arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dt.yaml: hdmi-transmitter@3d: adi,input-style: False schema does not allow [[1]] arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dt.yaml: hdmi-transmitter@3d: reg-names:1: 'edid' was expected arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dt.yaml: hdmi-transmitter@3d: reg-names:2: 'cec' was expected Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi index 64dca5b7f748d..6885948f3024e 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi @@ -220,8 +220,8 @@ &i2c4 { hdmi-transmitter@3d { compatible = "adi,adv7513"; - reg = <0x3d>, <0x2d>, <0x4d>, <0x5d>; - reg-names = "main", "cec", "edid", "packet"; + reg = <0x3d>, <0x4d>, <0x2d>, <0x5d>; + reg-names = "main", "edid", "cec", "packet"; clocks = <&cec_clock>; clock-names = "cec"; @@ -239,8 +239,6 @@ adi,input-depth = <8>; adi,input-colorspace = "rgb"; adi,input-clock = "1x"; - adi,input-style = <1>; - adi,input-justification = "evenly"; ports { #address-cells = <1>; From 15934f515911e9bac86981eebe6667ad20286bc8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 14 Jul 2021 18:22:38 +0100 Subject: [PATCH 134/851] arm64: dts: fvp: Remove panel timings The simple-panel driver already has hard-coded timings for "arm,rtsm-display", and as such screams at us for trying to override a fixed mode from DT. Since the exact values probably don't matter all that much anyway, just remove the DT node to keep boot quiet. Link: https://lore.kernel.org/r/2701c187cf8e0762df38f68cc069ec2c29a3b5a9.1626283322.git.robin.murphy@arm.com Reviewed-by: Linus Walleij Signed-off-by: Robin Murphy Signed-off-by: Sudeep Holla --- arch/arm64/boot/dts/arm/fvp-base-revc.dts | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index b8a21092db4d3..2732d8e5bb5da 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -192,18 +192,6 @@ remote-endpoint = <&clcd_pads>; }; }; - - panel-timing { - clock-frequency = <63500127>; - hactive = <1024>; - hback-porch = <152>; - hfront-porch = <48>; - hsync-len = <104>; - vactive = <768>; - vback-porch = <23>; - vfront-porch = <3>; - vsync-len = <4>; - }; }; bus@8000000 { From b1c64924df5d2caedb8714148ce177d3384dfafd Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Thu, 24 Jun 2021 19:47:17 +0800 Subject: [PATCH 135/851] dt-bindings: arm: rockchip: add rk3568 compatible string to pmu.yaml add "rockchip,rk3568-pmu", "syscon", "simple-mfd" for pmu nodes on a rk3568 platform to pmu.ymal. Signed-off-by: Liang Chen Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210624114719.1685-2-cl@rock-chips.com Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/arm/rockchip/pmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml index 53115b92d17ff..ceb15cea77e22 100644 --- a/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml @@ -23,6 +23,7 @@ select: - rockchip,rk3066-pmu - rockchip,rk3288-pmu - rockchip,rk3399-pmu + - rockchip,rk3568-pmu required: - compatible @@ -35,6 +36,7 @@ properties: - rockchip,rk3066-pmu - rockchip,rk3288-pmu - rockchip,rk3399-pmu + - rockchip,rk3568-pmu - const: syscon - const: simple-mfd From d178bed180ef7e7ac16d3c586fb7330b3cb8fc4d Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Thu, 24 Jun 2021 21:10:27 +0800 Subject: [PATCH 136/851] arm64: dts: rockchip: add pmu and qos nodes for rk3568 Add the power-management and QoS nodes to the core rk3568 dtsi. Signed-off-by: Liang Chen Link: https://lore.kernel.org/r/20210624131027.3719-1-cl@rock-chips.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568.dtsi | 229 +++++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index d225e6a45d5cb..618849186c39a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -257,6 +258,99 @@ status = "disabled"; }; + pmu: power-management@fdd90000 { + compatible = "rockchip,rk3568-pmu", "syscon", "simple-mfd"; + reg = <0x0 0xfdd90000 0x0 0x1000>; + + power: power-controller { + compatible = "rockchip,rk3568-power-controller"; + #power-domain-cells = <1>; + #address-cells = <1>; + #size-cells = <0>; + + /* These power domains are grouped by VD_GPU */ + power-domain@RK3568_PD_GPU { + reg = ; + clocks = <&cru ACLK_GPU_PRE>, + <&cru PCLK_GPU_PRE>; + pm_qos = <&qos_gpu>; + #power-domain-cells = <0>; + }; + + /* These power domains are grouped by VD_LOGIC */ + power-domain@RK3568_PD_VI { + reg = ; + clocks = <&cru HCLK_VI>, + <&cru PCLK_VI>; + pm_qos = <&qos_isp>, + <&qos_vicap0>, + <&qos_vicap1>; + #power-domain-cells = <0>; + }; + + power-domain@RK3568_PD_VO { + reg = ; + clocks = <&cru HCLK_VO>, + <&cru PCLK_VO>, + <&cru ACLK_VOP_PRE>; + pm_qos = <&qos_hdcp>, + <&qos_vop_m0>, + <&qos_vop_m1>; + #power-domain-cells = <0>; + }; + + power-domain@RK3568_PD_RGA { + reg = ; + clocks = <&cru HCLK_RGA_PRE>, + <&cru PCLK_RGA_PRE>; + pm_qos = <&qos_ebc>, + <&qos_iep>, + <&qos_jpeg_dec>, + <&qos_jpeg_enc>, + <&qos_rga_rd>, + <&qos_rga_wr>; + #power-domain-cells = <0>; + }; + + power-domain@RK3568_PD_VPU { + reg = ; + clocks = <&cru HCLK_VPU_PRE>; + pm_qos = <&qos_vpu>; + #power-domain-cells = <0>; + }; + + power-domain@RK3568_PD_RKVDEC { + clocks = <&cru HCLK_RKVDEC_PRE>; + reg = ; + pm_qos = <&qos_rkvdec>; + #power-domain-cells = <0>; + }; + + power-domain@RK3568_PD_RKVENC { + reg = ; + clocks = <&cru HCLK_RKVENC_PRE>; + pm_qos = <&qos_rkvenc_rd_m0>, + <&qos_rkvenc_rd_m1>, + <&qos_rkvenc_wr_m0>; + #power-domain-cells = <0>; + }; + + power-domain@RK3568_PD_PIPE { + reg = ; + clocks = <&cru PCLK_PIPE>; + pm_qos = <&qos_pcie2x1>, + <&qos_pcie3x1>, + <&qos_pcie3x2>, + <&qos_sata0>, + <&qos_sata1>, + <&qos_sata2>, + <&qos_usb3_0>, + <&qos_usb3_1>; + #power-domain-cells = <0>; + }; + }; + }; + sdmmc2: mmc@fe000000 { compatible = "rockchip,rk3568-dw-mshc", "rockchip,rk3288-dw-mshc"; reg = <0x0 0xfe000000 0x0 0x4000>; @@ -271,6 +365,141 @@ status = "disabled"; }; + qos_gpu: qos@fe128000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe128000 0x0 0x20>; + }; + + qos_rkvenc_rd_m0: qos@fe138080 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe138080 0x0 0x20>; + }; + + qos_rkvenc_rd_m1: qos@fe138100 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe138100 0x0 0x20>; + }; + + qos_rkvenc_wr_m0: qos@fe138180 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe138180 0x0 0x20>; + }; + + qos_isp: qos@fe148000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe148000 0x0 0x20>; + }; + + qos_vicap0: qos@fe148080 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe148080 0x0 0x20>; + }; + + qos_vicap1: qos@fe148100 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe148100 0x0 0x20>; + }; + + qos_vpu: qos@fe150000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe150000 0x0 0x20>; + }; + + qos_ebc: qos@fe158000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe158000 0x0 0x20>; + }; + + qos_iep: qos@fe158100 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe158100 0x0 0x20>; + }; + + qos_jpeg_dec: qos@fe158180 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe158180 0x0 0x20>; + }; + + qos_jpeg_enc: qos@fe158200 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe158200 0x0 0x20>; + }; + + qos_rga_rd: qos@fe158280 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe158280 0x0 0x20>; + }; + + qos_rga_wr: qos@fe158300 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe158300 0x0 0x20>; + }; + + qos_npu: qos@fe180000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe180000 0x0 0x20>; + }; + + qos_pcie2x1: qos@fe190000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190000 0x0 0x20>; + }; + + qos_pcie3x1: qos@fe190080 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190080 0x0 0x20>; + }; + + qos_pcie3x2: qos@fe190100 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190100 0x0 0x20>; + }; + + qos_sata0: qos@fe190200 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190200 0x0 0x20>; + }; + + qos_sata1: qos@fe190280 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190280 0x0 0x20>; + }; + + qos_sata2: qos@fe190300 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190300 0x0 0x20>; + }; + + qos_usb3_0: qos@fe190380 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190380 0x0 0x20>; + }; + + qos_usb3_1: qos@fe190400 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe190400 0x0 0x20>; + }; + + qos_rkvdec: qos@fe198000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe198000 0x0 0x20>; + }; + + qos_hdcp: qos@fe1a8000 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe1a8000 0x0 0x20>; + }; + + qos_vop_m0: qos@fe1a8080 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe1a8080 0x0 0x20>; + }; + + qos_vop_m1: qos@fe1a8100 { + compatible = "rockchip,rk3568-qos", "syscon"; + reg = <0x0 0xfe1a8100 0x0 0x20>; + }; + sdmmc0: mmc@fe2b0000 { compatible = "rockchip,rk3568-dw-mshc", "rockchip,rk3288-dw-mshc"; reg = <0x0 0xfe2b0000 0x0 0x4000>; From 773b358cb274a6cededc1f597ba70e75c0676752 Mon Sep 17 00:00:00 2001 From: Levin Du Date: Fri, 9 Jul 2021 16:01:25 +0800 Subject: [PATCH 137/851] dt-bindings: add doc for Firefly ROC-RK3328-PC Add devicetree binding documentation for the Firefly ROC-RK3328-PC. Signed-off-by: Levin Du Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210709080126.17045-2-djw@t-chip.com.cn Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/arm/rockchip.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml index 6546b015fc623..7ef902f45b382 100644 --- a/Documentation/devicetree/bindings/arm/rockchip.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip.yaml @@ -115,6 +115,11 @@ properties: - const: firefly,roc-rk3328-cc - const: rockchip,rk3328 + - description: Firefly ROC-RK3328-PC + items: + - const: firefly,roc-rk3328-pc + - const: rockchip,rk3328 + - description: Firefly ROC-RK3399-PC items: - enum: From ffe7ee186baafe3855d117a5d6e7183e8e77795d Mon Sep 17 00:00:00 2001 From: Levin Du Date: Fri, 9 Jul 2021 16:01:26 +0800 Subject: [PATCH 138/851] arm64: dts: rockchip: add support for Firefly ROC-RK3328-PC ROC-RK3328-PC is the board inside the portable Firefly Station M1 Geek PC. As a redesign after the ROC-RK3328-CC, it uses TypeC as power input and OTG port, embedded with eMMC 5.1 storage and a SDIO WiFi/BT chip (RTL8723DS). - Rockchip RK3328 SoC - 2/4GB LPDDR3 RAM - 16/32/64/128GB eMMC 5.1 - TF card slot - USB 3.0 Port x 1, USB 2.0 Port x 1, TypeC Port x 1 (Power/OTG) - HDMI - Gigabit Ethernet - WiFi: RTL8723DS - Audio: RK3328 - Key: Power, Reset, Recovery - LED: POWER, USER - IR Signed-off-by: Levin Du Link: https://lore.kernel.org/r/20210709080126.17045-3-djw@t-chip.com.cn Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/Makefile | 1 + .../arm64/boot/dts/rockchip/rk3328-roc-pc.dts | 110 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile index 7fdb41de01ec9..46652b6d7c4d8 100644 --- a/arch/arm64/boot/dts/rockchip/Makefile +++ b/arch/arm64/boot/dts/rockchip/Makefile @@ -13,6 +13,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-nanopi-r2s.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-rock64.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-rock-pi-e.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-roc-cc.dtb +dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3328-roc-pc.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3368-evb-act8846.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3368-geekbox.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3368-lion-haikou.dtb diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts new file mode 100644 index 0000000000000..e3e3984d01d40 --- /dev/null +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +// Copyright (c) 2021 T-Chip Intelligent Technology Co., Ltd + +/dts-v1/; + +#include + +#include "rk3328-roc-cc.dts" + +/ { + model = "Firefly ROC-RK3328-PC"; + compatible = "firefly,roc-rk3328-pc", "rockchip,rk3328"; + + adc-keys { + compatible = "adc-keys"; + io-channels = <&saradc 0>; + io-channel-names = "buttons"; + keyup-threshold-microvolt = <1750000>; + + /* This button is unpopulated out of the factory. */ + button-recovery { + label = "Recovery"; + linux,code = ; + press-threshold-microvolt = <10000>; + }; + }; + + ir-receiver { + compatible = "gpio-ir-receiver"; + gpios = <&gpio2 RK_PA2 GPIO_ACTIVE_LOW>; + linux,rc-map-name = "rc-khadas"; + pinctrl-names = "default"; + pinctrl-0 = <&ir_int>; + }; + + sdio_pwrseq: sdio-pwrseq { + compatible = "mmc-pwrseq-simple"; + pinctrl-names = "default"; + pinctrl-0 = <&wifi_en>, <&wifi_host_wake>; + reset-gpios = <&gpio3 RK_PB0 GPIO_ACTIVE_LOW>; + }; +}; + +&codec { + mute-gpios = <&grf_gpio 0 GPIO_ACTIVE_LOW>; +}; + +&gpu { + mali-supply = <&vdd_logic>; +}; + +&pinctrl { + ir { + ir_int: ir-int { + rockchip,pins = <2 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; + + sdmmcio { + sdio_per_pin: sdio-per-pin { + rockchip,pins = <0 RK_PD3 RK_FUNC_GPIO &pcfg_pull_down>; + }; + }; + + wifi { + wifi_en: wifi-en { + rockchip,pins = <3 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>; + }; + + wifi_host_wake: wifi-host-wake { + rockchip,pins = <3 RK_PA1 RK_FUNC_GPIO &pcfg_pull_none_4ma>; + }; + + bt_rst: bt-rst { + rockchip,pins = <1 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>; + }; + + bt_en: bt-en { + rockchip,pins = <1 RK_PD2 RK_FUNC_GPIO &pcfg_pull_none>; + }; + }; +}; + +&pmic_int_l { + rockchip,pins = <0 RK_PA2 RK_FUNC_GPIO &pcfg_pull_up>; +}; + +&rk805 { + interrupt-parent = <&gpio0>; + interrupts = ; +}; + +&saradc { + vref-supply = <&vcc_18>; + status = "okay"; +}; + +&usb20_host_drv { + rockchip,pins = <0 RK_PA0 RK_FUNC_GPIO &pcfg_pull_up>; +}; + +&vcc_host1_5v { + gpio = <&gpio0 RK_PA0 GPIO_ACTIVE_HIGH>; +}; + +&vcc_sdio { + gpios = <&gpio0 RK_PD3 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&sdio_per_pin>; +}; From 4011a797dac7703ecda083b3be25d891f34e9040 Mon Sep 17 00:00:00 2001 From: Levin Du Date: Mon, 28 Jun 2021 11:54:01 +0800 Subject: [PATCH 139/851] dt-bindings: add doc for Firefly ROC-RK3399-PC-PLUS Add devicetree binding documentation for the Firefly ROC-RK3399-PC-PLUS. Signed-off-by: Levin Du Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210628035402.16812-2-djw@t-chip.com.cn Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/arm/rockchip.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml index 7ef902f45b382..ce7785fe3598a 100644 --- a/Documentation/devicetree/bindings/arm/rockchip.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip.yaml @@ -127,6 +127,12 @@ properties: - firefly,roc-rk3399-pc-mezzanine - const: rockchip,rk3399 + - description: Firefly ROC-RK3399-PC-PLUS + items: + - enum: + - firefly,roc-rk3399-pc-plus + - const: rockchip,rk3399 + - description: FriendlyElec NanoPi R2S items: - const: friendlyarm,nanopi-r2s From 95e18f24ab427401b7286d6d24b5a8b0621fab2e Mon Sep 17 00:00:00 2001 From: Levin Du Date: Mon, 28 Jun 2021 11:54:02 +0800 Subject: [PATCH 140/851] arm64: dts: rockchip: add support for Firefly ROC-RK3399-PC-PLUS ROC-RK3399-PC-PLUS is the board inside the portable Firefly Station P1 Geek PC. As a redesign after the ROC-RK3399-PC, it uses DC-12V as power input and spares a USB 3 host port. It is also equipped with a USB WiFi chip and audio codec without the mezzanine board. - Rockchip RK3399 SoC - 4GB LPDDR4 RAM - 16MB SPI-Flash - eMMC slot - TF card slot - USB 3.0 Port x 1, USB 2.0 Port x 1, TypeC Port x 1 - HDMI - Gigabit Ethernet - WiFi: RTL8723DU - Audio: ES8388 - Key: Recovery - LED: WORK, DIY - IR Signed-off-by: Kongxin Deng Signed-off-by: Levin Du Link: https://lore.kernel.org/r/20210628035402.16812-3-djw@t-chip.com.cn Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/Makefile | 1 + .../boot/dts/rockchip/rk3399-roc-pc-plus.dts | 218 ++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile index 46652b6d7c4d8..2890756c294c8 100644 --- a/arch/arm64/boot/dts/rockchip/Makefile +++ b/arch/arm64/boot/dts/rockchip/Makefile @@ -43,6 +43,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-pinebook-pro.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-puma-haikou.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc-mezzanine.dtb +dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc-plus.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4a.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4b.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4c.dtb diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts new file mode 100644 index 0000000000000..5a2661ae0131c --- /dev/null +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2017 T-Chip Intelligent Technology Co., Ltd + */ + +/dts-v1/; +#include "rk3399-roc-pc.dtsi" + +/* + * Notice: + * 1. rk3399-roc-pc-plus is powered by dc_12v directly. + * 2. rk3399-roc-pc-plus has only vcc_bus_typec0 in schematic, which is coresponding + * to vcc_vbus_typec1 in rk3399-roc-pc. + * For simplicity, reserve the node name of vcc_vbus_typec1. + * 3. vcc5v0_host is actually 2 regulators (host0, 1) controlled by the same gpio. + */ + +/delete-node/ &fusb1; +/delete-node/ &hub_rst; +/delete-node/ &mp8859; +/delete-node/ &vcc_sys_en; +/delete-node/ &vcc_vbus_typec0; +/delete-node/ &yellow_led; + +/ { + model = "Firefly ROC-RK3399-PC-PLUS Board"; + compatible = "firefly,roc-rk3399-pc-plus", "rockchip,rk3399"; + + dc_12v: dc-12v { + compatible = "regulator-fixed"; + regulator-name = "dc_12v"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + }; + + es8388-sound { + compatible = "simple-audio-card"; + pinctrl-names = "default"; + pinctrl-0 = <&hp_det_pin>; + simple-audio-card,name = "rockchip,es8388-codec"; + simple-audio-card,format = "i2s"; + simple-audio-card,mclk-fs = <256>; + simple-audio-card,widgets = + "Microphone", "Mic Jack", + "Headphone", "Headphones"; + simple-audio-card,routing = + "LINPUT1", "Mic Jack", + "Headphone Amp INL", "LOUT2", + "Headphone Amp INR", "ROUT2", + "Headphones", "Headphone Amp OUTL", + "Headphones", "Headphone Amp OUTR"; + simple-audio-card,hp-det-gpio = <&gpio2 RK_PA6 GPIO_ACTIVE_HIGH>; + simple-audio-card,aux-devs = <&headphones_amp>; + simple-audio-card,pin-switches = "Headphones"; + + simple-audio-card,codec { + sound-dai = <&es8388>; + }; + + simple-audio-card,cpu { + sound-dai = <&i2s1>; + }; + }; + + gpio-fan { + #cooling-cells = <2>; + compatible = "gpio-fan"; + gpio-fan,speed-map = <0 0 3000 1>; + gpios = <&gpio1 RK_PA1 GPIO_ACTIVE_HIGH>; + }; + + /delete-node/ gpio-keys; + + /* not amplifier, used as switcher only */ + headphones_amp: headphones-amp { + compatible = "simple-audio-amplifier"; + pinctrl-names = "default"; + pinctrl-0 = <&ear_ctl_pin>; + enable-gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>; + sound-name-prefix = "Headphone Amp"; + VCC-supply = <&vcca3v0_codec>; + }; + + ir-receiver { + linux,rc-map-name = "rc-khadas"; + }; + + leds { + pinctrl-names = "default"; + pinctrl-0 = <&work_led_pin>, <&diy_led_pin>; + }; +}; + +&fusb0 { + vbus-supply = <&vcc_vbus_typec1>; +}; + +&i2c0 { + hym8563: hym8563@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + interrupt-parent = <&gpio0>; + interrupts = ; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "xin32k"; + pinctrl-names = "default"; + pinctrl-0 = <&hym8563_int>; + }; +}; + +&i2c1 { + es8388: es8388@11 { + compatible = "everest,es8388"; + reg = <0x11>; + clock-names = "mclk"; + clocks = <&cru SCLK_I2S_8CH_OUT>; + #sound-dai-cells = <0>; + }; +}; + +/* <4 RK_PA0 1 &pcfg_pull_none> is used as i2s_8ch_mclk_pin */ +&i2s0_8ch_bus { + rockchip,pins = + <3 RK_PD0 1 &pcfg_pull_none>, + <3 RK_PD1 1 &pcfg_pull_none>, + <3 RK_PD2 1 &pcfg_pull_none>, + <3 RK_PD3 1 &pcfg_pull_none>, + <3 RK_PD4 1 &pcfg_pull_none>, + <3 RK_PD5 1 &pcfg_pull_none>, + <3 RK_PD6 1 &pcfg_pull_none>, + <3 RK_PD7 1 &pcfg_pull_none>; +}; + +&i2s1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2s_8ch_mclk_pin>, <&i2s1_2ch_bus>; + rockchip,playback-channels = <2>; + rockchip,capture-channels = <2>; + status = "okay"; +}; + +&pinctrl { + es8388 { + ear_ctl_pin: ear-ctl-pin { + rockchip,pins = <0 RK_PA2 RK_FUNC_GPIO &pcfg_output_high>; + }; + + hp_det_pin: hp-det-pin { + rockchip,pins = <2 RK_PA6 RK_FUNC_GPIO &pcfg_pull_down>; + }; + }; + + hym8563 { + hym8563_int: hym8563-int { + rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_up>; + }; + }; + + i2s1 { + i2s_8ch_mclk_pin: i2s-8ch-mclk-pin { + rockchip,pins = <4 RK_PA0 1 &pcfg_pull_none>; + }; + }; +}; + +&u2phy0 { + status = "okay"; + + u2phy0_otg: otg-port { + phy-supply = <&vcc_vbus_typec1>; + status = "okay"; + }; + + u2phy0_host: host-port { + phy-supply = <&vcc5v0_host>; + status = "okay"; + }; +}; + +&u2phy1 { + status = "okay"; + + u2phy1_otg: otg-port { + phy-supply = <&vcc5v0_host>; + status = "okay"; + }; + + u2phy1_host: host-port { + phy-supply = <&vcc5v0_host>; + status = "okay"; + }; +}; + +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_xfer &uart0_cts &uart0_rts>; + status = "okay"; +}; + +&usbdrd_dwc3_0 { + dr_mode = "host"; + status = "okay"; +}; + +&vcc_sys { + /* vcc_sys is fixed, not controlled by any gpio */ + /delete-property/ gpio; + /delete-property/ pinctrl-names; + /delete-property/ pinctrl-0; +}; + +&vcc5v0_host { + pinctrl-names = "default"; + pinctrl-0 = <&vcc5v0_host_en>; +}; From a1536b7fd2d75a6836094cfce513807f0b6f8b2c Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Wed, 14 Jul 2021 21:56:29 -0500 Subject: [PATCH 141/851] arm64: dts: rockchip: set stdout-path on helios64 set the default output path to uart2 Signed-off-by: Dennis Gilmore Link: https://lore.kernel.org/r/20210715025635.70452-2-dgilmore@redhat.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index 738cfd21df3ef..d911a9a4f0f0f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -43,6 +43,10 @@ vin-supply = <&vcc3v3_sys_s3>; }; + chosen { + stdout-path = "serial2:1500000n8"; + }; + clkin_gmac: external-gmac-clock { compatible = "fixed-clock"; clock-frequency = <125000000>; From 0ead44ed102d6391c9535395a99164fe44038b21 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Wed, 14 Jul 2021 21:56:30 -0500 Subject: [PATCH 142/851] arm64: dts: rockchip: add SPI support to helios64 add SPI support for the helios64, u-boot can live in spi1, spi2 is user accessible, spi5 is for the sata controller rom. https://wiki.kobol.io/helios64/spi/ Signed-off-by: Dennis Gilmore Link: https://lore.kernel.org/r/20210715025635.70452-3-dgilmore@redhat.com Signed-off-by: Heiko Stuebner --- .../dts/rockchip/rk3399-kobol-helios64.dts | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index d911a9a4f0f0f..b275b47902115 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -21,6 +21,9 @@ aliases { mmc0 = &sdmmc; mmc1 = &sdhci; + spi1 = &spi1; + spi2 = &spi2; + spi5 = &spi5; }; avdd_0v9_s0: avdd-0v9-s0 { @@ -473,6 +476,27 @@ status = "okay"; }; +&spi1 { + status = "okay"; + + spiflash: flash@0 { + compatible = "jedec,spi-nor"; + reg = <0x0>; + spi-max-frequency = <25000000>; + status = "okay"; + m25p,fast-read; + }; +}; + +/* UEXT connector */ +&spi2 { + status = "okay"; +}; + +&spi5 { + status = "okay"; +}; + &tcphy1 { /* phy for &usbdrd_dwc3_1 */ status = "okay"; From ae2a8a1ed3283ebb5abe21be04773cebada16b9c Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Wed, 14 Jul 2021 21:56:31 -0500 Subject: [PATCH 143/851] arm64: dts: rockchip: enable tsadc on helios64 Enable the tsadc thermal controller on the helios64 Signed-off-by: Dennis Gilmore Link: https://lore.kernel.org/r/20210715025635.70452-4-dgilmore@redhat.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index b275b47902115..63c7681843daa 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -502,6 +502,14 @@ status = "okay"; }; +&tsadc { + /* tshut mode 0:CRU 1:GPIO */ + rockchip,hw-tshut-mode = <1>; + /* tshut polarity 0:LOW 1:HIGH */ + rockchip,hw-tshut-polarity = <1>; + status = "okay"; +}; + &u2phy1 { status = "okay"; From ba356289261975ff061580ff30d1026448b9ffd2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Jul 2021 15:52:14 +0100 Subject: [PATCH 144/851] mtd: devices: mchp48l640: Fix memory leak on cmd The allocation for cmd is not being kfree'd on the return leading to a memory leak. Fix this by kfree'ing it. Addresses-Coverity: ("Resource leak") Fixes: 88d125026753 ("mtd: devices: add support for microchip 48l640 EERAM") Signed-off-by: Colin Ian King Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210712145214.101377-1-colin.king@canonical.com --- drivers/mtd/devices/mchp48l640.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index ad66b5aaf4e9b..99400d0fb8c1e 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -255,6 +255,7 @@ static int mchp48l640_read_page(struct mtd_info *mtd, loff_t from, size_t len, if (!ret) *retlen += len; + kfree(cmd); return ret; fail: From b9ed06f4c665e3074dfca0a238d581a20d52baa7 Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Wed, 14 Jul 2021 10:38:31 +0700 Subject: [PATCH 145/851] i2c: aspeed: Add slave_enable() to toggle slave mode Slave needs time to prepare the response data before Master could enquiry via read transaction. However, there is no mechanism for i2c-aspeed Slave to notify Master that it needs more time to process and this make Master side to time out when trying to get the response. This commit introduces the slave_enable() callback in struct i2c_algorithm for Slave to temporary stop the Slave mode while working on the response and re-enable the Slave when response data ready. Signed-off-by: Quan Nguyen Message-Id: <20210714033833.11640-2-quan@os.amperecomputing.com> Signed-off-by: Corey Minyard --- drivers/i2c/busses/i2c-aspeed.c | 20 ++++++++++++++++++++ include/linux/i2c.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 67e8b97c0c950..a6a19dc8a5016 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -785,6 +785,25 @@ static int aspeed_i2c_unreg_slave(struct i2c_client *client) return 0; } + +static int aspeed_i2c_slave_enable(struct i2c_client *client, bool enable) +{ + struct aspeed_i2c_bus *bus = i2c_get_adapdata(client->adapter); + u32 func_ctrl_reg_val; + + if (!bus->slave) + return -EINVAL; + + /* Toggle slave mode. */ + func_ctrl_reg_val = readl(bus->base + ASPEED_I2C_FUN_CTRL_REG); + if (enable) + func_ctrl_reg_val |= ASPEED_I2CD_SLAVE_EN; + else + func_ctrl_reg_val &= ~ASPEED_I2CD_SLAVE_EN; + writel(func_ctrl_reg_val, bus->base + ASPEED_I2C_FUN_CTRL_REG); + + return 0; +} #endif /* CONFIG_I2C_SLAVE */ static const struct i2c_algorithm aspeed_i2c_algo = { @@ -793,6 +812,7 @@ static const struct i2c_algorithm aspeed_i2c_algo = { #if IS_ENABLED(CONFIG_I2C_SLAVE) .reg_slave = aspeed_i2c_reg_slave, .unreg_slave = aspeed_i2c_unreg_slave, + .slave_enable = aspeed_i2c_slave_enable, #endif /* CONFIG_I2C_SLAVE */ }; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3eb60a2e9e618..8c1765aa7e3f9 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -520,6 +520,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, * from the ``I2C_FUNC_*`` flags. * @reg_slave: Register given client to I2C slave mode of this adapter * @unreg_slave: Unregister given client from I2C slave mode of this adapter + * @slave_enable: Toggle enable slave mode for given client of this adapter * * The following structs are for those who like to implement new bus drivers: * i2c_algorithm is the interface to a class of hardware solutions which can @@ -557,6 +558,7 @@ struct i2c_algorithm { #if IS_ENABLED(CONFIG_I2C_SLAVE) int (*reg_slave)(struct i2c_client *client); int (*unreg_slave)(struct i2c_client *client); + int (*slave_enable)(struct i2c_client *client, bool enable); #endif }; From 007888f365c93283181691c29ef2693616df884e Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Wed, 14 Jul 2021 10:38:32 +0700 Subject: [PATCH 146/851] ipmi: ssif_bmc: Add SSIF BMC driver The SMBus system interface (SSIF) IPMI BMC driver can be used to perform in-band IPMI communication with their host in management (BMC) side. Signed-off-by: Quan Nguyen Message-Id: <20210714033833.11640-3-quan@os.amperecomputing.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/Kconfig | 11 + drivers/char/ipmi/Makefile | 1 + drivers/char/ipmi/ssif_bmc.c | 781 +++++++++++++++++++++++++++++++++++ drivers/char/ipmi/ssif_bmc.h | 106 +++++ 4 files changed, 899 insertions(+) create mode 100644 drivers/char/ipmi/ssif_bmc.c create mode 100644 drivers/char/ipmi/ssif_bmc.h diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig index 249b31197eeae..e09a470ab2daa 100644 --- a/drivers/char/ipmi/Kconfig +++ b/drivers/char/ipmi/Kconfig @@ -160,6 +160,17 @@ config ASPEED_BT_IPMI_BMC found on Aspeed SOCs (AST2400 and AST2500). The driver implements the BMC side of the BT interface. +config SSIF_IPMI_BMC + tristate "SSIF IPMI BMC driver" + select I2C + select I2C_SLAVE + help + This enables the IPMI SMBus system interface (SSIF) at the + management (BMC) side. + + The driver implements the BMC side of the SMBus system + interface (SSIF). + config IPMB_DEVICE_INTERFACE tristate 'IPMB Interface handler' depends on I2C diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile index 84f47d18007fb..a93c09dad22a0 100644 --- a/drivers/char/ipmi/Makefile +++ b/drivers/char/ipmi/Makefile @@ -29,3 +29,4 @@ obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o obj-$(CONFIG_NPCM7XX_KCS_IPMI_BMC) += kcs_bmc_npcm7xx.o obj-$(CONFIG_IPMB_DEVICE_INTERFACE) += ipmb_dev_int.o +obj-$(CONFIG_SSIF_IPMI_BMC) += ssif_bmc.o diff --git a/drivers/char/ipmi/ssif_bmc.c b/drivers/char/ipmi/ssif_bmc.c new file mode 100644 index 0000000000000..b15c05622e721 --- /dev/null +++ b/drivers/char/ipmi/ssif_bmc.c @@ -0,0 +1,781 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * The driver for BMC side of SSIF interface + * + * Copyright (c) 2021, Ampere Computing LLC + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ssif_bmc.h" + +static const char *state_to_string(enum ssif_state state) +{ + switch (state) { + case SSIF_READY: + return "SSIF_READY"; + case SSIF_START: + return "SSIF_START"; + case SSIF_SMBUS_CMD: + return "SSIF_SMBUS_CMD"; + case SSIF_REQ_RECVING: + return "SSIF_REQ_RECVING"; + case SSIF_RES_SENDING: + return "SSIF_RES_SENDING"; + case SSIF_BAD_SMBUS: + return "SSIF_BAD_SMBUS"; + default: + return "SSIF_STATE_UNKNOWN"; + } +} + +/* Handle SSIF message that will be sent to user */ +static ssize_t ssif_bmc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct ssif_bmc_ctx *ssif_bmc = to_ssif_bmc(file); + struct ssif_msg msg; + unsigned long flags; + ssize_t ret; + + spin_lock_irqsave(&ssif_bmc->lock, flags); + while (!ssif_bmc->request_available) { + spin_unlock_irqrestore(&ssif_bmc->lock, flags); + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + ret = wait_event_interruptible(ssif_bmc->wait_queue, + ssif_bmc->request_available); + if (ret) + return ret; + spin_lock_irqsave(&ssif_bmc->lock, flags); + } + + if (count < min_t(ssize_t, ssif_msg_len(&ssif_bmc->request), sizeof(struct ssif_msg))) { + spin_unlock_irqrestore(&ssif_bmc->lock, flags); + ret = -EINVAL; + } else { + count = min_t(ssize_t, ssif_msg_len(&ssif_bmc->request), sizeof(struct ssif_msg)); + memcpy(&msg, &ssif_bmc->request, count); + ssif_bmc->request_available = false; + spin_unlock_irqrestore(&ssif_bmc->lock, flags); + + ret = copy_to_user(buf, &msg, count); + } + + return (ret < 0) ? ret : count; +} + +/* Handle SSIF message that is written by user */ +static ssize_t ssif_bmc_write(struct file *file, const char __user *buf, size_t count, + loff_t *ppos) +{ + struct ssif_bmc_ctx *ssif_bmc = to_ssif_bmc(file); + struct ssif_msg msg; + unsigned long flags; + ssize_t ret; + + if (count > sizeof(struct ssif_msg)) + return -EINVAL; + + ret = copy_from_user(&msg, buf, count); + if (ret) + return ret; + + if (!msg.len || count < ssif_msg_len(&msg)) + return -EINVAL; + + spin_lock_irqsave(&ssif_bmc->lock, flags); + while (ssif_bmc->response_in_progress) { + spin_unlock_irqrestore(&ssif_bmc->lock, flags); + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + ret = wait_event_interruptible(ssif_bmc->wait_queue, + !ssif_bmc->response_in_progress); + if (ret) + return ret; + spin_lock_irqsave(&ssif_bmc->lock, flags); + } + + memcpy(&ssif_bmc->response, &msg, count); + ssif_bmc->is_singlepart_read = (ssif_msg_len(&msg) <= MAX_PAYLOAD_PER_TRANSACTION + 1); + ssif_bmc->response_in_progress = true; + + if (ssif_bmc->client->adapter->algo->slave_enable) + ret = ssif_bmc->client->adapter->algo->slave_enable(ssif_bmc->client, true); + + spin_unlock_irqrestore(&ssif_bmc->lock, flags); + + return (ret < 0) ? ret : count; +} + +static int ssif_bmc_open(struct inode *inode, struct file *file) +{ + struct ssif_bmc_ctx *ssif_bmc = to_ssif_bmc(file); + int ret = 0; + + spin_lock_irq(&ssif_bmc->lock); + if (!ssif_bmc->running) + ssif_bmc->running = 1; + else + ret = -EBUSY; + spin_unlock_irq(&ssif_bmc->lock); + + return ret; +} + +static unsigned int ssif_bmc_poll(struct file *file, poll_table *wait) +{ + struct ssif_bmc_ctx *ssif_bmc = to_ssif_bmc(file); + unsigned int mask = 0; + + poll_wait(file, &ssif_bmc->wait_queue, wait); + + spin_lock_irq(&ssif_bmc->lock); + /* The request is available, userspace application can get the request */ + if (ssif_bmc->request_available) + mask |= POLLIN; + + spin_unlock_irq(&ssif_bmc->lock); + + return mask; +} + +static int ssif_bmc_release(struct inode *inode, struct file *file) +{ + struct ssif_bmc_ctx *ssif_bmc = to_ssif_bmc(file); + + spin_lock_irq(&ssif_bmc->lock); + ssif_bmc->running = 0; + spin_unlock_irq(&ssif_bmc->lock); + + return 0; +} + +/* + * System calls to device interface for user apps + */ +static const struct file_operations ssif_bmc_fops = { + .owner = THIS_MODULE, + .open = ssif_bmc_open, + .read = ssif_bmc_read, + .write = ssif_bmc_write, + .release = ssif_bmc_release, + .poll = ssif_bmc_poll, +}; + +/* Called with ssif_bmc->lock held. */ +static void complete_response(struct ssif_bmc_ctx *ssif_bmc) +{ + /* Invalidate response in buffer to denote it having been sent. */ + ssif_bmc->response.len = 0; + ssif_bmc->response_in_progress = false; + ssif_bmc->nbytes_processed = 0; + ssif_bmc->remain_len = 0; + wake_up_all(&ssif_bmc->wait_queue); +} + +/* Called with ssif_bmc->lock held. */ +static void handle_request(struct ssif_bmc_ctx *ssif_bmc) +{ + if (ssif_bmc->client->adapter->algo->slave_enable) + ssif_bmc->client->adapter->algo->slave_enable(ssif_bmc->client, false); + + /* Request message is available to process */ + ssif_bmc->request_available = true; + /* + * This is the new READ request. + */ + wake_up_all(&ssif_bmc->wait_queue); +} + +static void set_multipart_response_buffer(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + u8 response_len = 0; + int idx = 0; + u8 data_len; + + data_len = ssif_bmc->response.len; + switch (ssif_bmc->smbus_cmd) { + case SSIF_IPMI_MULTIPART_READ_START: + /* + * Read Start length is 32 bytes. + * Read Start transfer first 30 bytes of IPMI response + * and 2 special code 0x00, 0x01. + */ + *val = MAX_PAYLOAD_PER_TRANSACTION; + ssif_bmc->remain_len = data_len - MAX_IPMI_DATA_PER_START_TRANSACTION; + ssif_bmc->block_num = 0; + + ssif_bmc->response_buf[idx++] = 0x00; /* Start Flag */ + ssif_bmc->response_buf[idx++] = 0x01; /* Start Flag */ + ssif_bmc->response_buf[idx++] = ssif_bmc->response.netfn_lun; + ssif_bmc->response_buf[idx++] = ssif_bmc->response.cmd; + ssif_bmc->response_buf[idx++] = ssif_bmc->response.payload[0]; + + response_len = MAX_PAYLOAD_PER_TRANSACTION - idx; + + memcpy(&ssif_bmc->response_buf[idx], &ssif_bmc->response.payload[1], + response_len); + break; + + case SSIF_IPMI_MULTIPART_READ_MIDDLE: + /* + * IPMI READ Middle or READ End messages can carry up to 31 bytes + * IPMI data plus block number byte. + */ + if (ssif_bmc->remain_len < MAX_IPMI_DATA_PER_MIDDLE_TRANSACTION) { + /* + * This is READ End message + * Return length is the remaining response data length + * plus block number + * Block number 0xFF is to indicate this is last message + * + */ + *val = ssif_bmc->remain_len + 1; + ssif_bmc->block_num = 0xFF; + ssif_bmc->response_buf[idx++] = ssif_bmc->block_num; + response_len = ssif_bmc->remain_len; + /* Clean the buffer */ + memset(&ssif_bmc->response_buf[idx], 0, MAX_PAYLOAD_PER_TRANSACTION - idx); + } else { + /* + * This is READ Middle message + * Response length is the maximum SMBUS transfer length + * Block number byte is incremented + * Return length is maximum SMBUS transfer length + */ + *val = MAX_PAYLOAD_PER_TRANSACTION; + ssif_bmc->remain_len -= MAX_IPMI_DATA_PER_MIDDLE_TRANSACTION; + response_len = MAX_IPMI_DATA_PER_MIDDLE_TRANSACTION; + ssif_bmc->response_buf[idx++] = ssif_bmc->block_num; + ssif_bmc->block_num++; + } + + memcpy(&ssif_bmc->response_buf[idx], + ssif_bmc->response.payload + 1 + ssif_bmc->nbytes_processed, + response_len); + break; + + default: + /* Do not expect to go to this case */ + dev_err(&ssif_bmc->client->dev, + "%s: Unexpected SMBus command 0x%x, aborting ...\n", + __func__, ssif_bmc->smbus_cmd); + ssif_bmc->aborting = true; + break; + } + + ssif_bmc->nbytes_processed += response_len; +} + +/* Process the IPMI response that will be read by master */ +static void handle_read_processed(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + u8 *buf; + u8 pec_len, addr, len; + u8 pec = 0; + + pec_len = ssif_bmc->pec_support ? 1 : 0; + /* PEC - Start Read Address */ + addr = GET_8BIT_ADDR(ssif_bmc->client->addr); + pec = i2c_smbus_pec(pec, &addr, 1); + /* PEC - SSIF Command */ + pec = i2c_smbus_pec(pec, &ssif_bmc->smbus_cmd, 1); + /* PEC - Restart Write Address */ + addr = addr | 0x01; + pec = i2c_smbus_pec(pec, &addr, 1); + + if (ssif_bmc->is_singlepart_read) { + /* Single-part Read processing */ + buf = (u8 *)&ssif_bmc->response; + + if (ssif_bmc->response.len && ssif_bmc->msg_idx < ssif_bmc->response.len) { + ssif_bmc->msg_idx++; + *val = buf[ssif_bmc->msg_idx]; + } else if (ssif_bmc->response.len && ssif_bmc->msg_idx == ssif_bmc->response.len) { + ssif_bmc->msg_idx++; + *val = i2c_smbus_pec(pec, buf, ssif_msg_len(&ssif_bmc->response)); + } else { + *val = 0; + } + /* Invalidate response buffer to denote it is sent */ + if (ssif_bmc->msg_idx + 1 >= (ssif_msg_len(&ssif_bmc->response) + pec_len)) + complete_response(ssif_bmc); + } else { + /* Multi-part Read processing */ + switch (ssif_bmc->smbus_cmd) { + case SSIF_IPMI_MULTIPART_READ_START: + case SSIF_IPMI_MULTIPART_READ_MIDDLE: + buf = (u8 *)&ssif_bmc->response_buf; + *val = buf[ssif_bmc->msg_idx]; + ssif_bmc->msg_idx++; + break; + default: + /* Do not expect to go to this case */ + dev_err(&ssif_bmc->client->dev, + "%s: Unexpected SMBus command 0x%x, aborting ...\n", + __func__, ssif_bmc->smbus_cmd); + ssif_bmc->aborting = true; + break; + } + + len = (ssif_bmc->block_num == 0xFF) ? + ssif_bmc->remain_len + 1 : MAX_PAYLOAD_PER_TRANSACTION; + if (ssif_bmc->msg_idx == (len + 1)) { + pec = i2c_smbus_pec(pec, &len, 1); + *val = i2c_smbus_pec(pec, ssif_bmc->response_buf, len); + } + /* Invalidate response buffer to denote last response is sent */ + if (ssif_bmc->block_num == 0xFF && + ssif_bmc->msg_idx > (ssif_bmc->remain_len + pec_len)) { + complete_response(ssif_bmc); + } + } +} + +static void handle_write_received(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + u8 *buf = (u8 *)&ssif_bmc->request; + + if (ssif_bmc->msg_idx >= sizeof(struct ssif_msg)) + return; + + switch (ssif_bmc->smbus_cmd) { + case SSIF_IPMI_SINGLEPART_WRITE: + buf[ssif_bmc->msg_idx - 1] = *val; + ssif_bmc->msg_idx++; + + break; + case SSIF_IPMI_MULTIPART_WRITE_START: + if (ssif_bmc->msg_idx == 1) + ssif_bmc->request.len = 0; + + fallthrough; + case SSIF_IPMI_MULTIPART_WRITE_MIDDLE: + /* The len should always be 32 */ + if (ssif_bmc->msg_idx == 1 && *val != MAX_PAYLOAD_PER_TRANSACTION) { + dev_warn(&ssif_bmc->client->dev, + "Warn: Invalid Multipart Write len, aborting ..."); + ssif_bmc->aborting = true; + } + + fallthrough; + case SSIF_IPMI_MULTIPART_WRITE_END: + /* Multi-part write, 2nd byte received is length */ + if (ssif_bmc->msg_idx == 1) { + if (*val > MAX_PAYLOAD_PER_TRANSACTION) { + dev_warn(&ssif_bmc->client->dev, + "Warn: Invalid Multipart Write End len, aborting ..."); + ssif_bmc->aborting = true; + } + + ssif_bmc->request.len += *val; + ssif_bmc->recv_len = *val; + + /* request len should never exceeded 255 bytes */ + if (ssif_bmc->request.len > 255) { + dev_warn(&ssif_bmc->client->dev, + "Warn: Invalid request len, aborting ..."); + ssif_bmc->aborting = true; + } + + } else { + buf[ssif_bmc->msg_idx - 1 + + ssif_bmc->request.len - ssif_bmc->recv_len] = *val; + } + + ssif_bmc->msg_idx++; + + break; + default: + /* Do not expect to go to this case */ + dev_err(&ssif_bmc->client->dev, + "%s: Unexpected SMBus command 0x%x, aborting ...\n", + __func__, ssif_bmc->smbus_cmd); + ssif_bmc->aborting = true; + break; + } +} + +static bool validate_request(struct ssif_bmc_ctx *ssif_bmc) +{ + u8 rpec = 0, cpec = 0; + bool ret = true; + u8 addr, index; + u8 *buf; + + buf = (u8 *)&ssif_bmc->request; + switch (ssif_bmc->smbus_cmd) { + case SSIF_IPMI_SINGLEPART_WRITE: + if ((ssif_bmc->msg_idx - 1) == ssif_msg_len(&ssif_bmc->request)) { + /* PEC is not included */ + ssif_bmc->pec_support = false; + ret = true; + goto exit; + } + + if ((ssif_bmc->msg_idx - 1) != (ssif_msg_len(&ssif_bmc->request) + 1)) { + dev_err(&ssif_bmc->client->dev, "Error: Unexpected length received %d\n", + ssif_msg_len(&ssif_bmc->request)); + ret = false; + goto exit; + } + + /* PEC is included */ + ssif_bmc->pec_support = true; + rpec = buf[ssif_bmc->msg_idx - 2]; + addr = GET_8BIT_ADDR(ssif_bmc->client->addr); + cpec = i2c_smbus_pec(cpec, &addr, 1); + cpec = i2c_smbus_pec(cpec, &ssif_bmc->smbus_cmd, 1); + cpec = i2c_smbus_pec(cpec, buf, ssif_msg_len(&ssif_bmc->request)); + if (rpec != cpec) { + dev_err(&ssif_bmc->client->dev, "Bad PEC 0x%02x vs. 0x%02x\n", rpec, cpec); + ret = false; + } + + break; + case SSIF_IPMI_MULTIPART_WRITE_START: + case SSIF_IPMI_MULTIPART_WRITE_MIDDLE: + case SSIF_IPMI_MULTIPART_WRITE_END: + index = ssif_bmc->request.len - ssif_bmc->recv_len; + if ((ssif_bmc->msg_idx - 1 + index) == ssif_msg_len(&ssif_bmc->request)) { + /* PEC is not included */ + ssif_bmc->pec_support = false; + ret = true; + goto exit; + } + + if ((ssif_bmc->msg_idx - 1 + index) != (ssif_msg_len(&ssif_bmc->request) + 1)) { + dev_err(&ssif_bmc->client->dev, "Error: Unexpected length received %d\n", + ssif_msg_len(&ssif_bmc->request)); + ret = false; + goto exit; + } + + /* PEC is included */ + ssif_bmc->pec_support = true; + rpec = buf[ssif_bmc->msg_idx - 2 + index]; + addr = GET_8BIT_ADDR(ssif_bmc->client->addr); + cpec = i2c_smbus_pec(cpec, &addr, 1); + cpec = i2c_smbus_pec(cpec, &ssif_bmc->smbus_cmd, 1); + cpec = i2c_smbus_pec(cpec, &ssif_bmc->recv_len, 1); + /* As SMBus specification does not allow the length + * (byte count) in the Write-Block protocol to be zero. + * Therefore, it is illegal to have the last Middle + * transaction in the sequence carry 32-byte and have + * a length of ‘0’ in the End transaction. + * But some users may try to use this way and we should + * prevent ssif_bmc driver broken in this case. + */ + if (ssif_bmc->recv_len != 0) + cpec = i2c_smbus_pec(cpec, buf + 1 + index, ssif_bmc->recv_len); + + if (rpec != cpec) { + dev_err(&ssif_bmc->client->dev, "Bad PEC 0x%02x vs. 0x%02x\n", rpec, cpec); + ret = false; + } + + break; + default: + /* Do not expect to go to this case */ + dev_err(&ssif_bmc->client->dev, "%s: Unexpected SMBus command 0x%x, aborting ...\n", + __func__, ssif_bmc->smbus_cmd); + ret = false; + break; + } + +exit: + return ret; +} + +static bool unsupported_smbus_cmd(u8 cmd) +{ + if (cmd == SSIF_IPMI_SINGLEPART_READ || + cmd == SSIF_IPMI_SINGLEPART_WRITE || + cmd == SSIF_IPMI_MULTIPART_WRITE_START || + cmd == SSIF_IPMI_MULTIPART_WRITE_MIDDLE || + cmd == SSIF_IPMI_MULTIPART_WRITE_END || + cmd == SSIF_IPMI_MULTIPART_READ_START || + cmd == SSIF_IPMI_MULTIPART_READ_MIDDLE) + return false; + + return true; +} + +static void process_smbus_cmd(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + /* SMBUS command can vary (single or multi-part) */ + ssif_bmc->smbus_cmd = *val; + ssif_bmc->msg_idx++; + + if (unsupported_smbus_cmd(*val)) { + dev_warn(&ssif_bmc->client->dev, "Warn: Unknown SMBus command, aborting ..."); + ssif_bmc->aborting = true; + } else if (ssif_bmc->aborting && + (*val == SSIF_IPMI_SINGLEPART_WRITE || + *val == SSIF_IPMI_MULTIPART_WRITE_START)) { + /* New request */ + dev_warn(&ssif_bmc->client->dev, "Warn: New request found, stop aborting ..."); + ssif_bmc->aborting = false; + } +} + +static void on_read_requested_event(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + if (ssif_bmc->state == SSIF_READY || + ssif_bmc->state == SSIF_START || + ssif_bmc->state == SSIF_REQ_RECVING || + ssif_bmc->state == SSIF_RES_SENDING) { + ssif_bmc->state = SSIF_BAD_SMBUS; + dev_warn(&ssif_bmc->client->dev, + "Warn: %s unexpected READ REQUESTED in state=%s, aborting ...\n", + __func__, state_to_string(ssif_bmc->state)); + ssif_bmc->aborting = true; + + } else if (ssif_bmc->state == SSIF_SMBUS_CMD) { + ssif_bmc->state = SSIF_RES_SENDING; + } + + if (ssif_bmc->aborting || ssif_bmc->state != SSIF_RES_SENDING) { + /* Abort by returning the last request with 0xFF as completion code */ + ssif_bmc->is_singlepart_read = true; + ssif_bmc->response.len = 0x03; + ssif_bmc->response.netfn_lun = ssif_bmc->request.netfn_lun | 4; + ssif_bmc->response.cmd = ssif_bmc->request.cmd; + memset(&ssif_bmc->response.payload[0], 0xFF, MAX_PAYLOAD_PER_TRANSACTION); + } + + ssif_bmc->msg_idx = 0; + if (ssif_bmc->is_singlepart_read) + *val = ssif_bmc->response.len; + else + set_multipart_response_buffer(ssif_bmc, val); +} + +static void on_read_processed_event(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + if (ssif_bmc->state == SSIF_READY || + ssif_bmc->state == SSIF_START || + ssif_bmc->state == SSIF_REQ_RECVING || + ssif_bmc->state == SSIF_SMBUS_CMD) { + dev_warn(&ssif_bmc->client->dev, + "Warn: %s unexpected READ PROCESSED in state=%s\n", + __func__, state_to_string(ssif_bmc->state)); + ssif_bmc->state = SSIF_BAD_SMBUS; + } + + handle_read_processed(ssif_bmc, val); +} + +static void on_write_requested_event(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + ssif_bmc->msg_idx = 0; + + if (ssif_bmc->state == SSIF_READY || ssif_bmc->state == SSIF_SMBUS_CMD) { + ssif_bmc->state = SSIF_START; + + } else if (ssif_bmc->state == SSIF_START || + ssif_bmc->state == SSIF_REQ_RECVING || + ssif_bmc->state == SSIF_RES_SENDING) { + dev_warn(&ssif_bmc->client->dev, + "Warn: %s unexpected WRITE REQUEST in state=%s\n", + __func__, state_to_string(ssif_bmc->state)); + ssif_bmc->state = SSIF_BAD_SMBUS; + } +} + +static void on_write_received_event(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + if (ssif_bmc->state == SSIF_READY || ssif_bmc->state == SSIF_RES_SENDING) { + dev_warn(&ssif_bmc->client->dev, + "Warn: %s unexpected WRITE RECEIVED in state=%s\n", + __func__, state_to_string(ssif_bmc->state)); + ssif_bmc->state = SSIF_BAD_SMBUS; + } else if (ssif_bmc->state == SSIF_START) { + ssif_bmc->state = SSIF_SMBUS_CMD; + } else if (ssif_bmc->state == SSIF_SMBUS_CMD) { + ssif_bmc->state = SSIF_REQ_RECVING; + } + + /* This is response sending state */ + if (ssif_bmc->state == SSIF_REQ_RECVING) { + if (ssif_bmc->response_in_progress) { + /* + * As per spec, it is generic management software or SSIF drivers to take + * care of issuing new request before the prior requests completed. + * So just abort everything here and wait for next new request + */ + dev_warn(&ssif_bmc->client->dev, + "Warn: SSIF new request with pending response, aborting ..."); + ssif_bmc->aborting = true; + complete_response(ssif_bmc); + } + + handle_write_received(ssif_bmc, val); + } else if (ssif_bmc->state == SSIF_SMBUS_CMD) { + process_smbus_cmd(ssif_bmc, val); + } +} + +static void on_stop_event(struct ssif_bmc_ctx *ssif_bmc, u8 *val) +{ + if (ssif_bmc->state == SSIF_READY || + ssif_bmc->state == SSIF_START || + ssif_bmc->state == SSIF_SMBUS_CMD) { + dev_warn(&ssif_bmc->client->dev, + "Warn: %s unexpected SLAVE STOP in state=%s\n", + __func__, state_to_string(ssif_bmc->state)); + + } else if (ssif_bmc->state == SSIF_BAD_SMBUS) { + dev_warn(&ssif_bmc->client->dev, + "Warn: %s received SLAVE STOP from bad state=%s\n", + __func__, state_to_string(ssif_bmc->state)); + + } else if (ssif_bmc->state == SSIF_REQ_RECVING) { + /* A BMC that receives an invalid request drop the data for the write + * transaction and any further transactions (read or write) until + * the next valid read or write Start transaction is received + */ + if (!validate_request(ssif_bmc)) + ssif_bmc->aborting = true; + + if (!ssif_bmc->aborting && + (ssif_bmc->smbus_cmd == SSIF_IPMI_SINGLEPART_WRITE || + ssif_bmc->smbus_cmd == SSIF_IPMI_MULTIPART_WRITE_END)) + handle_request(ssif_bmc); + } + + ssif_bmc->state = SSIF_READY; + /* Reset message index */ + ssif_bmc->msg_idx = 0; +} + +/* + * Callback function to handle I2C slave events + */ +static int ssif_bmc_cb(struct i2c_client *client, enum i2c_slave_event event, u8 *val) +{ + unsigned long flags; + struct ssif_bmc_ctx *ssif_bmc = i2c_get_clientdata(client); + + spin_lock_irqsave(&ssif_bmc->lock, flags); + + switch (event) { + case I2C_SLAVE_READ_REQUESTED: + on_read_requested_event(ssif_bmc, val); + break; + + case I2C_SLAVE_WRITE_REQUESTED: + on_write_requested_event(ssif_bmc, val); + break; + + case I2C_SLAVE_READ_PROCESSED: + on_read_processed_event(ssif_bmc, val); + break; + + case I2C_SLAVE_WRITE_RECEIVED: + on_write_received_event(ssif_bmc, val); + break; + + case I2C_SLAVE_STOP: + on_stop_event(ssif_bmc, val); + break; + + default: + dev_warn(&ssif_bmc->client->dev, "Warn: Unknown i2c slave event, aborting ...\n"); + ssif_bmc->aborting = true; + break; + } + + spin_unlock_irqrestore(&ssif_bmc->lock, flags); + + return 0; +} + +static int ssif_bmc_probe(struct i2c_client *client, const struct i2c_device_id *id) +{ + struct ssif_bmc_ctx *ssif_bmc; + int ret; + + ssif_bmc = devm_kzalloc(&client->dev, sizeof(*ssif_bmc), GFP_KERNEL); + if (!ssif_bmc) + return -ENOMEM; + + spin_lock_init(&ssif_bmc->lock); + + init_waitqueue_head(&ssif_bmc->wait_queue); + ssif_bmc->request_available = false; + ssif_bmc->response_in_progress = false; + + /* Register misc device interface */ + ssif_bmc->miscdev.minor = MISC_DYNAMIC_MINOR; + ssif_bmc->miscdev.name = DEVICE_NAME; + ssif_bmc->miscdev.fops = &ssif_bmc_fops; + ssif_bmc->miscdev.parent = &client->dev; + ret = misc_register(&ssif_bmc->miscdev); + if (ret) + goto out; + + ssif_bmc->client = client; + ssif_bmc->client->flags |= I2C_CLIENT_SLAVE; + + /* Register I2C slave */ + i2c_set_clientdata(client, ssif_bmc); + ret = i2c_slave_register(client, ssif_bmc_cb); + if (ret) { + misc_deregister(&ssif_bmc->miscdev); + goto out; + } + + return 0; +out: + devm_kfree(&client->dev, ssif_bmc); + return ret; +} + +static int ssif_bmc_remove(struct i2c_client *client) +{ + struct ssif_bmc_ctx *ssif_bmc = i2c_get_clientdata(client); + + i2c_slave_unregister(client); + misc_deregister(&ssif_bmc->miscdev); + + return 0; +} + +static const struct of_device_id ssif_bmc_match[] = { + { .compatible = "ampere,ssif-bmc" }, + { }, +}; + +static const struct i2c_device_id ssif_bmc_id[] = { + { DEVICE_NAME, 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(i2c, ssif_bmc_id); + +static struct i2c_driver ssif_bmc_driver = { + .driver = { + .name = DEVICE_NAME, + .of_match_table = ssif_bmc_match, + }, + .probe = ssif_bmc_probe, + .remove = ssif_bmc_remove, + .id_table = ssif_bmc_id, +}; + +module_i2c_driver(ssif_bmc_driver); + +MODULE_AUTHOR("Quan Nguyen "); +MODULE_AUTHOR("Chuong Tran "); +MODULE_DESCRIPTION("Linux device driver of the BMC IPMI SSIF interface."); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/char/ipmi/ssif_bmc.h b/drivers/char/ipmi/ssif_bmc.h new file mode 100644 index 0000000000000..b63e40a4b9000 --- /dev/null +++ b/drivers/char/ipmi/ssif_bmc.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * The driver for BMC side of SSIF interface + * + * Copyright (c) 2021, Ampere Computing LLC + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __SSIF_BMC_H__ +#define __SSIF_BMC_H__ + +#define DEVICE_NAME "ipmi-ssif-host" + +#define GET_8BIT_ADDR(addr_7bit) (((addr_7bit) << 1) & 0xff) + +/* A standard SMBus Transaction is limited to 32 data bytes */ +#define MAX_PAYLOAD_PER_TRANSACTION 32 + +#define MAX_IPMI_DATA_PER_START_TRANSACTION 30 +#define MAX_IPMI_DATA_PER_MIDDLE_TRANSACTION 31 + +#define SSIF_IPMI_SINGLEPART_WRITE 0x2 +#define SSIF_IPMI_SINGLEPART_READ 0x3 +#define SSIF_IPMI_MULTIPART_WRITE_START 0x6 +#define SSIF_IPMI_MULTIPART_WRITE_MIDDLE 0x7 +#define SSIF_IPMI_MULTIPART_WRITE_END 0x8 +#define SSIF_IPMI_MULTIPART_READ_START 0x3 +#define SSIF_IPMI_MULTIPART_READ_MIDDLE 0x9 + +#define MSG_PAYLOAD_LEN_MAX 252 + +struct ssif_msg { + u8 len; + u8 netfn_lun; + u8 cmd; + u8 payload[MSG_PAYLOAD_LEN_MAX]; +} __packed; + +static inline u32 ssif_msg_len(struct ssif_msg *ssif_msg) +{ + return ssif_msg->len + 1; +} + +/* + * SSIF internal states: + * SSIF_READY 0x00 : Ready state + * SSIF_START 0x01 : Start smbus transaction + * SSIF_SMBUS_CMD 0x02 : Received SMBus command + * SSIF_REQ_RECVING 0x03 : Receiving request + * SSIF_RES_SENDING 0x04 : Sending response + * SSIF_BAD_SMBUS 0x05 : Bad SMbus transaction + */ +enum ssif_state { + SSIF_READY, + SSIF_START, + SSIF_SMBUS_CMD, + SSIF_REQ_RECVING, + SSIF_RES_SENDING, + SSIF_BAD_SMBUS, + SSIF_STATE_MAX +}; + +struct ssif_bmc_ctx { + struct i2c_client *client; + struct miscdevice miscdev; + size_t msg_idx; + bool pec_support; + /* ssif bmc spinlock */ + spinlock_t lock; + wait_queue_head_t wait_queue; + u8 running; + enum ssif_state state; + u8 smbus_cmd; + /* Flag to abort current process */ + bool aborting; + /* Flag to identify a Multi-part Read Transaction */ + bool is_singlepart_read; + u8 nbytes_processed; + u8 remain_len; + u8 recv_len; + /* Block Number of a Multi-part Read Transaction */ + u8 block_num; + bool request_available; + bool response_in_progress; + /* Response buffer for Multi-part Read Transaction */ + u8 response_buf[MAX_PAYLOAD_PER_TRANSACTION]; + struct ssif_msg response; + struct ssif_msg request; +}; + +static inline struct ssif_bmc_ctx *to_ssif_bmc(struct file *file) +{ + return container_of(file->private_data, struct ssif_bmc_ctx, miscdev); +} +#endif /* __SSIF_BMC_H__ */ From 1a2055cd5091f814444a1128df96fb190a5c98be Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Wed, 14 Jul 2021 10:38:33 +0700 Subject: [PATCH 147/851] bindings: ipmi: Add binding for SSIF BMC driver Add device tree binding document for the SSIF BMC driver. Signed-off-by: Quan Nguyen Message-Id: <20210714033833.11640-4-quan@os.amperecomputing.com> Reviewed-by: Rob Herring Signed-off-by: Corey Minyard --- .../devicetree/bindings/ipmi/ssif-bmc.yaml | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 Documentation/devicetree/bindings/ipmi/ssif-bmc.yaml diff --git a/Documentation/devicetree/bindings/ipmi/ssif-bmc.yaml b/Documentation/devicetree/bindings/ipmi/ssif-bmc.yaml new file mode 100644 index 0000000000000..917a577c2f292 --- /dev/null +++ b/Documentation/devicetree/bindings/ipmi/ssif-bmc.yaml @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ipmi/ssif-bmc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SSIF IPMI BMC interface + +description: SSIF IPMI BMC device bindings + +maintainers: + - Quan Nguyen + +properties: + compatible: + enum: + - ampere,ssif-bmc + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + ssif-bmc@10 { + compatible = "ampere,ssif-bmc"; + reg = <0x10>; + }; + }; From 51caa4ed854269780b1ea63c2b912b505fe66f46 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 24 Jun 2021 22:28:40 +0200 Subject: [PATCH 148/851] bus: mhi: Add inbound buffers allocation flag Currently, the MHI controller driver defines which channels should have their inbound buffers allocated and queued. But ideally, this is something that should be decided by the MHI device driver instead, which actually deals with that buffers. Add a flag parameter to mhi_prepare_for_transfer allowing to specify if buffers have to be allocated and queued by the MHI stack. Keep auto_queue flag for now, but should be removed at some point. Signed-off-by: Loic Poulain Tested-by: Bhaumik Bhatt Reviewed-by: Bhaumik Bhatt Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/1624566520-20406-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/core/internal.h | 2 +- drivers/bus/mhi/core/main.c | 9 ++++++--- drivers/net/mhi/net.c | 2 +- drivers/net/wwan/mhi_wwan_ctrl.c | 2 +- include/linux/mhi.h | 7 ++++++- net/qrtr/mhi.c | 2 +- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5b9ea66b92dc3..bc239a11aa698 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl, struct image_info *img_info); void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl); int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan); + struct mhi_chan *mhi_chan, unsigned int flags); int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan); void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 22acde118bc35..69cd9dcde6d8d 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -1423,7 +1423,7 @@ static void mhi_unprepare_channel(struct mhi_controller *mhi_cntrl, } int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan) + struct mhi_chan *mhi_chan, unsigned int flags) { int ret = 0; struct device *dev = &mhi_chan->mhi_dev->dev; @@ -1448,6 +1448,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, if (ret) goto error_pm_state; + if (mhi_chan->dir == DMA_FROM_DEVICE) + mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS); + /* Pre-allocate buffer for xfer ring */ if (mhi_chan->pre_alloc) { int nr_el = get_nr_avail_ring_elements(mhi_cntrl, @@ -1603,7 +1606,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan) } /* Move channel to start state */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags) { int ret, dir; struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; @@ -1614,7 +1617,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) if (!mhi_chan) continue; - ret = mhi_prepare_channel(mhi_cntrl, mhi_chan); + ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags); if (ret) goto error_open_chan; } diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c index e60e38c1f09d3..11be6bcdd551a 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi/net.c @@ -335,7 +335,7 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, u64_stats_init(&mhi_netdev->stats.tx_syncp); /* Start MHI channels */ - err = mhi_prepare_for_transfer(mhi_dev); + err = mhi_prepare_for_transfer(mhi_dev, 0); if (err) goto out_err; diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index 1bc6b69aa5302..1e18420ce4045 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port) int ret; /* Start mhi device's channel(s) */ - ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev); + ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0); if (ret) return ret; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 944aa3aa30355..5e08468854db4 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -719,8 +719,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index fa611678af052..29b4fa3b72abf 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -79,7 +79,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev); + rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS); if (rc) return rc; From 610207f7d03afec8a7a17ebd223718770bc57a4f Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 24 Jun 2021 14:18:12 +0100 Subject: [PATCH 149/851] slimbus: messaging: start transaction ids from 1 instead of zero As tid is unsigned its hard to figure out if the tid is valid or invalid. So Start the transaction ids from 1 instead of zero so that we could differentiate between a valid tid and invalid tids This is useful in cases where controller would add a tid for controller specific transfers. Fixes: d3062a210930 ("slimbus: messaging: add slim_alloc/free_txn_tid()") Signed-off-by: Srinivas Kandagatla --- drivers/slimbus/messaging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index f2b5d347d227b..6097ddc43a35c 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -66,7 +66,7 @@ int slim_alloc_txn_tid(struct slim_controller *ctrl, struct slim_msg_txn *txn) int ret = 0; spin_lock_irqsave(&ctrl->txn_lock, flags); - ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 0, + ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 1, SLIM_MAX_TIDS, GFP_ATOMIC); if (ret < 0) { spin_unlock_irqrestore(&ctrl->txn_lock, flags); From c24e70aec9adfe53e8e5a4a8a3261af3fd6f7ec1 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 24 Jun 2021 14:20:24 +0100 Subject: [PATCH 150/851] slimbus: messaging: check for valid transaction id In some usecases transaction ids are dynamically allocated inside the controller driver after sending the messages which have generic acknowledge responses. So check for this before refcounting pm_runtime. Without this we would end up imbalancing runtime pm count by doing pm_runtime_put() in both slim_do_transfer() and slim_msg_response() for a single pm_runtime_get() in slim_do_transfer() Fixes: d3062a210930 ("slimbus: messaging: add slim_alloc/free_txn_tid()") Signed-off-by: Srinivas Kandagatla --- drivers/slimbus/messaging.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index 6097ddc43a35c..e5ae26227bdbf 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -131,7 +131,8 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) goto slim_xfer_err; } } - + /* Initialize tid to invalid value */ + txn->tid = 0; need_tid = slim_tid_txn(txn->mt, txn->mc); if (need_tid) { @@ -163,7 +164,7 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) txn->mt, txn->mc, txn->la, ret); slim_xfer_err: - if (!clk_pause_msg && (!need_tid || ret == -ETIMEDOUT)) { + if (!clk_pause_msg && (txn->tid == 0 || ret == -ETIMEDOUT)) { /* * remove runtime-pm vote if this was TX only, or * if there was error during this transaction From f58e0e4413187896e90538ba192acf7d77823d54 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 24 Jun 2021 15:31:35 +0100 Subject: [PATCH 151/851] slimbus: ngd: set correct device for pm For some reason we ended up using wrong device in some places for pm_runtime calls. Fix this so that NGG driver can do runtime pm correctly. Fixes: 917809e2280b ("slimbus: ngd: Add qcom SLIMBus NGD driver") Signed-off-by: Srinivas Kandagatla --- drivers/slimbus/qcom-ngd-ctrl.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index c054e83ab6361..f3ee8e0363721 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -618,7 +618,7 @@ static void qcom_slim_ngd_rx(struct qcom_slim_ngd_ctrl *ctrl, u8 *buf) (mc == SLIM_USR_MC_GENERIC_ACK && mt == SLIM_MSG_MT_SRC_REFERRED_USER)) { slim_msg_response(&ctrl->ctrl, &buf[4], buf[3], len - 4); - pm_runtime_mark_last_busy(ctrl->dev); + pm_runtime_mark_last_busy(ctrl->ctrl.dev); } } @@ -1257,13 +1257,14 @@ static int qcom_slim_ngd_enable(struct qcom_slim_ngd_ctrl *ctrl, bool enable) } /* controller state should be in sync with framework state */ complete(&ctrl->qmi.qmi_comp); - if (!pm_runtime_enabled(ctrl->dev) || - !pm_runtime_suspended(ctrl->dev)) - qcom_slim_ngd_runtime_resume(ctrl->dev); + if (!pm_runtime_enabled(ctrl->ctrl.dev) || + !pm_runtime_suspended(ctrl->ctrl.dev)) + qcom_slim_ngd_runtime_resume(ctrl->ctrl.dev); else - pm_runtime_resume(ctrl->dev); - pm_runtime_mark_last_busy(ctrl->dev); - pm_runtime_put(ctrl->dev); + pm_runtime_resume(ctrl->ctrl.dev); + + pm_runtime_mark_last_busy(ctrl->ctrl.dev); + pm_runtime_put(ctrl->ctrl.dev); ret = slim_register_controller(&ctrl->ctrl); if (ret) { @@ -1389,7 +1390,7 @@ static int qcom_slim_ngd_ssr_pdr_notify(struct qcom_slim_ngd_ctrl *ctrl, /* Make sure the last dma xfer is finished */ mutex_lock(&ctrl->tx_lock); if (ctrl->state != QCOM_SLIM_NGD_CTRL_DOWN) { - pm_runtime_get_noresume(ctrl->dev); + pm_runtime_get_noresume(ctrl->ctrl.dev); ctrl->state = QCOM_SLIM_NGD_CTRL_DOWN; qcom_slim_ngd_down(ctrl); qcom_slim_ngd_exit_dma(ctrl); From e5c578adcdd974f73380f0e5a5595354fcab0d60 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 5 Jul 2021 13:50:05 +0100 Subject: [PATCH 152/851] slimbus: ngd: reset dma setup during runtime pm During suspend/resume NGD remote instance is power cycled along with remotely controlled bam dma engine. So Reset the dma configuration during this suspend resume path so that we are not dealing with any stale dma setup. Without this transactions timeout after first suspend resume path. Fixes: 917809e2280b ("slimbus: ngd: Add qcom SLIMBus NGD driver") Signed-off-by: Srinivas Kandagatla --- drivers/slimbus/qcom-ngd-ctrl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index f3ee8e0363721..7040293c2ee8f 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1080,7 +1080,8 @@ static void qcom_slim_ngd_setup(struct qcom_slim_ngd_ctrl *ctrl) { u32 cfg = readl_relaxed(ctrl->ngd->base); - if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN) + if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN || + ctrl->state == QCOM_SLIM_NGD_CTRL_ASLEEP) qcom_slim_ngd_init_dma(ctrl); /* By default enable message queues */ @@ -1131,6 +1132,7 @@ static int qcom_slim_ngd_power_up(struct qcom_slim_ngd_ctrl *ctrl) dev_info(ctrl->dev, "Subsys restart: ADSP active framer\n"); return 0; } + qcom_slim_ngd_setup(ctrl); return 0; } @@ -1618,6 +1620,7 @@ static int __maybe_unused qcom_slim_ngd_runtime_suspend(struct device *dev) struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev); int ret = 0; + qcom_slim_ngd_exit_dma(ctrl); if (!ctrl->qmi.handle) return 0; From 6a68cd546c367a4601518676e07228ee2cf55b6d Mon Sep 17 00:00:00 2001 From: ULRICH Thomas Date: Mon, 21 Jun 2021 09:04:17 +0000 Subject: [PATCH 153/851] bus: mhi: pci_generic: Add Cinterion MV31-W PCIe to MHI This patch adds VendorID/ProductID and MBIM Channel Definitions for M.2 Modem Card (PCIe Variant) to MHI PCI generic controller driver. Cinterion MV31-W (by Thales) Additional information on such Modem Card (USB or PCIe variant) is available at: https://www.thalesgroup.com/en/markets/digital-identity-and-security/iot/iot-connectivity/products/iot-products/mv31-w-ultra-high Signed-off-by: ULRICH Thomas Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/PAZP264MB284690134DA010698E6B3BDDE60A9@PAZP264MB2846.FRAP264.PROD.OUTLOOK.COM [mani: fixed the subject, whitespace, and added sideband_wake field] Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/pci_generic.c | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index ca3bc40427f85..6e1a86021b754 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -342,6 +342,40 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = { .dma_data_width = 32 }; +static const struct mhi_channel_config mhi_mv31_channels[] = { + MHI_CHANNEL_CONFIG_UL(0, "LOOPBACK", 64, 0), + MHI_CHANNEL_CONFIG_DL(1, "LOOPBACK", 64, 0), + /* MBIM Control Channel */ + MHI_CHANNEL_CONFIG_UL(12, "MBIM", 64, 0), + MHI_CHANNEL_CONFIG_DL(13, "MBIM", 64, 0), + /* MBIM Data Channel */ + MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 512, 2), + MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 512, 3), +}; + +static struct mhi_event_config mhi_mv31_events[] = { + MHI_EVENT_CONFIG_CTRL(0, 256), + MHI_EVENT_CONFIG_DATA(1, 256), + MHI_EVENT_CONFIG_HW_DATA(2, 1024, 100), + MHI_EVENT_CONFIG_HW_DATA(3, 1024, 101), +}; + +static const struct mhi_controller_config modem_mv31_config = { + .max_channels = 128, + .timeout_ms = 20000, + .num_channels = ARRAY_SIZE(mhi_mv31_channels), + .ch_cfg = mhi_mv31_channels, + .num_events = ARRAY_SIZE(mhi_mv31_events), + .event_cfg = mhi_mv31_events, +}; + +static const struct mhi_pci_dev_info mhi_mv31_info = { + .name = "cinterion-mv31", + .config = &modem_mv31_config, + .bar_num = MHI_PCI_DEFAULT_BAR_NUM, + .dma_data_width = 32, +}; + static const struct pci_device_id mhi_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0306), .driver_data = (kernel_ulong_t) &mhi_qcom_sdx55_info }, @@ -362,6 +396,9 @@ static const struct pci_device_id mhi_pci_id_table[] = { /* DW5930e (sdx55), Non-eSIM, It's also T99W175 */ { PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0b1), .driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info }, + /* MV31-W (Cinterion) */ + { PCI_DEVICE(0x1269, 0x00b3), + .driver_data = (kernel_ulong_t) &mhi_mv31_info }, { } }; MODULE_DEVICE_TABLE(pci, mhi_pci_id_table); From 7f81b917db7288729a10f1bcf364f04edf6f3fe0 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 6 May 2021 12:51:40 -0700 Subject: [PATCH 154/851] bus: mhi: core: Set BHI/BHIe offsets on power up preparation Set the BHI and/or BHIe offsets in mhi_prepare_for_power_up(), rearrange the function, and remove the equivalent from mhi_async_power_up(). This helps consolidate multiple checks in different parts of the driver and can help MHI fail early on before power up begins if the offsets are not read correctly. Signed-off-by: Bhaumik Bhatt Reviewed-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1620330705-40192-2-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/core/init.c | 42 ++++++++++++++++++++----------------- drivers/bus/mhi/core/pm.c | 28 ++++--------------------- 2 files changed, 27 insertions(+), 43 deletions(-) diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index c81b377fca8f7..11c7a3d3c9bf4 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -1063,7 +1063,7 @@ EXPORT_SYMBOL_GPL(mhi_free_controller); int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl) { struct device *dev = &mhi_cntrl->mhi_dev->dev; - u32 bhie_off; + u32 bhi_off, bhie_off; int ret; mutex_lock(&mhi_cntrl->pm_mutex); @@ -1072,29 +1072,36 @@ int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl) if (ret) goto error_dev_ctxt; - /* - * Allocate RDDM table if specified, this table is for debugging purpose - */ - if (mhi_cntrl->rddm_size) { - mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->rddm_image, - mhi_cntrl->rddm_size); + ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIOFF, &bhi_off); + if (ret) { + dev_err(dev, "Error getting BHI offset\n"); + goto error_reg_offset; + } + mhi_cntrl->bhi = mhi_cntrl->regs + bhi_off; - /* - * This controller supports RDDM, so we need to manually clear - * BHIE RX registers since POR values are undefined. - */ + if (mhi_cntrl->fbc_download || mhi_cntrl->rddm_size) { ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIEOFF, &bhie_off); if (ret) { dev_err(dev, "Error getting BHIE offset\n"); - goto bhie_error; + goto error_reg_offset; } - mhi_cntrl->bhie = mhi_cntrl->regs + bhie_off; + } + + if (mhi_cntrl->rddm_size) { + /* + * This controller supports RDDM, so we need to manually clear + * BHIE RX registers since POR values are undefined. + */ memset_io(mhi_cntrl->bhie + BHIE_RXVECADDR_LOW_OFFS, 0, BHIE_RXVECSTATUS_OFFS - BHIE_RXVECADDR_LOW_OFFS + 4); - + /* + * Allocate RDDM table for debugging purpose if specified + */ + mhi_alloc_bhie_table(mhi_cntrl, &mhi_cntrl->rddm_image, + mhi_cntrl->rddm_size); if (mhi_cntrl->rddm_image) mhi_rddm_prepare(mhi_cntrl, mhi_cntrl->rddm_image); } @@ -1103,11 +1110,8 @@ int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl) return 0; -bhie_error: - if (mhi_cntrl->rddm_image) { - mhi_free_bhie_table(mhi_cntrl, mhi_cntrl->rddm_image); - mhi_cntrl->rddm_image = NULL; - } +error_reg_offset: + mhi_deinit_dev_ctxt(mhi_cntrl); error_dev_ctxt: mutex_unlock(&mhi_cntrl->pm_mutex); diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c index bbf6cd04861eb..ff7cdc8653ef3 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -1059,28 +1059,8 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) if (ret) goto error_setup_irq; - /* Setup BHI offset & INTVEC */ + /* Setup BHI INTVEC */ write_lock_irq(&mhi_cntrl->pm_lock); - ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIOFF, &val); - if (ret) { - write_unlock_irq(&mhi_cntrl->pm_lock); - goto error_bhi_offset; - } - - mhi_cntrl->bhi = mhi_cntrl->regs + val; - - /* Setup BHIE offset */ - if (mhi_cntrl->fbc_download) { - ret = mhi_read_reg(mhi_cntrl, mhi_cntrl->regs, BHIEOFF, &val); - if (ret) { - write_unlock_irq(&mhi_cntrl->pm_lock); - dev_err(dev, "Error reading BHIE offset\n"); - goto error_bhi_offset; - } - - mhi_cntrl->bhie = mhi_cntrl->regs + val; - } - mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0); mhi_cntrl->pm_state = MHI_PM_POR; mhi_cntrl->ee = MHI_EE_MAX; @@ -1091,7 +1071,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) if (!MHI_IN_PBL(current_ee) && current_ee != MHI_EE_AMSS) { dev_err(dev, "Not a valid EE for power on\n"); ret = -EIO; - goto error_bhi_offset; + goto error_async_power_up; } state = mhi_get_mhi_state(mhi_cntrl); @@ -1110,7 +1090,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) if (!ret) { ret = -EIO; dev_info(dev, "Failed to reset MHI due to syserr state\n"); - goto error_bhi_offset; + goto error_async_power_up; } /* @@ -1132,7 +1112,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) return 0; -error_bhi_offset: +error_async_power_up: mhi_deinit_free_irq(mhi_cntrl); error_setup_irq: From cc9aaa88751c7a832a542177cde994d9962d8394 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 6 May 2021 12:51:41 -0700 Subject: [PATCH 155/851] bus: mhi: core: Set BHI and BHIe pointers to NULL in clean-up Set the BHI and BHIe pointers to NULL as part of clean-up. This makes sure that stale pointers are not accessed after powering MHI down. Suggested-by: Hemant Kumar Signed-off-by: Bhaumik Bhatt Reviewed-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1620330705-40192-3-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/core/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 11c7a3d3c9bf4..1cc2f225d3d1f 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -1132,6 +1132,9 @@ void mhi_unprepare_after_power_down(struct mhi_controller *mhi_cntrl) mhi_cntrl->rddm_image = NULL; } + mhi_cntrl->bhi = NULL; + mhi_cntrl->bhie = NULL; + mhi_deinit_dev_ctxt(mhi_cntrl); } EXPORT_SYMBOL_GPL(mhi_unprepare_after_power_down); From a735a900cff28fc4a76b036ff449c38550b7e802 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 6 May 2021 12:51:42 -0700 Subject: [PATCH 156/851] bus: mhi: Add MMIO region length to controller structure Make controller driver specify the MMIO register region length for range checking of BHI or BHIe space. This can help validate that offsets are in acceptable memory region or not and avoid any boot-up issues due to BHI or BHIe memory accesses. Signed-off-by: Bhaumik Bhatt Reviewed-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1620330705-40192-4-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5e08468854db4..b8ca6943f0b70 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -303,6 +303,7 @@ struct mhi_controller_config { * @rddm_size: RAM dump size that host should allocate for debugging purpose * @sbl_size: SBL image size downloaded through BHIe (optional) * @seg_len: BHIe vector size (optional) + * @reg_len: Length of the MHI MMIO region (required) * @fbc_image: Points to firmware image buffer * @rddm_image: Points to RAM dump buffer * @mhi_chan: Points to the channel configuration table @@ -386,6 +387,7 @@ struct mhi_controller { size_t rddm_size; size_t sbl_size; size_t seg_len; + size_t reg_len; struct image_info *fbc_image; struct image_info *rddm_image; struct mhi_chan *mhi_chan; From be49067c86ead93f03a7b8110c2ff58452a83158 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 6 May 2021 12:51:43 -0700 Subject: [PATCH 157/851] ath11k: set register access length for MHI driver MHI driver requires register space length to add range checks and prevent memory region accesses outside of that for MMIO space. Set it before registering the MHI controller. Signed-off-by: Bhaumik Bhatt Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Acked-by: Kalle Valo Link: https://lore.kernel.org/r/1620330705-40192-5-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- drivers/net/wireless/ath/ath11k/mhi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index 75cc2d80fde8d..26c7ae242db67 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -330,6 +330,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci) mhi_ctrl->cntrl_dev = ab->dev; mhi_ctrl->fw_image = ab_pci->amss_path; mhi_ctrl->regs = ab->mem; + mhi_ctrl->reg_len = ab->mem_len; ret = ath11k_mhi_get_msi(ab_pci); if (ret) { From 00f29cd72a64c7b57f970f155276d73df71262ca Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 6 May 2021 12:51:44 -0700 Subject: [PATCH 158/851] bus: mhi: pci_generic: Set register access length for MHI driver MHI driver requires register space length to add range checks and prevent memory region accesses outside of that for MMIO space. Set it from the PCI generic controller driver before registering the MHI controller. Signed-off-by: Bhaumik Bhatt Reviewed-by: Hemant Kumar Reviewed-by: Loic Poulain Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1620330705-40192-6-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/pci_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 6e1a86021b754..c772d94025fee 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -500,6 +500,7 @@ static int mhi_pci_claim(struct mhi_controller *mhi_cntrl, return err; } mhi_cntrl->regs = pcim_iomap_table(pdev)[bar_num]; + mhi_cntrl->reg_len = pci_resource_len(pdev, bar_num); err = pci_set_dma_mask(pdev, dma_mask); if (err) { From 554492868314c973f1df5f84bd44c93e58ca8635 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Thu, 6 May 2021 12:51:45 -0700 Subject: [PATCH 159/851] bus: mhi: core: Add range checks for BHI and BHIe When obtaining the BHI or BHIe offsets during the power up preparation phase, range checks are missing. These can help controller drivers avoid accessing any address outside of the MMIO region. Ensure that mhi_cntrl->reg_len is set before MHI registration as it is a required field and range checks will fail without it. Signed-off-by: Bhaumik Bhatt Reviewed-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1620330705-40192-7-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/core/init.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 1cc2f225d3d1f..aeb1e3c2cdc4f 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -885,7 +885,8 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl, if (!mhi_cntrl || !mhi_cntrl->cntrl_dev || !mhi_cntrl->regs || !mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put || !mhi_cntrl->status_cb || !mhi_cntrl->read_reg || - !mhi_cntrl->write_reg || !mhi_cntrl->nr_irqs || !mhi_cntrl->irq) + !mhi_cntrl->write_reg || !mhi_cntrl->nr_irqs || + !mhi_cntrl->irq || !mhi_cntrl->reg_len) return -EINVAL; ret = parse_config(mhi_cntrl, config); @@ -1077,6 +1078,13 @@ int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl) dev_err(dev, "Error getting BHI offset\n"); goto error_reg_offset; } + + if (bhi_off >= mhi_cntrl->reg_len) { + dev_err(dev, "BHI offset: 0x%x is out of range: 0x%zx\n", + bhi_off, mhi_cntrl->reg_len); + ret = -EINVAL; + goto error_reg_offset; + } mhi_cntrl->bhi = mhi_cntrl->regs + bhi_off; if (mhi_cntrl->fbc_download || mhi_cntrl->rddm_size) { @@ -1086,6 +1094,14 @@ int mhi_prepare_for_power_up(struct mhi_controller *mhi_cntrl) dev_err(dev, "Error getting BHIE offset\n"); goto error_reg_offset; } + + if (bhie_off >= mhi_cntrl->reg_len) { + dev_err(dev, + "BHIe offset: 0x%x is out of range: 0x%zx\n", + bhie_off, mhi_cntrl->reg_len); + ret = -EINVAL; + goto error_reg_offset; + } mhi_cntrl->bhie = mhi_cntrl->regs + bhie_off; } From e1e82defeabdde90db2f9aa61769ddbf201c2f08 Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Tue, 22 Jun 2021 13:07:08 -0700 Subject: [PATCH 160/851] bus: mhi: core: Replace DMA allocation wrappers with original APIs There is nothing special done within the mhi_alloc_coherent() and the mhi_free_coherent() wrapper functions. They only directly call the equivalent DMA allocation functions. Replace them with the original function calls such that the implementation is clear and direct. Signed-off-by: Bhaumik Bhatt Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1624392428-9328-1-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/core/boot.c | 17 +++++++++-------- drivers/bus/mhi/core/init.c | 32 ++++++++++++++++---------------- drivers/bus/mhi/core/internal.h | 20 -------------------- drivers/bus/mhi/core/main.c | 6 +++--- 4 files changed, 28 insertions(+), 47 deletions(-) diff --git a/drivers/bus/mhi/core/boot.c b/drivers/bus/mhi/core/boot.c index 8100cf51cd09e..0a972620a4030 100644 --- a/drivers/bus/mhi/core/boot.c +++ b/drivers/bus/mhi/core/boot.c @@ -302,8 +302,8 @@ void mhi_free_bhie_table(struct mhi_controller *mhi_cntrl, struct mhi_buf *mhi_buf = image_info->mhi_buf; for (i = 0; i < image_info->entries; i++, mhi_buf++) - mhi_free_coherent(mhi_cntrl, mhi_buf->len, mhi_buf->buf, - mhi_buf->dma_addr); + dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len, + mhi_buf->buf, mhi_buf->dma_addr); kfree(image_info->mhi_buf); kfree(image_info); @@ -339,8 +339,8 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl, vec_size = sizeof(struct bhi_vec_entry) * i; mhi_buf->len = vec_size; - mhi_buf->buf = mhi_alloc_coherent(mhi_cntrl, vec_size, - &mhi_buf->dma_addr, + mhi_buf->buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, + vec_size, &mhi_buf->dma_addr, GFP_KERNEL); if (!mhi_buf->buf) goto error_alloc_segment; @@ -354,8 +354,8 @@ int mhi_alloc_bhie_table(struct mhi_controller *mhi_cntrl, error_alloc_segment: for (--i, --mhi_buf; i >= 0; i--, mhi_buf--) - mhi_free_coherent(mhi_cntrl, mhi_buf->len, mhi_buf->buf, - mhi_buf->dma_addr); + dma_free_coherent(mhi_cntrl->cntrl_dev, mhi_buf->len, + mhi_buf->buf, mhi_buf->dma_addr); error_alloc_mhi_buf: kfree(img_info); @@ -442,7 +442,8 @@ void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl) if (size > firmware->size) size = firmware->size; - buf = mhi_alloc_coherent(mhi_cntrl, size, &dma_addr, GFP_KERNEL); + buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, size, &dma_addr, + GFP_KERNEL); if (!buf) { release_firmware(firmware); goto error_fw_load; @@ -451,7 +452,7 @@ void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl) /* Download image using BHI */ memcpy(buf, firmware->data, size); ret = mhi_fw_load_bhi(mhi_cntrl, dma_addr, size); - mhi_free_coherent(mhi_cntrl, size, buf, dma_addr); + dma_free_coherent(mhi_cntrl->cntrl_dev, size, buf, dma_addr); /* Error or in EDL mode, we're done */ if (ret) { diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index aeb1e3c2cdc4f..5aaca6d0f52b2 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -129,7 +129,7 @@ static int mhi_alloc_aligned_ring(struct mhi_controller *mhi_cntrl, u64 len) { ring->alloc_size = len + (len - 1); - ring->pre_aligned = mhi_alloc_coherent(mhi_cntrl, ring->alloc_size, + ring->pre_aligned = dma_alloc_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size, &ring->dma_handle, GFP_KERNEL); if (!ring->pre_aligned) return -ENOMEM; @@ -221,13 +221,13 @@ void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl) mhi_cmd = mhi_cntrl->mhi_cmd; for (i = 0; i < NR_OF_CMD_RINGS; i++, mhi_cmd++) { ring = &mhi_cmd->ring; - mhi_free_coherent(mhi_cntrl, ring->alloc_size, + dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size, ring->pre_aligned, ring->dma_handle); ring->base = NULL; ring->iommu_base = 0; } - mhi_free_coherent(mhi_cntrl, + dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS, mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr); @@ -237,17 +237,17 @@ void mhi_deinit_dev_ctxt(struct mhi_controller *mhi_cntrl) continue; ring = &mhi_event->ring; - mhi_free_coherent(mhi_cntrl, ring->alloc_size, + dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size, ring->pre_aligned, ring->dma_handle); ring->base = NULL; ring->iommu_base = 0; } - mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->er_ctxt) * + dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) * mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt, mhi_ctxt->er_ctxt_addr); - mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->chan_ctxt) * + dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) * mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt, mhi_ctxt->chan_ctxt_addr); @@ -275,7 +275,7 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) return -ENOMEM; /* Setup channel ctxt */ - mhi_ctxt->chan_ctxt = mhi_alloc_coherent(mhi_cntrl, + mhi_ctxt->chan_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) * mhi_cntrl->max_chan, &mhi_ctxt->chan_ctxt_addr, @@ -307,7 +307,7 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) } /* Setup event context */ - mhi_ctxt->er_ctxt = mhi_alloc_coherent(mhi_cntrl, + mhi_ctxt->er_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) * mhi_cntrl->total_ev_rings, &mhi_ctxt->er_ctxt_addr, @@ -354,7 +354,7 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) /* Setup cmd context */ ret = -ENOMEM; - mhi_ctxt->cmd_ctxt = mhi_alloc_coherent(mhi_cntrl, + mhi_ctxt->cmd_ctxt = dma_alloc_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS, &mhi_ctxt->cmd_ctxt_addr, @@ -389,10 +389,10 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) for (--i, --mhi_cmd; i >= 0; i--, mhi_cmd--) { struct mhi_ring *ring = &mhi_cmd->ring; - mhi_free_coherent(mhi_cntrl, ring->alloc_size, + dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size, ring->pre_aligned, ring->dma_handle); } - mhi_free_coherent(mhi_cntrl, + dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->cmd_ctxt) * NR_OF_CMD_RINGS, mhi_ctxt->cmd_ctxt, mhi_ctxt->cmd_ctxt_addr); i = mhi_cntrl->total_ev_rings; @@ -405,15 +405,15 @@ int mhi_init_dev_ctxt(struct mhi_controller *mhi_cntrl) if (mhi_event->offload_ev) continue; - mhi_free_coherent(mhi_cntrl, ring->alloc_size, + dma_free_coherent(mhi_cntrl->cntrl_dev, ring->alloc_size, ring->pre_aligned, ring->dma_handle); } - mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->er_ctxt) * + dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->er_ctxt) * mhi_cntrl->total_ev_rings, mhi_ctxt->er_ctxt, mhi_ctxt->er_ctxt_addr); error_alloc_er_ctxt: - mhi_free_coherent(mhi_cntrl, sizeof(*mhi_ctxt->chan_ctxt) * + dma_free_coherent(mhi_cntrl->cntrl_dev, sizeof(*mhi_ctxt->chan_ctxt) * mhi_cntrl->max_chan, mhi_ctxt->chan_ctxt, mhi_ctxt->chan_ctxt_addr); @@ -567,7 +567,7 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, if (!chan_ctxt->rbase) /* Already uninitialized */ return; - mhi_free_coherent(mhi_cntrl, tre_ring->alloc_size, + dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size, tre_ring->pre_aligned, tre_ring->dma_handle); vfree(buf_ring->base); @@ -610,7 +610,7 @@ int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, buf_ring->base = vzalloc(buf_ring->len); if (!buf_ring->base) { - mhi_free_coherent(mhi_cntrl, tre_ring->alloc_size, + dma_free_coherent(mhi_cntrl->cntrl_dev, tre_ring->alloc_size, tre_ring->pre_aligned, tre_ring->dma_handle); return -ENOMEM; } diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index bc239a11aa698..721739c5e0d57 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -690,26 +690,6 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan); -/* Memory allocation methods */ -static inline void *mhi_alloc_coherent(struct mhi_controller *mhi_cntrl, - size_t size, - dma_addr_t *dma_handle, - gfp_t gfp) -{ - void *buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, size, dma_handle, - gfp); - - return buf; -} - -static inline void mhi_free_coherent(struct mhi_controller *mhi_cntrl, - size_t size, - void *vaddr, - dma_addr_t dma_handle) -{ - dma_free_coherent(mhi_cntrl->cntrl_dev, size, vaddr, dma_handle); -} - /* Event processing methods */ void mhi_ctrl_ev_task(unsigned long data); void mhi_ev_task(unsigned long data); diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 69cd9dcde6d8d..c67fd001ded15 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -193,7 +193,7 @@ int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl, int mhi_map_single_use_bb(struct mhi_controller *mhi_cntrl, struct mhi_buf_info *buf_info) { - void *buf = mhi_alloc_coherent(mhi_cntrl, buf_info->len, + void *buf = dma_alloc_coherent(mhi_cntrl->cntrl_dev, buf_info->len, &buf_info->p_addr, GFP_ATOMIC); if (!buf) @@ -220,8 +220,8 @@ void mhi_unmap_single_use_bb(struct mhi_controller *mhi_cntrl, if (buf_info->dir == DMA_FROM_DEVICE) memcpy(buf_info->v_addr, buf_info->bb_addr, buf_info->len); - mhi_free_coherent(mhi_cntrl, buf_info->len, buf_info->bb_addr, - buf_info->p_addr); + dma_free_coherent(mhi_cntrl->cntrl_dev, buf_info->len, + buf_info->bb_addr, buf_info->p_addr); } static int get_nr_avail_ring_elements(struct mhi_controller *mhi_cntrl, From b8c95616d154830be38482557d8eef6100f3c3dd Mon Sep 17 00:00:00 2001 From: Bhaumik Bhatt Date: Mon, 3 May 2021 13:00:38 -0700 Subject: [PATCH 161/851] bus: mhi: core: Improve debug messages for power up Improve error message to be more descriptive if a failure occurs with an invalid power up execution environment. Additionally, add a debug log to print the execution environment and MHI state before a power up is attempted to confirm if the device is in an expected state. This helps clarify reasons for power up failures such as the device being found in a PBL or Emergency Download Mode execution environment and the host expected a full power up with Pass-Through and no image loading involved. Signed-off-by: Bhaumik Bhatt Reviewed-by: Hemant Kumar Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1620072038-36160-1-git-send-email-bbhatt@codeaurora.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/core/pm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c index ff7cdc8653ef3..fb99e3727155b 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -1069,12 +1069,16 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) /* Confirm that the device is in valid exec env */ if (!MHI_IN_PBL(current_ee) && current_ee != MHI_EE_AMSS) { - dev_err(dev, "Not a valid EE for power on\n"); + dev_err(dev, "%s is not a valid EE for power on\n", + TO_MHI_EXEC_STR(current_ee)); ret = -EIO; goto error_async_power_up; } state = mhi_get_mhi_state(mhi_cntrl); + dev_dbg(dev, "Attempting power on with EE: %s, state: %s\n", + TO_MHI_EXEC_STR(current_ee), TO_MHI_STATE_STR(state)); + if (state == MHI_STATE_SYS_ERR) { mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET); ret = wait_event_timeout(mhi_cntrl->state_event, From 8874e3a7baec1ee0dc5c20b8e0bf1cade3512b24 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 16 Jul 2021 13:46:00 +0100 Subject: [PATCH 162/851] drm/tegra: Fix compilation of variadic macro Commit 43636451db8c ("drm/tegra: Implement job submission part of new UAPI") added the macro 'SUBMIT_ERR' that in turns makes use of the macro '__VA_OPT__'. The '__VA_OPT__' macro is not supported by older versions of GCC and so causes build failures when using older versions of GCC. Fix this by using the '##__VA_ARGS__' macro instead. Fixes: 43636451db8c ("drm/tegra: Implement job submission part of new UAPI") Reported-by: Linux Kernel Functional Testing Signed-off-by: Jon Hunter Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index c53b7207c4786..e496300891493 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -24,7 +24,7 @@ #define SUBMIT_ERR(context, fmt, ...) \ dev_err_ratelimited(context->client->base.dev, \ "%s: job submission failed: " fmt "\n", \ - current->comm __VA_OPT__(,) __VA_ARGS__) + current->comm, ##__VA_ARGS__) static struct tegra_drm_mapping * tegra_drm_mapping_get(struct tegra_drm_context *context, u32 id) From 83eea1033d1c369001a90cf82d0e8f61b615a023 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 5 Jul 2021 00:20:14 +0200 Subject: [PATCH 163/851] hwmon: (ntc_thermistor) Use library interpolation The kernel has a helper function for linear interpolation so use it. It incidentally makes the code easier to read as well. Tested on the ST-Ericsson HREFv60plus hardware reference design with two thermistors forming a thermal zone. Cc: Peter Rosin Cc: Chris Lesiak Cc: linux-iio@vger.kernel.org Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20210704222014.12058-1-linus.walleij@linaro.org Signed-off-by: Guenter Roeck --- drivers/hwmon/ntc_thermistor.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 18fd6f12ca162..cf26c44f2b880 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -549,15 +550,16 @@ static int get_temp_mc(struct ntc_data *data, unsigned int ohm) int temp; lookup_comp(data, ohm, &low, &high); - if (low == high) { - /* Unable to use linear approximation */ - temp = data->comp[low].temp_c * 1000; - } else { - temp = data->comp[low].temp_c * 1000 + - ((data->comp[high].temp_c - data->comp[low].temp_c) * - 1000 * ((int)ohm - (int)data->comp[low].ohm)) / - ((int)data->comp[high].ohm - (int)data->comp[low].ohm); - } + /* + * First multiplying the table temperatures with 1000 to get to + * millicentigrades (which is what we want) and then interpolating + * will give the best precision. + */ + temp = fixp_linear_interpolate(data->comp[low].ohm, + data->comp[low].temp_c * 1000, + data->comp[high].ohm, + data->comp[high].temp_c * 1000, + ohm); return temp; } From ddde0ab0a2cafd5ec964727fd64c8fca928416fa Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 9 Jul 2021 10:06:18 +1200 Subject: [PATCH 164/851] hwmon: (pmbus/bpa-rs600) Support BPD-RS600 The BPD-RS600 is the DC version of the BPA-RS600. The PMBUS interface is the same between the two models. Keep the same compatible string but accept either BPA-RS600 or BPD-RS600 in the PMBUS_MFR_MODEL. Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20210708220618.23576-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/bpa-rs600.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/pmbus/bpa-rs600.c b/drivers/hwmon/pmbus/bpa-rs600.c index 2be69fedfa361..d205b41540ced 100644 --- a/drivers/hwmon/pmbus/bpa-rs600.c +++ b/drivers/hwmon/pmbus/bpa-rs600.c @@ -21,6 +21,8 @@ #define BPARS600_MFR_IOUT_MAX 0xa6 #define BPARS600_MFR_POUT_MAX 0xa7 +enum chips { bpa_rs600, bpd_rs600 }; + static int bpa_rs600_read_byte_data(struct i2c_client *client, int page, int reg) { int ret; @@ -146,11 +148,19 @@ static struct pmbus_driver_info bpa_rs600_info = { .read_word_data = bpa_rs600_read_word_data, }; +static const struct i2c_device_id bpa_rs600_id[] = { + { "bpa-rs600", bpa_rs600 }, + { "bpd-rs600", bpd_rs600 }, + {}, +}; +MODULE_DEVICE_TABLE(i2c, bpa_rs600_id); + static int bpa_rs600_probe(struct i2c_client *client) { struct device *dev = &client->dev; u8 buf[I2C_SMBUS_BLOCK_MAX + 1]; int ret; + const struct i2c_device_id *mid; if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA @@ -164,7 +174,11 @@ static int bpa_rs600_probe(struct i2c_client *client) return ret; } - if (strncmp(buf, "BPA-RS600", 8)) { + for (mid = bpa_rs600_id; mid->name[0]; mid++) { + if (!strncasecmp(buf, mid->name, strlen(mid->name))) + break; + } + if (!mid->name[0]) { buf[ret] = '\0'; dev_err(dev, "Unsupported Manufacturer Model '%s'\n", buf); return -ENODEV; @@ -173,12 +187,6 @@ static int bpa_rs600_probe(struct i2c_client *client) return pmbus_do_probe(client, &bpa_rs600_info); } -static const struct i2c_device_id bpa_rs600_id[] = { - { "bpars600", 0 }, - {}, -}; -MODULE_DEVICE_TABLE(i2c, bpa_rs600_id); - static const struct of_device_id __maybe_unused bpa_rs600_of_match[] = { { .compatible = "blutek,bpa-rs600" }, {}, From a5ed7262cdf8bc1602d0ea801ce76142fdd7c3aa Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 9 Jul 2021 20:44:59 +0200 Subject: [PATCH 165/851] hwmon: (w83627ehf) Use platform_create_bundle Using platform_create_bundle() simplifies the module init code and allows w83627ehf_probe() to be marked as __init, lowering the runtime memory footprint. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20210709184501.6546-2-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/w83627ehf.c | 57 +++++++-------------------------------- 1 file changed, 10 insertions(+), 47 deletions(-) diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index 8618aaf32350e..16aed90ca2ec6 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1694,7 +1694,7 @@ static const struct hwmon_chip_info w83627ehf_chip_info = { .info = w83627ehf_info, }; -static int w83627ehf_probe(struct platform_device *pdev) +static int __init w83627ehf_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct w83627ehf_sio_data *sio_data = dev_get_platdata(dev); @@ -2057,7 +2057,6 @@ static struct platform_driver w83627ehf_driver = { .name = DRVNAME, .pm = W83627EHF_DEV_PM_OPS, }, - .probe = w83627ehf_probe, .remove = w83627ehf_remove, }; @@ -2150,8 +2149,7 @@ static int __init w83627ehf_find(int sioaddr, unsigned short *addr, /* * when Super-I/O functions move to a separate file, the Super-I/O * bus will manage the lifetime of the device and this module will only keep - * track of the w83627ehf driver. But since we platform_device_alloc(), we - * must keep track of the device + * track of the w83627ehf driver. */ static struct platform_device *pdev; @@ -2159,7 +2157,10 @@ static int __init sensors_w83627ehf_init(void) { int err; unsigned short address; - struct resource res; + struct resource res = { + .name = DRVNAME, + .flags = IORESOURCE_IO, + }; struct w83627ehf_sio_data sio_data; /* @@ -2173,55 +2174,17 @@ static int __init sensors_w83627ehf_init(void) w83627ehf_find(0x4e, &address, &sio_data)) return -ENODEV; - err = platform_driver_register(&w83627ehf_driver); - if (err) - goto exit; - - pdev = platform_device_alloc(DRVNAME, address); - if (!pdev) { - err = -ENOMEM; - pr_err("Device allocation failed\n"); - goto exit_unregister; - } - - err = platform_device_add_data(pdev, &sio_data, - sizeof(struct w83627ehf_sio_data)); - if (err) { - pr_err("Platform data allocation failed\n"); - goto exit_device_put; - } - - memset(&res, 0, sizeof(res)); - res.name = DRVNAME; res.start = address + IOREGION_OFFSET; res.end = address + IOREGION_OFFSET + IOREGION_LENGTH - 1; - res.flags = IORESOURCE_IO; err = acpi_check_resource_conflict(&res); if (err) - goto exit_device_put; + return err; - err = platform_device_add_resources(pdev, &res, 1); - if (err) { - pr_err("Device resource addition failed (%d)\n", err); - goto exit_device_put; - } + pdev = platform_create_bundle(&w83627ehf_driver, w83627ehf_probe, &res, 1, &sio_data, + sizeof(struct w83627ehf_sio_data)); - /* platform_device_add calls probe() */ - err = platform_device_add(pdev); - if (err) { - pr_err("Device addition failed (%d)\n", err); - goto exit_device_put; - } - - return 0; - -exit_device_put: - platform_device_put(pdev); -exit_unregister: - platform_driver_unregister(&w83627ehf_driver); -exit: - return err; + return PTR_ERR_OR_ZERO(pdev); } static void __exit sensors_w83627ehf_exit(void) From b8f3d51e9b9b22e096b6dce03971d5fce73f4b3f Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 9 Jul 2021 20:45:00 +0200 Subject: [PATCH 166/851] hwmon: (w83627ehf) Remove w83627ehf_remove() Using devm_request_region() allows us to omit w83627ehf_remove() and also simplifies error handling during probe. Also fixed a checkpatch issue. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20210709184501.6546-3-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/w83627ehf.c | 42 +++++++-------------------------------- 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index 16aed90ca2ec6..19af845743241 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1705,20 +1705,12 @@ static int __init w83627ehf_probe(struct platform_device *pdev) struct device *hwmon_dev; res = platform_get_resource(pdev, IORESOURCE_IO, 0); - if (!request_region(res->start, IOREGION_LENGTH, DRVNAME)) { - err = -EBUSY; - dev_err(dev, "Failed to request region 0x%lx-0x%lx\n", - (unsigned long)res->start, - (unsigned long)res->start + IOREGION_LENGTH - 1); - goto exit; - } + if (!devm_request_region(dev, res->start, IOREGION_LENGTH, DRVNAME)) + return -EBUSY; - data = devm_kzalloc(&pdev->dev, sizeof(struct w83627ehf_data), - GFP_KERNEL); - if (!data) { - err = -ENOMEM; - goto exit_release; - } + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; data->addr = res->start; mutex_init(&data->lock); @@ -1882,7 +1874,7 @@ static int __init w83627ehf_probe(struct platform_device *pdev) err = superio_enter(sio_data->sioreg); if (err) - goto exit_release; + return err; /* Read VID value */ if (sio_data->kind == w83667hg || sio_data->kind == w83667hg_b) { @@ -1951,26 +1943,7 @@ static int __init w83627ehf_probe(struct platform_device *pdev) data, &w83627ehf_chip_info, w83627ehf_groups); - if (IS_ERR(hwmon_dev)) { - err = PTR_ERR(hwmon_dev); - goto exit_release; - } - - return 0; - -exit_release: - release_region(res->start, IOREGION_LENGTH); -exit: - return err; -} - -static int w83627ehf_remove(struct platform_device *pdev) -{ - struct w83627ehf_data *data = platform_get_drvdata(pdev); - - release_region(data->addr, IOREGION_LENGTH); - - return 0; + return PTR_ERR_OR_ZERO(hwmon_dev); } #ifdef CONFIG_PM @@ -2057,7 +2030,6 @@ static struct platform_driver w83627ehf_driver = { .name = DRVNAME, .pm = W83627EHF_DEV_PM_OPS, }, - .remove = w83627ehf_remove, }; /* w83627ehf_find() looks for a '627 in the Super-I/O config space */ From 47f53972ca1553c9ef6d6e422a0ab61824bbcb40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Fri, 16 Jul 2021 15:54:41 +0200 Subject: [PATCH 167/851] hwmon: intel-m10-bmc-hwmon: add n5010 sensors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the list of sensors supported by the Silicom n5010 PAC, and enable the drivers as a subtype of the intel-m10-bmc multi-function driver. Signed-off-by: Martin Hundebøll Reviewed-by: Guenter Roeck Reviewed-by: Moritz Fischer Reviewed-by: Xu Yilun Link: https://lore.kernel.org/r/20210716135441.3235863-4-martin@geanix.com Signed-off-by: Guenter Roeck --- drivers/hwmon/intel-m10-bmc-hwmon.c | 116 ++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/drivers/hwmon/intel-m10-bmc-hwmon.c b/drivers/hwmon/intel-m10-bmc-hwmon.c index bd7ed2ed3a1e6..7a08e4c44a4b4 100644 --- a/drivers/hwmon/intel-m10-bmc-hwmon.c +++ b/drivers/hwmon/intel-m10-bmc-hwmon.c @@ -228,6 +228,118 @@ static const struct m10bmc_hwmon_board_data d5005bmc_hwmon_bdata = { .hinfo = d5005bmc_hinfo, }; +static const struct m10bmc_sdata n5010bmc_temp_tbl[] = { + { 0x100, 0x0, 0x104, 0x0, 0x0, 1000, "Board Local Temperature" }, + { 0x108, 0x0, 0x10c, 0x0, 0x0, 1000, "FPGA 1 Temperature" }, + { 0x110, 0x0, 0x114, 0x0, 0x0, 1000, "FPGA 2 Temperature" }, + { 0x118, 0x0, 0x0, 0x0, 0x0, 1000, "Card Top Temperature" }, + { 0x11c, 0x0, 0x0, 0x0, 0x0, 1000, "Card Bottom Temperature" }, + { 0x128, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 1.2V Temperature" }, + { 0x134, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 5V Temperature" }, + { 0x140, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 0.9V Temperature" }, + { 0x14c, 0x0, 0x0, 0x0, 0x0, 1000, "FPGA 0.85V Temperature" }, + { 0x158, 0x0, 0x0, 0x0, 0x0, 1000, "AUX 12V Temperature" }, + { 0x164, 0x0, 0x0, 0x0, 0x0, 1000, "Backplane 12V Temperature" }, + { 0x1a8, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-1 Temperature" }, + { 0x1ac, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-2 Temperature" }, + { 0x1b0, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-3 Temperature" }, + { 0x1b4, 0x0, 0x0, 0x0, 0x0, 1000, "QSFP28-4 Temperature" }, + { 0x1b8, 0x0, 0x0, 0x0, 0x0, 1000, "CVL1 Internal Temperature" }, + { 0x1bc, 0x0, 0x0, 0x0, 0x0, 1000, "CVL2 Internal Temperature" }, +}; + +static const struct m10bmc_sdata n5010bmc_in_tbl[] = { + { 0x120, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.2V Voltage" }, + { 0x12c, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 5V Voltage" }, + { 0x138, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.9V Voltage" }, + { 0x144, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.85V Voltage" }, + { 0x150, 0x0, 0x0, 0x0, 0x0, 1, "AUX 12V Voltage" }, + { 0x15c, 0x0, 0x0, 0x0, 0x0, 1, "Backplane 12V Voltage" }, + { 0x16c, 0x0, 0x0, 0x0, 0x0, 1, "DDR4 1.2V Voltage" }, + { 0x17c, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.8V Voltage" }, + { 0x184, 0x0, 0x0, 0x0, 0x0, 1, "QDR 1.3V Voltage" }, + { 0x18c, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 0.8V Voltage" }, + { 0x194, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 1.05V Voltage" }, + { 0x19c, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 1.05V Voltage" }, + { 0x1a4, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 0.8V Voltage" }, +}; + +static const struct m10bmc_sdata n5010bmc_curr_tbl[] = { + { 0x124, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.2V Current" }, + { 0x130, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 5V Current" }, + { 0x13c, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.9V Current" }, + { 0x148, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 0.85V Current" }, + { 0x154, 0x0, 0x0, 0x0, 0x0, 1, "AUX 12V Current" }, + { 0x160, 0x0, 0x0, 0x0, 0x0, 1, "Backplane 12V Current" }, + { 0x168, 0x0, 0x0, 0x0, 0x0, 1, "DDR4 1.2V Current" }, + { 0x178, 0x0, 0x0, 0x0, 0x0, 1, "FPGA 1.8V Current" }, + { 0x180, 0x0, 0x0, 0x0, 0x0, 1, "QDR 1.3V Current" }, + { 0x188, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 0.8V Current" }, + { 0x190, 0x0, 0x0, 0x0, 0x0, 1, "CVL1 1.05V Current" }, + { 0x198, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 1.05V Current" }, + { 0x1a0, 0x0, 0x0, 0x0, 0x0, 1, "CVL2 0.8V Current" }, +}; + +static const struct hwmon_channel_info *n5010bmc_hinfo[] = { + HWMON_CHANNEL_INFO(temp, + HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), + HWMON_CHANNEL_INFO(in, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL, + HWMON_I_INPUT | HWMON_I_LABEL), + HWMON_CHANNEL_INFO(curr, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL, + HWMON_C_INPUT | HWMON_C_LABEL), + NULL +}; + +static const struct m10bmc_hwmon_board_data n5010bmc_hwmon_bdata = { + .tables = { + [hwmon_temp] = n5010bmc_temp_tbl, + [hwmon_in] = n5010bmc_in_tbl, + [hwmon_curr] = n5010bmc_curr_tbl, + }, + + .hinfo = n5010bmc_hinfo, +}; + static umode_t m10bmc_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) @@ -438,6 +550,10 @@ static const struct platform_device_id intel_m10bmc_hwmon_ids[] = { .name = "d5005bmc-hwmon", .driver_data = (unsigned long)&d5005bmc_hwmon_bdata, }, + { + .name = "n5010bmc-hwmon", + .driver_data = (unsigned long)&n5010bmc_hwmon_bdata, + }, { } }; From 3d3377027027b3f5a7447bc147fb9dfabfad9369 Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Mon, 5 Jul 2021 09:26:10 +0800 Subject: [PATCH 168/851] arm64: dts: rockchip: add saradc node for rk3568 Add the core dt-node for the rk3568's saradc. Signed-off-by: Simon Xue Link: https://lore.kernel.org/r/20210705012610.3831-1-xxm@rock-chips.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index 618849186c39a..11825909c5dbf 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -754,6 +754,18 @@ status = "disabled"; }; + saradc: saradc@fe720000 { + compatible = "rockchip,rk3568-saradc", "rockchip,rk3399-saradc"; + reg = <0x0 0xfe720000 0x0 0x100>; + interrupts = ; + clocks = <&cru CLK_SARADC>, <&cru PCLK_SARADC>; + clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_P_SARADC>; + reset-names = "saradc-apb"; + #io-channel-cells = <1>; + status = "disabled"; + }; + pinctrl: pinctrl { compatible = "rockchip,rk3568-pinctrl"; rockchip,grf = <&grf>; From 714b35da2b6eace13bfe23ae02330e3b6a2ea4e7 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Thu, 15 Jul 2021 18:41:01 +0200 Subject: [PATCH 169/851] arm64: dts: rockchip: Disable CDN DP on Pinebook Pro The CDN DP needs a PHY and a extcon to work correctly. But no extcon is provided by the device-tree, which leads to an error: cdn-dp fec00000.dp: [drm:cdn_dp_probe [rockchipdrm]] *ERROR* missing extcon or phy cdn-dp: probe of fec00000.dp failed with error -22 Disable the CDN DP to make graphic work on the Pinebook Pro. Reported-by: Guillaume Gardet Signed-off-by: Matthias Brugger Link: https://lore.kernel.org/r/20210715164101.11486-1-matthias.bgg@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 2b5f001ff4a61..9e5d07f5712e6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -385,10 +385,6 @@ }; }; -&cdn_dp { - status = "okay"; -}; - &cpu_b0 { cpu-supply = <&vdd_cpu_b>; }; From 0a777f84fd94aba8bd7282d9616e9160ac8ba757 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Fri, 18 Jun 2021 20:12:52 +0200 Subject: [PATCH 170/851] dt-bindings: Add doc for ROCK Pi 4 A+ and B+ ROCK Pi 4 got 2 more variants called A+ and B+. Add the dt-bindings documentation for it. Signed-off-by: Alex Bee Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210618181256.27992-2-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/arm/rockchip.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml index ce7785fe3598a..f051e3330302c 100644 --- a/Documentation/devicetree/bindings/arm/rockchip.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip.yaml @@ -471,11 +471,13 @@ properties: - const: radxa,rock - const: rockchip,rk3188 - - description: Radxa ROCK Pi 4A/B/C + - description: Radxa ROCK Pi 4A/A+/B/B+/C items: - enum: - radxa,rockpi4a + - radxa,rockpi4a-plus - radxa,rockpi4b + - radxa,rockpi4b-plus - radxa,rockpi4c - const: radxa,rockpi4 - const: rockchip,rk3399 From 7ce2bc0f8295afd794264e503a4c8cabd09e8b38 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Fri, 18 Jun 2021 20:12:53 +0200 Subject: [PATCH 171/851] arm64: dts: rockchip: Add RK3399 ROCK Pi 4A+ board ROCK Pi 4A+ board is the successor of ROCK Pi 4A board. Differences to the original version are - has RK3399 OP1 SoC revision - has eMMC (16 or 32 GB) soldered on board (no changes required, since it is enabled in rk3399-rock-pi-4.dtsi) - dev boards have SPI flash soldered, but as per manufacturer response, this won't be the case for mass production boards Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20210618181256.27992-3-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/Makefile | 1 + .../boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile index 2890756c294c8..5e2e852c5f691 100644 --- a/arch/arm64/boot/dts/rockchip/Makefile +++ b/arch/arm64/boot/dts/rockchip/Makefile @@ -45,6 +45,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc-mezzanine.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc-plus.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4a.dtb +dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4a-plus.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4b.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4c.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock960.dtb diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts new file mode 100644 index 0000000000000..281a04b2f5e92 --- /dev/null +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2019 Akash Gajjar + * Copyright (c) 2019 Pragnesh Patel + */ + +/dts-v1/; +#include "rk3399-rock-pi-4.dtsi" +#include "rk3399-op1-opp.dtsi" + +/ { + model = "Radxa ROCK Pi 4A+"; + compatible = "radxa,rockpi4a-plus", "radxa,rockpi4", "rockchip,rk3399"; +}; From fb094a9957b4500b6988b0927ef408636bff966b Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Fri, 18 Jun 2021 20:12:54 +0200 Subject: [PATCH 172/851] arm64: dts: rockchip: Add RK3399 ROCK Pi 4B+ board ROCK Pi 4B+ board is the successor of ROCK Pi 4B board. Differences to the original version are - has RK3399 OP1 SoC revision - has eMMC (16 or 32 GB) soldered on board (no changes required, since it is enabled in rk3399-rock-pi-4.dtsi) - dev boards have SPI flash soldered, but as per manufacturer response, this won't be the case for mass production boards Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20210618181256.27992-4-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/Makefile | 1 + .../dts/rockchip/rk3399-rock-pi-4b-plus.dts | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts diff --git a/arch/arm64/boot/dts/rockchip/Makefile b/arch/arm64/boot/dts/rockchip/Makefile index 5e2e852c5f691..b1c3f32ac11a1 100644 --- a/arch/arm64/boot/dts/rockchip/Makefile +++ b/arch/arm64/boot/dts/rockchip/Makefile @@ -47,6 +47,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-roc-pc-plus.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4a.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4a-plus.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4b.dtb +dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4b-plus.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock-pi-4c.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rock960.dtb dtb-$(CONFIG_ARCH_ROCKCHIP) += rk3399-rockpro64-v2.dtb diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts new file mode 100644 index 0000000000000..dfad13d2ab249 --- /dev/null +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2019 Akash Gajjar + * Copyright (c) 2019 Pragnesh Patel + */ + +/dts-v1/; +#include "rk3399-rock-pi-4.dtsi" +#include "rk3399-op1-opp.dtsi" + +/ { + model = "Radxa ROCK Pi 4B+"; + compatible = "radxa,rockpi4b-plus", "radxa,rockpi4", "rockchip,rk3399"; + + aliases { + mmc2 = &sdio0; + }; +}; + +&sdio0 { + status = "okay"; + + brcmf: wifi@1 { + compatible = "brcm,bcm4329-fmac"; + reg = <1>; + interrupt-parent = <&gpio0>; + interrupts = ; + interrupt-names = "host-wake"; + pinctrl-names = "default"; + pinctrl-0 = <&wifi_host_wake_l>; + }; +}; + +&uart0 { + status = "okay"; + + bluetooth { + compatible = "brcm,bcm43438-bt"; + clocks = <&rk808 1>; + clock-names = "ext_clock"; + device-wakeup-gpios = <&gpio2 RK_PD3 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpio0 RK_PB1 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; + }; +}; From 9e595375ddf8c71bdd23df2a6769518116dbc6bf Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Fri, 18 Jun 2021 20:12:55 +0200 Subject: [PATCH 173/851] arm64: dts: rockchip: add ES8316 codec for ROCK Pi 4 ROCK Pi 4 boards have the codec connected to i2s0 and it is accessible via i2c1 address 0x11. Add an audio-graph-card for it. Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20210618181256.27992-5-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index b28888ea9262e..b49072af4014b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -36,6 +36,12 @@ reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; }; + sound { + compatible = "audio-graph-card"; + label = "Analog"; + dais = <&i2s0_p0>; + }; + vcc12v_dcin: dc-12v { compatible = "regulator-fixed"; regulator-name = "vcc12v_dcin"; @@ -422,6 +428,20 @@ i2c-scl-rising-time-ns = <300>; i2c-scl-falling-time-ns = <15>; status = "okay"; + + es8316: codec@11 { + compatible = "everest,es8316"; + reg = <0x11>; + clocks = <&cru SCLK_I2S_8CH_OUT>; + clock-names = "mclk"; + #sound-dai-cells = <0>; + + port { + es8316_p0_0: endpoint { + remote-endpoint = <&i2s0_p0_0>; + }; + }; + }; }; &i2c3 { @@ -441,6 +461,14 @@ rockchip,capture-channels = <2>; rockchip,playback-channels = <2>; status = "okay"; + + i2s0_p0: port { + i2s0_p0_0: endpoint { + dai-format = "i2s"; + mclk-fs = <256>; + remote-endpoint = <&es8316_p0_0>; + }; + }; }; &i2s1 { From 30910d85c20d929ee1e618bfd86e0fc2f510739f Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Fri, 18 Jun 2021 20:12:56 +0200 Subject: [PATCH 174/851] arm64: dts: rockchip: add SPDIF node for ROCK Pi 4 Add a SPDIF audio-graph-card to ROCK Pi 4 device tree. It's not enabled by default since all dma channels are used by the (already) enabled i2s0/1/2 and the pin is muxed with GPIO4_C5 which might be in use already. If enabled SPDIF_TX will be available at pin #15. Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20210618181256.27992-6-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index b49072af4014b..98136c88fa497 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -42,6 +42,23 @@ dais = <&i2s0_p0>; }; + sound-dit { + compatible = "audio-graph-card"; + label = "SPDIF"; + dais = <&spdif_p0>; + }; + + spdif-dit { + compatible = "linux,spdif-dit"; + #sound-dai-cells = <0>; + + port { + dit_p0_0: endpoint { + remote-endpoint = <&spdif_p0_0>; + }; + }; + }; + vcc12v_dcin: dc-12v { compatible = "regulator-fixed"; regulator-name = "vcc12v_dcin"; @@ -631,6 +648,15 @@ status = "okay"; }; +&spdif { + + spdif_p0: port { + spdif_p0_0: endpoint { + remote-endpoint = <&dit_p0_0>; + }; + }; +}; + &tcphy0 { status = "okay"; }; From b46e9b299bd95499bdd08424c22aa848191c24c7 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 1 Jul 2021 20:54:01 -0700 Subject: [PATCH 175/851] fpga: altera-freeze-bridge: Address warning about unused variable warning: unused variable 'altera_freeze_br_of_match' [-Wunused-const-variable] static const struct of_device_id altera_freeze_br_of_match[] = { Fixes: ca24a648f535 ("fpga: add altera freeze bridge support") Cc: Tom Rix Reported-by: kernel test robot Signed-off-by: Moritz Fischer --- drivers/fpga/altera-freeze-bridge.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/fpga/altera-freeze-bridge.c b/drivers/fpga/altera-freeze-bridge.c index dd58c4aea92ea..7d22a44d652e2 100644 --- a/drivers/fpga/altera-freeze-bridge.c +++ b/drivers/fpga/altera-freeze-bridge.c @@ -198,11 +198,13 @@ static const struct fpga_bridge_ops altera_freeze_br_br_ops = { .enable_show = altera_freeze_br_enable_show, }; +#ifdef CONFIG_OF static const struct of_device_id altera_freeze_br_of_match[] = { { .compatible = "altr,freeze-bridge-controller", }, {}, }; MODULE_DEVICE_TABLE(of, altera_freeze_br_of_match); +#endif static int altera_freeze_br_probe(struct platform_device *pdev) { From 7f6296289cd163d188d3b3883d25ca1a77bdb870 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 1 Jul 2021 20:54:02 -0700 Subject: [PATCH 176/851] fpga: xiilnx-spi: Address warning about unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit warning: ‘xlnx_spi_of_match’ defined but not used [-Wunused-const-variable] static const struct of_device_id xlnx_spi_of_match[] = { Fixes: 061c97d13f1a ("fpga manager: Add Xilinx slave serial SPI driver") Cc: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/xilinx-spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/fpga/xilinx-spi.c b/drivers/fpga/xilinx-spi.c index fee4d0abf6bfe..b6bcf1d9233d2 100644 --- a/drivers/fpga/xilinx-spi.c +++ b/drivers/fpga/xilinx-spi.c @@ -256,11 +256,13 @@ static int xilinx_spi_probe(struct spi_device *spi) return devm_fpga_mgr_register(&spi->dev, mgr); } +#ifdef CONFIG_OF static const struct of_device_id xlnx_spi_of_match[] = { { .compatible = "xlnx,fpga-slave-serial", }, {} }; MODULE_DEVICE_TABLE(of, xlnx_spi_of_match); +#endif static struct spi_driver xilinx_slave_spi_driver = { .driver = { From 310a0b2dae99417ce118b9b7e6a79f13abcd8a96 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 1 Jul 2021 20:54:03 -0700 Subject: [PATCH 177/851] fpga: xilinx-pr-decoupler: Address warning about unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit warning: ‘xlnx_pr_decoupler_of_match’ defined but not used [-Wunused-const-variable=] static const struct of_device_id xlnx_pr_decoupler_of_match[] = { Fixes: 7e961c12be42 ("fpga: Add support for Xilinx LogiCORE PR Decoupler") Cc: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/xilinx-pr-decoupler.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/fpga/xilinx-pr-decoupler.c b/drivers/fpga/xilinx-pr-decoupler.c index ea2bde6e5bc4e..e986ed47c4ed7 100644 --- a/drivers/fpga/xilinx-pr-decoupler.c +++ b/drivers/fpga/xilinx-pr-decoupler.c @@ -81,6 +81,7 @@ static const struct fpga_bridge_ops xlnx_pr_decoupler_br_ops = { .enable_show = xlnx_pr_decoupler_enable_show, }; +#ifdef CONFIG_OF static const struct xlnx_config_data decoupler_config = { .name = "Xilinx PR Decoupler", }; @@ -99,6 +100,7 @@ static const struct of_device_id xlnx_pr_decoupler_of_match[] = { {}, }; MODULE_DEVICE_TABLE(of, xlnx_pr_decoupler_of_match); +#endif static int xlnx_pr_decoupler_probe(struct platform_device *pdev) { From 6c17b7ff1d11437733fa3215b45b050f65b677f6 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 1 Jul 2021 20:54:04 -0700 Subject: [PATCH 178/851] fpga: zynqmp-fpga: Address warning about unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit warning: ‘zynqmp_fpga_of_match’ defined but not used [-Wunused-const-variable=] static const struct of_device_id zynqmp_fpga_of_match[] = { Fixes: c09f7471127e ("fpga manager: Adding FPGA Manager support for Xilinx zynqmp") Cc: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/zynqmp-fpga.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/zynqmp-fpga.c b/drivers/fpga/zynqmp-fpga.c index 125743c9797ff..b3240f75f0c70 100644 --- a/drivers/fpga/zynqmp-fpga.c +++ b/drivers/fpga/zynqmp-fpga.c @@ -110,12 +110,13 @@ static int zynqmp_fpga_probe(struct platform_device *pdev) return devm_fpga_mgr_register(dev, mgr); } +#ifdef CONFIG_OF static const struct of_device_id zynqmp_fpga_of_match[] = { { .compatible = "xlnx,zynqmp-pcap-fpga", }, {}, }; - MODULE_DEVICE_TABLE(of, zynqmp_fpga_of_match); +#endif static struct platform_driver zynqmp_fpga_driver = { .probe = zynqmp_fpga_probe, From a11fe89db6288ae6f596a3d299a253e781da6d1c Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 25 Jun 2021 12:51:42 -0700 Subject: [PATCH 179/851] fpga: fpga-mgr: wrap the write_init() op An FPGA manager should not be required to provide a write_init() op if there is nothing for it do. So add a wrapper and move the op checking. Default to success. [mdf@kernel.org: Reworded first line] Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/fpga-mgr.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index ecb4c3c795fa5..c047de8a059b7 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -25,6 +25,15 @@ struct fpga_mgr_devres { struct fpga_manager *mgr; }; +static inline int fpga_mgr_write_init(struct fpga_manager *mgr, + struct fpga_image_info *info, + const char *buf, size_t count) +{ + if (mgr->mops->write_init) + return mgr->mops->write_init(mgr, info, buf, count); + return 0; +} + /** * fpga_image_info_alloc - Allocate an FPGA image info struct * @dev: owning device @@ -83,9 +92,9 @@ static int fpga_mgr_write_init_buf(struct fpga_manager *mgr, mgr->state = FPGA_MGR_STATE_WRITE_INIT; if (!mgr->mops->initial_header_size) - ret = mgr->mops->write_init(mgr, info, NULL, 0); + ret = fpga_mgr_write_init(mgr, info, NULL, 0); else - ret = mgr->mops->write_init( + ret = fpga_mgr_write_init( mgr, info, buf, min(mgr->mops->initial_header_size, count)); if (ret) { @@ -569,7 +578,7 @@ struct fpga_manager *fpga_mgr_create(struct device *parent, const char *name, int id, ret; if (!mops || !mops->write_complete || !mops->state || - !mops->write_init || (!mops->write && !mops->write_sg) || + (!mops->write && !mops->write_sg) || (mops->write && mops->write_sg)) { dev_err(parent, "Attempt to register without fpga_manager_ops\n"); return NULL; From cdc2160df447e39a24c3b48f8dc217ebffce5b7a Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 25 Jun 2021 12:51:43 -0700 Subject: [PATCH 180/851] fpga: fpga-mgr: make write_complete() op optional An FPGA manager should not be required to provide a write_complete function if there is nothing. Move the op check to the existing wrapper. Default to success and remove noop function. [mdf@kernel.org: Reworded message] Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/fpga-mgr.c | 45 +++++++++++++++++++------------------- drivers/fpga/zynqmp-fpga.c | 7 ------ 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index c047de8a059b7..05a69ab3ecb9e 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -25,6 +25,28 @@ struct fpga_mgr_devres { struct fpga_manager *mgr; }; +/* + * After all the FPGA image has been written, do the device specific steps to + * finish and set the FPGA into operating mode. + */ +static inline int fpga_mgr_write_complete(struct fpga_manager *mgr, + struct fpga_image_info *info) +{ + int ret = 0; + + mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE; + if (mgr->mops->write_complete) + ret = mgr->mops->write_complete(mgr, info); + if (ret) { + dev_err(&mgr->dev, "Error after writing image data to FPGA\n"); + mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR; + return ret; + } + mgr->state = FPGA_MGR_STATE_OPERATING; + + return 0; +} + static inline int fpga_mgr_write_init(struct fpga_manager *mgr, struct fpga_image_info *info, const char *buf, size_t count) @@ -146,27 +168,6 @@ static int fpga_mgr_write_init_sg(struct fpga_manager *mgr, return ret; } -/* - * After all the FPGA image has been written, do the device specific steps to - * finish and set the FPGA into operating mode. - */ -static int fpga_mgr_write_complete(struct fpga_manager *mgr, - struct fpga_image_info *info) -{ - int ret; - - mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE; - ret = mgr->mops->write_complete(mgr, info); - if (ret) { - dev_err(&mgr->dev, "Error after writing image data to FPGA\n"); - mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR; - return ret; - } - mgr->state = FPGA_MGR_STATE_OPERATING; - - return 0; -} - /** * fpga_mgr_buf_load_sg - load fpga from image in buffer from a scatter list * @mgr: fpga manager @@ -577,7 +578,7 @@ struct fpga_manager *fpga_mgr_create(struct device *parent, const char *name, struct fpga_manager *mgr; int id, ret; - if (!mops || !mops->write_complete || !mops->state || + if (!mops || !mops->state || (!mops->write && !mops->write_sg) || (mops->write && mops->write_sg)) { dev_err(parent, "Attempt to register without fpga_manager_ops\n"); diff --git a/drivers/fpga/zynqmp-fpga.c b/drivers/fpga/zynqmp-fpga.c index b3240f75f0c70..7d3d5650c3220 100644 --- a/drivers/fpga/zynqmp-fpga.c +++ b/drivers/fpga/zynqmp-fpga.c @@ -66,12 +66,6 @@ static int zynqmp_fpga_ops_write(struct fpga_manager *mgr, return ret; } -static int zynqmp_fpga_ops_write_complete(struct fpga_manager *mgr, - struct fpga_image_info *info) -{ - return 0; -} - static enum fpga_mgr_states zynqmp_fpga_ops_state(struct fpga_manager *mgr) { u32 status = 0; @@ -87,7 +81,6 @@ static const struct fpga_manager_ops zynqmp_fpga_ops = { .state = zynqmp_fpga_ops_state, .write_init = zynqmp_fpga_ops_write_init, .write = zynqmp_fpga_ops_write, - .write_complete = zynqmp_fpga_ops_write_complete, }; static int zynqmp_fpga_probe(struct platform_device *pdev) From 9590d5375cd4e3dc4ce90128bed8bb9d0accdc52 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 25 Jun 2021 12:51:44 -0700 Subject: [PATCH 181/851] fpga: fpga-mgr: wrap the write() op An FPGA manager should not be required to provide a write function. Move the op check to the wrapper. Default to -EOPNOTSUP so its users will fail gracefully. [mdf@kernel.org: Reworded message] Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/fpga-mgr.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index 05a69ab3ecb9e..8d5536d748081 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -25,6 +25,13 @@ struct fpga_mgr_devres { struct fpga_manager *mgr; }; +static inline int fpga_mgr_write(struct fpga_manager *mgr, const char *buf, size_t count) +{ + if (mgr->mops->write) + return mgr->mops->write(mgr, buf, count); + return -EOPNOTSUPP; +} + /* * After all the FPGA image has been written, do the device specific steps to * finish and set the FPGA into operating mode. @@ -204,7 +211,7 @@ static int fpga_mgr_buf_load_sg(struct fpga_manager *mgr, sg_miter_start(&miter, sgt->sgl, sgt->nents, SG_MITER_FROM_SG); while (sg_miter_next(&miter)) { - ret = mgr->mops->write(mgr, miter.addr, miter.length); + ret = fpga_mgr_write(mgr, miter.addr, miter.length); if (ret) break; } @@ -234,7 +241,7 @@ static int fpga_mgr_buf_load_mapped(struct fpga_manager *mgr, * Write the FPGA image to the FPGA. */ mgr->state = FPGA_MGR_STATE_WRITE; - ret = mgr->mops->write(mgr, buf, count); + ret = fpga_mgr_write(mgr, buf, count); if (ret) { dev_err(&mgr->dev, "Error while writing image data to FPGA\n"); mgr->state = FPGA_MGR_STATE_WRITE_ERR; @@ -578,9 +585,7 @@ struct fpga_manager *fpga_mgr_create(struct device *parent, const char *name, struct fpga_manager *mgr; int id, ret; - if (!mops || !mops->state || - (!mops->write && !mops->write_sg) || - (mops->write && mops->write_sg)) { + if (!mops || !mops->state) { dev_err(parent, "Attempt to register without fpga_manager_ops\n"); return NULL; } From 43a0e0528d6d7f37c4917c5cf8ee835ebad76c4d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 25 Jun 2021 12:51:45 -0700 Subject: [PATCH 182/851] fpga: fpga-mgr: wrap the status() op An FPGA manager is not required to provide a status() op. Add a wrapper consistent with the other op wrappers. Move the op check to the wrapper. Default to 0, no errors to report. [mdf@kernel.org: Reworded first line] Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/fpga-mgr.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index 8d5536d748081..43518b6eed21e 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -25,6 +25,13 @@ struct fpga_mgr_devres { struct fpga_manager *mgr; }; +static inline u64 fpga_mgr_status(struct fpga_manager *mgr) +{ + if (mgr->mops->status) + return mgr->mops->status(mgr); + return 0; +} + static inline int fpga_mgr_write(struct fpga_manager *mgr, const char *buf, size_t count) { if (mgr->mops->write) @@ -434,10 +441,7 @@ static ssize_t status_show(struct device *dev, u64 status; int len = 0; - if (!mgr->mops->status) - return -ENOENT; - - status = mgr->mops->status(mgr); + status = fpga_mgr_status(mgr); if (status & FPGA_MGR_STATUS_OPERATION_ERR) len += sprintf(buf + len, "reconfig operation error\n"); From 6814101ee1b2ad45318f5288c18904b2e22edeb1 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 25 Jun 2021 12:51:46 -0700 Subject: [PATCH 183/851] fpga: fpga-mgr: wrap the state() op An FPGA manager should not be required to provide a state() op. Add a wrapper consistent with the other op wrappers. Move op check to wrapper. Default to FPGA_MGR_STATE_UNKNOWN, what noop state() ops use. Remove unneeded noop state() ops [mdf@kernel.org: Reworded first line] Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/dfl-fme-mgr.c | 6 ------ drivers/fpga/fpga-mgr.c | 11 +++++++++-- drivers/fpga/stratix10-soc.c | 6 ------ drivers/fpga/ts73xx-fpga.c | 6 ------ 4 files changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/fpga/dfl-fme-mgr.c b/drivers/fpga/dfl-fme-mgr.c index d5861d13b3069..313420405d5e8 100644 --- a/drivers/fpga/dfl-fme-mgr.c +++ b/drivers/fpga/dfl-fme-mgr.c @@ -252,11 +252,6 @@ static int fme_mgr_write_complete(struct fpga_manager *mgr, return 0; } -static enum fpga_mgr_states fme_mgr_state(struct fpga_manager *mgr) -{ - return FPGA_MGR_STATE_UNKNOWN; -} - static u64 fme_mgr_status(struct fpga_manager *mgr) { struct fme_mgr_priv *priv = mgr->priv; @@ -268,7 +263,6 @@ static const struct fpga_manager_ops fme_mgr_ops = { .write_init = fme_mgr_write_init, .write = fme_mgr_write, .write_complete = fme_mgr_write_complete, - .state = fme_mgr_state, .status = fme_mgr_status, }; diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index 43518b6eed21e..b3380ad341d22 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -25,6 +25,13 @@ struct fpga_mgr_devres { struct fpga_manager *mgr; }; +static inline enum fpga_mgr_states fpga_mgr_state(struct fpga_manager *mgr) +{ + if (mgr->mops->state) + return mgr->mops->state(mgr); + return FPGA_MGR_STATE_UNKNOWN; +} + static inline u64 fpga_mgr_status(struct fpga_manager *mgr) { if (mgr->mops->status) @@ -589,7 +596,7 @@ struct fpga_manager *fpga_mgr_create(struct device *parent, const char *name, struct fpga_manager *mgr; int id, ret; - if (!mops || !mops->state) { + if (!mops) { dev_err(parent, "Attempt to register without fpga_manager_ops\n"); return NULL; } @@ -707,7 +714,7 @@ int fpga_mgr_register(struct fpga_manager *mgr) * from device. FPGA may be in reset mode or may have been programmed * by bootloader or EEPROM. */ - mgr->state = mgr->mops->state(mgr); + mgr->state = fpga_mgr_state(mgr); ret = device_add(&mgr->dev); if (ret) diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c index a2cea500f7cc6..047fd7f237069 100644 --- a/drivers/fpga/stratix10-soc.c +++ b/drivers/fpga/stratix10-soc.c @@ -388,13 +388,7 @@ static int s10_ops_write_complete(struct fpga_manager *mgr, return ret; } -static enum fpga_mgr_states s10_ops_state(struct fpga_manager *mgr) -{ - return FPGA_MGR_STATE_UNKNOWN; -} - static const struct fpga_manager_ops s10_ops = { - .state = s10_ops_state, .write_init = s10_ops_write_init, .write = s10_ops_write, .write_complete = s10_ops_write_complete, diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c index 101f016c6ed8c..167abb0b08d40 100644 --- a/drivers/fpga/ts73xx-fpga.c +++ b/drivers/fpga/ts73xx-fpga.c @@ -32,11 +32,6 @@ struct ts73xx_fpga_priv { struct device *dev; }; -static enum fpga_mgr_states ts73xx_fpga_state(struct fpga_manager *mgr) -{ - return FPGA_MGR_STATE_UNKNOWN; -} - static int ts73xx_fpga_write_init(struct fpga_manager *mgr, struct fpga_image_info *info, const char *buf, size_t count) @@ -98,7 +93,6 @@ static int ts73xx_fpga_write_complete(struct fpga_manager *mgr, } static const struct fpga_manager_ops ts73xx_fpga_ops = { - .state = ts73xx_fpga_state, .write_init = ts73xx_fpga_write_init, .write = ts73xx_fpga_write, .write_complete = ts73xx_fpga_write_complete, From 5daa06ffcebf1eb9560c060f5049889d2634afd1 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 25 Jun 2021 12:51:47 -0700 Subject: [PATCH 184/851] fpga: fpga-mgr: wrap the fpga_remove() op An FPGA manager is not required to provide a fpga_remove() op. Add a wrapper consistent with the other op wrappers. Move op check to wrapper. [mdf@kernel.org: Reworded first line] Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/fpga-mgr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index b3380ad341d22..077c0f9edbe4c 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -25,6 +25,12 @@ struct fpga_mgr_devres { struct fpga_manager *mgr; }; +static inline void fpga_mgr_fpga_remove(struct fpga_manager *mgr) +{ + if (mgr->mops->fpga_remove) + mgr->mops->fpga_remove(mgr); +} + static inline enum fpga_mgr_states fpga_mgr_state(struct fpga_manager *mgr) { if (mgr->mops->state) @@ -745,8 +751,7 @@ void fpga_mgr_unregister(struct fpga_manager *mgr) * If the low level driver provides a method for putting fpga into * a desired state upon unregister, do it. */ - if (mgr->mops->fpga_remove) - mgr->mops->fpga_remove(mgr); + fpga_mgr_fpga_remove(mgr); device_unregister(&mgr->dev); } From c485d3bf3cc7790faed2b90c799a38caa2f69268 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 25 Jun 2021 12:51:48 -0700 Subject: [PATCH 185/851] fpga: fpga-mgr: wrap the write_sg() op An FPGA manager should not be required to provide a write_sg function. Move the op check to the wrapper. Default to -EOPNOTSUP so its users will fail gracefully. [mdf@kernel.org: Reworded first line] Signed-off-by: Tom Rix Signed-off-by: Moritz Fischer --- drivers/fpga/fpga-mgr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index 077c0f9edbe4c..aa30889e23208 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -83,6 +83,14 @@ static inline int fpga_mgr_write_init(struct fpga_manager *mgr, return 0; } +static inline int fpga_mgr_write_sg(struct fpga_manager *mgr, + struct sg_table *sgt) +{ + if (mgr->mops->write_sg) + return mgr->mops->write_sg(mgr, sgt); + return -EOPNOTSUPP; +} + /** * fpga_image_info_alloc - Allocate an FPGA image info struct * @dev: owning device @@ -225,7 +233,7 @@ static int fpga_mgr_buf_load_sg(struct fpga_manager *mgr, /* Write the FPGA image to the FPGA. */ mgr->state = FPGA_MGR_STATE_WRITE; if (mgr->mops->write_sg) { - ret = mgr->mops->write_sg(mgr, sgt); + ret = fpga_mgr_write_sg(mgr, sgt); } else { struct sg_mapping_iter miter; From 49c8d682f525ee94dff67e2cd1cb8542213f75ef Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 4 Jul 2021 16:01:37 -0700 Subject: [PATCH 186/851] m68k/coldfire: change pll var. to clk_pll DEFINE_CLK() makes the variable name be clk_xyz, so variable 'pll' should instead be 'clk_pll'. In file included from ../arch/m68k/coldfire/m525x.c:12: ../arch/m68k/coldfire/m525x.c:29:30: error: 'pll' undeclared here (not in a function) 29 | CLKDEV_INIT(NULL, "pll.0", &pll), | ^~~ ../include/linux/clkdev.h:30:10: note: in definition of macro 'CLKDEV_INIT' 30 | .clk = c, \ | ^ In file included from ../arch/m68k/coldfire/m525x.c:21: ../arch/m68k/include/asm/mcfclk.h:43:27: warning: 'clk_pll' defined but not used [-Wunused-variable] 43 | static struct clk clk_##clk_ref = { \ | ^~~~ ../arch/m68k/coldfire/m525x.c:25:1: note: in expansion of macro 'DEFINE_CLK' 25 | DEFINE_CLK(pll, "pll.0", MCF_CLK); | ^~~~~~~~~~ Fixes: 63aadb77669a ("m68k: coldfire: use clkdev_lookup on most coldfire") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Greg Ungerer Cc: linux-m68k@lists.linux-m68k.org Cc: uclinux-dev@uclinux.org Cc: Arnd Bergmann Signed-off-by: Greg Ungerer --- arch/m68k/coldfire/m525x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c index 2c4d2ca2f20db..485375112e28a 100644 --- a/arch/m68k/coldfire/m525x.c +++ b/arch/m68k/coldfire/m525x.c @@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK); DEFINE_CLK(sys, "sys.0", MCF_BUSCLK); static struct clk_lookup m525x_clk_lookup[] = { - CLKDEV_INIT(NULL, "pll.0", &pll), + CLKDEV_INIT(NULL, "pll.0", &clk_pll), CLKDEV_INIT(NULL, "sys.0", &clk_sys), CLKDEV_INIT("mcftmr.0", NULL, &clk_sys), CLKDEV_INIT("mcftmr.1", NULL, &clk_sys), From 76ad1542ed9bd0b9c3c4630587ceb0dc8852e296 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 7 Jul 2021 09:37:29 -0700 Subject: [PATCH 187/851] m68k/nommu: prevent setting ROMKERNEL when ROM is not set When CONFIG_ROMKERNEL is set but CONFIG_ROM is not set, the linker complains: m68k-linux-ld:./arch/m68k/kernel/vmlinux.lds:5: undefined symbol `CONFIG_ROMSTART' referenced in expression # CONFIG_ROM is not set # CONFIG_RAMKERNEL is not set CONFIG_ROMKERNEL=y Since ROMSTART depends on ROM, make ROMKERNEL also depend on ROM. Signed-off-by: Randy Dunlap Cc: Greg Ungerer Cc: linux-m68k@lists.linux-m68k.org Cc: uclinux-dev@uclinux.org Signed-off-by: Greg Ungerer --- arch/m68k/Kconfig.machine | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index d964c1f273995..edb748598df53 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -464,6 +464,7 @@ config RAMKERNEL config ROMKERNEL bool "ROM" + depends on ROM help The kernel will be resident in FLASH/ROM when running. This is often referred to as Execute-in-Place (XIP), since the kernel From d8e32fc6b2892db31dfdd2c8fcef0b6a0a63b898 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 2 Jul 2021 11:48:38 +0200 Subject: [PATCH 188/851] m68k: stmark2: update board setup Add configuration for flexcan pads. Signed-off-by: Angelo Dureghello Signed-off-by: Greg Ungerer --- arch/m68k/coldfire/stmark2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/m68k/coldfire/stmark2.c b/arch/m68k/coldfire/stmark2.c index 8b5af9c83244e..036a6ae5f5992 100644 --- a/arch/m68k/coldfire/stmark2.c +++ b/arch/m68k/coldfire/stmark2.c @@ -111,7 +111,9 @@ static int __init init_stmark2(void) __raw_writeb(0x00, MCFGPIO_PAR_BE); __raw_writeb(0x00, MCFGPIO_PAR_FBCTL); __raw_writeb(0x00, MCFGPIO_PAR_CS); - __raw_writeb(0x00, MCFGPIO_PAR_CANI2C); + + /* CAN pads */ + __raw_writeb(0x50, MCFGPIO_PAR_CANI2C); platform_add_devices(stmark2_devices, ARRAY_SIZE(stmark2_devices)); @@ -121,4 +123,4 @@ static int __init init_stmark2(void) return 0; } -late_initcall(init_stmark2); +device_initcall(init_stmark2); From 64151620227a2fcb13dae0b99b6a1003edb38c67 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 2 Jul 2021 11:48:39 +0200 Subject: [PATCH 189/851] m68k: m5441x: add flexcan support Add flexcan support. Signed-off-by: Angelo Dureghello Made the flexcan resource inclusion conditional based on the enablement of the flexcan driver. This commit is no longer dependant on the presence of the updated driver in mainline. Signed-off-by: Greg Ungerer --- arch/m68k/coldfire/device.c | 44 +++++++++++++++++++++++++++++++ arch/m68k/coldfire/m5441x.c | 8 +++--- arch/m68k/include/asm/m5441xsim.h | 19 +++++++++++++ 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c index 59f7dfe50a4d0..0386252e9d043 100644 --- a/arch/m68k/coldfire/device.c +++ b/arch/m68k/coldfire/device.c @@ -581,6 +581,47 @@ static struct platform_device mcf_esdhc = { }; #endif /* MCFSDHC_BASE */ +#if IS_ENABLED(CONFIG_CAN_FLEXCAN) + +#include + +static struct flexcan_platform_data mcf5441x_flexcan_info = { + .clk_src = 1, + .clock_frequency = 120000000, +}; + +static struct resource mcf5441x_flexcan0_resource[] = { + { + .start = MCFFLEXCAN_BASE0, + .end = MCFFLEXCAN_BASE0 + MCFFLEXCAN_SIZE, + .flags = IORESOURCE_MEM, + }, + { + .start = MCF_IRQ_IFL0, + .end = MCF_IRQ_IFL0, + .flags = IORESOURCE_IRQ, + }, + { + .start = MCF_IRQ_BOFF0, + .end = MCF_IRQ_BOFF0, + .flags = IORESOURCE_IRQ, + }, + { + .start = MCF_IRQ_ERR0, + .end = MCF_IRQ_ERR0, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device mcf_flexcan0 = { + .name = "flexcan-mcf5441x", + .id = 0, + .num_resources = ARRAY_SIZE(mcf5441x_flexcan0_resource), + .resource = mcf5441x_flexcan0_resource, + .dev.platform_data = &mcf5441x_flexcan_info, +}; +#endif /* IS_ENABLED(CONFIG_CAN_FLEXCAN) */ + static struct platform_device *mcf_devices[] __initdata = { &mcf_uart, #if IS_ENABLED(CONFIG_FEC) @@ -616,6 +657,9 @@ static struct platform_device *mcf_devices[] __initdata = { #ifdef MCFSDHC_BASE &mcf_esdhc, #endif +#if IS_ENABLED(CONFIG_CAN_FLEXCAN) + &mcf_flexcan0, +#endif }; /* diff --git a/arch/m68k/coldfire/m5441x.c b/arch/m68k/coldfire/m5441x.c index ce14693d18b62..39855044090d7 100644 --- a/arch/m68k/coldfire/m5441x.c +++ b/arch/m68k/coldfire/m5441x.c @@ -19,8 +19,8 @@ #include DEFINE_CLK(0, "flexbus", 2, MCF_CLK); -DEFINE_CLK(0, "mcfcan.0", 8, MCF_CLK); -DEFINE_CLK(0, "mcfcan.1", 9, MCF_CLK); +DEFINE_CLK(0, "flexcan.0", 8, MCF_CLK); +DEFINE_CLK(0, "flexcan.1", 9, MCF_CLK); DEFINE_CLK(0, "imx1-i2c.1", 14, MCF_CLK); DEFINE_CLK(0, "mcfdspi.1", 15, MCF_CLK); DEFINE_CLK(0, "edma", 17, MCF_CLK); @@ -142,6 +142,8 @@ static struct clk_lookup m5411x_clk_lookup[] = { static struct clk * const enable_clks[] __initconst = { /* make sure these clocks are enabled */ + &__clk_0_8, /* flexcan.0 */ + &__clk_0_9, /* flexcan.1 */ &__clk_0_15, /* dspi.1 */ &__clk_0_17, /* eDMA */ &__clk_0_18, /* intc0 */ @@ -162,8 +164,6 @@ static struct clk * const enable_clks[] __initconst = { &__clk_1_37, /* gpio */ }; static struct clk * const disable_clks[] __initconst = { - &__clk_0_8, /* can.0 */ - &__clk_0_9, /* can.1 */ &__clk_0_14, /* i2c.1 */ &__clk_0_22, /* i2c.0 */ &__clk_0_23, /* dspi.0 */ diff --git a/arch/m68k/include/asm/m5441xsim.h b/arch/m68k/include/asm/m5441xsim.h index e091e36d34648..f48cf63bd7822 100644 --- a/arch/m68k/include/asm/m5441xsim.h +++ b/arch/m68k/include/asm/m5441xsim.h @@ -73,6 +73,12 @@ #define MCFINT0_FECENTC1 55 /* on interrupt controller 1 */ +#define MCFINT1_FLEXCAN0_IFL 0 +#define MCFINT1_FLEXCAN0_BOFF 1 +#define MCFINT1_FLEXCAN0_ERR 3 +#define MCFINT1_FLEXCAN1_IFL 4 +#define MCFINT1_FLEXCAN1_BOFF 5 +#define MCFINT1_FLEXCAN1_ERR 7 #define MCFINT1_UART4 48 #define MCFINT1_UART5 49 #define MCFINT1_UART6 50 @@ -314,4 +320,17 @@ #define MCF_IRQ_SDHC (MCFINT2_VECBASE + MCFINT2_SDHC) #define MCFSDHC_CLK (MCFSDHC_BASE + 0x2c) +/* + * Flexcan module + */ +#define MCFFLEXCAN_BASE0 0xfc020000 +#define MCFFLEXCAN_BASE1 0xfc024000 +#define MCFFLEXCAN_SIZE 0x4000 +#define MCF_IRQ_IFL0 (MCFINT1_VECBASE + MCFINT1_FLEXCAN0_IFL) +#define MCF_IRQ_BOFF0 (MCFINT1_VECBASE + MCFINT1_FLEXCAN0_BOFF) +#define MCF_IRQ_ERR0 (MCFINT1_VECBASE + MCFINT1_FLEXCAN0_ERR) +#define MCF_IRQ_IFL1 (MCFINT1_VECBASE + MCFINT1_FLEXCAN1_IFL) +#define MCF_IRQ_BOFF1 (MCFINT1_VECBASE + MCFINT1_FLEXCAN1_BOFF) +#define MCF_IRQ_ERR1 (MCFINT1_VECBASE + MCFINT1_FLEXCAN1_ERR) + #endif /* m5441xsim_h */ From d016ed8d8ee821da772b6f6a8d63a7016a41d17c Mon Sep 17 00:00:00 2001 From: Biju Das Date: Fri, 11 Jun 2021 17:56:24 +0100 Subject: [PATCH 190/851] arm64: defconfig: Enable RIIC Enable RIIC driver support for Renesas RZ/G2L based platforms. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/20210611165624.30749-6-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index f423d08b9a71b..c7cf0d1ad34ed 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -466,6 +466,7 @@ CONFIG_I2C_SH_MOBILE=y CONFIG_I2C_TEGRA=y CONFIG_I2C_UNIPHIER_F=y CONFIG_I2C_RCAR=y +CONFIG_I2C_RIIC=y CONFIG_I2C_CROS_EC_TUNNEL=y CONFIG_SPI=y CONFIG_SPI_ARMADA_3700=y From ed9fc721133e4a5c7b08a69298b9374caf09179c Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 14 Jun 2021 18:31:48 +0200 Subject: [PATCH 191/851] dt-bindings: iio: accel: bma255: Fix interrupt type Bosch accelerometers similar to BMA255 are initially configured to emit an active-high interrupt signal. This is currently not re-configured in the bmc150-accel driver so the interrupt should most certainly be IRQ_TYPE_EDGE_RISING (or potentially IRQ_TYPE_LEVEL_HIGH). (Unless there is some kind of inverter installed on the board...) At the moment the bmc150-accel driver forcefully requests the IRQ using IRQF_TRIGGER_RISING, which means that the IRQ type is currently ignored in all existing device trees. Fixes: 6259551 ("iio: accel: bmc150-accel: Add DT bindings") Cc: Linus Walleij Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210614163150.7774-2-stephan@gerhold.net Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml index e830d5295b921..b37ba902e4a24 100644 --- a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml +++ b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml @@ -38,7 +38,7 @@ properties: description: | The first interrupt listed must be the one connected to the INT1 pin, the second (optional) interrupt listed must be the one connected to the - INT2 pin (if available). + INT2 pin (if available). The type should be IRQ_TYPE_EDGE_RISING. mount-matrix: description: an optional 3x3 mounting rotation matrix. @@ -63,7 +63,7 @@ examples: reg = <0x08>; vddio-supply = <&vddio>; vdd-supply = <&vdd>; - interrupts = <57 IRQ_TYPE_EDGE_FALLING>; + interrupts = <57 IRQ_TYPE_EDGE_RISING>; }; }; - | From 22400a3b733155658e0a5df04a74c7320dbd1760 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 14 Jun 2021 18:31:49 +0200 Subject: [PATCH 192/851] dt-bindings: iio: accel: bma255: Sort compatibles Similar to recent rework in the bmc150-accel driver, sort the compatible list in the DT schema so there is a consistent order. Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210614163150.7774-3-stephan@gerhold.net Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/accel/bosch,bma255.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml index b37ba902e4a24..f35c57b8105f1 100644 --- a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml +++ b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml @@ -16,15 +16,15 @@ description: properties: compatible: enum: - - bosch,bmc150_accel - - bosch,bmi055_accel + - bosch,bma222 + - bosch,bma222e + - bosch,bma250e - bosch,bma253 - bosch,bma254 - bosch,bma255 - - bosch,bma250e - - bosch,bma222 - - bosch,bma222e - bosch,bma280 + - bosch,bmc150_accel + - bosch,bmi055_accel reg: maxItems: 1 From 47acb28e4bbd4cf17e6f9f895b849d9a016387bc Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 14 Jun 2021 18:31:50 +0200 Subject: [PATCH 193/851] dt-bindings: iio: accel: bma255: Merge bosch,bma180 schema In Linux the bma180 and bmc150-accel driver cover fairly similar chips from Bosch (just with minor register differences). For the DT schema, this does not make any difference: They both represent I2C/SPI devices, have one or two interrupts plus a vdd/vddio-supply. This means there is no need to duplicate the schema, we can just document the compatibles for both drivers in a single DT schema. Suggested-by: Jonathan Cameron Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20210614163150.7774-4-stephan@gerhold.net Signed-off-by: Jonathan Cameron --- .../bindings/iio/accel/bosch,bma180.yaml | 61 ------------------- .../bindings/iio/accel/bosch,bma255.yaml | 9 +++ 2 files changed, 9 insertions(+), 61 deletions(-) delete mode 100644 Documentation/devicetree/bindings/iio/accel/bosch,bma180.yaml diff --git a/Documentation/devicetree/bindings/iio/accel/bosch,bma180.yaml b/Documentation/devicetree/bindings/iio/accel/bosch,bma180.yaml deleted file mode 100644 index a7e84089cc3d9..0000000000000 --- a/Documentation/devicetree/bindings/iio/accel/bosch,bma180.yaml +++ /dev/null @@ -1,61 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/iio/accel/bosch,bma180.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Bosch BMA023 / BMA150/ BMA180 / BMA250 / SMB380 triaxial accelerometers - -maintainers: - - Jonathan Cameron - -description: | - https://media.digikey.com/pdf/Data%20Sheets/Bosch/BMA150.pdf - http://omapworld.com/BMA180_111_1002839.pdf - http://ae-bst.resource.bosch.com/media/products/dokumente/bma250/bst-bma250-ds002-05.pdf - -properties: - compatible: - enum: - - bosch,bma023 - - bosch,bma150 - - bosch,bma180 - - bosch,bma250 - - bosch,smb380 - - reg: - maxItems: 1 - - vdd-supply: true - - vddio-supply: true - - interrupts: - minItems: 1 - maxItems: 2 - description: | - Type should be either IRQ_TYPE_LEVEL_HIGH or IRQ_TYPE_EDGE_RISING. - For the bma250 the first interrupt listed must be the one - connected to the INT1 pin, the second (optional) interrupt - listed must be the one connected to the INT2 pin. - -required: - - compatible - - reg - -additionalProperties: false - -examples: - - | - #include - i2c { - #address-cells = <1>; - #size-cells = <0>; - accel@40 { - compatible = "bosch,bma180"; - reg = <0x40>; - interrupt-parent = <&gpio6>; - interrupts = <18 (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING)>; - }; - }; -... diff --git a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml index f35c57b8105f1..5b35856b1942f 100644 --- a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml +++ b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml @@ -8,6 +8,7 @@ title: Bosch BMA255 and Similar Accelerometers maintainers: - Linus Walleij + - Stephan Gerhold description: 3 axis accelerometers with varying range and I2C or SPI @@ -16,6 +17,7 @@ description: properties: compatible: enum: + # bmc150-accel driver in Linux - bosch,bma222 - bosch,bma222e - bosch,bma250e @@ -26,6 +28,13 @@ properties: - bosch,bmc150_accel - bosch,bmi055_accel + # bma180 driver in Linux + - bosch,bma023 + - bosch,bma150 + - bosch,bma180 + - bosch,bma250 + - bosch,smb380 + reg: maxItems: 1 From 73380b9ad2478fd80fcb3056edebb61f37a89ec9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 8 Jun 2021 18:51:49 +0100 Subject: [PATCH 194/851] iio: st-sensors: Remove some unused includes and add some that should be there The st-sensors drivers have changed in structure over time, and includes have not always kept up with this. Let's bring them back to nearer the ideal. Identified with the include-what-you-use tool and careful checking of its suggestions. Note I haven't been particularly aggressive here, so this is just the cases where the include obviously isn't needed rather than the more subtle corners. Note I took the opportunity to add mod_devicetable.h as I generally prefer to see that when acpi or of match tables are present. Signed-off-by: Jonathan Cameron Cc: Linus Walleij Cc: Denis Ciocca Cc: Hans de Goede Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20210608175149.4019289-1-jic23@kernel.org --- drivers/iio/accel/st_accel_buffer.c | 7 +------ drivers/iio/accel/st_accel_core.c | 8 ++------ drivers/iio/accel/st_accel_i2c.c | 3 +-- drivers/iio/accel/st_accel_spi.c | 2 +- drivers/iio/common/st_sensors/st_sensors_buffer.c | 2 -- drivers/iio/common/st_sensors/st_sensors_core.c | 1 + drivers/iio/common/st_sensors/st_sensors_core.h | 1 + drivers/iio/common/st_sensors/st_sensors_i2c.c | 3 +-- drivers/iio/common/st_sensors/st_sensors_spi.c | 3 +-- drivers/iio/common/st_sensors/st_sensors_trigger.c | 1 - drivers/iio/gyro/st_gyro_buffer.c | 7 +------ drivers/iio/gyro/st_gyro_core.c | 9 ++------- drivers/iio/gyro/st_gyro_i2c.c | 2 +- drivers/iio/gyro/st_gyro_spi.c | 2 +- drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c | 1 + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c | 3 ++- drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c | 3 ++- drivers/iio/magnetometer/st_magn_buffer.c | 7 +------ drivers/iio/magnetometer/st_magn_core.c | 11 +++-------- drivers/iio/magnetometer/st_magn_i2c.c | 2 +- drivers/iio/magnetometer/st_magn_spi.c | 2 +- drivers/iio/pressure/st_pressure_buffer.c | 7 +------ drivers/iio/pressure/st_pressure_core.c | 10 ++-------- drivers/iio/pressure/st_pressure_i2c.c | 3 ++- drivers/iio/pressure/st_pressure_spi.c | 2 +- 25 files changed, 31 insertions(+), 71 deletions(-) diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c index 492263589e044..f89770f251d97 100644 --- a/drivers/iio/accel/st_accel_buffer.c +++ b/drivers/iio/accel/st_accel_buffer.c @@ -9,14 +9,9 @@ #include #include -#include -#include -#include -#include -#include #include #include -#include +#include #include #include diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 28fceac9f2f6e..365e4e64ca187 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -9,17 +9,13 @@ #include #include +#include +#include #include #include -#include -#include -#include -#include -#include #include #include #include -#include #include #include "st_accel.h" diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c index 95e305b88d5ed..f711756e41e3d 100644 --- a/drivers/iio/accel/st_accel_i2c.c +++ b/drivers/iio/accel/st_accel_i2c.c @@ -9,11 +9,10 @@ #include #include -#include +#include #include #include #include -#include #include #include "st_accel.h" diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c index 83d3308ce5ccc..bb45d9ff95b85 100644 --- a/drivers/iio/accel/st_accel_spi.c +++ b/drivers/iio/accel/st_accel_spi.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index 802f9ae04cf4e..dccc471e79da8 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -9,13 +9,11 @@ #include #include -#include #include #include #include #include #include -#include #include #include diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 7a69c1be73937..0bbb090b108c7 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/iio/common/st_sensors/st_sensors_core.h b/drivers/iio/common/st_sensors/st_sensors_core.h index e8894be55660f..09f3e602a2e2e 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.h +++ b/drivers/iio/common/st_sensors/st_sensors_core.h @@ -4,6 +4,7 @@ */ #ifndef __ST_SENSORS_CORE_H #define __ST_SENSORS_CORE_H +struct iio_dev; int st_sensors_write_data_with_mask(struct iio_dev *indio_dev, u8 reg_addr, u8 mask, u8 data); #endif diff --git a/drivers/iio/common/st_sensors/st_sensors_i2c.c b/drivers/iio/common/st_sensors/st_sensors_i2c.c index b9e59ad32a02e..b3ff887008666 100644 --- a/drivers/iio/common/st_sensors/st_sensors_i2c.c +++ b/drivers/iio/common/st_sensors/st_sensors_i2c.c @@ -7,15 +7,14 @@ * Denis Ciocca */ +#include #include #include -#include #include #include #include - #define ST_SENSORS_I2C_MULTIREAD 0x80 static const struct regmap_config st_sensors_i2c_regmap_config = { diff --git a/drivers/iio/common/st_sensors/st_sensors_spi.c b/drivers/iio/common/st_sensors/st_sensors_spi.c index 48fc41dc5633f..0d1d66c77cd88 100644 --- a/drivers/iio/common/st_sensors/st_sensors_spi.c +++ b/drivers/iio/common/st_sensors/st_sensors_spi.c @@ -9,13 +9,12 @@ #include #include -#include #include #include #include +#include #include -#include "st_sensors_core.h" #define ST_SENSORS_SPI_MULTIREAD 0xc0 diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 0b511665dee5f..64e0a748a8558 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c index 4feb7ada71953..02b5562b65859 100644 --- a/drivers/iio/gyro/st_gyro_buffer.c +++ b/drivers/iio/gyro/st_gyro_buffer.c @@ -9,14 +9,9 @@ #include #include -#include -#include -#include -#include -#include #include #include -#include +#include #include #include diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index b86ee4d940d93..fe227ad400f0a 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -9,17 +9,12 @@ #include #include -#include -#include -#include +#include #include -#include -#include -#include +#include #include #include #include -#include #include #include "st_gyro.h" diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c index a25cc0379e163..3ef86e16ee656 100644 --- a/drivers/iio/gyro/st_gyro_i2c.c +++ b/drivers/iio/gyro/st_gyro_i2c.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/gyro/st_gyro_spi.c b/drivers/iio/gyro/st_gyro_spi.c index 18d6a2aeda45a..41d835493347c 100644 --- a/drivers/iio/gyro/st_gyro_spi.c +++ b/drivers/iio/gyro/st_gyro_spi.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c index 8204f7303fd7f..5e6625140db75 100644 --- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c +++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c index 50a36ab53bc3c..78bede3587470 100644 --- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c +++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c @@ -10,7 +10,8 @@ #include #include #include -#include +#include +#include #include diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c index 272c88990dd03..180b54e66438f 100644 --- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c +++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c @@ -9,7 +9,8 @@ #include #include -#include +#include +#include #include #include diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c index 4917721fa2e5a..68f01714304f1 100644 --- a/drivers/iio/magnetometer/st_magn_buffer.c +++ b/drivers/iio/magnetometer/st_magn_buffer.c @@ -9,14 +9,9 @@ #include #include -#include -#include -#include -#include -#include #include #include -#include +#include #include #include diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 0048c3cd36eef..2c44a92590fce 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -9,16 +9,11 @@ #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include #include -#include +#include #include #include "st_magn.h" diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c index 3e23c117de8e1..2dfe4ee99591b 100644 --- a/drivers/iio/magnetometer/st_magn_i2c.c +++ b/drivers/iio/magnetometer/st_magn_i2c.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c index 03c0a737aba6e..fba9787963952 100644 --- a/drivers/iio/magnetometer/st_magn_spi.c +++ b/drivers/iio/magnetometer/st_magn_spi.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c index 7cf6f06797e1e..b651e7c31e909 100644 --- a/drivers/iio/pressure/st_pressure_buffer.c +++ b/drivers/iio/pressure/st_pressure_buffer.c @@ -9,14 +9,9 @@ #include #include -#include -#include -#include -#include -#include #include #include -#include +#include #include #include diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 7912b5a683955..4ff6d40e3670b 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -9,17 +9,11 @@ #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include #include #include -#include #include #include diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c index f0a5af314ceb8..52fa98f24478d 100644 --- a/drivers/iio/pressure/st_pressure_i2c.c +++ b/drivers/iio/pressure/st_pressure_i2c.c @@ -7,9 +7,10 @@ * Denis Ciocca */ +#include #include #include -#include +#include #include #include diff --git a/drivers/iio/pressure/st_pressure_spi.c b/drivers/iio/pressure/st_pressure_spi.c index b48cf7d01cd74..ee393df54cee8 100644 --- a/drivers/iio/pressure/st_pressure_spi.c +++ b/drivers/iio/pressure/st_pressure_spi.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include From a0a6968c99595757b571147cac621cbbc25ba63d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 14 Jul 2021 13:24:51 +0200 Subject: [PATCH 195/851] arm64: dts: renesas: rcar-gen3: Add SoC model to comment headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the R-Car Gen3 SoC model present is documented in the comment header of each board DTS, on a single line. This makes it easier to identify boards that are available with different SoC or SiP options. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund -- To be queued in renesas-devel for v5.15. v2: - Add Reviewed-by, - Use a single line for easier grepping. Link: https://lore.kernel.org/r/251569665d7d4f4ed4bbab7267ce2ddccdef33e5.1626261816.git.geert+renesas@glider.be --- arch/arm64/boot/dts/renesas/r8a77950-ulcb-kf.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77950-ulcb.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77951-ulcb-kf.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77951-ulcb.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77960-ulcb-kf.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77960-ulcb.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77961-ulcb-kf.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77961-ulcb.dts | 3 +-- arch/arm64/boot/dts/renesas/r8a77965-ulcb-kf.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77965-ulcb.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77970-eagle.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77980-condor.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts | 2 +- arch/arm64/boot/dts/renesas/r8a77995-draak.dts | 2 +- arch/arm64/boot/dts/renesas/r8a779a0-falcon.dts | 2 +- 15 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a77950-ulcb-kf.dts b/arch/arm64/boot/dts/renesas/r8a77950-ulcb-kf.dts index dcaaf12cec40d..85f008ef63ded 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950-ulcb-kf.dts +++ b/arch/arm64/boot/dts/renesas/r8a77950-ulcb-kf.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the H3ULCB Kingfisher board + * Device Tree Source for the H3ULCB Kingfisher board with R-Car H3 ES1.x * * Copyright (C) 2017 Renesas Electronics Corp. * Copyright (C) 2017 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77950-ulcb.dts b/arch/arm64/boot/dts/renesas/r8a77950-ulcb.dts index 38a6d6a108d48..5340579931e35 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950-ulcb.dts +++ b/arch/arm64/boot/dts/renesas/r8a77950-ulcb.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the H3ULCB (R-Car Starter Kit Premier) board + * Device Tree Source for the H3ULCB (R-Car Starter Kit Premier) board with R-Car H3 ES1.x * * Copyright (C) 2016 Renesas Electronics Corp. * Copyright (C) 2016 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77951-ulcb-kf.dts b/arch/arm64/boot/dts/renesas/r8a77951-ulcb-kf.dts index 11f943a67703f..2e58a27aa2766 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951-ulcb-kf.dts +++ b/arch/arm64/boot/dts/renesas/r8a77951-ulcb-kf.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the H3ULCB Kingfisher board + * Device Tree Source for the H3ULCB Kingfisher board with R-Car H3 ES2.0+ * * Copyright (C) 2017 Renesas Electronics Corp. * Copyright (C) 2017 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77951-ulcb.dts b/arch/arm64/boot/dts/renesas/r8a77951-ulcb.dts index 8ad8f2a539771..06d4e948eb0f1 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951-ulcb.dts +++ b/arch/arm64/boot/dts/renesas/r8a77951-ulcb.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the H3ULCB (R-Car Starter Kit Premier) board + * Device Tree Source for the H3ULCB (R-Car Starter Kit Premier) board with R-Car H3 ES2.0+ * * Copyright (C) 2016 Renesas Electronics Corp. * Copyright (C) 2016 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77960-ulcb-kf.dts b/arch/arm64/boot/dts/renesas/r8a77960-ulcb-kf.dts index 2151c37d77a6c..02d61360692c0 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960-ulcb-kf.dts +++ b/arch/arm64/boot/dts/renesas/r8a77960-ulcb-kf.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the M3ULCB Kingfisher board + * Device Tree Source for the M3ULCB Kingfisher board with R-Car M3-W * * Copyright (C) 2017 Renesas Electronics Corp. * Copyright (C) 2017 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77960-ulcb.dts b/arch/arm64/boot/dts/renesas/r8a77960-ulcb.dts index d041042a56192..4bfeb1df0488d 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960-ulcb.dts +++ b/arch/arm64/boot/dts/renesas/r8a77960-ulcb.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the M3ULCB (R-Car Starter Kit Pro) board + * Device Tree Source for the M3ULCB (R-Car Starter Kit Pro) board with R-Car M3-W * * Copyright (C) 2016 Renesas Electronics Corp. * Copyright (C) 2016 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77961-ulcb-kf.dts b/arch/arm64/boot/dts/renesas/r8a77961-ulcb-kf.dts index 6ec958348eb02..d66eb27ee8c48 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961-ulcb-kf.dts +++ b/arch/arm64/boot/dts/renesas/r8a77961-ulcb-kf.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the M3ULCB Kingfisher board + * Device Tree Source for the M3ULCB Kingfisher board with R-Car M3-W+ * * Copyright (C) 2020 Eugeniu Rosca */ diff --git a/arch/arm64/boot/dts/renesas/r8a77961-ulcb.dts b/arch/arm64/boot/dts/renesas/r8a77961-ulcb.dts index 294a055f117ea..70cf926667a6e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961-ulcb.dts +++ b/arch/arm64/boot/dts/renesas/r8a77961-ulcb.dts @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the M3ULCB (R-Car Starter Kit Pro) board with R-Car - * M3-W+ + * Device Tree Source for the M3ULCB (R-Car Starter Kit Pro) board with R-Car M3-W+ * * Copyright (C) 2020 Renesas Electronics Corp. */ diff --git a/arch/arm64/boot/dts/renesas/r8a77965-ulcb-kf.dts b/arch/arm64/boot/dts/renesas/r8a77965-ulcb-kf.dts index 12aa08fd6fd87..a601968c5727e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965-ulcb-kf.dts +++ b/arch/arm64/boot/dts/renesas/r8a77965-ulcb-kf.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the M3NULCB Kingfisher board + * Device Tree Source for the M3NULCB Kingfisher board with R-Car M3-N * * Copyright (C) 2018 Renesas Electronics Corp. * Copyright (C) 2018 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77965-ulcb.dts b/arch/arm64/boot/dts/renesas/r8a77965-ulcb.dts index 964078b6cc49e..71704b67a20e1 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965-ulcb.dts +++ b/arch/arm64/boot/dts/renesas/r8a77965-ulcb.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the M3NULCB (R-Car Starter Kit Pro) board + * Device Tree Source for the M3NULCB (R-Car Starter Kit Pro) board with R-Car M3-N * * Copyright (C) 2018 Renesas Electronics Corp. * Copyright (C) 2018 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77970-eagle.dts b/arch/arm64/boot/dts/renesas/r8a77970-eagle.dts index 5c84681703eda..d24da54f312b9 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970-eagle.dts +++ b/arch/arm64/boot/dts/renesas/r8a77970-eagle.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the Eagle board + * Device Tree Source for the Eagle board with R-Car V3M * * Copyright (C) 2016-2017 Renesas Electronics Corp. * Copyright (C) 2017 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts index 7bde0a549c098..edf7f2a2f9587 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts +++ b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the Condor board + * Device Tree Source for the Condor board with R-Car V3H * * Copyright (C) 2018 Renesas Electronics Corp. * Copyright (C) 2018 Cogent Embedded, Inc. diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index df647de9015b0..9c7146084ea1c 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the ebisu board + * Device Tree Source for the Ebisu board with R-Car E3 * * Copyright (C) 2018 Renesas Electronics Corp. */ diff --git a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts index 192a7806f16ba..f0f585a404619 100644 --- a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts +++ b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the Draak board + * Device Tree Source for the Draak board with R-Car D3 * * Copyright (C) 2016-2018 Renesas Electronics Corp. * Copyright (C) 2017 Glider bvba diff --git a/arch/arm64/boot/dts/renesas/r8a779a0-falcon.dts b/arch/arm64/boot/dts/renesas/r8a779a0-falcon.dts index 687f019e79f05..dc671ff57ec76 100644 --- a/arch/arm64/boot/dts/renesas/r8a779a0-falcon.dts +++ b/arch/arm64/boot/dts/renesas/r8a779a0-falcon.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Device Tree Source for the Falcon CPU and BreakOut boards + * Device Tree Source for the Falcon CPU and BreakOut boards with R-Car V3U * * Copyright (C) 2020 Renesas Electronics Corp. */ From a0b22464ce9352c4a3e100f10f976c85b0ae6690 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Jul 2021 14:04:56 +0200 Subject: [PATCH 196/851] m68k: defconfig: Update defconfigs for v5.14-rc1: - Enable modular build of the new Netfilter base hook dump support, - Drop CONFIG_SCSI=y (selected by ATA, as enabled since commit b90257bfddbd01f3 ("m68k: use libata instead of the legacy ide driver")), - Disable CIFS_STATS2 (defaults to enabled since commit 0d52df81e07739db ("cifs: enable extended stats by default")), - Enable modular build of the glob self-test (visible since commit b90257bfddbd01f3 ("m68k: use libata instead of the legacy ide driver")), - Drop CONFIG_TEST_LIST_SORT=m (auto-enabled since commit ebd09577be6c15ee ("lib/test: convert lib/test_list_sort.c to use KUnit")), - Enable modular build of the new test for the scanf() family of functions. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210712120456.4119260-1-geert@linux-m68k.org --- arch/m68k/configs/amiga_defconfig | 6 ++++-- arch/m68k/configs/apollo_defconfig | 4 +++- arch/m68k/configs/atari_defconfig | 6 ++++-- arch/m68k/configs/bvme6000_defconfig | 4 +++- arch/m68k/configs/hp300_defconfig | 4 +++- arch/m68k/configs/mac_defconfig | 6 ++++-- arch/m68k/configs/multi_defconfig | 6 ++++-- arch/m68k/configs/mvme147_defconfig | 4 +++- arch/m68k/configs/mvme16x_defconfig | 4 +++- arch/m68k/configs/q40_defconfig | 6 ++++-- arch/m68k/configs/sun3_defconfig | 4 +++- arch/m68k/configs/sun3x_defconfig | 4 +++- 12 files changed, 41 insertions(+), 17 deletions(-) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 0a2cacf7be082..5f536286f5fce 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -84,6 +84,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -323,7 +324,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -502,6 +502,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -616,6 +617,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -624,7 +626,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -636,6 +637,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 4dc6dcfaf28ab..d9568644051ad 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -80,6 +80,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -458,6 +459,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -580,7 +582,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -592,6 +593,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 23d910a692ab7..dbf1960c66697 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -87,6 +87,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -324,7 +325,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -480,6 +480,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -594,6 +595,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -602,7 +604,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -614,6 +615,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 2c3f428338469..7620db3e33e7f 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -77,6 +77,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -451,6 +452,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -573,7 +575,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -585,6 +586,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 5b1898d4b249a..113a02d47ebbf 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -79,6 +79,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -460,6 +461,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -582,7 +584,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -594,6 +595,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 9606ccd8dafa8..a8e006e8da668 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -78,6 +78,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -315,7 +316,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -482,6 +482,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -596,6 +597,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -604,7 +606,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -616,6 +617,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 3175ba5007e1f..b6655907a1f3c 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -98,6 +98,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -344,7 +345,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -567,6 +567,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -681,6 +682,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -689,7 +691,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -701,6 +702,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 793085f00c99f..563ba47db8c68 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -76,6 +76,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -450,6 +451,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -572,7 +574,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -584,6 +585,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 56fbac7943b2e..9f1b44de4706e 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -77,6 +77,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -451,6 +452,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -573,7 +575,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -585,6 +586,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 0e15431b65e2a..1993433d08406 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -78,6 +78,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -314,7 +315,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_DUMMY_IRQ=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -469,6 +469,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -583,6 +584,7 @@ CONFIG_PRIME_NUMBERS=m CONFIG_CRC32_SELFTEST=m CONFIG_CRC64=m CONFIG_XZ_DEC_TEST=m +CONFIG_GLOB_SELFTEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y @@ -591,7 +593,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -603,6 +604,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 3490a05f29b82..56dbc63cef5bc 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -74,6 +74,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -453,6 +454,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -574,7 +576,6 @@ CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -586,6 +587,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 4e92c8c332fc5..6bd1bba81ac32 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -74,6 +74,7 @@ CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_NETLINK_HOOK=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_ZONES=y # CONFIG_NF_CONNTRACK_PROCFS is not set @@ -452,6 +453,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_CIFS=m +# CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set CONFIG_CODA_FS=m CONFIG_NLS_CODEPAGE_437=y @@ -574,7 +576,6 @@ CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_KUNIT=m CONFIG_KUNIT_ALL_TESTS=m -CONFIG_TEST_LIST_SORT=m CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_TEST_DIV64=m @@ -586,6 +587,7 @@ CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m +CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m From a3f4ec99797c4d13a849501717dfe9d460f9d904 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Jun 2021 16:34:20 -0400 Subject: [PATCH 197/851] NFSD: Clean up splice actor A few useful observations: - The value in @size is never modified. - splice_desc.len is an unsigned int, and so is xdr_buf.page_len. An implicit cast to size_t is unnecessary. - The computation of .page_len is the same in all three arms of the "if" statement, so hoist it out to make it clear that the operation is an unconditional invariant. The resulting function is 18 bytes shorter on my system (-Os). Signed-off-by: Chuck Lever Reviewed-by: NeilBrown --- fs/nfsd/vfs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a224a5e23cc11..46a6d9fce3d26 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -847,26 +847,21 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct svc_rqst *rqstp = sd->u.data; struct page **pp = rqstp->rq_next_page; struct page *page = buf->page; - size_t size; - - size = sd->len; if (rqstp->rq_res.page_len == 0) { get_page(page); put_page(*rqstp->rq_next_page); *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_base = buf->offset; - rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); if (*rqstp->rq_next_page) put_page(*rqstp->rq_next_page); *(rqstp->rq_next_page++) = page; - rqstp->rq_res.page_len += size; - } else - rqstp->rq_res.page_len += size; + } + rqstp->rq_res.page_len += sd->len; - return size; + return sd->len; } static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, From d2f1cb4e7b28def3c064f24aca96c702dbb6c888 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Jul 2021 10:03:10 -0400 Subject: [PATCH 198/851] SUNRPC: Add svc_rqst_replace_page() API Replacing a page in rq_pages[] requires a get_page(), which is a bus-locked operation, and a put_page(), which can be even more costly. To reduce the cost of replacing a page in rq_pages[], batch the put_page() operations by collecting "freed" pages in a pagevec, and then release those pages when the pagevec is full. This pagevec is also emptied when each RPC completes. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 4 ++++ net/sunrpc/svc.c | 21 +++++++++++++++++++++ net/sunrpc/svc_xprt.c | 3 +++ 3 files changed, 28 insertions(+) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e91d51ea028bb..ab9afbf0a0d8b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -19,6 +19,7 @@ #include #include #include +#include /* statistics for svc_pool structures */ struct svc_pool_stats { @@ -256,6 +257,7 @@ struct svc_rqst { struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ + struct pagevec rq_pvec; struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; @@ -502,6 +504,8 @@ struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); +void svc_rqst_replace_page(struct svc_rqst *rqstp, + struct page *page); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); unsigned int svc_pool_map_get(void); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 0de918cb3d90d..d2d412d438277 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -838,6 +838,27 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser } EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); +/** + * svc_rqst_replace_page - Replace one page in rq_pages[] + * @rqstp: svc_rqst with pages to replace + * @page: replacement page + * + * When replacing a page in rq_pages, batch the release of the + * replaced pages to avoid hammering the page allocator. + */ +void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) +{ + if (*rqstp->rq_next_page) { + if (!pagevec_space(&rqstp->rq_pvec)) + __pagevec_release(&rqstp->rq_pvec); + pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page); + } + + get_page(page); + *(rqstp->rq_next_page++) = page; +} +EXPORT_SYMBOL_GPL(svc_rqst_replace_page); + /* * Called from a server thread as it's exiting. Caller must hold the "service * mutex" for the service. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d66a8e44a1aeb..682058a5ec13d 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -539,6 +539,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp) kfree(rqstp->rq_deferred); rqstp->rq_deferred = NULL; + pagevec_release(&rqstp->rq_pvec); svc_free_res_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; @@ -664,6 +665,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) struct xdr_buf *arg = &rqstp->rq_arg; unsigned long pages, filled; + pagevec_init(&rqstp->rq_pvec); + pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT; if (pages > RPCSVC_MAXPAGES) { pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n", From 54bc78aa02dd78591b71cf7bd540490a017c44dd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Jun 2021 17:24:27 -0400 Subject: [PATCH 199/851] NFSD: Batch release pages during splice read Large splice reads call put_page() repeatedly. put_page() is relatively expensive to call, so replace it with the new svc_rqst_replace_page() helper to help amortize that cost. Signed-off-by: Chuck Lever Reviewed-by: NeilBrown --- fs/nfsd/vfs.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 46a6d9fce3d26..7732a384f9496 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -849,15 +849,10 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct page *page = buf->page; if (rqstp->rq_res.page_len == 0) { - get_page(page); - put_page(*rqstp->rq_next_page); - *(rqstp->rq_next_page++) = page; + svc_rqst_replace_page(rqstp, page); rqstp->rq_res.page_base = buf->offset; } else if (page != pp[-1]) { - get_page(page); - if (*rqstp->rq_next_page) - put_page(*rqstp->rq_next_page); - *(rqstp->rq_next_page++) = page; + svc_rqst_replace_page(rqstp, page); } rqstp->rq_res.page_len += sd->len; From 0fa130071d9a299edfd5c41e8240d0b2e8f659e0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 16 Jul 2021 20:55:10 -0400 Subject: [PATCH 200/851] tracing: Add trace_event helper macros __string_len() and __assign_str_len() There's a few cases that a string that is to be recorded in a trace event, does not have a terminating 'nul' character, and instead, the tracepoint passes in the length of the string to record. Add two helper macros to the trace event code that lets this work easier, than tricks with "%.*s" logic. __string_len() which is similar to __string() for declaration, but takes a length argument. __assign_str_len() which is similar to __assign_str() for assiging the string, but it too takes a length argument. Note, the TRACE_EVENT() macro will allocate the location on the ring buffer to 'len + 1', that will be used to store the string into. It is a requirement that the 'len' used for this is a most the length of the string being recorded. This string can still use __get_str() just like strings created with __string() can use to retrieve the string. Link: https://lore.kernel.org/linux-nfs/20210513105018.7539996a@gandalf.local.home/ Tested-by: Chuck Lever Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Chuck Lever --- include/trace/trace_events.h | 22 ++++++++++++++++++ samples/trace_events/trace-events-sample.h | 27 ++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index acc17194c1602..08810a4638805 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -102,6 +102,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) @@ -197,6 +200,9 @@ TRACE_MAKE_SYSTEM_STR(); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) @@ -459,6 +465,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) @@ -507,6 +516,9 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #define __string(item, src) __dynamic_array(char, item, \ strlen((src) ? (const char *)(src) : "(null)") + 1) +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) + /* * __bitmask_size_in_bytes_raw is the number of bytes needed to hold * num_possible_cpus(). @@ -670,10 +682,20 @@ static inline notrace int trace_event_get_offsets_##call( \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) +#undef __string_len +#define __string_len(item, src, len) __dynamic_array(char, item, -1) + #undef __assign_str #define __assign_str(dst, src) \ strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); +#undef __assign_str_len +#define __assign_str_len(dst, src, len) \ + do { \ + memcpy(__get_str(dst), (src), (len)); \ + __get_str(dst)[len] = '\0'; \ + } while(0) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 13a35f7cbe661..e61471ab7d141 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -141,6 +141,33 @@ * In most cases, the __assign_str() macro will take the same * parameters as the __string() macro had to declare the string. * + * __string_len: This is a helper to a __dynamic_array, but it understands + * that the array has characters in it, and with the combined + * use of __assign_str_len(), it will allocate 'len' + 1 bytes + * in the ring buffer and add a '\0' to the string. This is + * useful if the string being saved has no terminating '\0' byte. + * It requires that the length of the string is known as it acts + * like a memcpy(). + * + * Declared with: + * + * __string_len(foo, bar, len) + * + * To assign this string, use the helper macro __assign_str_len(). + * + * __assign_str(foo, bar, len); + * + * Then len + 1 is allocated to the ring buffer, and a nul terminating + * byte is added. This is similar to: + * + * memcpy(__get_str(foo), bar, len); + * __get_str(foo)[len] = 0; + * + * The advantage of using this over __dynamic_array, is that it + * takes care of allocating the extra byte on the ring buffer + * for the '\0' terminating byte, and __get_str(foo) can be used + * in the TP_printk(). + * * __bitmask: This is another kind of __dynamic_array, but it expects * an array of longs, and the number of bits to parse. It takes * two parameters (name, nr_bits), where name is the name of the From 9274c259cecd89dc439d175b23444dba6bae36c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 12 May 2021 09:39:06 -0400 Subject: [PATCH 201/851] NFSD: Use new __string_len C macros for the nfs_dirent tracepoint Clean up. Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index adaec43548d11..52a43acd546c9 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -400,18 +400,16 @@ TRACE_EVENT(nfsd_dirent, TP_STRUCT__entry( __field(u32, fh_hash) __field(u64, ino) - __field(int, len) - __dynamic_array(unsigned char, name, namlen) + __string_len(name, name, namlen) ), TP_fast_assign( __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0; __entry->ino = ino; - __entry->len = namlen; - memcpy(__get_str(name), name, namlen); + __assign_str_len(name, name, namlen) ), - TP_printk("fh_hash=0x%08x ino=%llu name=%.*s", - __entry->fh_hash, __entry->ino, - __entry->len, __get_str(name)) + TP_printk("fh_hash=0x%08x ino=%llu name=%s", + __entry->fh_hash, __entry->ino, __get_str(name) + ) ) #include "state.h" From 9b5b7a72fdc4da142b7487eea4326a5bcebed181 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 May 2021 15:34:57 -0400 Subject: [PATCH 202/851] NFSD: Use new __string_len C macros for nfsd_clid_class Clean up. Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 52a43acd546c9..538520957a815 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -606,7 +606,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __array(unsigned char, addr, sizeof(struct sockaddr_in6)) __field(unsigned long, flavor) __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) - __dynamic_array(char, name, clp->cl_name.len + 1) + __string_len(name, name, clp->cl_name.len) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; @@ -616,8 +616,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __entry->flavor = clp->cl_cred.cr_flavor; memcpy(__entry->verifier, (void *)&clp->cl_verifier, NFS4_VERIFIER_SIZE); - memcpy(__get_str(name), clp->cl_name.data, clp->cl_name.len); - __get_str(name)[clp->cl_name.len] = '\0'; + __assign_str_len(name, clp->cl_name.data, clp->cl_name.len); ), TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x", __entry->addr, __get_str(name), From d550173b00711d5db77815db929d0c259e09d73b Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 7 Jul 2021 00:07:01 +0100 Subject: [PATCH 203/851] arm64: dts: qcom: sm8250: fix usb2 qmp phy node Use 'lanes' as SuperSpeed lanes device node instead of just 'lane' to fix issues with TypeC support. Signed-off-by: Dmitry Baryshkov Fixes: be0624b99042 ("arm64: dts: qcom: sm8250: Add USB and PHY device nodes") Cc: robh+dt@kernel.org Cc: devicetree@vger.kernel.org Tested-by: Bryan O'Donoghue Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20210706230702.299047-2-bryan.odonoghue@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8250.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 4798368b02efb..9a6eff1813a68 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2210,7 +2210,7 @@ <&gcc GCC_USB3_PHY_SEC_BCR>; reset-names = "phy", "common"; - usb_2_ssphy: lane@88eb200 { + usb_2_ssphy: lanes@88eb200 { reg = <0 0x088eb200 0 0x200>, <0 0x088eb400 0 0x200>, <0 0x088eb800 0 0x800>; From e4548dae8fa59ab741f7d99b718187fce86cfb0d Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Tue, 6 Jul 2021 19:08:14 +0530 Subject: [PATCH 204/851] arm64: dts: qcom: sm8150: Add UFS ICE capability Add support for UFS ICE (Qualcomm Inline Crypto Engine) in sm8150 SoC dts. I tested this on SA8155p-adp board, which is a publicly available development board that uses the sa8155p Qualcomm Snapdragon SoC. SA8155p platform is similar to the SM8150, so use this as base for now. I tested the UFS ICE feature using 'fscrypt' test utility. Cc: Bjorn Andersson Cc: Eric Biggers Signed-off-by: Bhupesh Sharma Link: https://lore.kernel.org/r/20210706133814.621536-1-bhupesh.sharma@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 9656704898306..1c84d78d0a19b 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1331,7 +1331,9 @@ ufs_mem_hc: ufshc@1d84000 { compatible = "qcom,sm8150-ufshc", "qcom,ufshc", "jedec,ufs-2.0"; - reg = <0 0x01d84000 0 0x2500>; + reg = <0 0x01d84000 0 0x2500>, + <0 0x01d90000 0 0x8000>; + reg-names = "std", "ice"; interrupts = ; phys = <&ufs_mem_phy_lanes>; phy-names = "ufsphy"; @@ -1350,7 +1352,8 @@ "ref_clk", "tx_lane0_sync_clk", "rx_lane0_sync_clk", - "rx_lane1_sync_clk"; + "rx_lane1_sync_clk", + "ice_core_clk"; clocks = <&gcc GCC_UFS_PHY_AXI_CLK>, <&gcc GCC_AGGRE_UFS_PHY_AXI_CLK>, @@ -1359,7 +1362,8 @@ <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_UFS_PHY_TX_SYMBOL_0_CLK>, <&gcc GCC_UFS_PHY_RX_SYMBOL_0_CLK>, - <&gcc GCC_UFS_PHY_RX_SYMBOL_1_CLK>; + <&gcc GCC_UFS_PHY_RX_SYMBOL_1_CLK>, + <&gcc GCC_UFS_PHY_ICE_CORE_CLK>; freq-table-hz = <37500000 300000000>, <0 0>, @@ -1368,7 +1372,8 @@ <0 0>, <0 0>, <0 0>, - <0 0>; + <0 0>, + <0 300000000>; status = "disabled"; }; From 9d7c31835c460dc5f5681d7519187f3cdb44d5c9 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Sun, 27 Jun 2021 17:16:14 +0530 Subject: [PATCH 205/851] arm64: dts: qcom: Use correct naming for dwc3 usb nodes in dts files The dwc3 usb nodes in several arm64 qcom dts are currently named differently, somewhere as 'usb@' and somewhere as 'dwc3@', leading to some confusion when one sees the entries in sysfs or dmesg: [ 1.943482] dwc3 a600000.usb: Adding to iommu group 1 [ 2.266127] dwc3 a800000.dwc3: Adding to iommu group 2 Name the usb nodes as 'usb@' for consistency, which is the correct convention as per the 'snps,dwc3' dt-binding as well (see [1]). [1]. Documentation/devicetree/bindings/usb/snps,dwc3.yaml Cc: Bjorn Andersson Signed-off-by: Bhupesh Sharma Link: https://lore.kernel.org/r/20210627114616.717101-2-bhupesh.sharma@linaro.org [bjorn: Extended to also fix ipq6018] Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/ipq6018.dtsi | 2 +- arch/arm64/boot/dts/qcom/msm8994.dtsi | 2 +- arch/arm64/boot/dts/qcom/sm8150.dtsi | 2 +- arch/arm64/boot/dts/qcom/sm8250.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sm8350.dtsi | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index b61ed01945233..01ca4b8b9d2be 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -557,7 +557,7 @@ resets = <&gcc GCC_USB1_BCR>; status = "disabled"; - dwc_1: dwc3@7000000 { + dwc_1: usb@7000000 { compatible = "snps,dwc3"; reg = <0x0 0x7000000 0x0 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index f9f0b5aa6a266..662f2f246b9b5 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -430,7 +430,7 @@ power-domains = <&gcc USB30_GDSC>; qcom,select-utmi-as-pipe-clk; - dwc3@f9200000 { + usb@f9200000 { compatible = "snps,dwc3"; reg = <0xf9200000 0xcc00>; interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 1c84d78d0a19b..62b88183174fb 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -2709,7 +2709,7 @@ resets = <&gcc GCC_USB30_SEC_BCR>; - usb_2_dwc3: dwc3@a800000 { + usb_2_dwc3: usb@a800000 { compatible = "snps,dwc3"; reg = <0 0x0a800000 0 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 9a6eff1813a68..d4432e357fcf2 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2321,7 +2321,7 @@ resets = <&gcc GCC_USB30_PRIM_BCR>; - usb_1_dwc3: dwc3@a600000 { + usb_1_dwc3: usb@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = ; @@ -2372,7 +2372,7 @@ resets = <&gcc GCC_USB30_SEC_BCR>; - usb_2_dwc3: dwc3@a800000 { + usb_2_dwc3: usb@a800000 { compatible = "snps,dwc3"; reg = <0 0x0a800000 0 0xcd00>; interrupts = ; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 0d16392bb9767..a631d58166b1c 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1273,7 +1273,7 @@ resets = <&gcc GCC_USB30_PRIM_BCR>; - usb_1_dwc3: dwc3@a600000 { + usb_1_dwc3: usb@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = ; @@ -1317,7 +1317,7 @@ resets = <&gcc GCC_USB30_SEC_BCR>; - usb_2_dwc3: dwc3@a800000 { + usb_2_dwc3: usb@a800000 { compatible = "snps,dwc3"; reg = <0 0x0a800000 0 0xcd00>; interrupts = ; From 27a9a8d6cdcc8b95f4df47282adf423362af90c3 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Sun, 27 Jun 2021 17:16:15 +0530 Subject: [PATCH 206/851] arm64: dts: qcom: sm8150: Sort dc_noc and gem_noc nodes Nodes should be sorted by address, so move the dc_noc and gem_noc nodes to their correct place. Cc: Bjorn Andersson Signed-off-by: Bhupesh Sharma Link: https://lore.kernel.org/r/20210627114616.717101-3-bhupesh.sharma@linaro.org [bjorn: Adjusted order slightly more] Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 62b88183174fb..6f4cc9dbc80b7 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -2586,20 +2586,6 @@ }; }; - dc_noc: interconnect@9160000 { - compatible = "qcom,sm8150-dc-noc"; - reg = <0 0x09160000 0 0x3200>; - #interconnect-cells = <1>; - qcom,bcm-voters = <&apps_bcm_voter>; - }; - - gem_noc: interconnect@9680000 { - compatible = "qcom,sm8150-gem-noc"; - reg = <0 0x09680000 0 0x3e200>; - #interconnect-cells = <1>; - qcom,bcm-voters = <&apps_bcm_voter>; - }; - usb_2_qmpphy: phy@88eb000 { compatible = "qcom,sm8150-qmp-usb3-uni-phy"; reg = <0 0x088eb000 0 0x200>; @@ -2631,6 +2617,20 @@ }; }; + dc_noc: interconnect@9160000 { + compatible = "qcom,sm8150-dc-noc"; + reg = <0 0x09160000 0 0x3200>; + #interconnect-cells = <1>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + gem_noc: interconnect@9680000 { + compatible = "qcom,sm8150-gem-noc"; + reg = <0 0x09680000 0 0x3e200>; + #interconnect-cells = <1>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + usb_1: usb@a6f8800 { compatible = "qcom,sm8150-dwc3", "qcom,dwc3"; reg = <0 0x0a6f8800 0 0x400>; From 4372c82dec4abea605d311df411a2788f33fdf8e Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Sun, 27 Jun 2021 17:16:16 +0530 Subject: [PATCH 207/851] arm64: dts: qcom: Fix usb entries for SA8155p adp board SA8155p adp board has two USB A-type receptacles called USB-portB and USB-portC respectively. While USB-portB is a USB High-Speed connector/interface, the USB-portC one is a USB 3.1 Super-Speed connector/interface. Also the USB-portB is used as the USB emergency download port (for image download purposes). Enable both the ports on the board in USB Host mode (since all the USB interfaces are brought out to USB Type A connectors). Cc: Bjorn Andersson Signed-off-by: Bhupesh Sharma Link: https://lore.kernel.org/r/20210627114616.717101-4-bhupesh.sharma@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sa8155p-adp.dts | 60 ++++++++++++++++++++---- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts index 0da7a3b8d1bf3..5ae2ddc65f7e4 100644 --- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts @@ -307,10 +307,6 @@ status = "okay"; }; -&tlmm { - gpio-reserved-ranges = <0 4>; -}; - &uart2 { status = "okay"; }; @@ -337,6 +333,16 @@ vdda-pll-max-microamp = <18300>; }; +&usb_1 { + status = "okay"; +}; + +&usb_1_dwc3 { + dr_mode = "host"; + + pinctrl-names = "default"; + pinctrl-0 = <&usb2phy_ac_en1_default>; +}; &usb_1_hsphy { status = "okay"; @@ -346,15 +352,51 @@ }; &usb_1_qmpphy { + status = "disabled"; +}; + +&usb_2 { status = "okay"; - vdda-phy-supply = <&vreg_l8c_1p2>; - vdda-pll-supply = <&vdda_usb_ss_dp_core_1>; }; -&usb_1 { +&usb_2_dwc3 { + dr_mode = "host"; + + pinctrl-names = "default"; + pinctrl-0 = <&usb2phy_ac_en2_default>; +}; + +&usb_2_hsphy { status = "okay"; + vdda-pll-supply = <&vdd_usb_hs_core>; + vdda33-supply = <&vdda_usb_hs_3p1>; + vdda18-supply = <&vdda_usb_hs_1p8>; }; -&usb_1_dwc3 { - dr_mode = "peripheral"; +&usb_2_qmpphy { + status = "okay"; + vdda-phy-supply = <&vreg_l8c_1p2>; + vdda-pll-supply = <&vdda_usb_ss_dp_core_1>; +}; + +&tlmm { + gpio-reserved-ranges = <0 4>; + + usb2phy_ac_en1_default: usb2phy_ac_en1_default { + mux { + pins = "gpio113"; + function = "usb2phy_ac"; + bias-disable; + drive-strength = <2>; + }; + }; + + usb2phy_ac_en2_default: usb2phy_ac_en2_default { + mux { + pins = "gpio123"; + function = "usb2phy_ac"; + bias-disable; + drive-strength = <2>; + }; + }; }; From 2ce654adff222cf7d985a4a0206be676cb5736cc Mon Sep 17 00:00:00 2001 From: Shaik Sajida Bhanu Date: Tue, 13 Jul 2021 12:36:11 +0530 Subject: [PATCH 208/851] arm64: dts: qcom: sc7280: Add nodes for eMMC and SD card Add nodes for eMMC and SD card on sc7280. Signed-off-by: Shaik Sajida Bhanu Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1626159971-22519-1-git-send-email-sbhanu@codeaurora.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7280-idp.dts | 71 +++++++++ arch/arm64/boot/dts/qcom/sc7280.dtsi | 186 ++++++++++++++++++++++++ 2 files changed, 257 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dts b/arch/arm64/boot/dts/qcom/sc7280-idp.dts index 3900cfc095629..1be822cef5947 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-idp.dts +++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dts @@ -7,6 +7,7 @@ /dts-v1/; +#include #include #include #include @@ -272,6 +273,34 @@ status = "okay"; }; +&sdhc_1 { + status = "okay"; + + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&sdc1_on>; + pinctrl-1 = <&sdc1_off>; + + non-removable; + no-sd; + no-sdio; + + vmmc-supply = <&vreg_l7b_2p9>; + vqmmc-supply = <&vreg_l19b_1p8>; +}; + +&sdhc_2 { + status = "okay"; + + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&sdc2_on>; + pinctrl-1 = <&sdc2_off>; + + vmmc-supply = <&vreg_l9c_2p9>; + vqmmc-supply = <&vreg_l6c_2p9>; + + cd-gpios = <&tlmm 91 GPIO_ACTIVE_LOW>; +}; + &uart5 { status = "okay"; }; @@ -291,3 +320,45 @@ bias-pull-up; }; }; + +&sdc1_on { + clk { + bias-disable; + drive-strength = <16>; + }; + + cmd { + bias-pull-up; + drive-strength = <10>; + }; + + data { + bias-pull-up; + drive-strength = <10>; + }; + + rclk { + bias-pull-down; + }; +}; + +&sdc2_on { + clk { + bias-disable; + drive-strength = <16>; + }; + + cmd { + bias-pull-up; + drive-strength = <10>; + }; + + data { + bias-pull-up; + drive-strength = <10>; + }; + + sd-cd { + bias-pull-up; + }; +}; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index a8c274ad74c47..da55adba1f8c0 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,11 @@ chosen { }; + aliases { + mmc1 = &sdhc_1; + mmc2 = &sdhc_2; + }; + clocks { xo_board: xo-board { compatible = "fixed-clock"; @@ -436,6 +442,60 @@ #mbox-cells = <2>; }; + sdhc_1: sdhci@7c4000 { + compatible = "qcom,sc7280-sdhci", "qcom,sdhci-msm-v5"; + status = "disabled"; + + reg = <0 0x007c4000 0 0x1000>, + <0 0x007c5000 0 0x1000>; + reg-names = "hc", "cqhci"; + + iommus = <&apps_smmu 0xc0 0x0>; + interrupts = , + ; + interrupt-names = "hc_irq", "pwr_irq"; + + clocks = <&gcc GCC_SDCC1_APPS_CLK>, + <&gcc GCC_SDCC1_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>; + clock-names = "core", "iface", "xo"; + interconnects = <&aggre1_noc MASTER_SDCC_1 0 &mc_virt SLAVE_EBI1 0>, + <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_SDCC_1 0>; + interconnect-names = "sdhc-ddr","cpu-sdhc"; + power-domains = <&rpmhpd SC7280_CX>; + operating-points-v2 = <&sdhc1_opp_table>; + + bus-width = <8>; + supports-cqe; + + qcom,dll-config = <0x0007642c>; + qcom,ddr-config = <0x80040868>; + + mmc-ddr-1_8v; + mmc-hs200-1_8v; + mmc-hs400-1_8v; + mmc-hs400-enhanced-strobe; + + sdhc1_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-100000000 { + opp-hz = /bits/ 64 <100000000>; + required-opps = <&rpmhpd_opp_low_svs>; + opp-peak-kBps = <1800000 400000>; + opp-avg-kBps = <100000 0>; + }; + + opp-384000000 { + opp-hz = /bits/ 64 <384000000>; + required-opps = <&rpmhpd_opp_nom>; + opp-peak-kBps = <5400000 1600000>; + opp-avg-kBps = <390000 0>; + }; + }; + + }; + qupv3_id_0: geniqup@9c0000 { compatible = "qcom,geni-se-qup"; reg = <0 0x009c0000 0 0x2000>; @@ -1035,6 +1095,51 @@ }; }; + sdhc_2: sdhci@8804000 { + compatible = "qcom,sc7280-sdhci", "qcom,sdhci-msm-v5"; + status = "disabled"; + + reg = <0 0x08804000 0 0x1000>; + + iommus = <&apps_smmu 0x100 0x0>; + interrupts = , + ; + interrupt-names = "hc_irq", "pwr_irq"; + + clocks = <&gcc GCC_SDCC2_APPS_CLK>, + <&gcc GCC_SDCC2_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>; + clock-names = "core", "iface", "xo"; + interconnects = <&aggre1_noc MASTER_SDCC_2 0 &mc_virt SLAVE_EBI1 0>, + <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_SDCC_2 0>; + interconnect-names = "sdhc-ddr","cpu-sdhc"; + power-domains = <&rpmhpd SC7280_CX>; + operating-points-v2 = <&sdhc2_opp_table>; + + bus-width = <4>; + + qcom,dll-config = <0x0007642c>; + + sdhc2_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-100000000 { + opp-hz = /bits/ 64 <100000000>; + required-opps = <&rpmhpd_opp_low_svs>; + opp-peak-kBps = <1800000 400000>; + opp-avg-kBps = <100000 0>; + }; + + opp-202000000 { + opp-hz = /bits/ 64 <202000000>; + required-opps = <&rpmhpd_opp_nom>; + opp-peak-kBps = <5400000 1600000>; + opp-avg-kBps = <200000 0>; + }; + }; + + }; + dc_noc: interconnect@90e0000 { reg = <0 0x090e0000 0 0x5080>; compatible = "qcom,sc7280-dc-noc"; @@ -1185,6 +1290,87 @@ pins = "gpio46", "gpio47"; function = "qup13"; }; + + sdc1_on: sdc1-on { + clk { + pins = "sdc1_clk"; + }; + + cmd { + pins = "sdc1_cmd"; + }; + + data { + pins = "sdc1_data"; + }; + + rclk { + pins = "sdc1_rclk"; + }; + }; + + sdc1_off: sdc1-off { + clk { + pins = "sdc1_clk"; + drive-strength = <2>; + bias-bus-hold; + }; + + cmd { + pins = "sdc1_cmd"; + drive-strength = <2>; + bias-bus-hold; + }; + + data { + pins = "sdc1_data"; + drive-strength = <2>; + bias-bus-hold; + }; + + rclk { + pins = "sdc1_rclk"; + bias-bus-hold; + }; + }; + + sdc2_on: sdc2-on { + clk { + pins = "sdc2_clk"; + }; + + cmd { + pins = "sdc2_cmd"; + }; + + data { + pins = "sdc2_data"; + }; + + sd-cd { + pins = "gpio91"; + }; + }; + + sdc2_off: sdc2-off { + clk { + pins = "sdc2_clk"; + drive-strength = <2>; + bias-bus-hold; + }; + + cmd { + pins ="sdc2_cmd"; + drive-strength = <2>; + bias-bus-hold; + }; + + data { + pins ="sdc2_data"; + drive-strength = <2>; + bias-bus-hold; + }; + }; }; apps_smmu: iommu@15000000 { From 299ec1f78ad023fcb640e8862674667464c65aaa Mon Sep 17 00:00:00 2001 From: Sandeep Maheswaram Date: Tue, 6 Jul 2021 18:30:12 +0530 Subject: [PATCH 209/851] arm64: dts: qcom: sc7280: Add USB related nodes Add nodes for DWC3 USB controller, QMP and HS USB PHYs in sc7280 SOC. Signed-off-by: Sandeep Maheswaram Reviewed-by: Matthias Kaehlcke Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1625576413-12324-3-git-send-email-sanm@codeaurora.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 164 +++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index da55adba1f8c0..1d405c2f234f5 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -1140,6 +1140,125 @@ }; + usb_1_hsphy: phy@88e3000 { + compatible = "qcom,sc7280-usb-hs-phy", + "qcom,usb-snps-hs-7nm-phy"; + reg = <0 0x088e3000 0 0x400>; + status = "disabled"; + #phy-cells = <0>; + + clocks = <&rpmhcc RPMH_CXO_CLK>; + clock-names = "ref"; + + resets = <&gcc GCC_QUSB2PHY_PRIM_BCR>; + }; + + usb_2_hsphy: phy@88e4000 { + compatible = "qcom,sc7280-usb-hs-phy", + "qcom,usb-snps-hs-7nm-phy"; + reg = <0 0x088e4000 0 0x400>; + status = "disabled"; + #phy-cells = <0>; + + clocks = <&rpmhcc RPMH_CXO_CLK>; + clock-names = "ref"; + + resets = <&gcc GCC_QUSB2PHY_SEC_BCR>; + }; + + usb_1_qmpphy: phy-wrapper@88e9000 { + compatible = "qcom,sc7280-qmp-usb3-dp-phy", + "qcom,sm8250-qmp-usb3-dp-phy"; + reg = <0 0x088e9000 0 0x200>, + <0 0x088e8000 0 0x40>, + <0 0x088ea000 0 0x200>; + status = "disabled"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clocks = <&gcc GCC_USB3_PRIM_PHY_AUX_CLK>, + <&rpmhcc RPMH_CXO_CLK>, + <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>; + clock-names = "aux", "ref_clk_src", "com_aux"; + + resets = <&gcc GCC_USB3_DP_PHY_PRIM_BCR>, + <&gcc GCC_USB3_PHY_PRIM_BCR>; + reset-names = "phy", "common"; + + usb_1_ssphy: usb3-phy@88e9200 { + reg = <0 0x088e9200 0 0x200>, + <0 0x088e9400 0 0x200>, + <0 0x088e9c00 0 0x400>, + <0 0x088e9600 0 0x200>, + <0 0x088e9800 0 0x200>, + <0 0x088e9a00 0 0x100>; + #clock-cells = <0>; + #phy-cells = <0>; + clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; + clock-names = "pipe0"; + clock-output-names = "usb3_phy_pipe_clk_src"; + }; + + dp_phy: dp-phy@88ea200 { + reg = <0 0x088ea200 0 0x200>, + <0 0x088ea400 0 0x200>, + <0 0x088eac00 0 0x400>, + <0 0x088ea600 0 0x200>, + <0 0x088ea800 0 0x200>, + <0 0x088eaa00 0 0x100>; + #phy-cells = <0>; + #clock-cells = <1>; + clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; + clock-names = "pipe0"; + clock-output-names = "usb3_phy_pipe_clk_src"; + }; + }; + + usb_2: usb@8cf8800 { + compatible = "qcom,sc7280-dwc3", "qcom,dwc3"; + reg = <0 0x08cf8800 0 0x400>; + status = "disabled"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + dma-ranges; + + clocks = <&gcc GCC_CFG_NOC_USB3_SEC_AXI_CLK>, + <&gcc GCC_USB30_SEC_MASTER_CLK>, + <&gcc GCC_AGGRE_USB3_SEC_AXI_CLK>, + <&gcc GCC_USB30_SEC_MOCK_UTMI_CLK>, + <&gcc GCC_USB30_SEC_SLEEP_CLK>; + clock-names = "cfg_noc", "core", "iface","mock_utmi", + "sleep"; + + assigned-clocks = <&gcc GCC_USB30_SEC_MOCK_UTMI_CLK>, + <&gcc GCC_USB30_SEC_MASTER_CLK>; + assigned-clock-rates = <19200000>, <200000000>; + + interrupts-extended = <&intc GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 13 IRQ_TYPE_EDGE_RISING>, + <&pdc 12 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "hs_phy_irq", + "dm_hs_phy_irq", "dp_hs_phy_irq"; + + power-domains = <&gcc GCC_USB30_SEC_GDSC>; + + resets = <&gcc GCC_USB30_SEC_BCR>; + + usb_2_dwc3: usb@8c00000 { + compatible = "snps,dwc3"; + reg = <0 0x08c00000 0 0xe000>; + interrupts = ; + iommus = <&apps_smmu 0xa0 0x0>; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; + phys = <&usb_2_hsphy>; + phy-names = "usb2-phy"; + maximum-speed = "high-speed"; + }; + }; + dc_noc: interconnect@90e0000 { reg = <0 0x090e0000 0 0x5080>; compatible = "qcom,sc7280-dc-noc"; @@ -1168,6 +1287,51 @@ qcom,bcm-voters = <&apps_bcm_voter>; }; + usb_1: usb@a6f8800 { + compatible = "qcom,sc7280-dwc3", "qcom,dwc3"; + reg = <0 0x0a6f8800 0 0x400>; + status = "disabled"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + dma-ranges; + + clocks = <&gcc GCC_CFG_NOC_USB3_PRIM_AXI_CLK>, + <&gcc GCC_USB30_PRIM_MASTER_CLK>, + <&gcc GCC_AGGRE_USB3_PRIM_AXI_CLK>, + <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>, + <&gcc GCC_USB30_PRIM_SLEEP_CLK>; + clock-names = "cfg_noc", "core", "iface", "mock_utmi", + "sleep"; + + assigned-clocks = <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>, + <&gcc GCC_USB30_PRIM_MASTER_CLK>; + assigned-clock-rates = <19200000>, <200000000>; + + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 14 IRQ_TYPE_EDGE_BOTH>, + <&pdc 15 IRQ_TYPE_EDGE_BOTH>, + <&pdc 17 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "hs_phy_irq", "dp_hs_phy_irq", + "dm_hs_phy_irq", "ss_phy_irq"; + + power-domains = <&gcc GCC_USB30_PRIM_GDSC>; + + resets = <&gcc GCC_USB30_PRIM_BCR>; + + usb_1_dwc3: usb@a600000 { + compatible = "snps,dwc3"; + reg = <0 0x0a600000 0 0xe000>; + interrupts = ; + iommus = <&apps_smmu 0xe0 0x0>; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; + phys = <&usb_1_hsphy>, <&usb_1_ssphy>; + phy-names = "usb2-phy", "usb3-phy"; + maximum-speed = "super-speed"; + }; + }; + videocc: clock-controller@aaf0000 { compatible = "qcom,sc7280-videocc"; reg = <0 0xaaf0000 0 0x10000>; From c0cc9bce3970b3fb78620515d5e8d99ab8c72bba Mon Sep 17 00:00:00 2001 From: Sandeep Maheswaram Date: Tue, 6 Jul 2021 18:30:13 +0530 Subject: [PATCH 210/851] arm64: dts: qcom: sc7280: Add USB nodes for IDP board Add USB nodes for sc7280 IDP board. Signed-off-by: Sandeep Maheswaram Reviewed-by: Matthias Kaehlcke Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1625576413-12324-4-git-send-email-sanm@codeaurora.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7280-idp.dts | 39 +++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dts b/arch/arm64/boot/dts/qcom/sc7280-idp.dts index 1be822cef5947..73225e3c2f86f 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-idp.dts +++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dts @@ -305,6 +305,45 @@ status = "okay"; }; +&usb_1 { + status = "okay"; +}; + +&usb_1_dwc3 { + dr_mode = "host"; +}; + +&usb_1_hsphy { + status = "okay"; + + vdda-pll-supply = <&vreg_l10c_0p8>; + vdda33-supply = <&vreg_l2b_3p0>; + vdda18-supply = <&vreg_l1c_1p8>; +}; + +&usb_1_qmpphy { + status = "okay"; + + vdda-phy-supply = <&vreg_l6b_1p2>; + vdda-pll-supply = <&vreg_l1b_0p8>; +}; + +&usb_2 { + status = "okay"; +}; + +&usb_2_dwc3 { + dr_mode = "peripheral"; +}; + +&usb_2_hsphy { + status = "okay"; + + vdda-pll-supply = <&vreg_l10c_0p8>; + vdda33-supply = <&vreg_l2b_3p0>; + vdda18-supply = <&vreg_l1c_1p8>; +}; + /* PINCTRL - additions to nodes defined in sc7280.dtsi */ &qup_uart5_default { From 8fc5ae26d8287e0cb33188c849a26b05c6411bab Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 20 Jul 2021 06:29:12 +0200 Subject: [PATCH 211/851] ovl: fix mmap denywrite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Overlayfs did not honor positive i_writecount on realfile for VM_DENYWRITE mappings. Similarly negative i_mmap_writable counts were ignored for VM_SHARED mappings. Fix by making vma_set_file() switch the temporary counts obtained and released by mmap_region(). Reported-by: Chengguang Xu Signed-off-by: Miklos Szeredi Reviewed-by: Christian König --- fs/overlayfs/file.c | 4 +++- include/linux/mm.h | 2 +- mm/mmap.c | 2 +- mm/util.c | 27 ++++++++++++++++++++++++++- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 4d53d3b7e5fe1..95cc14f3b7952 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -430,7 +430,9 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) if (WARN_ON(file != vma->vm_file)) return -EIO; - vma_set_file(vma, realfile); + ret = vma_set_file(vma, realfile); + if (ret) + return ret; old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = call_mmap(vma->vm_file, vma); diff --git a/include/linux/mm.h b/include/linux/mm.h index 57453dba41b94..8d039c906f58f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2779,7 +2779,7 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) } #endif -void vma_set_file(struct vm_area_struct *vma, struct file *file); +int /* __must_check */ vma_set_file(struct vm_area_struct *vma, struct file *file); #ifdef CONFIG_NUMA_BALANCING unsigned long change_prot_numa(struct vm_area_struct *vma, diff --git a/mm/mmap.c b/mm/mmap.c index ca54d36d203ae..555a1b9212661 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1806,6 +1806,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ vma->vm_file = get_file(file); error = call_mmap(file, vma); + file = vma->vm_file; if (error) goto unmap_and_free_vma; @@ -1867,7 +1868,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (vm_flags & VM_DENYWRITE) allow_write_access(file); } - file = vma->vm_file; out: perf_event_mmap(vma); diff --git a/mm/util.c b/mm/util.c index 99c6cc77de9e2..ff91091a6778e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -314,12 +314,37 @@ int vma_is_stack_for_current(struct vm_area_struct *vma) /* * Change backing file, only valid to use during initial VMA setup. */ -void vma_set_file(struct vm_area_struct *vma, struct file *file) +int vma_set_file(struct vm_area_struct *vma, struct file *file) { + vm_flags_t vm_flags = vma->vm_flags; + int err = 0; + /* Changing an anonymous vma with this is illegal */ get_file(file); + + /* Get temporary denial counts on replacement */ + if (vm_flags & VM_DENYWRITE) { + err = deny_write_access(file); + if (err) + goto out_put; + } + if (vm_flags & VM_SHARED) { + err = mapping_map_writable(file->f_mapping); + if (err) + goto out_allow; + } + swap(vma->vm_file, file); + + /* Undo temporary denial counts on replaced */ + if (vm_flags & VM_SHARED) + mapping_unmap_writable(file->f_mapping); +out_allow: + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); +out_put: fput(file); + return err; } EXPORT_SYMBOL(vma_set_file); From 8729ca2a6f2dd154e32b6d4858425ba29b012f7f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 24 Jun 2021 13:56:27 +0800 Subject: [PATCH 212/851] ARM: s3c: delete unneed local variable "delay" "delay" variable on line 79 can be deleted by returning "0" on line 88. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20210624055627.22295-1-wangborong@cdjrlc.com Signed-off-by: Krzysztof Kozlowski --- arch/arm/mach-s3c/mach-gta02.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-s3c/mach-gta02.c b/arch/arm/mach-s3c/mach-gta02.c index aec8b451c0167..418939ce0fc35 100644 --- a/arch/arm/mach-s3c/mach-gta02.c +++ b/arch/arm/mach-s3c/mach-gta02.c @@ -79,13 +79,12 @@ static struct pcf50633 *gta02_pcf; static long gta02_panic_blink(int state) { - long delay = 0; char led; led = (state) ? 1 : 0; gpio_direction_output(GTA02_GPIO_AUX_LED, led); - return delay; + return 0; } From d3827047534caa058e64a6c8b150ae3bdfc42219 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 12 Jul 2021 16:47:15 -0400 Subject: [PATCH 213/851] dm writecache: split up writecache_map() to improve code readability writecache_map() has grown too large and can be confusing to read given all the goto statements. Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 338 ++++++++++++++++++++----------------- 1 file changed, 187 insertions(+), 151 deletions(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index e21e29e81bbf6..4ab5b318127b4 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1294,10 +1294,164 @@ static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) bio_list_add(&wc->flush_list, bio); } -static int writecache_map(struct dm_target *ti, struct bio *bio) +enum wc_map_op { + WC_MAP_SUBMIT, + WC_MAP_REMAP, + WC_MAP_REMAP_ORIGIN, + WC_MAP_RETURN, + WC_MAP_ERROR, +}; + +static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio) { + enum wc_map_op map_op; struct wc_entry *e; + +read_next_block: + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); + if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { + if (WC_MODE_PMEM(wc)) { + bio_copy_block(wc, bio, memory_data(wc, e)); + if (bio->bi_iter.bi_size) + goto read_next_block; + map_op = WC_MAP_SUBMIT; + } else { + dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); + bio_set_dev(bio, wc->ssd_dev->bdev); + bio->bi_iter.bi_sector = cache_sector(wc, e); + if (!writecache_entry_is_committed(wc, e)) + writecache_wait_for_ios(wc, WRITE); + map_op = WC_MAP_REMAP; + } + } else { + if (e) { + sector_t next_boundary = + read_original_sector(wc, e) - bio->bi_iter.bi_sector; + if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) + dm_accept_partial_bio(bio, next_boundary); + } + map_op = WC_MAP_REMAP_ORIGIN; + } + + return map_op; +} + +static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e, bool search_used) +{ + unsigned bio_size = wc->block_size; + sector_t start_cache_sec = cache_sector(wc, e); + sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); + + while (bio_size < bio->bi_iter.bi_size) { + if (!search_used) { + struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); + if (!f) + break; + write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + + (bio_size >> SECTOR_SHIFT), wc->seq_count); + writecache_insert_entry(wc, f); + wc->uncommitted_blocks++; + } else { + struct wc_entry *f; + struct rb_node *next = rb_next(&e->rb_node); + if (!next) + break; + f = container_of(next, struct wc_entry, rb_node); + if (f != e + 1) + break; + if (read_original_sector(wc, f) != + read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) + break; + if (unlikely(f->write_in_progress)) + break; + if (writecache_entry_is_committed(wc, f)) + wc->overwrote_committed = true; + e = f; + } + bio_size += wc->block_size; + current_cache_sec += wc->block_size >> SECTOR_SHIFT; + } + + bio_set_dev(bio, wc->ssd_dev->bdev); + bio->bi_iter.bi_sector = start_cache_sec; + dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + + if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { + wc->uncommitted_blocks = 0; + queue_work(wc->writeback_wq, &wc->flush_work); + } else { + writecache_schedule_autocommit(wc); + } + + return WC_MAP_REMAP; +} + +static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio) +{ + struct wc_entry *e; + + do { + bool found_entry = false; + bool search_used = false; + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); + if (e) { + if (!writecache_entry_is_committed(wc, e)) { + search_used = true; + goto bio_copy; + } + if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { + wc->overwrote_committed = true; + search_used = true; + goto bio_copy; + } + found_entry = true; + } else { + if (unlikely(wc->cleaner) || + (wc->metadata_only && !(bio->bi_opf & REQ_META))) + goto direct_write; + } + e = writecache_pop_from_freelist(wc, (sector_t)-1); + if (unlikely(!e)) { + if (!WC_MODE_PMEM(wc) && !found_entry) { +direct_write: + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); + if (e) { + sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; + BUG_ON(!next_boundary); + if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { + dm_accept_partial_bio(bio, next_boundary); + } + } + return WC_MAP_REMAP_ORIGIN; + } + writecache_wait_on_freelist(wc); + continue; + } + write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); + writecache_insert_entry(wc, e); + wc->uncommitted_blocks++; +bio_copy: + if (WC_MODE_PMEM(wc)) + bio_copy_block(wc, bio, memory_data(wc, e)); + else + return writecache_bio_copy_ssd(wc, bio, e, search_used); + } while (bio->bi_iter.bi_size); + + if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks)) + writecache_flush(wc); + else + writecache_schedule_autocommit(wc); + + return WC_MAP_SUBMIT; +} + +static int writecache_map(struct dm_target *ti, struct bio *bio) +{ struct dm_writecache *wc = ti->private; + enum wc_map_op map_op = WC_MAP_ERROR; bio->bi_private = NULL; @@ -1343,167 +1497,49 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) } } - if (bio_data_dir(bio) == READ) { -read_next_block: - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { - if (WC_MODE_PMEM(wc)) { - bio_copy_block(wc, bio, memory_data(wc, e)); - if (bio->bi_iter.bi_size) - goto read_next_block; - goto unlock_submit; - } else { - dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); - bio_set_dev(bio, wc->ssd_dev->bdev); - bio->bi_iter.bi_sector = cache_sector(wc, e); - if (!writecache_entry_is_committed(wc, e)) - writecache_wait_for_ios(wc, WRITE); - goto unlock_remap; - } - } else { - if (e) { - sector_t next_boundary = - read_original_sector(wc, e) - bio->bi_iter.bi_sector; - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { - dm_accept_partial_bio(bio, next_boundary); - } - } - goto unlock_remap_origin; - } - } else { - do { - bool found_entry = false; - bool search_used = false; - if (writecache_has_error(wc)) - goto unlock_error; - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); - if (e) { - if (!writecache_entry_is_committed(wc, e)) { - search_used = true; - goto bio_copy; - } - if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { - wc->overwrote_committed = true; - search_used = true; - goto bio_copy; - } - found_entry = true; - } else { - if (unlikely(wc->cleaner) || - (wc->metadata_only && !(bio->bi_opf & REQ_META))) - goto direct_write; - } - e = writecache_pop_from_freelist(wc, (sector_t)-1); - if (unlikely(!e)) { - if (!WC_MODE_PMEM(wc) && !found_entry) { -direct_write: - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - if (e) { - sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; - BUG_ON(!next_boundary); - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { - dm_accept_partial_bio(bio, next_boundary); - } - } - goto unlock_remap_origin; - } - writecache_wait_on_freelist(wc); - continue; - } - write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); - writecache_insert_entry(wc, e); - wc->uncommitted_blocks++; -bio_copy: - if (WC_MODE_PMEM(wc)) { - bio_copy_block(wc, bio, memory_data(wc, e)); - } else { - unsigned bio_size = wc->block_size; - sector_t start_cache_sec = cache_sector(wc, e); - sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); - - while (bio_size < bio->bi_iter.bi_size) { - if (!search_used) { - struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); - if (!f) - break; - write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + - (bio_size >> SECTOR_SHIFT), wc->seq_count); - writecache_insert_entry(wc, f); - wc->uncommitted_blocks++; - } else { - struct wc_entry *f; - struct rb_node *next = rb_next(&e->rb_node); - if (!next) - break; - f = container_of(next, struct wc_entry, rb_node); - if (f != e + 1) - break; - if (read_original_sector(wc, f) != - read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) - break; - if (unlikely(f->write_in_progress)) - break; - if (writecache_entry_is_committed(wc, f)) - wc->overwrote_committed = true; - e = f; - } - bio_size += wc->block_size; - current_cache_sec += wc->block_size >> SECTOR_SHIFT; - } - - bio_set_dev(bio, wc->ssd_dev->bdev); - bio->bi_iter.bi_sector = start_cache_sec; - dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); - - if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { - wc->uncommitted_blocks = 0; - queue_work(wc->writeback_wq, &wc->flush_work); - } else { - writecache_schedule_autocommit(wc); - } - goto unlock_remap; - } - } while (bio->bi_iter.bi_size); - - if (unlikely(bio->bi_opf & REQ_FUA || - wc->uncommitted_blocks >= wc->autocommit_blocks)) - writecache_flush(wc); - else - writecache_schedule_autocommit(wc); - goto unlock_submit; - } + if (bio_data_dir(bio) == READ) + map_op = writecache_map_read(wc, bio); + else + map_op = writecache_map_write(wc, bio); + switch (map_op) { + case WC_MAP_REMAP_ORIGIN: unlock_remap_origin: - if (likely(wc->pause != 0)) { - if (bio_op(bio) == REQ_OP_WRITE) { - dm_iot_io_begin(&wc->iot, 1); - bio->bi_private = (void *)2; + if (likely(wc->pause != 0)) { + if (bio_op(bio) == REQ_OP_WRITE) { + dm_iot_io_begin(&wc->iot, 1); + bio->bi_private = (void *)2; + } } - } - bio_set_dev(bio, wc->dev->bdev); - wc_unlock(wc); - return DM_MAPIO_REMAPPED; + bio_set_dev(bio, wc->dev->bdev); + wc_unlock(wc); + return DM_MAPIO_REMAPPED; + case WC_MAP_REMAP: unlock_remap: - /* make sure that writecache_end_io decrements bio_in_progress: */ - bio->bi_private = (void *)1; - atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); - wc_unlock(wc); - return DM_MAPIO_REMAPPED; + /* make sure that writecache_end_io decrements bio_in_progress: */ + bio->bi_private = (void *)1; + atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); + wc_unlock(wc); + return DM_MAPIO_REMAPPED; + case WC_MAP_SUBMIT: unlock_submit: - wc_unlock(wc); - bio_endio(bio); - return DM_MAPIO_SUBMITTED; + wc_unlock(wc); + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + case WC_MAP_RETURN: unlock_return: - wc_unlock(wc); - return DM_MAPIO_SUBMITTED; + wc_unlock(wc); + return DM_MAPIO_SUBMITTED; + case WC_MAP_ERROR: unlock_error: - wc_unlock(wc); - bio_io_error(bio); - return DM_MAPIO_SUBMITTED; + wc_unlock(wc); + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } } static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) From 58fd963fcd77b67e1b0178eadc3a18ce8097ce2a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 12 Jul 2021 16:58:49 -0400 Subject: [PATCH 214/851] dm writecache: factor out writecache_map_remap_origin() Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 4ab5b318127b4..4e4e2aa046981 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1302,6 +1302,19 @@ enum wc_map_op { WC_MAP_ERROR, }; +static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e) +{ + if (e) { + sector_t next_boundary = + read_original_sector(wc, e) - bio->bi_iter.bi_sector; + if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) + dm_accept_partial_bio(bio, next_boundary); + } + + return WC_MAP_REMAP_ORIGIN; +} + static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio) { enum wc_map_op map_op; @@ -1324,13 +1337,7 @@ static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio * map_op = WC_MAP_REMAP; } } else { - if (e) { - sector_t next_boundary = - read_original_sector(wc, e) - bio->bi_iter.bi_sector; - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) - dm_accept_partial_bio(bio, next_boundary); - } - map_op = WC_MAP_REMAP_ORIGIN; + map_op = writecache_map_remap_origin(wc, bio, e); } return map_op; @@ -1418,14 +1425,7 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio if (!WC_MODE_PMEM(wc) && !found_entry) { direct_write: e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - if (e) { - sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; - BUG_ON(!next_boundary); - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { - dm_accept_partial_bio(bio, next_boundary); - } - } - return WC_MAP_REMAP_ORIGIN; + return writecache_map_remap_origin(wc, bio, e); } writecache_wait_on_freelist(wc); continue; From d9ed8449eaf375833240d281c881d741a9fd366c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 12 Jul 2021 21:21:34 -0400 Subject: [PATCH 215/851] dm writecache: further writecache_map() cleanup Factor out writecache_map_flush() and writecache_map_discard() from writecache_map(). Also eliminate the various goto labels in writecache_map(). Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 75 ++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 4e4e2aa046981..5b553df56b7a3 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1448,31 +1448,52 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio return WC_MAP_SUBMIT; } +static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio *bio) +{ + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + + if (WC_MODE_PMEM(wc)) { + writecache_flush(wc); + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + else if (unlikely(wc->cleaner) || unlikely(wc->metadata_only)) + return WC_MAP_REMAP_ORIGIN; + return WC_MAP_SUBMIT; + } + /* SSD: */ + if (dm_bio_get_target_bio_nr(bio)) + return WC_MAP_REMAP_ORIGIN; + writecache_offload_bio(wc, bio); + return WC_MAP_RETURN; +} + +static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio) +{ + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + + if (WC_MODE_PMEM(wc)) { + writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); + return WC_MAP_REMAP_ORIGIN; + } + /* SSD: */ + writecache_offload_bio(wc, bio); + return WC_MAP_RETURN; +} + static int writecache_map(struct dm_target *ti, struct bio *bio) { struct dm_writecache *wc = ti->private; - enum wc_map_op map_op = WC_MAP_ERROR; + enum wc_map_op map_op; bio->bi_private = NULL; wc_lock(wc); if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - if (writecache_has_error(wc)) - goto unlock_error; - if (WC_MODE_PMEM(wc)) { - writecache_flush(wc); - if (writecache_has_error(wc)) - goto unlock_error; - if (unlikely(wc->cleaner) || unlikely(wc->metadata_only)) - goto unlock_remap_origin; - goto unlock_submit; - } else { - if (dm_bio_get_target_bio_nr(bio)) - goto unlock_remap_origin; - writecache_offload_bio(wc, bio); - goto unlock_return; - } + map_op = writecache_map_flush(wc, bio); + goto done; } bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); @@ -1482,29 +1503,22 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) DMERR("I/O is not aligned, sector %llu, size %u, block size %u", (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size, wc->block_size); - goto unlock_error; + map_op = WC_MAP_ERROR; + goto done; } if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { - if (writecache_has_error(wc)) - goto unlock_error; - if (WC_MODE_PMEM(wc)) { - writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); - goto unlock_remap_origin; - } else { - writecache_offload_bio(wc, bio); - goto unlock_return; - } + map_op = writecache_map_discard(wc, bio); + goto done; } if (bio_data_dir(bio) == READ) map_op = writecache_map_read(wc, bio); else map_op = writecache_map_write(wc, bio); - +done: switch (map_op) { case WC_MAP_REMAP_ORIGIN: -unlock_remap_origin: if (likely(wc->pause != 0)) { if (bio_op(bio) == REQ_OP_WRITE) { dm_iot_io_begin(&wc->iot, 1); @@ -1516,7 +1530,6 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; case WC_MAP_REMAP: -unlock_remap: /* make sure that writecache_end_io decrements bio_in_progress: */ bio->bi_private = (void *)1; atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); @@ -1524,18 +1537,16 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; case WC_MAP_SUBMIT: -unlock_submit: wc_unlock(wc); bio_endio(bio); return DM_MAPIO_SUBMITTED; case WC_MAP_RETURN: -unlock_return: wc_unlock(wc); return DM_MAPIO_SUBMITTED; case WC_MAP_ERROR: -unlock_error: + default: wc_unlock(wc); bio_io_error(bio); return DM_MAPIO_SUBMITTED; From 06bad3bb15355c7f4fa9ec9fd75bba5a2fd5f02f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Aug 2020 16:14:34 -0700 Subject: [PATCH 216/851] tools/memory-model: Document locking corner cases Most Linux-kernel uses of locking are straightforward, but there are corner-case uses that rely on less well-known aspects of the lock and unlock primitives. This commit therefore adds a locking.txt and litmus tests in Documentation/litmus-tests/locking to explain these corner-case uses. Signed-off-by: Paul E. McKenney --- .../litmus-tests/locking/DCL-broken.litmus | 55 +++ .../litmus-tests/locking/DCL-fixed.litmus | 56 +++ .../litmus-tests/locking/RM-broken.litmus | 42 +++ .../litmus-tests/locking/RM-fixed.litmus | 42 +++ tools/memory-model/Documentation/locking.txt | 320 ++++++++++++++++++ 5 files changed, 515 insertions(+) create mode 100644 Documentation/litmus-tests/locking/DCL-broken.litmus create mode 100644 Documentation/litmus-tests/locking/DCL-fixed.litmus create mode 100644 Documentation/litmus-tests/locking/RM-broken.litmus create mode 100644 Documentation/litmus-tests/locking/RM-fixed.litmus create mode 100644 tools/memory-model/Documentation/locking.txt diff --git a/Documentation/litmus-tests/locking/DCL-broken.litmus b/Documentation/litmus-tests/locking/DCL-broken.litmus new file mode 100644 index 0000000000000..cfaa25ff82b1e --- /dev/null +++ b/Documentation/litmus-tests/locking/DCL-broken.litmus @@ -0,0 +1,55 @@ +C DCL-broken + +(* + * Result: Sometimes + * + * This litmus test demonstrates more than just locking is required to + * correctly implement double-checked locking. + *) + +{ + int flag; + int data; + int lck; +} + +P0(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + WRITE_ONCE(*flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +P1(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + WRITE_ONCE(*flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +locations [flag;data;lck;0:r0;0:r1;1:r0;1:r1] +exists (0:r2=0 \/ 1:r2=0) diff --git a/Documentation/litmus-tests/locking/DCL-fixed.litmus b/Documentation/litmus-tests/locking/DCL-fixed.litmus new file mode 100644 index 0000000000000..579d6c246f167 --- /dev/null +++ b/Documentation/litmus-tests/locking/DCL-fixed.litmus @@ -0,0 +1,56 @@ +C DCL-fixed + +(* + * Result: Never + * + * This litmus test demonstrates that double-checked locking can be + * reliable given proper use of smp_load_acquire() and smp_store_release() + * in addition to the locking. + *) + +{ + int flag; + int data; + int lck; +} + +P0(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = smp_load_acquire(flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + smp_store_release(flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +P1(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = smp_load_acquire(flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + smp_store_release(flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +locations [flag;data;lck;0:r0;0:r1;1:r0;1:r1] +exists (0:r2=0 \/ 1:r2=0) diff --git a/Documentation/litmus-tests/locking/RM-broken.litmus b/Documentation/litmus-tests/locking/RM-broken.litmus new file mode 100644 index 0000000000000..c586ae4b547de --- /dev/null +++ b/Documentation/litmus-tests/locking/RM-broken.litmus @@ -0,0 +1,42 @@ +C RM-broken + +(* + * Result: DEADLOCK + * + * This litmus test demonstrates that the old "roach motel" approach + * to locking, where code can be freely moved into critical sections, + * cannot be used in the Linux kernel. + *) + +{ + int lck; + int x; + int y; +} + +P0(int *x, int *y, int *lck) +{ + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); +} + +P1(int *x, int *y, int *lck) +{ + int r0; + int r1; + int r2; + + spin_lock(lck); + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + r2 = atomic_inc_return(y); + spin_unlock(lck); +} + +locations [x;lck;0:r2;1:r0;1:r1;1:r2] +filter (y=2 /\ 1:r0=0 /\ 1:r1=1) +exists (1:r2=1) diff --git a/Documentation/litmus-tests/locking/RM-fixed.litmus b/Documentation/litmus-tests/locking/RM-fixed.litmus new file mode 100644 index 0000000000000..672856736b42e --- /dev/null +++ b/Documentation/litmus-tests/locking/RM-fixed.litmus @@ -0,0 +1,42 @@ +C RM-fixed + +(* + * Result: Never + * + * This litmus test demonstrates that the old "roach motel" approach + * to locking, where code can be freely moved into critical sections, + * cannot be used in the Linux kernel. + *) + +{ + int lck; + int x; + int y; +} + +P0(int *x, int *y, int *lck) +{ + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); +} + +P1(int *x, int *y, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + spin_lock(lck); + r2 = atomic_inc_return(y); + spin_unlock(lck); +} + +locations [x;lck;0:r2;1:r0;1:r1;1:r2] +filter (y=2 /\ 1:r0=0 /\ 1:r1=1) +exists (1:r2=1) diff --git a/tools/memory-model/Documentation/locking.txt b/tools/memory-model/Documentation/locking.txt new file mode 100644 index 0000000000000..4e05c6d53ab72 --- /dev/null +++ b/tools/memory-model/Documentation/locking.txt @@ -0,0 +1,320 @@ +Locking +======= + +Locking is well-known and the common use cases are straightforward: Any +CPU holding a given lock sees any changes previously seen or made by any +CPU before it previously released that same lock. This last sentence +is the only part of this document that most developers will need to read. + +However, developers who would like to also access lock-protected shared +variables outside of their corresponding locks should continue reading. + + +Locking and Prior Accesses +-------------------------- + +The basic rule of locking is worth repeating: + + Any CPU holding a given lock sees any changes previously seen + or made by any CPU before it previously released that same lock. + +Note that this statement is a bit stronger than "Any CPU holding a +given lock sees all changes made by any CPU during the time that CPU was +previously holding this same lock". For example, consider the following +pair of code fragments: + + /* See MP+polocks.litmus. */ + void CPU0(void) + { + WRITE_ONCE(x, 1); + spin_lock(&mylock); + WRITE_ONCE(y, 1); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + r0 = READ_ONCE(y); + spin_unlock(&mylock); + r1 = READ_ONCE(x); + } + +The basic rule guarantees that if CPU0() acquires mylock before CPU1(), +then both r0 and r1 must be set to the value 1. This also has the +consequence that if the final value of r0 is equal to 1, then the final +value of r1 must also be equal to 1. In contrast, the weaker rule would +say nothing about the final value of r1. + + +Locking and Subsequent Accesses +------------------------------- + +The converse to the basic rule also holds: Any CPU holding a given +lock will not see any changes that will be made by any CPU after it +subsequently acquires this same lock. This converse statement is +illustrated by the following litmus test: + + /* See MP+porevlocks.litmus. */ + void CPU0(void) + { + r0 = READ_ONCE(y); + spin_lock(&mylock); + r1 = READ_ONCE(x); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + WRITE_ONCE(x, 1); + spin_unlock(&mylock); + WRITE_ONCE(y, 1); + } + +This converse to the basic rule guarantees that if CPU0() acquires +mylock before CPU1(), then both r0 and r1 must be set to the value 0. +This also has the consequence that if the final value of r1 is equal +to 0, then the final value of r0 must also be equal to 0. In contrast, +the weaker rule would say nothing about the final value of r0. + +These examples show only a single pair of CPUs, but the effects of the +locking basic rule extend across multiple acquisitions of a given lock +across multiple CPUs. + + +Double-Checked Locking +---------------------- + +It is well known that more than just a lock is required to make +double-checked locking work correctly, This litmus test illustrates +one incorrect approach: + + /* See Documentation/litmus-tests/locking/DCL-broken.litmus. */ + P0(int *flag, int *data, int *lck) + { + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + WRITE_ONCE(*flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); + } + /* P1() is the exactly the same as P0(). */ + +There are two problems. First, there is no ordering between the first +READ_ONCE() of "flag" and the READ_ONCE() of "data". Second, there is +no ordering between the two WRITE_ONCE() calls. It should therefore be +no surprise that "r2" can be zero, and a quick herd7 run confirms this. + +One way to fix this is to use smp_load_acquire() and smp_store_release() +as shown in this corrected version: + + /* See Documentation/litmus-tests/locking/DCL-fixed.litmus. */ + P0(int *flag, int *data, int *lck) + { + int r0; + int r1; + int r2; + + r0 = smp_load_acquire(flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + smp_store_release(flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); + } + /* P1() is the exactly the same as P0(). */ + +The smp_load_acquire() guarantees that its load from "flags" will +be ordered before the READ_ONCE() from data, thus solving the first +problem. The smp_store_release() guarantees that its store will be +ordered after the WRITE_ONCE() to "data", solving the second problem. +The smp_store_release() pairs with the smp_load_acquire(), thus ensuring +that the ordering provided by each actually takes effect. Again, a +quick herd7 run confirms this. + +In short, if you access a lock-protected variable without holding the +corresponding lock, you will need to provide additional ordering, in +this case, via the smp_load_acquire() and the smp_store_release(). + + +Ordering Provided by a Lock to CPUs Not Holding That Lock +--------------------------------------------------------- + +It is not necessarily the case that accesses ordered by locking will be +seen as ordered by CPUs not holding that lock. Consider this example: + + /* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */ + void CPU0(void) + { + spin_lock(&mylock); + WRITE_ONCE(x, 1); + WRITE_ONCE(y, 1); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + r0 = READ_ONCE(y); + WRITE_ONCE(z, 1); + spin_unlock(&mylock); + } + + void CPU2(void) + { + WRITE_ONCE(z, 2); + smp_mb(); + r1 = READ_ONCE(x); + } + +Counter-intuitive though it might be, it is quite possible to have +the final value of r0 be 1, the final value of z be 2, and the final +value of r1 be 0. The reason for this surprising outcome is that CPU2() +never acquired the lock, and thus did not fully benefit from the lock's +ordering properties. + +Ordering can be extended to CPUs not holding the lock by careful use +of smp_mb__after_spinlock(): + + /* See Z6.0+pooncelock+poonceLock+pombonce.litmus. */ + void CPU0(void) + { + spin_lock(&mylock); + WRITE_ONCE(x, 1); + WRITE_ONCE(y, 1); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + smp_mb__after_spinlock(); + r0 = READ_ONCE(y); + WRITE_ONCE(z, 1); + spin_unlock(&mylock); + } + + void CPU2(void) + { + WRITE_ONCE(z, 2); + smp_mb(); + r1 = READ_ONCE(x); + } + +This addition of smp_mb__after_spinlock() strengthens the lock +acquisition sufficiently to rule out the counter-intuitive outcome. +In other words, the addition of the smp_mb__after_spinlock() prohibits +the counter-intuitive result where the final value of r0 is 1, the final +value of z is 2, and the final value of r1 is 0. + + +No Roach-Motel Locking! +----------------------- + +This example requires familiarity with the herd7 "filter" clause, so +please read up on that topic in litmus-tests.txt. + +It is tempting to allow memory-reference instructions to be pulled +into a critical section, but this cannot be allowed in the general case. +For example, consider a spin loop preceding a lock-based critical section. +Now, herd7 does not model spin loops, but we can emulate one with two +loads, with a "filter" clause to constrain the first to return the +initial value and the second to return the updated value, as shown below: + + /* See Documentation/litmus-tests/locking/RM-fixed.litmus. */ + P0(int *x, int *y, int *lck) + { + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); + } + + P1(int *x, int *y, int *lck) + { + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + spin_lock(lck); + r2 = atomic_inc_return(y); + spin_unlock(lck); + } + + filter (y=2 /\ 1:r0=0 /\ 1:r1=1) + exists (1:r2=1) + +The variable "x" is the control variable for the emulated spin loop. +P0() sets it to "1" while holding the lock, and P1() emulates the +spin loop by reading it twice, first into "1:r0" (which should get the +initial value "0") and then into "1:r1" (which should get the updated +value "1"). + +The purpose of the variable "y" is to reject deadlocked executions. +Only those executions where the final value of "y" have avoided deadlock. + +The "filter" clause takes all this into account, constraining "y" to +equal "2", "1:r0" to equal "0", and "1:r1" to equal 1. + +Then the "exists" clause checks to see if P1() acquired its lock first, +which should not happen given the filter clause because P0() updates +"x" while holding the lock. And herd7 confirms this. + +But suppose that the compiler was permitted to reorder the spin loop +into P1()'s critical section, like this: + + /* See Documentation/litmus-tests/locking/RM-broken.litmus. */ + P0(int *x, int *y, int *lck) + { + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); + } + + P1(int *x, int *y, int *lck) + { + int r0; + int r1; + int r2; + + spin_lock(lck); + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + r2 = atomic_inc_return(y); + spin_unlock(lck); + } + + locations [x;lck;0:r2;1:r0;1:r1;1:r2] + filter (y=2 /\ 1:r0=0 /\ 1:r1=1) + exists (1:r2=1) + +If "1:r0" is equal to "0", "1:r1" can never equal "1" because P0() +cannot update "x" while P1() holds the lock. And herd7 confirms this, +showing zero executions matching the "filter" criteria. + +And this is why Linux-kernel lock and unlock primitives must prevent +code from entering critical sections. It is not sufficient to only +prevent code from leaving them. From 2151cab87a68200932809369e1987bf99df0f063 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Mar 2019 11:53:50 -0700 Subject: [PATCH 217/851] tools/memory-model: Make judgelitmus.sh note timeouts Currently, judgelitmus.sh treats timeouts (as in the "--timeout" argument) as "!!! Verification error". This can be misleading because it is quite possible that running the test longer would have produced a verification. This commit therefore changes judgelitmus.sh to check for timeouts and to report them with "!!! Timeout". Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 0cc63875e395d..d3c313b9a458a 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -42,6 +42,14 @@ grep '^Observation' $LKMM_DESTDIR/$litmus.out if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out then : +elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out +then + echo ' !!! Timeout' $litmus + if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + then + echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmus.out 2>&1 + fi + exit 124 else echo ' !!! Verification error' $litmus if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out From 3017c1832610411a7712beac6203083c8950f1b6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Mar 2019 13:07:46 -0700 Subject: [PATCH 218/851] tools/memory-model: Make cmplitmushist.sh note timeouts Currently, cmplitmushist.sh treats timeouts (as in the "--timeout" argument) as "Missing Observation line". This can be misleading because it is quite possible that running the test longer would have produced a verification. This commit therefore changes cmplitmushist.sh to check for timeouts and to report them with "Timed out". Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/cmplitmushist.sh | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh index 0f498aeeccf5e..b9c174dd80042 100755 --- a/tools/memory-model/scripts/cmplitmushist.sh +++ b/tools/memory-model/scripts/cmplitmushist.sh @@ -12,12 +12,30 @@ trap 'rm -rf $T' 0 mkdir $T # comparetest oldpath newpath +timedout=0 perfect=0 obsline=0 noobsline=0 obsresult=0 badcompare=0 comparetest () { + if grep -q '^Command exited with non-zero status 124' $1 || + grep -q '^Command exited with non-zero status 124' $2 + then + if grep -q '^Command exited with non-zero status 124' $1 && + grep -q '^Command exited with non-zero status 124' $2 + then + echo Both runs timed out: $2 + elif grep -q '^Command exited with non-zero status 124' $1 + then + echo Old run timed out: $2 + elif grep -q '^Command exited with non-zero status 124' $2 + then + echo New run timed out: $2 + fi + timedout=`expr "$timedout" + 1` + return 0 + fi grep -v 'maxresident)k\|minor)pagefaults\|^Time' $1 > $T/oldout grep -v 'maxresident)k\|minor)pagefaults\|^Time' $2 > $T/newout if cmp -s $T/oldout $T/newout && grep -q '^Observation' $1 @@ -78,6 +96,10 @@ if test "$obsresult" -ne 0 then echo Matching Observation Always/Sometimes/Never result: $obsresult 1>&2 fi +if test "$timedout" -ne 0 +then + echo "!!!" Timed out: $timedout 1>&2 +fi if test "$badcompare" -ne 0 then echo "!!!" Result changed: $badcompare 1>&2 From 3d368c0285a3a4069e4160728549ca30517d1b1b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Mar 2019 13:40:57 -0700 Subject: [PATCH 219/851] tools/memory-model: Make judgelitmus.sh identify bad macros Currently, judgelitmus.sh treats use of unknown primitives (such as srcu_read_lock() prior to SRCU support) as "!!! Verification error". This can be misleading because it fails to call out typos and running a version LKMM on a litmus test requiring a feature not provided by that version. This commit therefore changes judgelitmus.sh to check for unknown primitives and to report them, for example, with: '!!! Current LKMM version does not know "rcu_write_lock"'. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/cmplitmushist.sh | 31 ++++++++++++++++++--- tools/memory-model/scripts/judgelitmus.sh | 12 ++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh index b9c174dd80042..ca1ac8b646144 100755 --- a/tools/memory-model/scripts/cmplitmushist.sh +++ b/tools/memory-model/scripts/cmplitmushist.sh @@ -12,6 +12,7 @@ trap 'rm -rf $T' 0 mkdir $T # comparetest oldpath newpath +badmacnam=0 timedout=0 perfect=0 obsline=0 @@ -19,8 +20,26 @@ noobsline=0 obsresult=0 badcompare=0 comparetest () { - if grep -q '^Command exited with non-zero status 124' $1 || - grep -q '^Command exited with non-zero status 124' $2 + if grep -q ': Unknown macro ' $1 || grep -q ': Unknown macro ' $2 + then + if grep -q ': Unknown macro ' $1 + then + badname=`grep ': Unknown macro ' $1 | + sed -e 's/^.*: Unknown macro //' | + sed -e 's/ (User error).*$//'` + echo 'Current LKMM version does not know "'$badname'"' $1 + fi + if grep -q ': Unknown macro ' $2 + then + badname=`grep ': Unknown macro ' $2 | + sed -e 's/^.*: Unknown macro //' | + sed -e 's/ (User error).*$//'` + echo 'Current LKMM version does not know "'$badname'"' $2 + fi + badmacnam=`expr "$badmacnam" + 1` + return 0 + elif grep -q '^Command exited with non-zero status 124' $1 || + grep -q '^Command exited with non-zero status 124' $2 then if grep -q '^Command exited with non-zero status 124' $1 && grep -q '^Command exited with non-zero status 124' $2 @@ -56,7 +75,7 @@ comparetest () { return 0 fi else - echo Missing Observation line "(e.g., herd7 timeout)": $2 + echo Missing Observation line "(e.g., syntax error)": $2 noobsline=`expr "$noobsline" + 1` return 0 fi @@ -90,7 +109,7 @@ then fi if test "$noobsline" -ne 0 then - echo Missing Observation line "(e.g., herd7 timeout)": $noobsline 1>&2 + echo Missing Observation line "(e.g., syntax error)": $noobsline 1>&2 fi if test "$obsresult" -ne 0 then @@ -100,6 +119,10 @@ if test "$timedout" -ne 0 then echo "!!!" Timed out: $timedout 1>&2 fi +if test "$badmacnam" -ne 0 +then + echo "!!!" Unknown primitive: $badmacnam 1>&2 +fi if test "$badcompare" -ne 0 then echo "!!!" Result changed: $badcompare 1>&2 diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index d3c313b9a458a..d40439c7b71e0 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -42,6 +42,18 @@ grep '^Observation' $LKMM_DESTDIR/$litmus.out if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out then : +elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out +then + badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out | + sed -e 's/^.*: Unknown macro //' | + sed -e 's/ (User error).*$//'` + badmsg=' !!! Current LKMM version does not know "'$badname'"'" $litmus" + echo $badmsg + if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + then + echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmus.out 2>&1 + fi + exit 254 elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out then echo ' !!! Timeout' $litmus From 001e4e7b27b6002ff3685b9225a7f209751e9e87 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 14:27:06 -0700 Subject: [PATCH 220/851] tools/memory-model: Make judgelitmus.sh detect hard deadlocks If a litmus test specifies "Result: Never" and if it contains an unconditional ("hard") deadlock, then running checklitmus.sh on it will not flag any errors, despite the fact that there are no executions. This commit therefore updates judgelitmus.sh to complain about tests with no executions that are marked, but not as "Result: DEADLOCK". Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index d40439c7b71e0..84c62eee321bf 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -83,6 +83,14 @@ then fi ret=1 fi +elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$' +then + echo " !!! Unexpected non-$outcome deadlock" $litmus + if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + then + echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 + fi + ret=1 elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q $outcome || test "$outcome" = Maybe then ret=0 From 140deb9f51d4e0ad75b550025fe472069fbf1060 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 Apr 2019 07:33:18 -0700 Subject: [PATCH 221/851] tools/memory-model: Fix paulmck email address on pre-existing scripts Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkalllitmus.sh | 2 +- tools/memory-model/scripts/checklitmus.sh | 2 +- tools/memory-model/scripts/checklitmushist.sh | 2 +- tools/memory-model/scripts/judgelitmus.sh | 2 +- tools/memory-model/scripts/newlitmushist.sh | 2 +- tools/memory-model/scripts/parseargs.sh | 2 +- tools/memory-model/scripts/runlitmushist.sh | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh index 3c0c7fbbd223b..10e14d94acee5 100755 --- a/tools/memory-model/scripts/checkalllitmus.sh +++ b/tools/memory-model/scripts/checkalllitmus.sh @@ -17,7 +17,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney . scripts/parseargs.sh diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh index 11461ed40b5e4..638b8c610894b 100755 --- a/tools/memory-model/scripts/checklitmus.sh +++ b/tools/memory-model/scripts/checklitmus.sh @@ -15,7 +15,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney litmus=$1 herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} diff --git a/tools/memory-model/scripts/checklitmushist.sh b/tools/memory-model/scripts/checklitmushist.sh index 1d210ffb7c8af..406ecfc0aee4c 100755 --- a/tools/memory-model/scripts/checklitmushist.sh +++ b/tools/memory-model/scripts/checklitmushist.sh @@ -12,7 +12,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney . scripts/parseargs.sh diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 84c62eee321bf..d82133e75580c 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -13,7 +13,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney litmus=$1 diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh index 991f8f8148817..3f4b06e299886 100755 --- a/tools/memory-model/scripts/newlitmushist.sh +++ b/tools/memory-model/scripts/newlitmushist.sh @@ -12,7 +12,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney . scripts/parseargs.sh diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 40f52080fdbd6..afe7bd23de6b8 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -9,7 +9,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney T=/tmp/parseargs.sh.$$ mkdir $T diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh index 6ed376f495bb4..852786fef179f 100755 --- a/tools/memory-model/scripts/runlitmushist.sh +++ b/tools/memory-model/scripts/runlitmushist.sh @@ -13,7 +13,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney T=/tmp/runlitmushist.sh.$$ trap 'rm -rf $T' 0 From 1ddaf3208b51e57bea2dd15bd387253ddd60ac1e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 15:59:26 -0700 Subject: [PATCH 222/851] tools/memory-model: Update parseargs.sh for hardware verification This commit adds a --hw argument to parseargs.sh to specify the CPU family for a hardware verification. For example, "--hw AArch64" will specify that a C-language litmus test is to be translated to ARMv8 and the result verified. This will set the LKMM_HW_MAP_FILE environment variable accordingly. If there is no --hw argument, this environment variable will be set to the empty string. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index afe7bd23de6b8..5f016fc3f3af5 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -27,6 +27,7 @@ initparam () { initparam LKMM_DESTDIR "." initparam LKMM_HERD_OPTIONS "-conf linux-kernel.cfg" +initparam LKMM_HW_MAP_FILE "" initparam LKMM_JOBS `getconf _NPROCESSORS_ONLN` initparam LKMM_PROCS "3" initparam LKMM_TIMEOUT "1m" @@ -37,10 +38,11 @@ usagehelp () { echo "Usage $scriptname [ arguments ]" echo " --destdir path (place for .litmus.out, default by .litmus)" echo " --herdopts -conf linux-kernel.cfg ..." + echo " --hw AArch64" echo " --jobs N (number of jobs, default one per CPU)" echo " --procs N (litmus tests with at most this many processes)" echo " --timeout N (herd7 timeout (e.g., 10s, 1m, 2hr, 1d, '')" - echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'" + echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --hw '$LKMM_HW_MAP_FILE' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'" exit 1 } @@ -95,6 +97,11 @@ do LKMM_HERD_OPTIONS="$2" shift ;; + --hw) + checkarg --hw "(.map file architecture name)" "$#" "$2" '^[A-Za-z0-9_-]\+' '^--' + LKMM_HW_MAP_FILE="$2" + shift + ;; -j[1-9]*) njobs="`echo $1 | sed -e 's/^-j//'`" trailchars="`echo $njobs | sed -e 's/[0-9]\+\(.*\)$/\1/'`" From 927d91f51f850169f3ecf274bf7401ef20c5d076 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 14:39:10 -0700 Subject: [PATCH 223/851] tools/memory-model: Make judgelitmus.sh handle hardware verifications This commit makes the judgelitmus.sh script check the --hw argument (AKA the LKMM_HW_MAP_FILE environment variable) and to adjust its judgment for a run where a C-language litmus test has been translated to assembly and the assembly version verified. In this case, the assembly verification output is checked against the C-language script's "Result:" comment. However, because hardware can be stronger than LKMM requires, the judgelitmus.sh script forgives verification mismatches featuring a "Sometimes" in the C-language script and an "Always" or "Never" assembly-language verification. Note that deadlock is not forgiven, however, this should not normally be an issue given that C-language tests containing locking, RCU, or SRCU cannot be translated to assembly. However, this issue can crop up in litmus tests that mimic deadlock by using the "filter" clause to ignore all executions. It can also crop up when certain herd arguments are used to autofilter everything that does not match the "exists" clause in cases where the "exists" clause cannot be satisfied. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/README | 8 +-- tools/memory-model/scripts/judgelitmus.sh | 75 ++++++++++++++--------- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README index 095c7eb36f9f9..0e29a52044c1a 100644 --- a/tools/memory-model/scripts/README +++ b/tools/memory-model/scripts/README @@ -43,10 +43,10 @@ initlitmushist.sh judgelitmus.sh - Given a .litmus file and its .litmus.out herd7 output, check the - .litmus.out file against the .litmus file's "Result:" comment to - judge whether the test ran correctly. Not normally run manually, - provided instead for use by other scripts. + Given a .litmus file and its herd7 output, check the output file + against the .litmus file's "Result:" comment to judge whether + the test ran correctly. Not normally run manually, provided + instead for use by other scripts. newlitmushist.sh diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index d82133e75580c..6f3c60065c8b5 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -1,9 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ # -# Given a .litmus test and the corresponding .litmus.out file, check -# the .litmus.out file against the "Result:" comment to judge whether -# the test ran correctly. +# Given a .litmus test and the corresponding litmus output file, check +# the .litmus.out file against the "Result:" comment to judge whether the +# test ran correctly. If the --hw argument is omitted, check against the +# LKMM output, which is assumed to be in file.litmus.out. If this argument +# is provided, this is assumed to be a hardware test, and the output is +# assumed to be in file.HW.litmus.out, where "HW" is the --hw argument. +# In addition, non-Sometimes verification results will be noted, but +# forgiven. # # Usage: # judgelitmus.sh file.litmus @@ -24,11 +29,18 @@ else echo ' --- ' error: \"$litmus\" is not a readable file exit 255 fi -if test -f "$LKMM_DESTDIR/$litmus".out -a -r "$LKMM_DESTDIR/$litmus".out +if test -z "$LKMM_HW_MAP_FILE" +then + litmusout=$litmus.out +else + litmusout="`echo $litmus | + sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out" +fi +if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout" then : else - echo ' --- ' error: \"$LKMM_DESTDIR/$litmus\".out is not a readable file + echo ' --- ' error: \"$LKMM_DESTDIR/$litmusout is not a readable file exit 255 fi if grep -q '^ \* Result: ' $litmus @@ -38,69 +50,76 @@ else outcome=specified fi -grep '^Observation' $LKMM_DESTDIR/$litmus.out -if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out +grep '^Observation' $LKMM_DESTDIR/$litmusout +if grep -q '^Observation' $LKMM_DESTDIR/$litmusout then : -elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out +elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout then - badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out | + badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout | sed -e 's/^.*: Unknown macro //' | sed -e 's/ (User error).*$//'` badmsg=' !!! Current LKMM version does not know "'$badname'"'" $litmus" echo $badmsg - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmusout 2>&1 fi exit 254 -elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out +elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmusout then echo ' !!! Timeout' $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmusout 2>&1 fi exit 124 else echo ' !!! Verification error' $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmusout 2>&1 fi exit 255 fi if test "$outcome" = DEADLOCK then - if grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$' + if grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$' then ret=0 else echo " !!! Unexpected non-$outcome verification" $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1 fi ret=1 fi -elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$' +elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$' then echo " !!! Unexpected non-$outcome deadlock" $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmusout 2>&1 fi ret=1 -elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q $outcome || test "$outcome" = Maybe +elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q $outcome || test "$outcome" = Maybe then ret=0 else - echo " !!! Unexpected non-$outcome verification" $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if test -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes then - echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1 + flag="--- Forgiven" + ret=0 + else + flag="!!! Unexpected" + ret=1 + fi + echo " $flag non-$outcome verification" $litmus + if ! grep -qe "$flag" $LKMM_DESTDIR/$litmusout + then + echo " $flag non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1 fi - ret=1 fi -tail -2 $LKMM_DESTDIR/$litmus.out | head -1 +tail -2 $LKMM_DESTDIR/$litmusout | head -1 exit $ret From 64a7c671782fba71f49aabbe199a57c417b210e6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 16:21:09 -0700 Subject: [PATCH 224/851] tools/memory-model: Add simpletest.sh to check locking, RCU, and SRCU This commit abstracts out common function to check a given litmus test for locking, RCU, and SRCU in order to avoid duplicating code. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/simpletest.sh | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 tools/memory-model/scripts/simpletest.sh diff --git a/tools/memory-model/scripts/simpletest.sh b/tools/memory-model/scripts/simpletest.sh new file mode 100755 index 0000000000000..7edc5d3616657 --- /dev/null +++ b/tools/memory-model/scripts/simpletest.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Give zero status if this is a simple test and non-zero otherwise. +# Simple tests do not contain locking, RCU, or SRCU. +# +# Usage: +# simpletest.sh file.litmus +# +# Copyright IBM Corporation, 2019 +# +# Author: Paul E. McKenney + + +litmus=$1 + +if test -f "$litmus" -a -r "$litmus" +then + : +else + echo ' --- ' error: \"$litmus\" is not a readable file + exit 255 +fi +exclude="^[[:space:]]*\(" +exclude="${exclude}spin_lock(\|spin_unlock(\|spin_trylock(\|spin_is_locked(" +exclude="${exclude}\|rcu_read_lock(\|rcu_read_unlock(" +exclude="${exclude}\|synchronize_rcu(\|synchronize_rcu_expedited(" +exclude="${exclude}\|srcu_read_lock(\|srcu_read_unlock(" +exclude="${exclude}\|synchronize_srcu(\|synchronize_srcu_expedited(" +exclude="${exclude}\)" +if grep -q $exclude $litmus +then + exit 255 +fi +exit 0 From 17b48719877213d42ac83a3e7f4f9d2053e50e20 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 16:37:01 -0700 Subject: [PATCH 225/851] tools/memory-model: Fix checkalllitmus.sh comment The checkalllitmus.sh runs litmus tests in the litmus-tests directory, not those in the github archive, so this commit updates the comment to reflect this reality. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkalllitmus.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh index 10e14d94acee5..54d8da8c338e1 100755 --- a/tools/memory-model/scripts/checkalllitmus.sh +++ b/tools/memory-model/scripts/checkalllitmus.sh @@ -30,8 +30,8 @@ else exit 255 fi -# Create any new directories that have appeared in the github litmus -# repo since the last run. +# Create any new directories that have appeared in the litmus-tests +# directory since the last run. if test "$LKMM_DESTDIR" != "." then find $litmusdir -type d -print | From cc2db498017edf1d3ca3139c8eff946767838f01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 12:39:27 -0700 Subject: [PATCH 226/851] tools/memory-model: Hardware checking for check{,all}litmus.sh This commit makes checklitmus.sh and checkalllitmus.sh check to see if a hardware verification was specified (via the --hw command-line argument, which sets the LKMM_HW_MAP_FILE environment variable). If so, the C-language litmus test is converted to the specified type of assembly-language litmus test and herd is run on it. Hardware is permitted to be stronger than LKMM requires, so "Always" and "Never" verifications of "Sometimes" C-language litmus tests are forgiven. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkalllitmus.sh | 23 +++++------ tools/memory-model/scripts/checklitmus.sh | 42 ++++++++++++++++++-- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh index 54d8da8c338e1..2d3ee850a8399 100755 --- a/tools/memory-model/scripts/checkalllitmus.sh +++ b/tools/memory-model/scripts/checkalllitmus.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0+ # # Run herd7 tests on all .litmus files in the litmus-tests directory @@ -8,6 +8,11 @@ # "^^^". It also outputs verification results to a file whose name is # that of the specified litmus test, but with ".out" appended. # +# If the --hw argument is specified, this script translates the .litmus +# C-language file to the specified type of assembly and verifies that. +# But in this case, litmus tests using complex synchronization (such as +# locking, RCU, and SRCU) are cheerfully ignored. +# # Usage: # checkalllitmus.sh # @@ -38,21 +43,15 @@ then ( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh ) fi -# Find the checklitmus script. If it is not where we expect it, then -# assume that the caller has the PATH environment variable set -# appropriately. -if test -x scripts/checklitmus.sh -then - clscript=scripts/checklitmus.sh -else - clscript=checklitmus.sh -fi - # Run the script on all the litmus tests in the specified directory ret=0 for i in $litmusdir/*.litmus do - if ! $clscript $i + if test -n "$LKMM_HW_MAP_FILE" && ! scripts/simpletest.sh $i + then + continue + fi + if ! scripts/checklitmus.sh $i then ret=1 fi diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh index 638b8c610894b..42ff11869cd62 100755 --- a/tools/memory-model/scripts/checklitmus.sh +++ b/tools/memory-model/scripts/checklitmus.sh @@ -6,6 +6,11 @@ # results to a file whose name is that of the specified litmus test, but # with ".out" appended. # +# If the --hw argument is specified, this script translates the .litmus +# C-language file to the specified type of assembly and verifies that. +# But in this case, litmus tests using complex synchronization (such as +# locking, RCU, and SRCU) are cheerfully ignored. +# # Usage: # checklitmus.sh file.litmus # @@ -18,8 +23,6 @@ # Author: Paul E. McKenney litmus=$1 -herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} - if test -f "$litmus" -a -r "$litmus" then : @@ -28,7 +31,38 @@ else exit 255 fi -echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 +if test -z "$LKMM_HW_MAP_FILE" +then + # LKMM run + herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} + echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 +else + # Hardware run + + T=/tmp/checklitmushw.sh.$$ + trap 'rm -rf $T' 0 2 + mkdir $T + + # Generate filenames + catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" + mapfile="Linux2${LKMM_HW_MAP_FILE}.map" + themefile="$T/${LKMM_HW_MAP_FILE}.theme" + herdoptions="-model $LKMM_HW_CAT_FILE" + hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` + hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` + + # Don't run on litmus tests with complex synchronization + if ! scripts/simpletest.sh $litmus + then + echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU + exit 254 + fi + + # Generate the assembly code and run herd7 on it. + gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile + jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +fi scripts/judgelitmus.sh $litmus From 012ae5d515e5670508974cb69a0d63a9df43cb13 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 14:37:46 -0700 Subject: [PATCH 227/851] tools/memory-model: Make judgelitmus.sh ransack .litmus.out files The judgelitmus.sh script currently relies solely on the "Result:" comment in the .litmus file. This is problematic when using the --hw argument, because it is necessary to check the hardware model against LKMM even in the absence of "Result:" comments. This commit therefore modifies judgelitmus.sh to check the observation in a .litmus.out file, in case one was generated by a previous LKMM run. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 6f3c60065c8b5..fe9131f8eb969 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -8,7 +8,9 @@ # is provided, this is assumed to be a hardware test, and the output is # assumed to be in file.HW.litmus.out, where "HW" is the --hw argument. # In addition, non-Sometimes verification results will be noted, but -# forgiven. +# forgiven. Furthermore, if there is no "Result:" comment but there is +# an LKMM .litmus.out file, the observation in that file will be used +# to judge the assembly-language verification. # # Usage: # judgelitmus.sh file.litmus @@ -32,9 +34,11 @@ fi if test -z "$LKMM_HW_MAP_FILE" then litmusout=$litmus.out + lkmmout= else litmusout="`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out" + lkmmout=$litmus.out fi if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout" then @@ -46,6 +50,9 @@ fi if grep -q '^ \* Result: ' $litmus then outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` +elif test -n "$LKMM_HW_MAP_FILE" && grep -q '^Observation' $LKMM_DESTDIR/$lkmmout > /dev/null 2>&1 +then + outcome=`grep -m 1 '^Observation ' $LKMM_DESTDIR/$lkmmout | awk '{ print $3 }'` else outcome=specified fi From 0e0428edd83d318c885d3bf40904b103cb9e6e7e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 14:57:56 -0700 Subject: [PATCH 228/851] tools/memory-model: Split runlitmus.sh out of checklitmus.sh This commit prepares for adding --hw capability to github litmus-test scripts by splitting runlitmus.sh (which simply runs the verification) out of checklitmus.sh (which also judges the results). Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checklitmus.sh | 57 ++----------------- tools/memory-model/scripts/runlitmus.sh | 69 +++++++++++++++++++++++ 2 files changed, 73 insertions(+), 53 deletions(-) create mode 100755 tools/memory-model/scripts/runlitmus.sh diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh index 42ff11869cd62..4c1d0cf0ddadc 100755 --- a/tools/memory-model/scripts/checklitmus.sh +++ b/tools/memory-model/scripts/checklitmus.sh @@ -1,15 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ # -# Run a herd7 test and invokes judgelitmus.sh to check the result against -# a "Result:" comment within the litmus test. It also outputs verification -# results to a file whose name is that of the specified litmus test, but -# with ".out" appended. -# -# If the --hw argument is specified, this script translates the .litmus -# C-language file to the specified type of assembly and verifies that. -# But in this case, litmus tests using complex synchronization (such as -# locking, RCU, and SRCU) are cheerfully ignored. +# Invokes runlitmus.sh and judgelitmus.sh on its arguments to run the +# specified litmus test and pass judgment on the results. # # Usage: # checklitmus.sh file.litmus @@ -22,47 +15,5 @@ # # Author: Paul E. McKenney -litmus=$1 -if test -f "$litmus" -a -r "$litmus" -then - : -else - echo ' --- ' error: \"$litmus\" is not a readable file - exit 255 -fi - -if test -z "$LKMM_HW_MAP_FILE" -then - # LKMM run - herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} - echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out - /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 -else - # Hardware run - - T=/tmp/checklitmushw.sh.$$ - trap 'rm -rf $T' 0 2 - mkdir $T - - # Generate filenames - catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" - mapfile="Linux2${LKMM_HW_MAP_FILE}.map" - themefile="$T/${LKMM_HW_MAP_FILE}.theme" - herdoptions="-model $LKMM_HW_CAT_FILE" - hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` - hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` - - # Don't run on litmus tests with complex synchronization - if ! scripts/simpletest.sh $litmus - then - echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU - exit 254 - fi - - # Generate the assembly code and run herd7 on it. - gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile - jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out - /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 -fi - -scripts/judgelitmus.sh $litmus +scripts/runlitmus.sh $1 +scripts/judgelitmus.sh $1 diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh new file mode 100755 index 0000000000000..91af859c0e90c --- /dev/null +++ b/tools/memory-model/scripts/runlitmus.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Without the -hw argument, runs a herd7 test and outputs verification +# results to a file whose name is that of the specified litmus test, +# but with ".out" appended. +# +# If the --hw argument is specified, this script translates the .litmus +# C-language file to the specified type of assembly and verifies that. +# But in this case, litmus tests using complex synchronization (such as +# locking, RCU, and SRCU) are cheerfully ignored. +# +# Either way, return the status of the herd7 command. +# +# Usage: +# runlitmus.sh file.litmus +# +# Run this in the directory containing the memory model, specifying the +# pathname of the litmus test to check. The caller is expected to have +# properly set up the LKMM environment variables. +# +# Copyright IBM Corporation, 2019 +# +# Author: Paul E. McKenney + +litmus=$1 +if test -f "$litmus" -a -r "$litmus" +then + : +else + echo ' --- ' error: \"$litmus\" is not a readable file + exit 255 +fi + +if test -z "$LKMM_HW_MAP_FILE" +then + # LKMM run + herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} + echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 +else + # Hardware run + + T=/tmp/checklitmushw.sh.$$ + trap 'rm -rf $T' 0 2 + mkdir $T + + # Generate filenames + catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" + mapfile="Linux2${LKMM_HW_MAP_FILE}.map" + themefile="$T/${LKMM_HW_MAP_FILE}.theme" + herdoptions="-model $LKMM_HW_CAT_FILE" + hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` + hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` + + # Don't run on litmus tests with complex synchronization + if ! scripts/simpletest.sh $litmus + then + echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU + exit 254 + fi + + # Generate the assembly code and run herd on it. + gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile + jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +fi + +exit $? From 2af6b5bae709728a156c34d2e68cbedd5f83296c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 16:41:41 -0700 Subject: [PATCH 229/851] tools/memory-model: Make runlitmus.sh generate .litmus.out for --hw In the absence of "Result:" comments, the runlitmus.sh script relies on litmus.out files from prior LKMM runs. This can be a bit user-hostile, so this commit makes runlitmus.sh generate any needed .litmus.out files that don't already exist. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 54 ++++++++++++++----------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 91af859c0e90c..2865a9661b078 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -28,42 +28,48 @@ if test -f "$litmus" -a -r "$litmus" then : else - echo ' --- ' error: \"$litmus\" is not a readable file + echo ' !!! ' error: \"$litmus\" is not a readable file exit 255 fi -if test -z "$LKMM_HW_MAP_FILE" +if test -z "$LKMM_HW_MAP_FILE" -o ! -e $LKMM_DESTDIR/$litmus.out then # LKMM run herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 -else - # Hardware run + ret=$? + if test -z "$LKMM_HW_MAP_FILE" + then + exit $ret + fi + echo " --- " Automatically generated LKMM output for '"'--hw $LKMM_HW_MAP_FILE'"' run +fi - T=/tmp/checklitmushw.sh.$$ - trap 'rm -rf $T' 0 2 - mkdir $T +# Hardware run - # Generate filenames - catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" - mapfile="Linux2${LKMM_HW_MAP_FILE}.map" - themefile="$T/${LKMM_HW_MAP_FILE}.theme" - herdoptions="-model $LKMM_HW_CAT_FILE" - hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` - hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` +T=/tmp/checklitmushw.sh.$$ +trap 'rm -rf $T' 0 2 +mkdir $T - # Don't run on litmus tests with complex synchronization - if ! scripts/simpletest.sh $litmus - then - echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU - exit 254 - fi +# Generate filenames +catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" +mapfile="Linux2${LKMM_HW_MAP_FILE}.map" +themefile="$T/${LKMM_HW_MAP_FILE}.theme" +herdoptions="-model $LKMM_HW_CAT_FILE" +hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` +hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` - # Generate the assembly code and run herd on it. - gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile - jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out - /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +# Don't run on litmus tests with complex synchronization +if ! scripts/simpletest.sh $litmus +then + echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU + exit 254 fi +# Generate the assembly code and run herd7 on it. +gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile +jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 + exit $? From 46d280ee93f807c0be3384b024de8bd8c3c514b0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 14:06:27 -0700 Subject: [PATCH 230/851] tools/memory-model: Move from .AArch64.litmus.out to .litmus.AArch.out When the github scripts see ".litmus.out", they assume that there must be a corresponding C-language ".litmus" file. Won't they be disappointed when they instead see nothing, or, worse yet, the corresponding assembly-language litmus test? This commit therefore swaps the hardware tag with the "litmus" to avoid this sort of disappointment. This commit also adjusts the .gitignore file so as to avoid adding these new ".out" files to git. [ paulmck: Apply Akira Yokosawa feedback. ] Signed-off-by: Paul E. McKenney --- tools/memory-model/litmus-tests/.gitignore | 2 +- tools/memory-model/scripts/judgelitmus.sh | 4 ++-- tools/memory-model/scripts/runlitmus.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore index c492a1ddad91d..d65462d64816d 100644 --- a/tools/memory-model/litmus-tests/.gitignore +++ b/tools/memory-model/litmus-tests/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -*.litmus.out +*.out diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index fe9131f8eb969..9abda72fe013c 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -6,7 +6,7 @@ # test ran correctly. If the --hw argument is omitted, check against the # LKMM output, which is assumed to be in file.litmus.out. If this argument # is provided, this is assumed to be a hardware test, and the output is -# assumed to be in file.HW.litmus.out, where "HW" is the --hw argument. +# assumed to be in file.litmus.HW.out, where "HW" is the --hw argument. # In addition, non-Sometimes verification results will be noted, but # forgiven. Furthermore, if there is no "Result:" comment but there is # an LKMM .litmus.out file, the observation in that file will be used @@ -37,7 +37,7 @@ then lkmmout= else litmusout="`echo $litmus | - sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out" + sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'`.out" lkmmout=$litmus.out fi if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout" diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 2865a9661b078..c84124b32bee6 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -57,7 +57,7 @@ catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" mapfile="Linux2${LKMM_HW_MAP_FILE}.map" themefile="$T/${LKMM_HW_MAP_FILE}.theme" herdoptions="-model $LKMM_HW_CAT_FILE" -hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` +hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'` hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` # Don't run on litmus tests with complex synchronization From 69164e42a56a44665a45034a057fd9428e1cddc3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 14:44:09 -0700 Subject: [PATCH 231/851] tools/memory-model: Keep assembly-language litmus tests This commit retains the assembly-language litmus tests generated from the C-language litmus tests, appending the hardware tag to the original C-language litmus test's filename. Thus, S+poonceonces.litmus.AArch64 contains the Armv8 assembly language corresponding to the C-language S+poonceonces.litmus test. This commit also updates the .gitignore to avoid committing these automatically generated assembly-language litmus tests. Signed-off-by: Paul E. McKenney --- tools/memory-model/litmus-tests/.gitignore | 2 +- tools/memory-model/scripts/runlitmus.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore index d65462d64816d..19c379cf069d2 100644 --- a/tools/memory-model/litmus-tests/.gitignore +++ b/tools/memory-model/litmus-tests/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -*.out +*.litmus.* diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index c84124b32bee6..62b47c7e1ba93 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -69,7 +69,7 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile -jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From 1cada3a91f1d3d335576bebb170236ac1e80ac80 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 22 Mar 2019 08:57:20 -0700 Subject: [PATCH 232/851] tools/memory-model: Allow herd to deduce CPU type Currently, the scripts specify the CPU's .cat file to herd. But this is pointless because herd will select a good and sufficient .cat file from the assembly-language litmus test itself. This commit therefore removes the -model argument to herd, allowing herd to figure the CPU family out itself. Note that the user can override herd's choice using the "--herdopts" argument to the scripts. Suggested-by: Luc Maranget Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 62b47c7e1ba93..afb196d7ef106 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -53,7 +53,6 @@ trap 'rm -rf $T' 0 2 mkdir $T # Generate filenames -catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" mapfile="Linux2${LKMM_HW_MAP_FILE}.map" themefile="$T/${LKMM_HW_MAP_FILE}.theme" herdoptions="-model $LKMM_HW_CAT_FILE" @@ -70,6 +69,6 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From 4f517d311df5dbffd36401f6655120e48111ad0a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 25 Mar 2019 17:20:51 -0700 Subject: [PATCH 233/851] tools/memory-model: Make runlitmus.sh check for jingle errors It turns out that the jingle7 tool is currently a bit picky about the litmus tests it is willing to process. This commit therefore ensures that jingle7 failures are reported. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index afb196d7ef106..5f2d29b460ff0 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -69,6 +69,11 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out +if grep -q "Generated 0 tests" $T/$hwlitmusfile.jingle7.out +then + echo ' !!! ' jingle7 failed, no $hwlitmus generated + exit 253 +fi /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From 3b02da4c1ed8ffde73e08faa369a963e8594c2a8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Apr 2019 12:34:56 -0700 Subject: [PATCH 234/851] tools/memory-model: Add -v flag to jingle7 runs Adding the -v flag to jingle7 invocations gives much useful information on why jingle7 didn't like a given litmus test. This commit therefore adds this flag and saves off any such information into a .err file. Suggested-by: Luc Maranget Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 5f2d29b460ff0..dfdb1f00fcc03 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -68,10 +68,11 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile -jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out +jingle7 -v -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out if grep -q "Generated 0 tests" $T/$hwlitmusfile.jingle7.out then - echo ' !!! ' jingle7 failed, no $hwlitmus generated + echo ' !!! ' jingle7 failed, errors in $hwlitmus.err + cp $T/$hwlitmusfile.jingle7.out $LKMM_DESTDIR/$hwlitmus.err exit 253 fi /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 From 9f2af1c8ecea3dc895fe6ac6ed6b7290a233a451 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 22 Mar 2019 17:18:43 -0700 Subject: [PATCH 235/851] tools/memory-model: Implement --hw support for checkghlitmus.sh This commits enables the "--hw" argument for the checkghlitmus.sh script, causing it to convert any applicable C-language litmus tests to the specified flavor of assembly language, to verify these assembly-language litmus tests, and checking compatibility of the outcomes. Note that the conversion does not yet handle locking, RCU, SRCU, plain C-language memory accesses, or casts. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkghlitmus.sh | 9 ++++--- tools/memory-model/scripts/hwfnseg.sh | 20 +++++++++++++++ tools/memory-model/scripts/runlitmushist.sh | 27 +++++++++++++-------- 3 files changed, 42 insertions(+), 14 deletions(-) create mode 100755 tools/memory-model/scripts/hwfnseg.sh diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh index 6589fbb6f6538..2ea220d2564b9 100755 --- a/tools/memory-model/scripts/checkghlitmus.sh +++ b/tools/memory-model/scripts/checkghlitmus.sh @@ -10,6 +10,7 @@ # parseargs.sh scripts for arguments. . scripts/parseargs.sh +. scripts/hwfnseg.sh T=/tmp/checkghlitmus.sh.$$ trap 'rm -rf $T' 0 @@ -32,9 +33,9 @@ then ( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh ) fi -# Create a list of the C-language litmus tests previously run. -( cd $LKMM_DESTDIR; find litmus -name '*.litmus.out' -print ) | - sed -e 's/\.out$//' | +# Create a list of the specified litmus tests previously run. +( cd $LKMM_DESTDIR; find litmus -name "*.litmus${hwfnseg}.out" -print ) | + sed -e "s/${hwfnseg}"'\.out$//' | xargs -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' | xargs -r grep -L "^P${LKMM_PROCS}"> $T/list-C-already @@ -44,7 +45,7 @@ find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C xargs < $T/list-C -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result xargs < $T/list-C-result -r grep -L "^P${LKMM_PROCS}" > $T/list-C-result-short -# Form list of tests without corresponding .litmus.out files +# Form list of tests without corresponding .out files sort $T/list-C-already $T/list-C-result-short | uniq -u > $T/list-C-needed # Run any needed tests. diff --git a/tools/memory-model/scripts/hwfnseg.sh b/tools/memory-model/scripts/hwfnseg.sh new file mode 100755 index 0000000000000..580c3281181c5 --- /dev/null +++ b/tools/memory-model/scripts/hwfnseg.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Generate the hardware extension to the litmus-test filename, or the +# empty string if this is an LKMM run. The extension is placed in +# the shell variable hwfnseg. +# +# Usage: +# . hwfnseg.sh +# +# Copyright IBM Corporation, 2019 +# +# Author: Paul E. McKenney + +if test -z "$LKMM_HW_MAP_FILE" +then + hwfnseg= +else + hwfnseg=".$LKMM_HW_MAP_FILE" +fi diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh index 852786fef179f..c6c2bdc67a502 100755 --- a/tools/memory-model/scripts/runlitmushist.sh +++ b/tools/memory-model/scripts/runlitmushist.sh @@ -15,6 +15,8 @@ # # Author: Paul E. McKenney +. scripts/hwfnseg.sh + T=/tmp/runlitmushist.sh.$$ trap 'rm -rf $T' 0 mkdir $T @@ -30,15 +32,12 @@ fi # Prefixes for per-CPU scripts for ((i=0;i<$LKMM_JOBS;i++)) do - echo dir="$LKMM_DESTDIR" > $T/$i.sh echo T=$T >> $T/$i.sh - echo herdoptions=\"$LKMM_HERD_OPTIONS\" >> $T/$i.sh cat << '___EOF___' >> $T/$i.sh runtest () { - echo ' ... ' /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 '>' $dir/$1.out '2>&1' - if /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 > $dir/$1.out 2>&1 + if scripts/runlitmus.sh $1 then - if ! grep -q '^Observation ' $dir/$1.out + if ! grep -q '^Observation ' $LKMM_DESTDIR/$1$2.out then echo ' !!! Herd failed, no Observation:' $1 fi @@ -47,10 +46,16 @@ do if test "$exitcode" -eq 124 then exitmsg="timed out" + elif test "$exitcode" -eq 253 + then + exitmsg= else exitmsg="failed, exit code $exitcode" fi - echo ' !!! Herd' ${exitmsg}: $1 + if test -n "$exitmsg" + then + echo ' !!! Herd' ${exitmsg}: $1 + fi fi } ___EOF___ @@ -59,11 +64,13 @@ done awk -v q="'" -v b='\\' ' { print "echo `grep " q "^P[0-9]" b "+(" q " " $0 " | tail -1 | sed -e " q "s/^P" b "([0-9]" b "+" b ")(.*$/" b "1/" q "` " $0 -}' | bash | -sort -k1n | -awk -v ncpu=$LKMM_JOBS -v t=$T ' +}' | sh | sort -k1n | +awk -v dq='"' -v hwfnseg="$hwfnseg" -v ncpu="$LKMM_JOBS" -v t="$T" ' { - print "runtest " $2 >> t "/" NR % ncpu ".sh"; + print "if test -z " dq hwfnseg dq " || scripts/simpletest.sh " dq $2 dq + print "then" + print "\techo runtest " dq $2 dq " " hwfnseg " >> " t "/" NR % ncpu ".sh"; + print "fi" } END { From a5e59c6f6074759ff420118e23257d4fb3952e92 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Mar 2019 11:47:14 -0700 Subject: [PATCH 236/851] tools/memory-model: Fix scripting --jobs argument The parseargs.sh regular expression for the --jobs argument incorrectly requires that the number of jobs be at least 10, that is, have at least two digits. This commit therefore adjusts this regular expression to allow single-digit numbers of jobs to be specified. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 5f016fc3f3af5..25a81ac0dfdf4 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -113,7 +113,7 @@ do LKMM_JOBS="`echo $njobs | sed -e 's/^\([0-9]\+\).*$/\1/'`" ;; --jobs|--job|-j) - checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]\+$' '^--' + checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]*$' '^--' LKMM_JOBS="$2" shift ;; From 537613009145e4ce50ec935ecef746c7037d8687 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 8 Apr 2019 09:27:28 -0700 Subject: [PATCH 237/851] tools/memory-model: Make checkghlitmus.sh use mselect7 The checkghlitmus.sh script currently uses grep to ignore non-C-language litmus tests, which is a bit fragile. This commit therefore enlists the aid of "mselect7 -arch C", given Luc Maraget's recent modifications that allow mselect7 to operate in filter mode. This change requires herdtools 7.52-32-g1da3e0e50977 or later. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkghlitmus.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh index 2ea220d2564b9..cedd0290b73f8 100755 --- a/tools/memory-model/scripts/checkghlitmus.sh +++ b/tools/memory-model/scripts/checkghlitmus.sh @@ -41,7 +41,7 @@ fi # Create a list of C-language litmus tests with "Result:" commands and # no more than the specified number of processes. -find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C +find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C xargs < $T/list-C -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result xargs < $T/list-C-result -r grep -L "^P${LKMM_PROCS}" > $T/list-C-result-short From b0f15cf438d83d4e3dd3a8a6b803080d05907c80 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 8 Apr 2019 10:02:23 -0700 Subject: [PATCH 238/851] tools/memory-model: Make history-check scripts use mselect7 The history-check scripts currently use grep to ignore non-C-language litmus tests, which is a bit fragile. This commit therefore enlists the aid of "mselect7 -arch C", given Luc Maraget's recent modifications that allow mselect7 to operate in filter mode. This change requires herdtools 7.52-32-g1da3e0e50977 or later. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/initlitmushist.sh | 2 +- tools/memory-model/scripts/newlitmushist.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/initlitmushist.sh b/tools/memory-model/scripts/initlitmushist.sh index 956b6957484d8..31ea782955d3f 100755 --- a/tools/memory-model/scripts/initlitmushist.sh +++ b/tools/memory-model/scripts/initlitmushist.sh @@ -60,7 +60,7 @@ fi # Create a list of the C-language litmus tests with no more than the # specified number of processes (per the --procs argument). -find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C +find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C xargs < $T/list-C -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short scripts/runlitmushist.sh < $T/list-C-short diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh index 3f4b06e299886..25235e2049cf0 100755 --- a/tools/memory-model/scripts/newlitmushist.sh +++ b/tools/memory-model/scripts/newlitmushist.sh @@ -43,7 +43,7 @@ fi # Form full list of litmus tests with no more than the specified # number of processes (per the --procs argument). -find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C-all +find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C-all xargs < $T/list-C-all -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short # Form list of new tests. Note: This does not handle litmus-test deletion! From b72e7b8b5cdc63742c37a750ccf2fc2a4c0b308a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 May 2019 09:51:57 -0700 Subject: [PATCH 239/851] tools/memory-model: Add "--" to parseargs.sh for additional arguments Currently, parseargs.sh expects to consume all the command-line arguments, which prevents the calling script from having any of its own arguments. This commit therefore causes parseargs.sh to stop consuming arguments when it encounters a "--" argument, leaving any remaining arguments for the calling script. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 25a81ac0dfdf4..7aa58755adfc0 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -83,7 +83,7 @@ do echo "Cannot create directory --destdir '$LKMM_DESTDIR'" usage fi - if test -d "$LKMM_DESTDIR" -a -w "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR" + if test -d "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR" then : else @@ -127,6 +127,10 @@ do LKMM_TIMEOUT="$2" shift ;; + --) + shift + break + ;; *) echo Unknown argument $1 usage From 81d5bb3344c0bb23e1960718e27028400f2a9c0b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 May 2019 10:03:29 -0700 Subject: [PATCH 240/851] tools/memory-model: Repair parseargs.sh header comment Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 7aa58755adfc0..08ded59098607 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ # -# the corresponding .litmus.out file, and does not judge the result. +# Parse arguments common to the various scripts. # # . scripts/parseargs.sh # From f108a961de9437cee99d7b3057baa92fcdfcfd49 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 May 2019 10:05:14 -0700 Subject: [PATCH 241/851] tools/memory-model: Add checktheselitmus.sh to run specified litmus tests This commit adds a checktheselitmus.sh script that runs the litmus tests specified on the command line. This is useful for verifying fixes to specific litmus tests. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/README | 8 ++++ .../memory-model/scripts/checktheselitmus.sh | 43 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100755 tools/memory-model/scripts/checktheselitmus.sh diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README index 0e29a52044c1a..cc2c4e5be9ec1 100644 --- a/tools/memory-model/scripts/README +++ b/tools/memory-model/scripts/README @@ -27,6 +27,14 @@ checklitmushist.sh checklitmus.sh Check a single litmus test against its "Result:" expected result. + Not intended to for manual use. + +checktheselitmus.sh + + Check the specified list of litmus tests against their "Result:" + expected results. This takes optional parseargs.sh arguments, + followed by "--" followed by pathnames starting from the current + directory. cmplitmushist.sh diff --git a/tools/memory-model/scripts/checktheselitmus.sh b/tools/memory-model/scripts/checktheselitmus.sh new file mode 100755 index 0000000000000..10eeb5ecea6de --- /dev/null +++ b/tools/memory-model/scripts/checktheselitmus.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Invokes checklitmus.sh on its arguments to run the specified litmus +# test and pass judgment on the results. +# +# Usage: +# checktheselitmus.sh -- [ file1.litmus [ file2.litmus ... ] ] +# +# Run this in the directory containing the memory model, specifying the +# pathname of the litmus test to check. The usual parseargs.sh arguments +# can be specified prior to the "--". +# +# This script is intended for use with pathnames that start from the +# tools/memory-model directory. If some of the pathnames instead start at +# the root directory, they all must do so and the "--destdir /" parseargs.sh +# argument must be specified prior to the "--". Alternatively, some other +# "--destdir" argument can be supplied as long as the needed subdirectories +# are populated. +# +# Copyright IBM Corporation, 2018 +# +# Author: Paul E. McKenney + +. scripts/parseargs.sh + +ret=0 +for i in "$@" +do + if scripts/checklitmus.sh $i + then + : + else + ret=1 + fi +done +if test "$ret" -ne 0 +then + echo " ^^^ VERIFICATION MISMATCHES" 1>&2 +else + echo All litmus tests verified as was expected. 1>&2 +fi +exit $ret From 137706786f35673ab2e3712bf678c232b3461a16 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 3 May 2019 07:34:20 -0700 Subject: [PATCH 242/851] tools/memory-model: Add data-race capabilities to judgelitmus.sh This commit adds functionality to judgelitmus.sh to allow it to handle both the "DATARACE" markers in the "Result:" comments in litmus tests and the "Flag data-race" markers in LKMM output. For C-language tests, if either marker is present, the other must also be as well, at least for litmus tests having a "Result:" comment. If the LKMM output indicates a data race, then failures of the Always/Sometimes/Never portion of the "Result:" prediction are forgiven. The reason for forgiving "Result:" mispredictions is that data races can result in "interesting" compiler optimizations, so that all bets are off in the data-race case. [ paulmck: Apply Akira Yokosawa feedback. ] Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 40 ++++++++++++++++++----- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 9abda72fe013c..2700481d20f01 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -4,13 +4,19 @@ # Given a .litmus test and the corresponding litmus output file, check # the .litmus.out file against the "Result:" comment to judge whether the # test ran correctly. If the --hw argument is omitted, check against the -# LKMM output, which is assumed to be in file.litmus.out. If this argument -# is provided, this is assumed to be a hardware test, and the output is -# assumed to be in file.litmus.HW.out, where "HW" is the --hw argument. -# In addition, non-Sometimes verification results will be noted, but -# forgiven. Furthermore, if there is no "Result:" comment but there is -# an LKMM .litmus.out file, the observation in that file will be used -# to judge the assembly-language verification. +# LKMM output, which is assumed to be in file.litmus.out. If either a +# "DATARACE" marker in the "Result:" comment or a "Flag data-race" marker +# in the LKMM output is present, the other must also be as well, at least +# for litmus tests having a "Result:" comment. In this case, a failure of +# the Always/Sometimes/Never portion of the "Result:" prediction will be +# noted, but forgiven. +# +# If the --hw argument is provided, this is assumed to be a hardware +# test, and the output is assumed to be in file.litmus.HW.out, where +# "HW" is the --hw argument. In addition, non-Sometimes verification +# results will be noted, but forgiven. Furthermore, if there is no +# "Result:" comment but there is an LKMM .litmus.out file, the observation +# in that file will be used to judge the assembly-language verification. # # Usage: # judgelitmus.sh file.litmus @@ -47,9 +53,27 @@ else echo ' --- ' error: \"$LKMM_DESTDIR/$litmusout is not a readable file exit 255 fi +if grep -q '^Flag data-race$' "$LKMM_DESTDIR/$litmusout" +then + datarace_modeled=1 +fi if grep -q '^ \* Result: ' $litmus then outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` + if grep -m1 '^ \* Result: .* DATARACE' $litmus + then + datarace_predicted=1 + fi + if test -n "$datarace_predicted" -a -z "$datarace_modeled" -a -z "$LKMM_HW_MAP_FILE" + then + echo '!!! Predicted data race not modeled' $litmus + exit 252 + elif test -z "$datarace_predicted" -a -n "$datarace_modeled" + then + # Note that hardware models currently don't model data races + echo '!!! Unexpected data race modeled' $litmus + exit 253 + fi elif test -n "$LKMM_HW_MAP_FILE" && grep -q '^Observation' $LKMM_DESTDIR/$lkmmout > /dev/null 2>&1 then outcome=`grep -m 1 '^Observation ' $LKMM_DESTDIR/$lkmmout | awk '{ print $3 }'` @@ -114,7 +138,7 @@ elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q $outcome || test "$o then ret=0 else - if test -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes + if test \( -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes \) -o -n "$datarace_modeled" then flag="--- Forgiven" ret=0 From 08a741c54842ce15f9bf6a25a2ba197f8562a6b6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Jun 2019 02:13:27 -0700 Subject: [PATCH 243/851] tools/memory-model: Make judgelitmus.sh handle scripted Result: tag The scripts that generate the litmus tests in the "auto" directory of the https://github.com/paulmckrcu/litmus archive place the "Result:" tag into a single-line ocaml comment, which judgelitmus.sh currently does not recognize. This commit therefore makes judgelitmus.sh recognize both the multiline comment format that it currently does and the automatically generated single-line format. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 2700481d20f01..1ec5d89fcfbb2 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -57,10 +57,10 @@ if grep -q '^Flag data-race$' "$LKMM_DESTDIR/$litmusout" then datarace_modeled=1 fi -if grep -q '^ \* Result: ' $litmus +if grep -q '^[( ]\* Result: ' $litmus then - outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` - if grep -m1 '^ \* Result: .* DATARACE' $litmus + outcome=`grep -m 1 '^[( ]\* Result: ' $litmus | awk '{ print $3 }'` + if grep -m1 '^[( ]\* Result: .* DATARACE' $litmus then datarace_predicted=1 fi From 50325310cf2ab1380323954f10252bae1e3a6d0a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 24 Jun 2019 22:30:32 -0700 Subject: [PATCH 244/851] tools/memory-model: Use "-unroll 0" to keep --hw runs finite Litmus tests involving atomic operations produce LL/SC loops on a number of architectures, and unrolling these loops can result in excessive verification times or even stack overflows. This commit therefore uses the "-unroll 0" herd7 argument to avoid unrolling, on the grounds that additional passes through an LL/SC loop should not change the verification. Note however, that certain bugs in the mapping of the LL/SC loop to machine instructions may go undetected. On the other hand, herd7 might not be the best vehicle for finding such bugs in any case. (You do stress-test your architecture-specific code, don't you?) Suggested-by: Luc Maranget Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index dfdb1f00fcc03..94608d4b6502e 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -75,6 +75,6 @@ then cp $T/$hwlitmusfile.jingle7.out $LKMM_DESTDIR/$hwlitmus.err exit 253 fi -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -unroll 0 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From 8baded711edc11a14d51107f3be9d9e50da4485d Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Sun, 16 May 2021 00:45:11 +0800 Subject: [PATCH 245/851] rcu: Fix to include first blocked task in stall warning The for loop in rcu_print_task_stall() always omits ts[0], which points to the first task blocking the stalled grace period. This in turn fails to count this first task, which means that ndetected will be equal to zero when all CPUs have passed through their quiescent states and only one task is blocking the stalled grace period. This zero value for ndetected will in turn result in an incorrect "All QSes seen" message: rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: rcu: Tasks blocked on level-1 rcu_node (CPUs 12-23): (detected by 15, t=6504 jiffies, g=164777, q=9011209) rcu: All QSes seen, last rcu_preempt kthread activity 1 (4295252379-4295252378), jiffies_till_next_fqs=1, root ->qsmask 0x2 BUG: sleeping function called from invalid context at include/linux/uaccess.h:156 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 70613, name: msgstress04 INFO: lockdep is turned off. Preemption disabled at: [] create_object.isra.0+0x204/0x4b0 CPU: 15 PID: 70613 Comm: msgstress04 Kdump: loaded Not tainted 5.12.2-yoctodev-standard #1 Hardware name: Marvell OcteonTX CN96XX board (DT) Call trace: dump_backtrace+0x0/0x2cc show_stack+0x24/0x30 dump_stack+0x110/0x188 ___might_sleep+0x214/0x2d0 __might_sleep+0x7c/0xe0 This commit therefore fixes the loop to include ts[0]. Fixes: c583bcb8f5ed ("rcu: Don't invoke try_invoke_on_locked_down_task() with irqs disabled") Signed-off-by: Yanfei Xu Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 6c76988cc019f..2e96f9741666d 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -280,8 +280,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) break; } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - for (i--; i; i--) { - t = ts[i]; + while (i) { + t = ts[--i]; if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr)) pr_cont(" P%d", t->pid); else From f6b3995a8b56dc0f89ed833ee8f78e4aeaf9514a Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Sun, 16 May 2021 17:50:10 +0800 Subject: [PATCH 246/851] rcu: Fix stall-warning deadlock due to non-release of rcu_node ->lock If rcu_print_task_stall() is invoked on an rcu_node structure that does not contain any tasks blocking the current grace period, it takes an early exit that fails to release that rcu_node structure's lock. This results in a self-deadlock, which is detected by lockdep. To reproduce this bug: tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 3 --trust-make --configs "TREE03" --kconfig "CONFIG_PROVE_LOCKING=y" --bootargs "rcutorture.stall_cpu=30 rcutorture.stall_cpu_block=1 rcutorture.fwd_progress=0 rcutorture.test_boost=0" This will also result in other complaints, including RCU's scheduler hook complaining about blocking rather than preemption and an rcutorture writer stall. Only a partial RCU CPU stall warning message will be printed because of the self-deadlock. This commit therefore releases the lock on the rcu_print_task_stall() function's early exit path. Fixes: c583bcb8f5ed ("rcu: Don't invoke try_invoke_on_locked_down_task() with irqs disabled") Signed-off-by: Yanfei Xu Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 2e96f9741666d..bd4de5bc5807e 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -267,8 +267,10 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) struct task_struct *ts[8]; lockdep_assert_irqs_disabled(); - if (!rcu_preempt_blocked_readers_cgp(rnp)) + if (!rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return 0; + } pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", rnp->level, rnp->grplo, rnp->grphi); t = list_entry(rnp->gp_tasks->prev, From 343ad5a3e1ca6a8ce69d075921cb514f30fe32e8 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 18 May 2021 19:17:16 -0700 Subject: [PATCH 247/851] rcu: Remove special bit at the bottom of the ->dynticks counter Commit b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") reserved a bit at the bottom of the ->dynticks counter to defer flushing of TLBs, but this facility never has been used. This commit therefore removes this capability along with the rcu_eqs_special_set() function used to trigger it. Link: https://lore.kernel.org/linux-doc/CALCETrWNPOOdTrFabTDd=H7+wc6xJ9rJceg6OL1S0rTV5pfSsA@mail.gmail.com/ Suggested-by: Andy Lutomirski Signed-off-by: "Joel Fernandes (Google)" [ paulmck: Forward-port to v5.13-rc1. ] Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 3 -- kernel/rcu/tree.c | 77 ++++++++--------------------------------- 2 files changed, 14 insertions(+), 66 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 953e70fafe383..9be015305f9f9 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -14,9 +14,6 @@ #include /* for HZ */ -/* Never flag non-existent other CPUs! */ -static inline bool rcu_eqs_special_set(int cpu) { return false; } - unsigned long get_state_synchronize_rcu(void); unsigned long start_poll_synchronize_rcu(void); bool poll_state_synchronize_rcu(unsigned long oldstate); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 51f24ecd94b26..42a0032dd99f7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -74,17 +74,10 @@ /* Data structures. */ -/* - * Steal a bit from the bottom of ->dynticks for idle entry/exit - * control. Initially this is for TLB flushing. - */ -#define RCU_DYNTICK_CTRL_MASK 0x1 -#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1) - static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .dynticks_nesting = 1, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, - .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), + .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, #endif @@ -266,7 +259,6 @@ void rcu_softirq_qs(void) */ static noinstr void rcu_dynticks_eqs_enter(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int seq; /* @@ -275,13 +267,9 @@ static noinstr void rcu_dynticks_eqs_enter(void) * next idle sojourn. */ rcu_dynticks_task_trace_enter(); // Before ->dynticks update! - seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = arch_atomic_inc_return(&this_cpu_ptr(&rcu_data)->dynticks); // RCU is no longer watching. Better be in extended quiescent state! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (seq & RCU_DYNTICK_CTRL_CTR)); - /* Better not have special action (TLB flush) pending! */ - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (seq & RCU_DYNTICK_CTRL_MASK)); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1)); } /* @@ -291,7 +279,6 @@ static noinstr void rcu_dynticks_eqs_enter(void) */ static noinstr void rcu_dynticks_eqs_exit(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int seq; /* @@ -299,15 +286,10 @@ static noinstr void rcu_dynticks_eqs_exit(void) * and we also must force ordering with the next RCU read-side * critical section. */ - seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = arch_atomic_inc_return(&this_cpu_ptr(&rcu_data)->dynticks); // RCU is now watching. Better not be in an extended quiescent state! rcu_dynticks_task_trace_exit(); // After ->dynticks update! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !(seq & RCU_DYNTICK_CTRL_CTR)); - if (seq & RCU_DYNTICK_CTRL_MASK) { - arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks); - smp_mb__after_atomic(); /* _exit after clearing mask. */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1)); } /* @@ -324,9 +306,9 @@ static void rcu_dynticks_eqs_online(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR) + if (atomic_read(&rdp->dynticks) & 0x1) return; - atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + atomic_inc(&rdp->dynticks); } /* @@ -336,9 +318,7 @@ static void rcu_dynticks_eqs_online(void) */ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR); + return !(arch_atomic_read(&this_cpu_ptr(&rcu_data)->dynticks) & 0x1); } /* @@ -347,9 +327,7 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) */ static int rcu_dynticks_snap(struct rcu_data *rdp) { - int snap = atomic_add_return(0, &rdp->dynticks); - - return snap & ~RCU_DYNTICK_CTRL_MASK; + return atomic_add_return(0, &rdp->dynticks); } /* @@ -358,7 +336,7 @@ static int rcu_dynticks_snap(struct rcu_data *rdp) */ static bool rcu_dynticks_in_eqs(int snap) { - return !(snap & RCU_DYNTICK_CTRL_CTR); + return !(snap & 0x1); } /* Return true if the specified CPU is currently idle from an RCU viewpoint. */ @@ -389,8 +367,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) int snap; // If not quiescent, force back to earlier extended quiescent state. - snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK | - RCU_DYNTICK_CTRL_CTR); + snap = atomic_read(&rdp->dynticks) & ~0x1; smp_rmb(); // Order ->dynticks and *vp reads. if (READ_ONCE(*vp)) @@ -398,32 +375,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) smp_rmb(); // Order *vp read and ->dynticks re-read. // If still in the same extended quiescent state, we are good! - return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK); -} - -/* - * Set the special (bottom) bit of the specified CPU so that it - * will take special action (such as flushing its TLB) on the - * next exit from an extended quiescent state. Returns true if - * the bit was successfully set, or false if the CPU was not in - * an extended quiescent state. - */ -bool rcu_eqs_special_set(int cpu) -{ - int old; - int new; - int new_old; - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - - new_old = atomic_read(&rdp->dynticks); - do { - old = new_old; - if (old & RCU_DYNTICK_CTRL_CTR) - return false; - new = old | RCU_DYNTICK_CTRL_MASK; - new_old = atomic_cmpxchg(&rdp->dynticks, old, new); - } while (new_old != old); - return true; + return snap == atomic_read(&rdp->dynticks); } /* @@ -442,10 +394,9 @@ notrace void rcu_momentary_dyntick_idle(void) int special; raw_cpu_write(rcu_data.rcu_need_heavy_qs, false); - special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, - &this_cpu_ptr(&rcu_data)->dynticks); + special = atomic_add_return(2, &this_cpu_ptr(&rcu_data)->dynticks); /* It is illegal to call this from idle state. */ - WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); + WARN_ON_ONCE(!(special & 0x1)); rcu_preempt_deferred_qs(current); } EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); From 285ec70ee6cecec1ed96b21c14b290adce341fda Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 May 2021 17:25:42 -0700 Subject: [PATCH 248/851] rcu: Weaken ->dynticks accesses and updates Accesses to the rcu_data structure's ->dynticks field have always been fully ordered because it was not possible to prove that weaker ordering was safe. However, with the removal of the rcu_eqs_special_set() function and the advent of the Linux-kernel memory model, it is now easy to show that two of the four original full memory barriers can be weakened to acquire and release operations. The remaining pair must remain full memory barriers. This change makes the memory ordering requirements more evident, and it might well also speed up the to-idle and from-idle fastpaths on some architectures. The following litmus test, adapted from one supplied off-list by Frederic Weisbecker, models the RCU grace-period kthread detecting an idle CPU that is concurrently transitioning to non-idle: C dynticks-from-idle { DYNTICKS=0; (* Initially idle. *) } P0(int *X, int *DYNTICKS) { int dynticks; int x; // Idle. dynticks = READ_ONCE(*DYNTICKS); smp_store_release(DYNTICKS, dynticks + 1); smp_mb(); // Now non-idle x = READ_ONCE(*X); } P1(int *X, int *DYNTICKS) { int dynticks; WRITE_ONCE(*X, 1); smp_mb(); dynticks = smp_load_acquire(DYNTICKS); } exists (1:dynticks=0 /\ 0:x=1) Running "herd7 -conf linux-kernel.cfg dynticks-from-idle.litmus" verifies this transition, namely, showing that if the RCU grace-period kthread (P1) sees another CPU as idle (P0), then any memory access prior to the start of the grace period (P1's write to X) will be seen by any RCU read-side critical section following the to-non-idle transition (P0's read from X). This is a straightforward use of full memory barriers to force ordering in a store-buffering (SB) litmus test. The following litmus test, also adapted from the one supplied off-list by Frederic Weisbecker, models the RCU grace-period kthread detecting a non-idle CPU that is concurrently transitioning to idle: C dynticks-into-idle { DYNTICKS=1; (* Initially non-idle. *) } P0(int *X, int *DYNTICKS) { int dynticks; // Non-idle. WRITE_ONCE(*X, 1); dynticks = READ_ONCE(*DYNTICKS); smp_store_release(DYNTICKS, dynticks + 1); smp_mb(); // Now idle. } P1(int *X, int *DYNTICKS) { int x; int dynticks; smp_mb(); dynticks = smp_load_acquire(DYNTICKS); x = READ_ONCE(*X); } exists (1:dynticks=2 /\ 1:x=0) Running "herd7 -conf linux-kernel.cfg dynticks-into-idle.litmus" verifies this transition, namely, showing that if the RCU grace-period kthread (P1) sees another CPU as newly idle (P0), then any pre-idle memory access (P0's write to X) will be seen by any code following the grace period (P1's read from X). This is a simple release-acquire pair forcing ordering in a message-passing (MP) litmus test. Of course, if the grace-period kthread detects the CPU as non-idle, it will refrain from reporting a quiescent state on behalf of that CPU, so there are no ordering requirements from the grace-period kthread in that case. However, other subsystems call rcu_is_idle_cpu() to check for CPUs being non-idle from an RCU perspective. That case is also verified by the above litmus tests with the proviso that the sense of the low-order bit of the DYNTICKS counter be inverted. Suggested-by: Linus Torvalds Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 50 +++++++++++++++++++++++++++++++---------------- kernel/rcu/tree.h | 2 +- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 42a0032dd99f7..bc6ccf0ba3b24 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -77,7 +77,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .dynticks_nesting = 1, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, - .dynticks = ATOMIC_INIT(1), + .dynticks = 1UL, #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, #endif @@ -251,6 +251,21 @@ void rcu_softirq_qs(void) rcu_tasks_qs(current, false); } +/* + * Increment the current CPU's rcu_data structure's ->dynticks field + * with ordering. Return the new value. + */ +static noinstr unsigned long rcu_dynticks_inc(int incby) +{ + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + int seq; + + seq = READ_ONCE(rdp->dynticks) + incby; + smp_store_release(&rdp->dynticks, seq); + smp_mb(); // Fundamental RCU ordering guarantee. + return seq; +} + /* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state, that is, @@ -267,7 +282,7 @@ static noinstr void rcu_dynticks_eqs_enter(void) * next idle sojourn. */ rcu_dynticks_task_trace_enter(); // Before ->dynticks update! - seq = arch_atomic_inc_return(&this_cpu_ptr(&rcu_data)->dynticks); + seq = rcu_dynticks_inc(1); // RCU is no longer watching. Better be in extended quiescent state! WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1)); } @@ -286,7 +301,7 @@ static noinstr void rcu_dynticks_eqs_exit(void) * and we also must force ordering with the next RCU read-side * critical section. */ - seq = arch_atomic_inc_return(&this_cpu_ptr(&rcu_data)->dynticks); + seq = rcu_dynticks_inc(1); // RCU is now watching. Better not be in an extended quiescent state! rcu_dynticks_task_trace_exit(); // After ->dynticks update! WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1)); @@ -306,9 +321,9 @@ static void rcu_dynticks_eqs_online(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - if (atomic_read(&rdp->dynticks) & 0x1) + if (READ_ONCE(rdp->dynticks) & 0x1) return; - atomic_inc(&rdp->dynticks); + rcu_dynticks_inc(1); } /* @@ -318,7 +333,7 @@ static void rcu_dynticks_eqs_online(void) */ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { - return !(arch_atomic_read(&this_cpu_ptr(&rcu_data)->dynticks) & 0x1); + return !(READ_ONCE(this_cpu_ptr(&rcu_data)->dynticks) & 0x1); } /* @@ -327,7 +342,8 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) */ static int rcu_dynticks_snap(struct rcu_data *rdp) { - return atomic_add_return(0, &rdp->dynticks); + smp_mb(); // Fundamental RCU ordering guarantee. + return smp_load_acquire(&rdp->dynticks); } /* @@ -367,7 +383,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) int snap; // If not quiescent, force back to earlier extended quiescent state. - snap = atomic_read(&rdp->dynticks) & ~0x1; + snap = READ_ONCE(rdp->dynticks) & ~0x1; smp_rmb(); // Order ->dynticks and *vp reads. if (READ_ONCE(*vp)) @@ -375,7 +391,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) smp_rmb(); // Order *vp read and ->dynticks re-read. // If still in the same extended quiescent state, we are good! - return snap == atomic_read(&rdp->dynticks); + return snap == READ_ONCE(rdp->dynticks); } /* @@ -391,12 +407,12 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) */ notrace void rcu_momentary_dyntick_idle(void) { - int special; + int seq; raw_cpu_write(rcu_data.rcu_need_heavy_qs, false); - special = atomic_add_return(2, &this_cpu_ptr(&rcu_data)->dynticks); + seq = rcu_dynticks_inc(2); /* It is illegal to call this from idle state. */ - WARN_ON_ONCE(!(special & 0x1)); + WARN_ON_ONCE(!(seq & 0x1)); rcu_preempt_deferred_qs(current); } EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); @@ -612,7 +628,7 @@ static noinstr void rcu_eqs_enter(bool user) lockdep_assert_irqs_disabled(); instrumentation_begin(); - trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); + trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, READ_ONCE(rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); rcu_prepare_for_idle(); rcu_preempt_deferred_qs(current); @@ -747,7 +763,7 @@ noinstr void rcu_nmi_exit(void) */ if (rdp->dynticks_nmi_nesting != 1) { trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, - atomic_read(&rdp->dynticks)); + READ_ONCE(rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ rdp->dynticks_nmi_nesting - 2); instrumentation_end(); @@ -755,7 +771,7 @@ noinstr void rcu_nmi_exit(void) } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); + trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, READ_ONCE(rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ if (!in_nmi()) @@ -863,7 +879,7 @@ static void noinstr rcu_eqs_exit(bool user) instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); + trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, READ_ONCE(rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdp->dynticks_nesting, 1); WARN_ON_ONCE(rdp->dynticks_nmi_nesting); @@ -1026,7 +1042,7 @@ noinstr void rcu_nmi_enter(void) trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdp->dynticks_nmi_nesting, - rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks)); + rdp->dynticks_nmi_nesting + incby, READ_ONCE(rdp->dynticks)); instrumentation_end(); WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdp->dynticks_nmi_nesting + incby); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 305cf6aeb4086..ce611da2ff6b3 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -184,7 +184,7 @@ struct rcu_data { int dynticks_snap; /* Per-GP tracking for dynticks. */ long dynticks_nesting; /* Track process nesting level. */ long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ - atomic_t dynticks; /* Even value for idle, else odd. */ + unsigned long dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */ bool rcu_urgent_qs; /* GP old need light quiescent state. */ bool rcu_forced_tick; /* Forced tick to provide QS. */ From f690dff7cb10074b4d68d4aa215ca6501746f3c6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 20 May 2021 13:35:50 -0700 Subject: [PATCH 249/851] rcu: Mark accesses to ->rcu_read_lock_nesting KCSAN flags accesses to ->rcu_read_lock_nesting as data races, but in the past, the overhead of marked accesses was excessive. However, that was long ago, and much has changed since then, both in terms of hardware and of compilers. Here is data taken on an eight-core laptop using Intel(R) Core(TM) i9-10885H CPU @ 2.40GHz with a kernel built using gcc version 9.3.0, with all data in nanoseconds. Unmarked accesses (status quo), measured by three refscale runs: Minimum reader duration: 3.286 2.851 3.395 Median reader duration: 3.698 3.531 3.4695 Maximum reader duration: 4.481 5.215 5.157 Marked accesses, also measured by three refscale runs: Minimum reader duration: 3.501 3.677 3.580 Median reader duration: 4.053 3.723 3.895 Maximum reader duration: 7.307 4.999 5.511 This focused microbenhmark shows only sub-nanosecond differences which are unlikely to be visible at the system level. This commit therefore marks data-racing accesses to ->rcu_read_lock_nesting. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- kernel/rcu/tree_plugin.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d9680b798b211..cfeb43bfc719e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -53,7 +53,7 @@ void __rcu_read_unlock(void); * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) +#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting) #else /* #ifdef CONFIG_PREEMPT_RCU */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index de1dc3bb7f701..83a702a4e2963 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -405,17 +405,20 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) static void rcu_preempt_read_enter(void) { - current->rcu_read_lock_nesting++; + WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1); } static int rcu_preempt_read_exit(void) { - return --current->rcu_read_lock_nesting; + int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1; + + WRITE_ONCE(current->rcu_read_lock_nesting, ret); + return ret; } static void rcu_preempt_depth_set(int val) { - current->rcu_read_lock_nesting = val; + WRITE_ONCE(current->rcu_read_lock_nesting, val); } /* From ed50f41361a72b0bd124f855935c6baef12e9aff Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 21 May 2021 12:08:29 +0200 Subject: [PATCH 250/851] rculist: Unify documentation about missing list_empty_rcu() We have two separate sections that talk about why list_empty_rcu() is not needed, so this commit consolidates them. Signed-off-by: Julian Wiedmann [ paulmck: The usual wordsmithing. ] Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index f8633d37e3581..d29740be4833e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -10,15 +10,6 @@ #include #include -/* - * Why is there no list_empty_rcu()? Because list_empty() serves this - * purpose. The list_empty() function fetches the RCU-protected pointer - * and compares it to the address of the list head, but neither dereferences - * this pointer itself nor provides this pointer to the caller. Therefore, - * it is not necessary to use rcu_dereference(), so that list_empty() can - * be used anywhere you would want to use a list_empty_rcu(). - */ - /* * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers * @list: list to be initialized @@ -318,21 +309,29 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, /* * Where are list_empty_rcu() and list_first_entry_rcu()? * - * Implementing those functions following their counterparts list_empty() and - * list_first_entry() is not advisable because they lead to subtle race - * conditions as the following snippet shows: + * They do not exist because they would lead to subtle race conditions: * * if (!list_empty_rcu(mylist)) { * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); * do_something(bar); * } * - * The list may not be empty when list_empty_rcu checks it, but it may be when - * list_first_entry_rcu rereads the ->next pointer. - * - * Rereading the ->next pointer is not a problem for list_empty() and - * list_first_entry() because they would be protected by a lock that blocks - * writers. + * The list might be non-empty when list_empty_rcu() checks it, but it + * might have become empty by the time that list_first_entry_rcu() rereads + * the ->next pointer, which would result in a SEGV. + * + * When not using RCU, it is OK for list_first_entry() to re-read that + * pointer because both functions should be protected by some lock that + * blocks writers. + * + * When using RCU, list_empty() uses READ_ONCE() to fetch the + * RCU-protected ->next pointer and then compares it to the address of the + * list head. However, it neither dereferences this pointer nor provides + * this pointer to its caller. Thus, READ_ONCE() suffices (that is, + * rcu_dereference() is not needed), which means that list_empty() can be + * used anywhere you would want to use list_empty_rcu(). Just don't + * expect anything useful to happen if you do a subsequent lockless + * call to list_first_entry_rcu()!!! * * See list_first_or_null_rcu for an alternative. */ From b080c57738d730262e93853d02a8103a46a08150 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 22 May 2021 00:56:23 +0900 Subject: [PATCH 251/851] rcu/tree: Handle VM stoppage in stall detection The soft watchdog timer function checks if a virtual machine was suspended and hence what looks like a lockup in fact is a false positive. This is what kvm_check_and_clear_guest_paused() does: it tests guest PVCLOCK_GUEST_STOPPED (which is set by the host) and if it's set then we need to touch all watchdogs and bail out. Watchdog timer function runs from IRQ, so PVCLOCK_GUEST_STOPPED check works fine. There is, however, one more watchdog that runs from IRQ, so watchdog timer fn races with it, and that watchdog is not aware of PVCLOCK_GUEST_STOPPED - RCU stall detector. apic_timer_interrupt() smp_apic_timer_interrupt() hrtimer_interrupt() __hrtimer_run_queues() tick_sched_timer() tick_sched_handle() update_process_times() rcu_sched_clock_irq() This triggers RCU stalls on our devices during VM resume. If tick_sched_handle()->rcu_sched_clock_irq() runs on a VCPU before watchdog_timer_fn()->kvm_check_and_clear_guest_paused() then there is nothing on this VCPU that touches watchdogs and RCU reads stale gp stall timestamp and new jiffies value, which makes it think that RCU has stalled. Make RCU stall watchdog aware of PVCLOCK_GUEST_STOPPED and don't report RCU stalls when we resume the VM. Signed-off-by: Sergey Senozhatsky Signed-off-by: Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index bd4de5bc5807e..0e7a60706d1c0 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -7,6 +7,8 @@ * Author: Paul E. McKenney */ +#include + ////////////////////////////////////////////////////////////////////////////// // // Controlling CPU stall warnings, including delay calculation. @@ -698,6 +700,14 @@ static void check_cpu_stall(struct rcu_data *rdp) (READ_ONCE(rnp->qsmask) & rdp->grpmask) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* We haven't checked in, so go dump stack. */ print_cpu_stall(gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) @@ -707,6 +717,14 @@ static void check_cpu_stall(struct rcu_data *rdp) ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(gs2, gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) From 51f50c3bc0fd30074a50cecafbdf7c64e095d7b4 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 22 May 2021 00:56:24 +0900 Subject: [PATCH 252/851] rcu: Do not disable GP stall detection in rcu_cpu_stall_reset() rcu_cpu_stall_reset() is one of the functions virtual CPUs execute during VM resume in order to handle jiffies skew that can trigger false positive stall warnings. Paul has pointed out that this approach is problematic because rcu_cpu_stall_reset() disables RCU grace period stall-detection virtually forever, while in fact it can just restart the stall-detection timeout. Suggested-by: "Paul E. McKenney" Signed-off-by: Sergey Senozhatsky Signed-off-by: Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 0e7a60706d1c0..e199022dce9dc 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -119,17 +119,14 @@ static void panic_on_rcu_stall(void) } /** - * rcu_cpu_stall_reset - prevent further stall warnings in current grace period - * - * Set the stall-warning timeout way off into the future, thus preventing - * any RCU CPU stall-warning messages from appearing in the current set of - * RCU grace periods. + * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period * * The caller must disable hard irqs. */ void rcu_cpu_stall_reset(void) { - WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2); + WRITE_ONCE(rcu_state.jiffies_stall, + jiffies + rcu_jiffies_till_stall_check()); } ////////////////////////////////////////////////////////////////////////////// From cc87c238077566bb03bc52b97d677779a0b48470 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 May 2021 14:23:03 -0700 Subject: [PATCH 253/851] rcu: Start timing stall repetitions after warning complete Systems with low-bandwidth consoles can have very large printk() latencies, and on such systems it makes no sense to have the next RCU CPU stall warning message start output before the prior message completed. This commit therefore sets the time of the next stall only after the prints have completed. While printing, the time of the next stall message is set to ULONG_MAX/2 jiffies into the future. Reviewed-by: Sergey Senozhatsky Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index e199022dce9dc..42847caa3909b 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -647,6 +647,7 @@ static void print_cpu_stall(unsigned long gps) static void check_cpu_stall(struct rcu_data *rdp) { + bool didstall = false; unsigned long gs1; unsigned long gs2; unsigned long gps; @@ -692,7 +693,7 @@ static void check_cpu_stall(struct rcu_data *rdp) ULONG_CMP_GE(gps, js)) return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; - jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + jn = jiffies + ULONG_MAX / 2; if (rcu_gp_in_progress() && (READ_ONCE(rnp->qsmask) & rdp->grpmask) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { @@ -709,6 +710,7 @@ static void check_cpu_stall(struct rcu_data *rdp) print_cpu_stall(gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) rcu_ftrace_dump(DUMP_ALL); + didstall = true; } else if (rcu_gp_in_progress() && ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && @@ -726,6 +728,11 @@ static void check_cpu_stall(struct rcu_data *rdp) print_other_cpu_stall(gs2, gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) rcu_ftrace_dump(DUMP_ALL); + didstall = true; + } + if (didstall && READ_ONCE(rcu_state.jiffies_stall) == jn) { + jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + WRITE_ONCE(rcu_state.jiffies_stall, jn); } } From a6cf9cd06cd3b36b14e216a07efc4852d692438f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Jun 2021 16:31:38 -0700 Subject: [PATCH 254/851] srcutiny: Mark read-side data races This commit marks some interrupt-induced read-side data races in __srcu_read_lock(), __srcu_read_unlock(), and srcu_torture_stats_print(). Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 8 ++++---- kernel/rcu/srcutiny.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 0e0cf4d6a72a0..6cfaa0a9a9b96 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -61,7 +61,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) int idx; idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; - WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1); return idx; } @@ -81,11 +81,11 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, { int idx; - idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; + idx = ((data_race(READ_ONCE(ssp->srcu_idx)) + 1) & 0x2) >> 1; pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", tt, tf, idx, - READ_ONCE(ssp->srcu_lock_nesting[!idx]), - READ_ONCE(ssp->srcu_lock_nesting[idx])); + data_race(READ_ONCE(ssp->srcu_lock_nesting[!idx])), + data_race(READ_ONCE(ssp->srcu_lock_nesting[idx]))); } #endif diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 26344dc6483b0..a0ba2ed49bc61 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); */ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { - int newval = ssp->srcu_lock_nesting[idx] - 1; + int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) From c87424ec22c0bbbcc61471d3056ee29b3958bd11 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 3 Jun 2021 10:17:36 -0700 Subject: [PATCH 255/851] rcu: Mark lockless ->qsmask read in rcu_check_boost_fail() Accesses to ->qsmask are normally protected by ->lock, but there is an exception in the diagnostic code in rcu_check_boost_fail(). This commit therefore applies data_race() to this access to avoid KCSAN complaining about the C-language writes protected by ->lock. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 42847caa3909b..6dd6c9aa3f757 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -766,7 +766,7 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) rcu_for_each_leaf_node(rnp) { if (!cpup) { - if (READ_ONCE(rnp->qsmask)) { + if (data_race(READ_ONCE(rnp->qsmask))) { return false; } else { if (READ_ONCE(rnp->gp_tasks)) From d5bb480a1272fef16f6c020e3a0f9466a51950eb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Jun 2021 21:57:02 -0700 Subject: [PATCH 256/851] rcu: Make rcu_gp_init() and rcu_gp_fqs_loop noinline to conserve stack The kbuild test project found an oversized stack frame in rcu_gp_kthread() for some kernel configurations. This oversizing was due to a very large amount of inlining, which is unnecessary due to the fact that this code executes infrequently. This commit therefore marks rcu_gp_init() and rcu_gp_fqs_loop noinline_for_stack to conserve stack space. Reported-by: kernel test robot Tested-by: Rong Chen [ paulmck: noinline_for_stack per Nathan Chancellor. ] Reviewed-by: Nathan Chancellor Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index bc6ccf0ba3b24..e472c78036011 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1739,7 +1739,7 @@ static void rcu_strict_gp_boundary(void *unused) /* * Initialize a new grace period. Return false if no grace period required. */ -static bool rcu_gp_init(void) +static noinline_for_stack bool rcu_gp_init(void) { unsigned long firstseq; unsigned long flags; @@ -1933,7 +1933,7 @@ static void rcu_gp_fqs(bool first_time) /* * Loop doing repeated quiescent-state forcing until the grace period ends. */ -static void rcu_gp_fqs_loop(void) +static noinline_for_stack void rcu_gp_fqs_loop(void) { bool first_gp_fqs; int gf = 0; From 7798a249ab0e608faeaf3e61a42fc700ae794e52 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 8 Jun 2021 15:49:48 +0800 Subject: [PATCH 257/851] rcu: Remove trailing spaces and tabs Run the following command to find and remove the trailing spaces and tabs: find kernel/rcu/ -type f | xargs sed -r -i 's/[ \t]+$//' Signed-off-by: Zhen Lei Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e472c78036011..37bc3a702b6ea 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -259,7 +259,7 @@ static noinstr unsigned long rcu_dynticks_inc(int incby) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int seq; - + seq = READ_ONCE(rdp->dynticks) + incby; smp_store_release(&rdp->dynticks, seq); smp_mb(); // Fundamental RCU ordering guarantee. From aba54baa110f533e324ad99c62fd47dfa8a914fb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 20 May 2021 21:08:38 -0700 Subject: [PATCH 258/851] rcu: Mark accesses in tree_stall.h This commit marks the accesses in tree_stall.h so as to both avoid undesirable compiler optimizations and to keep KCSAN focused on the accesses of the core algorithm. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_stall.h | 63 +++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 6dd6c9aa3f757..a8d0fcf0826f4 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -465,9 +465,10 @@ static void rcu_check_gp_kthread_starvation(void) pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n", rcu_state.name, j, (long)rcu_seq_current(&rcu_state.gp_seq), - data_race(rcu_state.gp_flags), - gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, - gpk ? gpk->__state : ~0, cpu); + data_race(READ_ONCE(rcu_state.gp_flags)), + gp_state_getname(rcu_state.gp_state), + data_race(READ_ONCE(rcu_state.gp_state)), + gpk ? data_race(READ_ONCE(gpk->__state)) : ~0, cpu); if (gpk) { pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); pr_err("RCU grace-period kthread stack dump:\n"); @@ -510,7 +511,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void) (long)rcu_seq_current(&rcu_state.gp_seq), data_race(rcu_state.gp_flags), gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, - gpk->__state); + data_race(READ_ONCE(gpk->__state))); pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n", cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); } @@ -569,11 +570,11 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) pr_err("INFO: Stall ended before state dump start\n"); } else { j = jiffies; - gpa = data_race(rcu_state.gp_activity); + gpa = data_race(READ_ONCE(rcu_state.gp_activity)); pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", rcu_state.name, j - gpa, j, gpa, - data_race(jiffies_till_next_fqs), - rcu_get_root()->qsmask); + data_race(READ_ONCE(jiffies_till_next_fqs)), + data_race(READ_ONCE(rcu_get_root()->qsmask))); } } /* Rewrite if needed in case of slow consoles. */ @@ -815,32 +816,34 @@ void show_rcu_gp_kthreads(void) struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); j = jiffies; - ja = j - data_race(rcu_state.gp_activity); - jr = j - data_race(rcu_state.gp_req_activity); - js = j - data_race(rcu_state.gp_start); - jw = j - data_race(rcu_state.gp_wake_time); + ja = j - data_race(READ_ONCE(rcu_state.gp_activity)); + jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity)); + js = j - data_race(READ_ONCE(rcu_state.gp_start)); + jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time)); pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n", rcu_state.name, gp_state_getname(rcu_state.gp_state), - rcu_state.gp_state, t ? t->__state : 0x1ffff, t ? t->rt_priority : 0xffU, - js, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq), - (long)data_race(rcu_state.gp_seq), - (long)data_race(rcu_get_root()->gp_seq_needed), - data_race(rcu_state.gp_max), - data_race(rcu_state.gp_flags)); + data_race(READ_ONCE(rcu_state.gp_state)), + t ? data_race(READ_ONCE(t->__state)) : 0x1ffff, t ? t->rt_priority : 0xffU, + js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)), + (long)data_race(READ_ONCE(rcu_state.gp_seq)), + (long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)), + data_race(READ_ONCE(rcu_state.gp_max)), + data_race(READ_ONCE(rcu_state.gp_flags))); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) && - !data_race(rnp->qsmask) && !data_race(rnp->boost_tasks) && - !data_race(rnp->exp_tasks) && !data_race(rnp->gp_tasks)) + !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) && + !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks))) continue; pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n", rnp->grplo, rnp->grphi, - (long)data_race(rnp->gp_seq), (long)data_race(rnp->gp_seq_needed), - data_race(rnp->qsmask), - ".b"[!!data_race(rnp->boost_kthread_task)], - ".B"[!!data_race(rnp->boost_tasks)], - ".E"[!!data_race(rnp->exp_tasks)], - ".G"[!!data_race(rnp->gp_tasks)], - data_race(rnp->n_boosts)); + (long)data_race(READ_ONCE(rnp->gp_seq)), + (long)data_race(READ_ONCE(rnp->gp_seq_needed)), + data_race(READ_ONCE(rnp->qsmask)), + ".b"[!!data_race(READ_ONCE(rnp->boost_kthread_task))], + ".B"[!!data_race(READ_ONCE(rnp->boost_tasks))], + ".E"[!!data_race(READ_ONCE(rnp->exp_tasks))], + ".G"[!!data_race(READ_ONCE(rnp->gp_tasks))], + data_race(READ_ONCE(rnp->n_boosts))); if (!rcu_is_leaf_node(rnp)) continue; for_each_leaf_node_possible_cpu(rnp, cpu) { @@ -850,12 +853,12 @@ void show_rcu_gp_kthreads(void) READ_ONCE(rdp->gp_seq_needed))) continue; pr_info("\tcpu %d ->gp_seq_needed %ld\n", - cpu, (long)data_race(rdp->gp_seq_needed)); + cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed))); } } for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(&rcu_data, cpu); - cbs += data_race(rdp->n_cbs_invoked); + cbs += data_race(READ_ONCE(rdp->n_cbs_invoked)); if (rcu_segcblist_is_offloaded(&rdp->cblist)) show_rcu_nocb_state(rdp); } @@ -937,11 +940,11 @@ void rcu_fwd_progress_check(unsigned long j) if (rcu_gp_in_progress()) { pr_info("%s: GP age %lu jiffies\n", - __func__, jiffies - rcu_state.gp_start); + __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_start))); show_rcu_gp_kthreads(); } else { pr_info("%s: Last GP end %lu jiffies ago\n", - __func__, jiffies - rcu_state.gp_end); + __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_end))); preempt_disable(); rdp = this_cpu_ptr(&rcu_data); rcu_check_gp_start_stall(rdp->mynode, rdp, j); From 891fb9846876c6d9d22f1a16d17bb07f8cb68ee7 Mon Sep 17 00:00:00 2001 From: Liu Song Date: Tue, 29 Jun 2021 21:55:51 +0800 Subject: [PATCH 259/851] rcu: Remove useless "ret" update in rcu_gp_fqs_loop() Within rcu_gp_fqs_loop(), the "ret" local variable is set to the return value from swait_event_idle_timeout_exclusive(), but "ret" is unconditionally overwritten later in the code. This commit therefore removes this useless assignment. Signed-off-by: Liu Song Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 37bc3a702b6ea..7b65ac3f49e5a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1960,8 +1960,8 @@ static noinline_for_stack void rcu_gp_fqs_loop(void) trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("fqswait")); WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS); - ret = swait_event_idle_timeout_exclusive( - rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j); + (void)swait_event_idle_timeout_exclusive(rcu_state.gp_wq, + rcu_gp_fqs_check_wake(&gf), j); rcu_gp_torture_wait(); WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS); /* Locking provides needed memory barriers. */ From df00d93bce86ae3d0acf53ac0450080facd7e58d Mon Sep 17 00:00:00 2001 From: Liu Song Date: Wed, 30 Jun 2021 22:08:02 +0800 Subject: [PATCH 260/851] rcu: Use per_cpu_ptr to get the pointer of per_cpu variable There are a few remaining locations in kernel/rcu that still use "&per_cpu()". This commit replaces them with "per_cpu_ptr(&)", and does not introduce any functional change. Reviewed-by: Uladzislau Rezki (Sony) Reviewed-by: Neeraj Upadhyay Reviewed-by: Joel Fernandes (Google) Signed-off-by: Liu Song Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 2 +- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_stall.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 8536c55df5142..21f00194e69d7 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -920,7 +920,7 @@ static void trc_read_check_handler(void *t_in) // Allow future IPIs to be sent on CPU and for task. // Also order this IPI handler against any later manipulations of // the intended task. - smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^ + smp_store_release(per_cpu_ptr(&trc_ipi_to_cpu, smp_processor_id()), false); // ^^^ smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^ } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7b65ac3f49e5a..888efad0361d3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1292,7 +1292,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) */ jtsq = READ_ONCE(jiffies_to_sched_qs); ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu); - rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu); + rnhqp = per_cpu_ptr(&rcu_data.rcu_need_heavy_qs, rdp->cpu); if (!READ_ONCE(*rnhqp) && (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || time_after(jiffies, rcu_state.jiffies_resched) || diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index a8d0fcf0826f4..677ee3d8671bf 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -351,7 +351,7 @@ static void rcu_dump_cpu_stacks(void) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d", rdp->last_accelerate & 0xffff, jiffies & 0xffff, From d40011b02d4e8cb99f2eeaa0c14d670779361551 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 6 Jul 2021 01:43:43 +0200 Subject: [PATCH 261/851] rcu: Explain why rcu_all_qs() is a stub in preemptible TREE RCU The cond_resched() function reports an RCU quiescent state only in non-preemptible TREE RCU implementation. This commit therefore adds a comment explaining why cond_resched() does nothing in preemptible kernels. Signed-off-by: Frederic Weisbecker Cc: Neeraj Upadhyay Cc: Joel Fernandes Cc: Uladzislau Rezki Cc: Boqun Feng Cc: Peter Zijlstra (Intel) Cc: Ingo Molnar Signed-off-by: Paul E. McKenney --- kernel/sched/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d9ff40f46619..6a03c3fac55cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7781,6 +7781,17 @@ int __sched __cond_resched(void) preempt_schedule_common(); return 1; } + /* + * In preemptible kernels, ->rcu_read_lock_nesting tells the tick + * whether the current CPU is in an RCU read-side critical section, + * so the tick can report quiescent states even for CPUs looping + * in kernel context. In contrast, in non-preemptible kernels, + * RCU readers leave no in-memory hints, which means that CPU-bound + * processes executing in kernel context might never report an + * RCU quiescent state. Therefore, the following code causes + * cond_resched() to report a quiescent state, but only when RCU + * is in urgent need of one. + */ #ifndef CONFIG_PREEMPT_RCU rcu_all_qs(); #endif From deb37cfcfc12b1acdfc79c703ccb0a6d399dd112 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 19 Jul 2021 11:52:12 -0700 Subject: [PATCH 262/851] rcu: Print human-readable message for schedule() in RCU reader The WARN_ON_ONCE() invocation within the CONFIG_PREEMPT=y version of rcu_note_context_switch() triggers when there is a voluntary context switch in an RCU read-side critical section, but there is quite a gap between the output of that WARN_ON_ONCE() and this RCU-usage error. This commit therefore converts the WARN_ON_ONCE() to a WARN_ONCE() that explicitly describes the problem in its message. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 83a702a4e2963..e8b45ab72a799 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -346,7 +346,7 @@ void rcu_note_context_switch(bool preempt) trace_rcu_utilization(TPS("Start context switch")); lockdep_assert_irqs_disabled(); - WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0); + WARN_ONCE(!preempt && rcu_preempt_depth() > 0, "Voluntary context switch within RCU read-side critical section!"); if (rcu_preempt_depth() > 0 && !t->rcu_read_unlock_special.b.blocked) { From 3fad1f7b48cd4270cda398ac124d6806eebab689 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 May 2021 17:04:57 -0700 Subject: [PATCH 263/851] refscale: Add measurement of clock readout This commit adds a "clock" type to refscale, which checks the performance of ktime_get_real_fast_ns(). Use the "clocksource=" kernel boot parameter to select the underlying clock source. [ paulmck: Work around compiler false positive per kernel test robot. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/refscale.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index d998a76fb5422..cba737cb246e8 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -467,6 +467,41 @@ static struct ref_scale_ops acqrel_ops = { .name = "acqrel" }; +static volatile u64 stopopts; + +static void ref_clock_section(const int nloops) +{ + u64 x = 0; + int i; + + preempt_disable(); + for (i = nloops; i >= 0; i--) { + x += ktime_get_real_fast_ns(); + } + preempt_enable(); + stopopts = x; +} + +static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) +{ + u64 x = 0; + int i; + + preempt_disable(); + for (i = nloops; i >= 0; i--) { + x += ktime_get_real_fast_ns(); + un_delay(udl, ndl); + } + preempt_enable(); + stopopts = x; +} + +static struct ref_scale_ops clock_ops = { + .readsection = ref_clock_section, + .delaysection = ref_clock_delay_section, + .name = "clock" +}; + static void rcu_scale_one_reader(void) { if (readdelay <= 0) @@ -759,7 +794,7 @@ ref_scale_init(void) int firsterr = 0; static struct ref_scale_ops *scale_ops[] = { &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops, - &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, + &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops, }; if (!torture_init_begin(scale_type, verbose)) From 6580f64ccd06a391a8072305edbfb560b0dc0bef Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 16 May 2021 21:17:27 -0700 Subject: [PATCH 264/851] rcutorture: Preempt rather than block when testing task stalls Currently, rcu_torture_stall() does a one-jiffy timed wait when stall_cpu_block is set. This works, but emits a pointless splat in CONFIG_PREEMPT=y kernels. This commit avoids this splat by instead invoking preempt_schedule() in CONFIG_PREEMPT=y kernels. This uses an admittedly ugly #ifdef, but abstracted approaches just looked worse. A prettier approach would provide a preempt_schedule() definition with a WARN_ON() for CONFIG_PREEMPT=n kernels, but this seems quite silly. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 40ef5417d9545..ab4215266ebee 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2022,8 +2022,13 @@ static int rcu_torture_stall(void *args) __func__, raw_smp_processor_id()); while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(), stop_at)) - if (stall_cpu_block) + if (stall_cpu_block) { +#ifdef CONFIG_PREEMPTION + preempt_schedule(); +#else schedule_timeout_uninterruptible(HZ); +#endif + } if (stall_cpu_irqsoff) local_irq_enable(); else if (!stall_cpu_block) From 8f12cead1d4992631a840d1e79a6995ebe87405e Mon Sep 17 00:00:00 2001 From: "Jiangong.Han" Date: Tue, 22 Jun 2021 18:37:08 +0800 Subject: [PATCH 265/851] rcuscale: Console output claims too few grace periods The rcuscale console output claims N grace periods, numbered from zero to N, which means that there were really N+1 grace periods. The root cause of this bug is that rcu_scale_writer() stores the number of the last grace period (numbered from zero) into writer_n_durations[me] instead of the number of grace periods. This commit therefore assigns the actual number of grace periods to writer_n_durations[me], and also makes the corresponding adjustment to the loop outputting per-grace-period measurements. Sample of old console output: rcu-scale: writer 0 gps: 133 ...... rcu-scale: 0 writer-duration: 0 44003961 rcu-scale: 0 writer-duration: 1 32003582 ...... rcu-scale: 0 writer-duration: 132 28004391 rcu-scale: 0 writer-duration: 133 27996410 Sample of new console output: rcu-scale: writer 0 gps: 134 ...... rcu-scale: 0 writer-duration: 0 44003961 rcu-scale: 0 writer-duration: 1 32003582 ...... rcu-scale: 0 writer-duration: 132 28004391 rcu-scale: 0 writer-duration: 133 27996410 Signed-off-by: Jiangong.Han Signed-off-by: Paul E. McKenney --- kernel/rcu/rcuscale.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index dca51fe9c73f2..2cc34a22a5060 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -487,7 +487,7 @@ rcu_scale_writer(void *arg) if (gp_async) { cur_ops->gp_barrier(); } - writer_n_durations[me] = i_max; + writer_n_durations[me] = i_max + 1; torture_kthread_stopping("rcu_scale_writer"); return 0; } @@ -561,7 +561,7 @@ rcu_scale_cleanup(void) wdpp = writer_durations[i]; if (!wdpp) continue; - for (j = 0; j <= writer_n_durations[i]; j++) { + for (j = 0; j < writer_n_durations[i]; j++) { wdp = &wdpp[j]; pr_alert("%s%s %4d writer-duration: %5d %llu\n", scale_type, SCALE_FLAG, From 0dbeb44f152a3b4f6f1ba7718c15f75f53694441 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Jun 2021 15:51:48 -0700 Subject: [PATCH 266/851] locktorture: Mark statistics data races The lock_stress_stats structure's ->n_lock_fail and ->n_lock_acquired fields are incremented and sampled locklessly using plain C-language statements, which KCSAN objects to. This commit therefore marks the statistics gathering with data_race() to flag the intent. While in the area, this commit also reduces the number of accesses to the ->n_lock_acquired field, thus eliminating some possible check/use confusion. Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index b3adb40549bf3..313d5e613fbe6 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -738,20 +738,22 @@ static int lock_torture_reader(void *arg) static void __torture_print_stats(char *page, struct lock_stress_stats *statp, bool write) { + long cur; bool fail = false; int i, n_stress; - long max = 0, min = statp ? statp[0].n_lock_acquired : 0; + long max = 0, min = statp ? data_race(statp[0].n_lock_acquired) : 0; long long sum = 0; n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress; for (i = 0; i < n_stress; i++) { - if (statp[i].n_lock_fail) + if (data_race(statp[i].n_lock_fail)) fail = true; - sum += statp[i].n_lock_acquired; - if (max < statp[i].n_lock_acquired) - max = statp[i].n_lock_acquired; - if (min > statp[i].n_lock_acquired) - min = statp[i].n_lock_acquired; + cur = data_race(statp[i].n_lock_acquired); + sum += cur; + if (max < cur) + max = cur; + if (min > cur) + min = cur; } page += sprintf(page, "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", From 61700f06e186a8651c50943f69939b1f3e7633d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Jun 2021 16:04:11 -0700 Subject: [PATCH 267/851] locktorture: Count lock readers Currently, the lock_is_read_held variable is bool, so that a reader sets it to true just after lock acquisition and then to false just before lock release. This works in a rough statistical sense, but can result in false negatives just after one of a pair of concurrent readers has released the lock. This approach does have low overhead, but at the expense of the setting to true potentially never leaving the reader's store buffer, thus resulting in an unconditional false negative. This commit therefore converts this variable to atomic_t and makes the reader use atomic_inc() just after acquisition and atomic_dec() just before release. This does increase overhead, but this increase is negligible compared to the 10-microsecond lock hold time. Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 313d5e613fbe6..7c5a4a087cc73 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -59,7 +59,7 @@ static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; static bool lock_is_write_held; -static bool lock_is_read_held; +static atomic_t lock_is_read_held; static unsigned long last_lock_release; struct lock_stress_stats { @@ -682,7 +682,7 @@ static int lock_torture_writer(void *arg) if (WARN_ON_ONCE(lock_is_write_held)) lwsp->n_lock_fail++; lock_is_write_held = true; - if (WARN_ON_ONCE(lock_is_read_held)) + if (WARN_ON_ONCE(atomic_read(&lock_is_read_held))) lwsp->n_lock_fail++; /* rare, but... */ lwsp->n_lock_acquired++; @@ -717,13 +717,13 @@ static int lock_torture_reader(void *arg) schedule_timeout_uninterruptible(1); cxt.cur_ops->readlock(tid); - lock_is_read_held = true; + atomic_inc(&lock_is_read_held); if (WARN_ON_ONCE(lock_is_write_held)) lrsp->n_lock_fail++; /* rare, but... */ lrsp->n_lock_acquired++; cxt.cur_ops->read_delay(&rand); - lock_is_read_held = false; + atomic_dec(&lock_is_read_held); cxt.cur_ops->readunlock(tid); stutter_wait("lock_torture_reader"); @@ -998,7 +998,6 @@ static int __init lock_torture_init(void) } if (nreaders_stress) { - lock_is_read_held = false; cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress, sizeof(*cxt.lrsa), GFP_KERNEL); From 665c31c04981e3f66be7eafa4483f200d7c13ecf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 24 Jun 2021 17:53:43 -0700 Subject: [PATCH 268/851] scftorture: Add RPC-like IPI tests This commit adds the single_weight_rpc module parameter, which causes the IPI handler to awaken the IPI sender. In many scheduler configurations, this will result in an IPI back to the sender that is likely to be received at a time when the sender CPU is idle. The intent is to stress IPI reception during CPU busy-to-idle transitions. Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 76 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 13 deletions(-) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 29e8fc5d91a7b..5cf40e4383191 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -64,6 +64,7 @@ torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); torture_param(int, weight_resched, -1, "Testing weight for resched_cpu() operations."); torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations."); +torture_param(int, weight_single_rpc, -1, "Testing weight for single-CPU RPC operations."); torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations."); torture_param(int, weight_many, -1, "Testing weight for multi-CPU no-wait operations."); torture_param(int, weight_many_wait, -1, "Testing weight for multi-CPU operations."); @@ -86,6 +87,8 @@ struct scf_statistics { long long n_resched; long long n_single; long long n_single_ofl; + long long n_single_rpc; + long long n_single_rpc_ofl; long long n_single_wait; long long n_single_wait_ofl; long long n_many; @@ -101,14 +104,17 @@ static DEFINE_PER_CPU(long long, scf_invoked_count); // Data for random primitive selection #define SCF_PRIM_RESCHED 0 #define SCF_PRIM_SINGLE 1 -#define SCF_PRIM_MANY 2 -#define SCF_PRIM_ALL 3 -#define SCF_NPRIMS 7 // Need wait and no-wait versions of each, - // except for SCF_PRIM_RESCHED. +#define SCF_PRIM_SINGLE_RPC 2 +#define SCF_PRIM_MANY 3 +#define SCF_PRIM_ALL 4 +#define SCF_NPRIMS 8 // Need wait and no-wait versions of each, + // except for SCF_PRIM_RESCHED and + // SCF_PRIM_SINGLE_RPC. static char *scf_prim_name[] = { "resched_cpu", "smp_call_function_single", + "smp_call_function_single_rpc", "smp_call_function_many", "smp_call_function", }; @@ -128,6 +134,8 @@ struct scf_check { bool scfc_out; int scfc_cpu; // -1 for not _single(). bool scfc_wait; + bool scfc_rpc; + struct completion scfc_completion; }; // Use to wait for all threads to start. @@ -158,6 +166,7 @@ static void scf_torture_stats_print(void) scfs.n_resched += scf_stats_p[i].n_resched; scfs.n_single += scf_stats_p[i].n_single; scfs.n_single_ofl += scf_stats_p[i].n_single_ofl; + scfs.n_single_rpc += scf_stats_p[i].n_single_rpc; scfs.n_single_wait += scf_stats_p[i].n_single_wait; scfs.n_single_wait_ofl += scf_stats_p[i].n_single_wait_ofl; scfs.n_many += scf_stats_p[i].n_many; @@ -168,9 +177,10 @@ static void scf_torture_stats_print(void) if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) || atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs)) bangstr = "!!! "; - pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld many: %lld/%lld all: %lld/%lld ", + pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld single_rpc: %lld single_rpc_ofl: %lld many: %lld/%lld all: %lld/%lld ", SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count, scfs.n_resched, scfs.n_single, scfs.n_single_wait, scfs.n_single_ofl, scfs.n_single_wait_ofl, + scfs.n_single_rpc, scfs.n_single_rpc_ofl, scfs.n_many, scfs.n_many_wait, scfs.n_all, scfs.n_all_wait); torture_onoff_stats(); pr_cont("ste: %d stnmie: %d stnmoe: %d staf: %d\n", atomic_read(&n_errs), @@ -282,10 +292,13 @@ static void scf_handler(void *scfc_in) out: if (unlikely(!scfcp)) return; - if (scfcp->scfc_wait) + if (scfcp->scfc_wait) { WRITE_ONCE(scfcp->scfc_out, true); - else + if (scfcp->scfc_rpc) + complete(&scfcp->scfc_completion); + } else { kfree(scfcp); + } } // As above, but check for correct CPU. @@ -319,6 +332,7 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra scfcp->scfc_cpu = -1; scfcp->scfc_wait = scfsp->scfs_wait; scfcp->scfc_out = false; + scfcp->scfc_rpc = false; } } switch (scfsp->scfs_prim) { @@ -350,6 +364,34 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra scfcp = NULL; } break; + case SCF_PRIM_SINGLE_RPC: + if (!scfcp) + break; + cpu = torture_random(trsp) % nr_cpu_ids; + scfp->n_single_rpc++; + scfcp->scfc_cpu = cpu; + scfcp->scfc_wait = true; + init_completion(&scfcp->scfc_completion); + scfcp->scfc_rpc = true; + barrier(); // Prevent race-reduction compiler optimizations. + scfcp->scfc_in = true; + ret = smp_call_function_single(cpu, scf_handler_1, (void *)scfcp, 0); + if (!ret) { + if (use_cpus_read_lock) + cpus_read_unlock(); + else + preempt_enable(); + wait_for_completion(&scfcp->scfc_completion); + if (use_cpus_read_lock) + cpus_read_lock(); + else + preempt_disable(); + } else { + scfp->n_single_rpc_ofl++; + kfree(scfcp); + scfcp = NULL; + } + break; case SCF_PRIM_MANY: if (scfsp->scfs_wait) scfp->n_many_wait++; @@ -379,10 +421,12 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra } if (scfcp && scfsp->scfs_wait) { if (WARN_ON_ONCE((num_online_cpus() > 1 || scfsp->scfs_prim == SCF_PRIM_SINGLE) && - !scfcp->scfc_out)) + !scfcp->scfc_out)) { + pr_warn("%s: Memory-ordering failure, scfs_prim: %d.\n", __func__, scfsp->scfs_prim); atomic_inc(&n_mb_out_errs); // Leak rather than trash! - else + } else { kfree(scfcp); + } barrier(); // Prevent race-reduction compiler optimizations. } if (use_cpus_read_lock) @@ -453,8 +497,8 @@ static void scftorture_print_module_parms(const char *tag) { pr_alert(SCFTORT_FLAG - "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d use_cpus_read_lock=%d, weight_resched=%d, weight_single=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag, - verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter, use_cpus_read_lock, weight_resched, weight_single, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait); + "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d use_cpus_read_lock=%d, weight_resched=%d, weight_single=%d, weight_single_rpc=%d, weight_single_wait=%d, weight_many=%d, weight_many_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag, + verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter, use_cpus_read_lock, weight_resched, weight_single, weight_single_rpc, weight_single_wait, weight_many, weight_many_wait, weight_all, weight_all_wait); } static void scf_cleanup_handler(void *unused) @@ -497,6 +541,7 @@ static int __init scf_torture_init(void) int firsterr = 0; unsigned long weight_resched1 = weight_resched; unsigned long weight_single1 = weight_single; + unsigned long weight_single_rpc1 = weight_single_rpc; unsigned long weight_single_wait1 = weight_single_wait; unsigned long weight_many1 = weight_many; unsigned long weight_many_wait1 = weight_many_wait; @@ -508,11 +553,13 @@ static int __init scf_torture_init(void) scftorture_print_module_parms("Start of test"); - if (weight_resched == -1 && weight_single == -1 && weight_single_wait == -1 && + if (weight_resched == -1 && + weight_single == -1 && weight_single_rpc == -1 && weight_single_wait == -1 && weight_many == -1 && weight_many_wait == -1 && weight_all == -1 && weight_all_wait == -1) { weight_resched1 = 2 * nr_cpu_ids; weight_single1 = 2 * nr_cpu_ids; + weight_single_rpc1 = 2 * nr_cpu_ids; weight_single_wait1 = 2 * nr_cpu_ids; weight_many1 = 2; weight_many_wait1 = 2; @@ -523,6 +570,8 @@ static int __init scf_torture_init(void) weight_resched1 = 0; if (weight_single == -1) weight_single1 = 0; + if (weight_single_rpc == -1) + weight_single_rpc1 = 0; if (weight_single_wait == -1) weight_single_wait1 = 0; if (weight_many == -1) @@ -534,7 +583,7 @@ static int __init scf_torture_init(void) if (weight_all_wait == -1) weight_all_wait1 = 0; } - if (weight_single1 == 0 && weight_single_wait1 == 0 && + if (weight_single1 == 0 && weight_single_rpc1 == 0 && weight_single_wait1 == 0 && weight_many1 == 0 && weight_many_wait1 == 0 && weight_all1 == 0 && weight_all_wait1 == 0) { VERBOSE_SCFTORTOUT_ERRSTRING("all zero weights makes no sense"); @@ -546,6 +595,7 @@ static int __init scf_torture_init(void) else if (weight_resched1) VERBOSE_SCFTORTOUT_ERRSTRING("built as module, weight_resched ignored"); scf_sel_add(weight_single1, SCF_PRIM_SINGLE, false); + scf_sel_add(weight_single_rpc1, SCF_PRIM_SINGLE_RPC, true); scf_sel_add(weight_single_wait1, SCF_PRIM_SINGLE, true); scf_sel_add(weight_many1, SCF_PRIM_MANY, false); scf_sel_add(weight_many_wait1, SCF_PRIM_MANY, true); From 05caab8b4b9e15c6fd01bfde70cc5cbf581ae96b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 9 Jul 2021 17:53:27 -0700 Subject: [PATCH 269/851] scftorture: Avoid NULL pointer exception on early exit When scftorture finds an error in the module parameters controlling the relative frequencies of smp_call_function*() variants, it takes an early exit. So early that it has not allocated memory to track the kthreads running the test, which results in a segfault. This commit therefore checks for the existence of the memory before attempting to stop the kthreads that would otherwise have been recorded in that non-existent memory. Signed-off-by: Paul E. McKenney --- kernel/scftorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 5cf40e4383191..64a08288b1a6d 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -513,7 +513,7 @@ static void scf_torture_cleanup(void) return; WRITE_ONCE(scfdone, true); - if (nthreads) + if (nthreads && scf_stats_p) for (i = 0; i < nthreads; i++) torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task); else From e8cf874883c5d83674eec409c99b79234123d1a6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Jul 2021 15:49:40 -0700 Subject: [PATCH 270/851] torture: Make kvm.sh select per-scenario affinity masks This commit causes kvm.sh to use the new kvm-assign-cpus.sh and kvm-get-cpus-script.sh scripts to create a TORTURE_AFFINITY environment variable containing either an empty string (for no affinity) or a list of CPUs to pin the scenario's vCPUs to. A later commit will make use of this information to actually pin the vCPUs. Signed-off-by: Paul E. McKenney --- .../rcutorture/bin/kvm-assign-cpus.sh | 105 ++++++++++++++++++ .../rcutorture/bin/kvm-get-cpus-script.sh | 87 +++++++++++++++ .../rcutorture/bin/kvm-test-1-run-batch.sh | 1 + tools/testing/selftests/rcutorture/bin/kvm.sh | 14 ++- 4 files changed, 205 insertions(+), 2 deletions(-) create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh new file mode 100755 index 0000000000000..494ae91cc1f0e --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh @@ -0,0 +1,105 @@ +#!/bin/sh +# +# Produce awk statements roughly depicting the system's CPU and cache +# layout. If the required information is not available, produce +# error messages as awk comments. Successful exit regardless. +# +# Usage: kvm-assign-cpus.sh /path/to/sysfs + +T=/tmp/kvm-assign-cpus.sh.$$ +trap 'rm -rf $T' 0 2 +mkdir $T + +sysfsdir=${1-/sys/devices/system/node} +if ! cd "$sysfsdir" > $T/msg 2>&1 +then + sed -e 's/^/# /' < $T/msg + exit 0 +fi +nodelist="`ls -d node*`" +for i in node* +do + if ! test -d $i/ + then + echo "# Not a directory: $sysfsdir/node*" + exit 0 + fi + for j in $i/cpu*/cache/index* + do + if ! test -d $j/ + then + echo "# Not a directory: $sysfsdir/$j" + exit 0 + else + break + fi + done + indexlist="`ls -d $i/cpu* | grep 'cpu[0-9][0-9]*' | head -1 | sed -e 's,^.*$,ls -d &/cache/index*,' | sh | sed -e 's,^.*/,,'`" + break +done +for i in node*/cpu*/cache/index*/shared_cpu_list +do + if ! test -f $i + then + echo "# Not a file: $sysfsdir/$i" + exit 0 + else + break + fi +done +firstshared= +for i in $indexlist +do + rm -f $T/cpulist + for n in node* + do + f="$n/cpu*/cache/$i/shared_cpu_list" + if ! cat $f > $T/msg 2>&1 + then + sed -e 's/^/# /' < $T/msg + exit 0 + fi + cat $f >> $T/cpulist + done + if grep -q '[-,]' $T/cpulist + then + if test -z "$firstshared" + then + firstshared="$i" + fi + fi +done +if test -z "$firstshared" +then + splitindex="`echo $indexlist | sed -e 's/ .*$//'`" +else + splitindex="$firstshared" +fi +nodenum=0 +for n in node* +do + cat $n/cpu*/cache/$splitindex/shared_cpu_list | sort -u -k1n | + awk -v nodenum="$nodenum" ' + BEGIN { + idx = 0; + } + + { + nlists = split($0, cpulists, ","); + for (i = 1; i <= nlists; i++) { + listsize = split(cpulists[i], cpus, "-"); + if (listsize == 1) + cpus[2] = cpus[1]; + for (j = cpus[1]; j <= cpus[2]; j++) { + print "cpu[" nodenum "][" idx "] = " j ";"; + idx++; + } + } + } + + END { + print "nodecpus[" nodenum "] = " idx ";"; + }' + nodenum=`expr $nodenum + 1` +done +echo "numnodes = $nodenum;" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh new file mode 100755 index 0000000000000..0ab7fca5bec12 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh @@ -0,0 +1,87 @@ +#!/bin/sh +# +# Create an awk script that takes as input numbers of CPUs and outputs +# lists of CPUs, one per line in both cases. +# +# Usage: kvm-get-cpus-script.sh /path/to/cpu/arrays /path/to/put/script [ /path/to/state ] +# +# The CPU arrays are output by kvm-assign-cpus.sh, and are valid awk +# statements initializing the variables describing the system's topology. +# +# The optional state is input by this script (if the file exists and is +# non-empty), and can also be output by this script. + +cpuarrays="${1-/sys/devices/system/node}" +scriptfile="${2}" +statefile="${3}" + +if ! test -f "$cpuarrays" +then + echo "File not found: $cpuarrays" 1>&2 + exit 1 +fi +scriptdir="`dirname "$scriptfile"`" +if ! test -d "$scriptdir" || ! test -x "$scriptdir" || ! test -w "$scriptdir" +then + echo "Directory not usable for script output: $scriptdir" + exit 1 +fi + +cat << '___EOF___' > "$scriptfile" +BEGIN { +___EOF___ +cat "$cpuarrays" >> "$scriptfile" +if test -r "$statefile" +then + cat "$statefile" >> "$scriptfile" +fi +cat << '___EOF___' >> "$scriptfile" +} + +# Do we have the system architecture to guide CPU affinity? +function gotcpus() +{ + return numnodes != ""; +} + +# Return a comma-separated list of the next n CPUs. +function nextcpus(n, i, s) +{ + for (i = 0; i < n; i++) { + if (nodecpus[curnode] == "") + curnode = 0; + if (cpu[curnode][curcpu[curnode]] == "") + curcpu[curnode] = 0; + if (s != "") + s = s ","; + s = s cpu[curnode][curcpu[curnode]]; + curcpu[curnode]++; + curnode++ + } + return s; +} + +# Dump out the current node/CPU state so that a later invocation of this +# script can continue where this one left off. Of course, this only works +# when a state file was specified and where there was valid sysfs state. +# Returns 1 if the state was dumped, 0 otherwise. +# +# Dumping the state for one system configuration and loading it into +# another isn't likely to do what you want, whatever that might be. +function dumpcpustate( i, fn) +{ +___EOF___ +echo ' fn = "'"$statefile"'";' >> $scriptfile +cat << '___EOF___' >> "$scriptfile" + if (fn != "" && gotcpus()) { + print "curnode = " curnode ";" > fn; + for (i = 0; i < numnodes; i++) + if (curcpu[i] != "") + print "curcpu[" i "] = " curcpu[i] ";" >> fn; + return 1; + } + if (fn != "") + print "# No CPU state to dump." > fn; + return 0; +} +___EOF___ diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index 7ea0809e229e9..005a048c55aa0 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -54,6 +54,7 @@ for i in "$@" do echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out echo > $i/kvm-test-1-run-qemu.sh.out + export TORTURE_AFFINITY= kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & done for i in $runfiles diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 0f26a81599f5c..f442d84fb2a3e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -430,7 +430,9 @@ then git diff HEAD >> $resdir/$ds/testid.txt fi ___EOF___ -cat << '___EOF___' > $T/dumpbatches.awk +kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk +kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk +cat << '___EOF___' >> $T/dumpbatches.awk BEGIN { i = 0; } @@ -442,7 +444,7 @@ BEGIN { } # Dump out the scripting required to run one test batch. -function dump(first, pastlast, batchnum) +function dump(first, pastlast, batchnum, affinitylist) { print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log"; print "needqemurun=" @@ -474,6 +476,14 @@ function dump(first, pastlast, batchnum) print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log"; print "mkdir " rd cfr[jn] " || :"; print "touch " builddir ".wait"; + affinitylist = ""; + if (gotcpus()) { + affinitylist = nextcpus(cpusr[jn]); + } + if (affinitylist ~ /^[0-9,-][0-9,-]*$/) + print "export TORTURE_AFFINITY=" affinitylist; + else + print "export TORTURE_AFFINITY="; print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log"; print "while test -f " builddir ".wait" From 973e42b635bdf46b2ddfe9478faddc179d9ee507 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Jul 2021 17:11:09 -0700 Subject: [PATCH 271/851] torture: Don't redirect qemu-cmd comment lines Currently, kvm-test-1-run-qemu.sh applies redirection to each and every line of each qemu-cmd script. Only the first line (the only one that is not a bash comment) needs to be redirected. Although redirecting the comments is currently harmless, just adding to the comment, it is an accident waiting to happen. This commit therefore adjusts the "sed" command to redirect only the qemu-system* command itself. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index 7c6643d1f0370..2fd0868b357c4 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -40,7 +40,7 @@ grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings . $T/qemu-cmd-settings # Decorate qemu-cmd with redirection, backgrounding, and PID capture -sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd +sed -e 's/^[^#].*$/& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd # In case qemu refuses to run... From 4ca8f1a42ec0124c43fe84553594a6bee84ed07d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Jul 2021 18:01:29 -0700 Subject: [PATCH 272/851] torture: Make kvm-test-1-run-qemu.sh apply affinity This commit causes the kvm-test-1-run-qemu.sh script to check the TORTURE_AFFINITY environment variable and to add "taskset" commands to the qemu-cmd file. The first "taskset" command is applied only if the TORTURE_AFFINITY environment variable is a non-empty string, and this command pins the current scenario's guest OS to the specified CPUs. The second "taskset" command reports the guest OS's affinity in a new "qemu-affinity" file. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-test-1-run-qemu.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index 2fd0868b357c4..b9a0bb6f01154 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -39,9 +39,16 @@ echo ' ---' `date`: Starting kernel, PID $$ grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings . $T/qemu-cmd-settings -# Decorate qemu-cmd with redirection, backgrounding, and PID capture -sed -e 's/^[^#].*$/& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd -echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd +# Decorate qemu-cmd with affinity, redirection, backgrounding, and PID capture +taskset_command= +if test -n "$TORTURE_AFFINITY" +then + taskset_command="taskset -c $TORTURE_AFFINITY " +fi +sed -e 's/^[^#].*$/'"$taskset_command"'& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd +echo 'qemu_pid=$!' >> $T/qemu-cmd +echo 'echo $qemu_pid > $resdir/qemu_pid' >> $T/qemu-cmd +echo 'taskset -c -p $qemu_pid > $resdir/qemu-affinity' >> $T/qemu-cmd # In case qemu refuses to run... echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log From 92c93c4ec0238bd276792af50426d93c51903418 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Jul 2021 09:22:15 -0700 Subject: [PATCH 273/851] rcutorture: Upgrade two-CPU scenarios to four CPUs There is no way to place the vCPUs in a two-CPU rcutorture scenario to get variable memory latency. This commit therefore upgrades the current two-CPU rcutorture scenarios to four CPUs. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/RUDE01 | 2 +- tools/testing/selftests/rcutorture/configs/rcu/TASKS01 | 2 +- tools/testing/selftests/rcutorture/configs/rcu/TASKS03 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 index bafe94cbd7390..3ca112444ce77 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=2 +CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 index bafe94cbd7390..3ca112444ce77 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=2 +CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 index ea4399020c6c1..dc02083803ce5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=2 +CONFIG_NR_CPUS=4 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y From 695e51cda947eaa294cd81855b66309a0d5b9bd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Jul 2021 11:02:14 -0700 Subject: [PATCH 274/851] torture: Use numeric taskset argument in jitter.sh The jitter.sh script has some entertaining awk code to generate a hex mask from a randomly selected CPU number, which is handed to the "taskset" command. Except that this command has a "-c" parameter to take a comma/dash-separated list of CPU numbers. This commit therefore saves a few lines of awk by switching to a single-number CPU list. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/jitter.sh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index 15d937ba96cad..fd1ffaa5a1358 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -68,16 +68,12 @@ do cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN { srand(n + me + systime()); ncpus = split(cpus, ca); - curcpu = ca[int(rand() * ncpus + 1)]; - z = ""; - for (i = 1; 4 * i <= curcpu; i++) - z = z "0"; - print "0x" 2 ^ (curcpu % 4) z; + print ca[int(rand() * ncpus + 1)]; }' < /dev/null` n=$(($n+1)) - if ! taskset -p $cpumask $$ > /dev/null 2>&1 + if ! taskset -c -p $cpumask $$ > /dev/null 2>&1 then - echo taskset failure: '"taskset -p ' $cpumask $$ '"' + echo taskset failure: '"taskset -c -p ' $cpumask $$ '"' exit 1 fi From 86a8c99c82ecb6fc3e4b6ef30229fc13c1a4d420 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Jul 2021 15:35:42 -0700 Subject: [PATCH 275/851] torture: Consistently name "qemu*" test output files There is "qemu-affinity", "qemu-cmd", "qemu-retval", but also "qemu_pid". This is hard to remember, not so good for bash tab completion, and just plain inconsistent. This commit therefore renames the "qemu_pid" file to "qemu-pid". A couple of the scripts must deal with old runs, and thus must handle both "qemu_pid" and "qemu-pid", but new runs will produce "qemu-pid". Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-again.sh | 2 +- .../selftests/rcutorture/bin/kvm-remote.sh | 2 +- .../rcutorture/bin/kvm-test-1-run-qemu.sh | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index d8c8483c46f13..b7b8d6856d7ec 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -142,7 +142,7 @@ then echo "Cannot copy from $oldrun to $rundir." usage fi -rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log +rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log touch "$rundir/log" echo $scriptname $args | tee -a "$rundir/log" echo $oldrun > "$rundir/re-run" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index f52641fdd1fe7..19cadb1b2f2d3 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -254,7 +254,7 @@ do sleep 30 done echo " ---" Collecting results from $i `date` - ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu_pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index b9a0bb6f01154..d828a34b5b89f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -47,7 +47,7 @@ then fi sed -e 's/^[^#].*$/'"$taskset_command"'& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd echo 'qemu_pid=$!' >> $T/qemu-cmd -echo 'echo $qemu_pid > $resdir/qemu_pid' >> $T/qemu-cmd +echo 'echo $qemu_pid > $resdir/qemu-pid' >> $T/qemu-cmd echo 'taskset -c -p $qemu_pid > $resdir/qemu-affinity' >> $T/qemu-cmd # In case qemu refuses to run... @@ -55,14 +55,14 @@ echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log # Attempt to run qemu kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` -( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & +( . $T/qemu-cmd; wait `cat $resdir/qemu-pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 if test -z "$TORTURE_KCONFIG_GDB_ARG" then sleep 10 # Give qemu's pid a chance to reach the file - if test -s "$resdir/qemu_pid" + if test -s "$resdir/qemu-pid" then - qemu_pid=`cat "$resdir/qemu_pid"` + qemu_pid=`cat "$resdir/qemu-pid"` echo Monitoring qemu job at pid $qemu_pid else qemu_pid="" @@ -89,9 +89,9 @@ then fi while : do - if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" + if test -z "$qemu_pid" -a -s "$resdir/qemu-pid" then - qemu_pid=`cat "$resdir/qemu_pid"` + qemu_pid=`cat "$resdir/qemu-pid"` fi kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 @@ -122,9 +122,9 @@ do break fi done -if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" +if test -z "$qemu_pid" -a -s "$resdir/qemu-pid" then - qemu_pid=`cat "$resdir/qemu_pid"` + qemu_pid=`cat "$resdir/qemu-pid"` fi if test $commandcompleted -eq 0 -a -n "$qemu_pid" then From 5d3ba2b5d2d08d017b62a2ee6cc80ee97dde0d22 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Jul 2021 17:20:08 -0700 Subject: [PATCH 276/851] torture: Make kvm-test-1-run-batch.sh select per-scenario affinity masks This commit causes kvm-test-1-run-batch.sh to use the new kvm-assign-cpus.sh and kvm-get-cpus-script.sh scripts to create a TORTURE_AFFINITY environment variable containing either an empty string (for no affinity) or a list of CPUs to pin the scenario's vCPUs to. The additional change to kvm-test-1-run.sh places the per-scenario number-of-CPUs information where it can easily be found. If there is some reason why affinity cannot be supplied, this commit prints and logs the reason via changes to kvm-again.sh. Finally, this commit updates the kvm-remote.sh script to copy the qemu-affinity output files back to the host system. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-again.sh | 2 +- .../selftests/rcutorture/bin/kvm-remote.sh | 2 +- .../rcutorture/bin/kvm-test-1-run-batch.sh | 23 +++++++++++++++++++ .../rcutorture/bin/kvm-test-1-run.sh | 1 + 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index b7b8d6856d7ec..5a0023d183dac 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -179,6 +179,6 @@ if test -n "$dryrun" then echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log" else - ( cd "$rundir"; sh $T/runbatches.sh ) + ( cd "$rundir"; sh $T/runbatches.sh ) | tee -a "$rundir/log" kvm-end-run-stats.sh "$rundir" "$starttime" fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 19cadb1b2f2d3..03126eb6ec5ae 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -254,7 +254,7 @@ do sleep 30 done echo " ---" Collecting results from $i `date` - ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index 005a048c55aa0..1e29d656501bc 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -50,11 +50,34 @@ grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings echo ---- System running test: `uname -a` echo ---- Starting kernels. `date` | tee -a log $TORTURE_JITTER_START +kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk for i in "$@" do echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out echo > $i/kvm-test-1-run-qemu.sh.out export TORTURE_AFFINITY= + kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate + cat << ' ___EOF___' >> $T/cpubatches.awk + END { + affinitylist = ""; + if (!gotcpus()) { + print "echo No CPU-affinity information, so no taskset command."; + } else if (cpu_count !~ /^[0-9][0-9]*$/) { + print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command."; + } else { + affinitylist = nextcpus(cpu_count); + if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/)) + print "echo " scenario ": Bogus CPU-affinity information, so no taskset command."; + else if (!dumpcpustate()) + print "echo " scenario ": Could not dump state, so no taskset command."; + else + print "export TORTURE_AFFINITY=" affinitylist; + } + } + ___EOF___ + cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`" + affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`" + $affinity_export kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & done for i in $runfiles diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index f10f426f50893..f4c8055dbf7ad 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -205,6 +205,7 @@ echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cm echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd +echo "# TORTURE_CPU_COUNT=$cpu_count" >> $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then From d02bee54d9311740d15459f0b85f6a2181fcd703 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 9 Jul 2021 14:31:37 -0700 Subject: [PATCH 277/851] torture: Don't use "test" command's "-a" argument There was a time long ago when the "test" command's documentation claimed that the "-a" and "-o" arguments did something useful. But this documentation now suggests letting the shell execute these boolean operators, so this commit applies that suggestion to kvm-test-1-run-qemu.sh. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-test-1-run-qemu.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index d828a34b5b89f..ca1d49c1c2f45 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -89,7 +89,7 @@ then fi while : do - if test -z "$qemu_pid" -a -s "$resdir/qemu-pid" + if test -z "$qemu_pid" && test -s "$resdir/qemu-pid" then qemu_pid=`cat "$resdir/qemu-pid"` fi @@ -122,11 +122,11 @@ do break fi done -if test -z "$qemu_pid" -a -s "$resdir/qemu-pid" +if test -z "$qemu_pid" && test -s "$resdir/qemu-pid" then qemu_pid=`cat "$resdir/qemu-pid"` fi -if test $commandcompleted -eq 0 -a -n "$qemu_pid" +if test $commandcompleted -eq 0 && test -n "$qemu_pid" then if ! test -f "$resdir/../STOP.1" then @@ -159,11 +159,11 @@ then then last_ts=0 fi - if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) + if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) then must_continue=yes fi - if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) + if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) then echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 kill -KILL $qemu_pid From a044938f20f5a01b8b216d3b2bef6c90262678a8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 9 Jul 2021 15:25:31 -0700 Subject: [PATCH 278/851] torture: Add timestamps to kvm-test-1-run-qemu.sh output The kvm-test-1-run-qemu.sh script logs the torture-test start time and also when it starts getting impatient for the test to finish. However, it does not timestamp these log messages, which can make debugging needlessly challenging. This commit therefore adds timestamps to these messages. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-test-1-run-qemu.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index ca1d49c1c2f45..b41252033fac0 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -63,10 +63,10 @@ then if test -s "$resdir/qemu-pid" then qemu_pid=`cat "$resdir/qemu-pid"` - echo Monitoring qemu job at pid $qemu_pid + echo Monitoring qemu job at pid $qemu_pid `date` else qemu_pid="" - echo Monitoring qemu job at yet-as-unknown pid + echo Monitoring qemu job at yet-as-unknown pid `date` fi fi if test -n "$TORTURE_KCONFIG_GDB_ARG" @@ -130,14 +130,14 @@ if test $commandcompleted -eq 0 && test -n "$qemu_pid" then if ! test -f "$resdir/../STOP.1" then - echo Grace period for qemu job at pid $qemu_pid + echo Grace period for qemu job at pid $qemu_pid `date` fi oldline="`tail $resdir/console.log`" while : do if test -f "$resdir/../STOP.1" then - echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1 + echo "PID $qemu_pid killed due to run STOP.1 request `date`" >> $resdir/Warnings 2>&1 kill -KILL $qemu_pid break fi @@ -165,7 +165,7 @@ then fi if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) then - echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 + echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds `date`" >> $resdir/Warnings 2>&1 kill -KILL $qemu_pid break fi From 0bd06857a1d2dd4eac1ca8e9b322bb057effe772 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 9 Jul 2021 17:13:34 -0700 Subject: [PATCH 279/851] torture: Make kvm-test-1-run-qemu.sh check for reboot loops It turns out that certain types of early boot bugs can result in reboot loops, even within a guest OS running under qemu/KVM. This commit therefore upgrades the kvm-test-1-run-qemu.sh script's hang-detection heuristics to detect such situations and to terminate the run when they occur. Signed-off-by: Paul E. McKenney --- .../testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh index b41252033fac0..44280582c594e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -159,9 +159,13 @@ then then last_ts=0 fi - if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) + if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) && test "$last_ts" -gt "$TORTURE_SHUTDOWN_GRACE" then must_continue=yes + if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) + then + echo Continuing at console.log time $last_ts \"`tail -n 1 $resdir/console.log`\" `date` + fi fi if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) then From 69be7e0160554a1e20991174fce827cd2bbf87ac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 13 May 2021 14:54:58 -0700 Subject: [PATCH 280/851] tools/memory-model: Add example for heuristic lockless reads This commit adds example code for heuristic lockless reads, based loosely on the sem_lock() and sem_unlock() functions. Reported-by: Manfred Spraul [ paulmck: Update per Manfred Spraul and Hillf Danton feedback. ] Signed-off-by: Paul E. McKenney --- .../Documentation/access-marking.txt | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index 58bff26198767..be7d507997cf8 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -319,6 +319,100 @@ of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy concurrent lockless write. +Lock-Protected Writes With Heuristic Lockless Reads +--------------------------------------------------- + +For another example, suppose that the code can normally make use of +a per-data-structure lock, but there are times when a global lock +is required. These times are indicated via a global flag. The code +might look as follows, and is based loosely on nf_conntrack_lock(), +nf_conntrack_all_lock(), and nf_conntrack_all_unlock(): + + bool global_flag; + DEFINE_SPINLOCK(global_lock); + struct foo { + spinlock_t f_lock; + int f_data; + }; + + /* All foo structures are in the following array. */ + int nfoo; + struct foo *foo_array; + + void do_something_locked(struct foo *fp) + { + bool gf = true; + + /* IMPORTANT: Heuristic plus spin_lock()! */ + if (!data_race(global_flag)) { + spin_lock(&fp->f_lock); + if (!smp_load_acquire(&global_flag)) { + do_something(fp); + spin_unlock(&fp->f_lock); + return; + } + spin_unlock(&fp->f_lock); + } + spin_lock(&global_lock); + /* Lock held, thus global flag cannot change. */ + if (!global_flag) { + spin_lock(&fp->f_lock); + spin_unlock(&global_lock); + gf = false; + } + do_something(fp); + if (fg) + spin_unlock(&global_lock); + else + spin_lock(&fp->f_lock); + } + + void begin_global(void) + { + int i; + + spin_lock(&global_lock); + WRITE_ONCE(global_flag, true); + for (i = 0; i < nfoo; i++) { + /* Wait for pre-existing local locks. */ + spin_lock(&fp->f_lock); + spin_unlock(&fp->f_lock); + } + } + + void end_global(void) + { + smp_store_release(&global_flag, false); + /* Pre-existing global lock acquisitions will recheck. */ + spin_unlock(&global_lock); + } + +All code paths leading from the do_something_locked() function's first +read from global_flag acquire a lock, so endless load fusing cannot +happen. + +If the value read from global_flag is true, then global_flag is rechecked +while holding global_lock, which prevents global_flag from changing. +If this recheck finds that global_flag is now false, the acquisition +of ->f_lock prior to the release of global_lock will result in any subsequent +begin_global() invocation waiting to acquire ->f_lock. + +On the other hand, if the value read from global_flag is false, then +global_flag, then rechecking under ->f_lock combined with synchronization +with begin_global() guarantees than any erroneous read will cause the +do_something_locked() function's first do_something() invocation to happen +before begin_global() returns. The combination of the smp_load_acquire() +in do_something_locked() and the smp_store_release() in end_global() +guarantees that either the do_something_locked() function's first +do_something() invocation happens after the call to end_global() or that +do_something_locked() acquires global_lock() and rechecks under the lock. + +For this to work, only those foo structures in foo_array[] may be +passed to do_something_locked(). The reason for this is that the +synchronization with begin_global() relies on momentarily locking each +and every foo structure. + + Lockless Reads and Writes ------------------------- From fdd764e8626daa462fba019e06509c763b6b0f6a Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Fri, 14 May 2021 11:40:06 -0700 Subject: [PATCH 281/851] tools/memory-model: Heuristics using data_race() must handle all values Data loaded for use by some sorts of heuristics can tolerate the occasional erroneous value. In this case the loads may use data_race() to give the compiler full freedom to optimize while also informing KCSAN of the intent. However, for this to work, the heuristic needs to be able to tolerate any erroneous value that could possibly arise. This commit therefore adds a paragraph spelling this out. Signed-off-by: Manfred Spraul Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/access-marking.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index be7d507997cf8..fe4ad6d12d24c 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -126,6 +126,11 @@ consistent errors, which in turn are quite capable of breaking heuristics. Therefore use of data_race() should be limited to cases where some other code (such as a barrier() call) will force the occasional reload. +Note that this use case requires that the heuristic be able to handle +any possible error. In contrast, if the heuristics might be fatally +confused by one or more of the possible erroneous values, use READ_ONCE() +instead of data_race(). + In theory, plain C-language loads can also be used for this use case. However, in practice this will have the disadvantage of causing KCSAN to generate false positives because KCSAN will have no way of knowing From 50a912792cdecac347b11c9b40d229412b4d75f7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 May 2021 10:47:43 -0700 Subject: [PATCH 282/851] tools/memory-model: Document data_race(READ_ONCE()) It is possible to cause KCSAN to ignore marked accesses by applying __no_kcsan to the function or applying data_race() to the marked accesses. These approaches allow the developer to restrict compiler optimizations while also causing KCSAN to ignore diagnostic accesses. This commit therefore updates the documentation accordingly. Signed-off-by: Paul E. McKenney --- .../Documentation/access-marking.txt | 49 +++++++++++++------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index fe4ad6d12d24c..a3dcc32e27b44 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -37,7 +37,9 @@ compiler's use of code-motion and common-subexpression optimizations. Therefore, if a given access is involved in an intentional data race, using READ_ONCE() for loads and WRITE_ONCE() for stores is usually preferable to data_race(), which in turn is usually preferable to plain -C-language accesses. +C-language accesses. It is permissible to combine #2 and #3, for example, +data_race(READ_ONCE(a)), which will both restrict compiler optimizations +and disable KCSAN diagnostics. KCSAN will complain about many types of data races involving plain C-language accesses, but marking all accesses involved in a given data @@ -86,6 +88,10 @@ that fail to exclude the updates. In this case, it is important to use data_race() for the diagnostic reads because otherwise KCSAN would give false-positive warnings about these diagnostic reads. +If it is necessary to both restrict compiler optimizations and disable +KCSAN diagnostics, use both data_race() and READ_ONCE(), for example, +data_race(READ_ONCE(a)). + In theory, plain C-language loads can also be used for this use case. However, in practice this will have the disadvantage of causing KCSAN to generate false positives because KCSAN will have no way of knowing @@ -279,19 +285,34 @@ tells KCSAN that data races are expected, and should be silently ignored. This data_race() also tells the human reading the code that read_foo_diagnostic() might sometimes return a bogus value. -However, please note that your kernel must be built with -CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to -detect a buggy lockless write. If you need KCSAN to detect such a -write even if that write did not change the value of foo, you also -need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. If you need KCSAN to -detect such a write happening in an interrupt handler running on the -same CPU doing the legitimate lock-protected write, you also need -CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these Kconfig -options set properly, KCSAN can be quite helpful, although it is not -necessarily a full replacement for hardware watchpoints. On the other -hand, neither are hardware watchpoints a full replacement for KCSAN -because it is not always easy to tell hardware watchpoint to conditionally -trap on accesses. +If it is necessary to suppress compiler optimization and also detect +buggy lockless writes, read_foo_diagnostic() can be updated as follows: + + void read_foo_diagnostic(void) + { + pr_info("Current value of foo: %d\n", data_race(READ_ONCE(foo))); + } + +Alternatively, given that KCSAN is to ignore all accesses in this function, +this function can be marked __no_kcsan and the data_race() can be dropped: + + void __no_kcsan read_foo_diagnostic(void) + { + pr_info("Current value of foo: %d\n", READ_ONCE(foo)); + } + +However, in order for KCSAN to detect buggy lockless writes, your kernel +must be built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n. If you +need KCSAN to detect such a write even if that write did not change +the value of foo, you also need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. +If you need KCSAN to detect such a write happening in an interrupt handler +running on the same CPU doing the legitimate lock-protected write, you +also need CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these +Kconfig options set properly, KCSAN can be quite helpful, although +it is not necessarily a full replacement for hardware watchpoints. +On the other hand, neither are hardware watchpoints a full replacement +for KCSAN because it is not always easy to tell hardware watchpoint to +conditionally trap on accesses. Lock-Protected Writes With Lockless Reads From 9b073961afabcf70d0804e472ea02fc6c739dcce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Jul 2021 10:43:29 -0700 Subject: [PATCH 283/851] clocksource: Prohibit clocksource watchdog test when HZ<100 As noted in a comment, use of the TEST_CLOCKSOURCE_WATCHDOG kconfig option is prohibited when HZ is less than 100 in order to avoid signed integer overflow. However, comments can easily be ignored even when they are actually read. Therefore, add a "depends" clause to prohibit use of the TEST_CLOCKSOURCE_WATCHDOG kconfig option when HZ is less than 100. Reported-by: kernel test robot Tested-by: Rong Chen Signed-off-by: Paul E. McKenney Link: https://lore.kernel.org/lkml/202107040532.wqL30yFp-lkp@intel.com/ --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 831212722924c..79487f3dad430 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2612,6 +2612,7 @@ config TEST_FPU config TEST_CLOCKSOURCE_WATCHDOG tristate "Test clocksource watchdog in kernel space" depends on CLOCKSOURCE_WATCHDOG + depends on HZ >= 100 help Enable this option to create a kernel module that will trigger a test of the clocksource watchdog. This module may be loaded From c2608cebed20bb307056d202258cc96ae8a9631d Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:48:58 -0700 Subject: [PATCH 284/851] dm ima: measure data on table load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DM configures a block device with various target specific attributes passed to it as a table. DM loads the table, and calls each target’s respective constructors with the attributes as input parameters. Some of these attributes are critical to ensure the device meets certain security bar. Thus, IMA should measure these attributes, to ensure they are not tampered with, during the lifetime of the device. So that the external services can have high confidence in the configuration of the block-devices on a given system. Some devices may have large tables. And a given device may change its state (table-load, suspend, resume, rename, remove, table-clear etc.) many times. Measuring these attributes each time when the device changes its state will significantly increase the size of the IMA logs. Further, once configured, these attributes are not expected to change unless a new table is loaded, or a device is removed and recreated. Therefore the clear-text of the attributes should only be measured during table load, and the hash of the active/inactive table should be measured for the remaining device state changes. Export IMA function ima_measure_critical_data() to allow measurement of DM device parameters, as well as target specific attributes, during table load. Compute the hash of the inactive table and store it for measurements during future state change. If a load is called multiple times, update the inactive table hash with the hash of the latest populated table. So that the correct inactive table hash is measured when the device transitions to different states like resume, remove, rename, etc. Signed-off-by: Tushar Sugandhi Signed-off-by: Mike Snitzer --- drivers/md/Makefile | 4 + drivers/md/dm-core.h | 5 + drivers/md/dm-ima.c | 330 ++++++++++++++++++++++++++++++ drivers/md/dm-ima.h | 59 ++++++ drivers/md/dm-ioctl.c | 7 +- drivers/md/dm.c | 3 + include/linux/device-mapper.h | 2 +- include/uapi/linux/dm-ioctl.h | 6 + security/integrity/ima/ima_main.c | 1 + 9 files changed, 415 insertions(+), 2 deletions(-) create mode 100644 drivers/md/dm-ima.c create mode 100644 drivers/md/dm-ima.h diff --git a/drivers/md/Makefile b/drivers/md/Makefile index a74aaf8b14457..816945eeed7f3 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -96,6 +96,10 @@ ifeq ($(CONFIG_BLK_DEV_ZONED),y) dm-mod-objs += dm-zone.o endif +ifeq ($(CONFIG_IMA),y) +dm-mod-objs += dm-ima.o +endif + ifeq ($(CONFIG_DM_VERITY_FEC),y) dm-verity-objs += dm-verity-fec.o endif diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index edc1553c4eeae..55dccdfbcb22e 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -18,6 +18,7 @@ #include #include "dm.h" +#include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 @@ -119,6 +120,10 @@ struct mapped_device { unsigned int nr_zones; unsigned int *zwp_offset; #endif + +#ifdef CONFIG_IMA + struct dm_ima_measurements ima; +#endif }; /* diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c new file mode 100644 index 0000000000000..e0caa40772279 --- /dev/null +++ b/drivers/md/dm-ima.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Microsoft Corporation + * + * Author: Tushar Sugandhi + * + * File: dm-ima.c + * Enables IMA measurements for DM targets + */ + +#include "dm-core.h" +#include "dm-ima.h" + +#include +#include +#include +#include + +#define DM_MSG_PREFIX "ima" + +/* + * Internal function to prefix separator characters in input buffer with escape + * character, so that they don't interfere with the construction of key-value pairs, + * and clients can split the key1=val1,key2=val2,key3=val3; pairs properly. + */ +static void fix_separator_chars(char **buf) +{ + int l = strlen(*buf); + int i, j, sp = 0; + + for (i = 0; i < l; i++) + if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',') + sp++; + + if (!sp) + return; + + for (i = l-1, j = i+sp; i >= 0; i--) { + (*buf)[j--] = (*buf)[i]; + if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',') + (*buf)[j--] = '\\'; + } +} + +/* + * Internal function to allocate memory for IMA measurements. + */ +static void *dm_ima_alloc(size_t len, gfp_t flags, bool noio) +{ + unsigned int noio_flag; + void *ptr; + + if (noio) + noio_flag = memalloc_noio_save(); + + ptr = kzalloc(len, flags); + + if (noio) + memalloc_noio_restore(noio_flag); + + return ptr; +} + +/* + * Internal function to allocate and copy name and uuid for IMA measurements. + */ +static int dm_ima_alloc_and_copy_name_uuid(struct mapped_device *md, char **dev_name, + char **dev_uuid, bool noio) +{ + int r; + *dev_name = dm_ima_alloc(DM_NAME_LEN*2, GFP_KERNEL, noio); + if (!(*dev_name)) { + r = -ENOMEM; + goto error; + } + + *dev_uuid = dm_ima_alloc(DM_UUID_LEN*2, GFP_KERNEL, noio); + if (!(*dev_uuid)) { + r = -ENOMEM; + goto error; + } + + r = dm_copy_name_and_uuid(md, *dev_name, *dev_uuid); + if (r) + goto error; + + fix_separator_chars(dev_name); + fix_separator_chars(dev_uuid); + + return 0; +error: + kfree(*dev_name); + kfree(*dev_uuid); + *dev_name = NULL; + *dev_uuid = NULL; + return r; +} + +/* + * Internal function to allocate and copy device data for IMA measurements. + */ +static int dm_ima_alloc_and_copy_device_data(struct mapped_device *md, char **device_data, + unsigned int num_targets, bool noio) +{ + char *dev_name = NULL, *dev_uuid = NULL; + int r; + + r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio); + if (r) + return r; + + *device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!(*device_data)) { + r = -ENOMEM; + goto error; + } + + scnprintf(*device_data, DM_IMA_DEVICE_BUF_LEN, + "name=%s,uuid=%s,major=%d,minor=%d,minor_count=%d,num_targets=%u;", + dev_name, dev_uuid, md->disk->major, md->disk->first_minor, + md->disk->minors, num_targets); +error: + kfree(dev_name); + kfree(dev_uuid); + return r; +} + +/* + * Internal wrapper function to call IMA to measure DM data. + */ +static void dm_ima_measure_data(const char *event_name, const void *buf, size_t buf_len, + bool noio) +{ + unsigned int noio_flag; + + if (noio) + noio_flag = memalloc_noio_save(); + + ima_measure_critical_data(DM_NAME, event_name, buf, buf_len, false); + + if (noio) + memalloc_noio_restore(noio_flag); +} + +/* + * Initialize/reset the dm ima related data structure variables. + */ +void dm_ima_reset_data(struct mapped_device *md) +{ + memset(&(md->ima), 0, sizeof(md->ima)); +} + +/* + * Build up the IMA data for each target, and finally measure. + */ +void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) +{ + size_t device_data_buf_len, target_metadata_buf_len, target_data_buf_len, l = 0; + char *target_metadata_buf = NULL, *target_data_buf = NULL, *digest_buf = NULL; + char *ima_buf = NULL, *device_data_buf = NULL; + int digest_size, last_target_measured = -1, r; + status_type_t type = STATUSTYPE_IMA; + size_t cur_total_buf_len = 0; + unsigned int num_targets, i; + SHASH_DESC_ON_STACK(shash, NULL); + struct crypto_shash *tfm; + u8 *digest = NULL; + bool noio = false; + + ima_buf = dm_ima_alloc(DM_IMA_MEASUREMENT_BUF_LEN, GFP_KERNEL, noio); + if (!ima_buf) + return; + + target_metadata_buf = dm_ima_alloc(DM_IMA_TARGET_METADATA_BUF_LEN, GFP_KERNEL, noio); + if (!target_metadata_buf) + goto error; + + target_data_buf = dm_ima_alloc(DM_IMA_TARGET_DATA_BUF_LEN, GFP_KERNEL, noio); + if (!target_data_buf) + goto error; + + num_targets = dm_table_get_num_targets(table); + + if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio)) + goto error; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) + goto error; + + shash->tfm = tfm; + digest_size = crypto_shash_digestsize(tfm); + digest = dm_ima_alloc(digest_size, GFP_KERNEL, noio); + if (!digest) + goto error; + + r = crypto_shash_init(shash); + if (r) + return; + + device_data_buf_len = strlen(device_data_buf); + memcpy(ima_buf + l, device_data_buf, device_data_buf_len); + l += device_data_buf_len; + + for (i = 0; i < num_targets; i++) { + struct dm_target *ti = dm_table_get_target(table, i); + + if (!ti) + goto error; + + last_target_measured = 0; + + /* + * First retrieve the target metadata. + */ + scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN, + "target_index=%d,target_begin=%llu,target_len=%llu,", + i, ti->begin, ti->len); + target_metadata_buf_len = strlen(target_metadata_buf); + + /* + * Then retrieve the actual target data. + */ + if (ti->type->status) + ti->type->status(ti, type, status_flags, target_data_buf, + DM_IMA_TARGET_DATA_BUF_LEN); + else + target_data_buf[0] = '\0'; + + target_data_buf_len = strlen(target_data_buf); + + /* + * Check if the total data can fit into the IMA buffer. + */ + cur_total_buf_len = l + target_metadata_buf_len + target_data_buf_len; + + /* + * IMA measurements for DM targets are best-effort. + * If the total data buffered so far, including the current target, + * is too large to fit into DM_IMA_MEASUREMENT_BUF_LEN, measure what + * we have in the current buffer, and continue measuring the remaining + * targets by prefixing the device metadata again. + */ + if (unlikely(cur_total_buf_len >= DM_IMA_MEASUREMENT_BUF_LEN)) { + dm_ima_measure_data("table_load", ima_buf, l, noio); + r = crypto_shash_update(shash, (const u8 *)ima_buf, l); + if (r < 0) + goto error; + + memset(ima_buf, 0, DM_IMA_MEASUREMENT_BUF_LEN); + l = 0; + + /* + * Each new "table_load" entry in IMA log should have device data + * prefix, so that multiple records from the same table_load for + * a given device can be linked together. + */ + memcpy(ima_buf + l, device_data_buf, device_data_buf_len); + l += device_data_buf_len; + + /* + * If this iteration of the for loop turns out to be the last target + * in the table, dm_ima_measure_data("table_load", ...) doesn't need + * to be called again, just the hash needs to be finalized. + * "last_target_measured" tracks this state. + */ + last_target_measured = 1; + } + + /* + * Fill-in all the target metadata, so that multiple targets for the same + * device can be linked together. + */ + memcpy(ima_buf + l, target_metadata_buf, target_metadata_buf_len); + l += target_metadata_buf_len; + + memcpy(ima_buf + l, target_data_buf, target_data_buf_len); + l += target_data_buf_len; + } + + if (!last_target_measured) { + dm_ima_measure_data("table_load", ima_buf, l, noio); + + r = crypto_shash_update(shash, (const u8 *)ima_buf, l); + if (r < 0) + goto error; + } + + /* + * Finalize the table hash, and store it in table->md->ima.inactive_table.hash, + * so that the table data can be verified against the future device state change + * events, e.g. resume, rename, remove, table-clear etc. + */ + r = crypto_shash_final(shash, digest); + if (r < 0) + goto error; + + digest_buf = dm_ima_alloc((digest_size*2)+1, GFP_KERNEL, noio); + if (!digest_buf) + goto error; + + for (i = 0; i < digest_size; i++) + snprintf((digest_buf+(i*2)), 3, "%02x", digest[i]); + + if (table->md->ima.active_table.hash != table->md->ima.inactive_table.hash) + kfree(table->md->ima.inactive_table.hash); + + table->md->ima.inactive_table.hash = digest_buf; + table->md->ima.inactive_table.hash_len = strlen(digest_buf); + table->md->ima.inactive_table.num_targets = num_targets; + + if (table->md->ima.active_table.device_metadata != + table->md->ima.inactive_table.device_metadata) + kfree(table->md->ima.inactive_table.device_metadata); + + table->md->ima.inactive_table.device_metadata = device_data_buf; + table->md->ima.inactive_table.device_metadata_len = device_data_buf_len; + + goto exit; +error: + kfree(digest_buf); + kfree(device_data_buf); +exit: + kfree(digest); + crypto_free_shash(tfm); + kfree(ima_buf); + kfree(target_metadata_buf); + kfree(target_data_buf); +} + diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h new file mode 100644 index 0000000000000..16afd9a8c0b2a --- /dev/null +++ b/drivers/md/dm-ima.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2021 Microsoft Corporation + * + * Author: Tushar Sugandhi + * + * File: dm-ima.h + * Header file for device mapper IMA measurements. + */ + +#ifndef DM_IMA_H +#define DM_IMA_H + +#define DM_IMA_MEASUREMENT_BUF_LEN 4096 +#define DM_IMA_DEVICE_BUF_LEN 1024 +#define DM_IMA_TARGET_METADATA_BUF_LEN 128 +#define DM_IMA_TARGET_DATA_BUF_LEN 2048 + +#ifdef CONFIG_IMA + +struct dm_ima_device_table_metadata { + /* + * Contains data specific to the device which is common across + * all the targets in the table (e.g. name, uuid, major, minor, etc). + * The values are stored in comma separated list of key1=val1,key2=val2; + * pairs delimited by a semicolon at the end of the list. + */ + char *device_metadata; + unsigned int device_metadata_len; + unsigned int num_targets; + + /* + * Contains the sha256 hashes of the IMA measurements of the target + * attributes' key-value pairs from the active/inactive tables. + */ + char *hash; + unsigned int hash_len; +}; + +/* + * This structure contains device metadata, and table hash for + * active and inactive tables for ima measurements. + */ +struct dm_ima_measurements { + struct dm_ima_device_table_metadata active_table; + struct dm_ima_device_table_metadata inactive_table; +}; + +void dm_ima_reset_data(struct mapped_device *md); +void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags); + +#else + +static inline void dm_ima_reset_data(struct mapped_device *md) {} +static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) {} + +#endif /* CONFIG_IMA */ + +#endif /* DM_IMA_H */ diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 2209cbcd84dbf..e6e9fe74baf94 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -6,7 +6,7 @@ */ #include "dm-core.h" - +#include "dm-ima.h" #include #include #include @@ -20,6 +20,7 @@ #include #include +#include #define DM_MSG_PREFIX "ioctl" #define DM_DRIVER_EMAIL "dm-devel@redhat.com" @@ -1224,6 +1225,8 @@ static void retrieve_status(struct dm_table *table, if (param->flags & DM_STATUS_TABLE_FLAG) type = STATUSTYPE_TABLE; + else if (param->flags & DM_IMA_MEASUREMENT_FLAG) + type = STATUSTYPE_IMA; else type = STATUSTYPE_INFO; @@ -1425,6 +1428,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si if (r) goto err_unlock_md_type; + dm_ima_measure_on_table_load(t, STATUSTYPE_IMA); + immutable_target_type = dm_get_immutable_target_type(md); if (immutable_target_type && (immutable_target_type != dm_table_get_immutable_target_type(t)) && diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2c5f9e5852117..2e82757b4ab78 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -8,6 +8,7 @@ #include "dm-core.h" #include "dm-rq.h" #include "dm-uevent.h" +#include "dm-ima.h" #include #include @@ -2006,6 +2007,8 @@ int dm_create(int minor, struct mapped_device **result) return r; } + dm_ima_reset_data(md); + *result = md; return 0; } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 7457d49acf9a5..74486c332946e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -31,7 +31,7 @@ enum dm_queue_mode { DM_TYPE_DAX_BIO_BASED = 3, }; -typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; +typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE, STATUSTYPE_IMA } status_type_t; union map_info { void *ptr; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index e5c6e458bdf73..c12ce30b52dfc 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -376,4 +376,10 @@ enum { */ #define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */ +/* + * If set, returns in the in buffer passed by UM, the raw table information + * that would be measured by IMA subsystem on device state change. + */ +#define DM_IMA_MEASUREMENT_FLAG (1 << 19) /* In */ + #endif /* _LINUX_DM_IOCTL_H */ diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 287b905090066..673833f940694 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -985,6 +985,7 @@ void ima_measure_critical_data(const char *event_label, CRITICAL_DATA, 0, event_label, hash); } +EXPORT_SYMBOL_GPL(ima_measure_critical_data); static int __init init_ima(void) { From b84d99335e3b298a874dc0ec9710eebc705e0634 Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:48:59 -0700 Subject: [PATCH 285/851] dm ima: measure data on device resume A given block device can load a table multiple times, with different input parameters, before eventually resuming it. Further, a device may be suspended and then resumed. The device may never resume after a table-load. Because of the above valid scenarios for a given device, it is important to measure and log the device resume event using IMA. Also, if the table is large, measuring it in clear-text each time the device changes state, will unnecessarily increase the size of IMA log. Since the table clear-text is already measured during table-load event, measuring the hash during resume should be sufficient to validate the table contents. Measure the device parameters, and hash of the active table, when the device is resumed. Signed-off-by: Tushar Sugandhi Signed-off-by: Mike Snitzer --- drivers/md/dm-ima.c | 115 ++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-ima.h | 3 ++ drivers/md/dm-ioctl.c | 8 ++- 3 files changed, 124 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index e0caa40772279..6cd032ce67e6d 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -142,6 +142,26 @@ static void dm_ima_measure_data(const char *event_name, const void *buf, size_t memalloc_noio_restore(noio_flag); } +/* + * Internal function to allocate and copy current device capacity for IMA measurements. + */ +static int dm_ima_alloc_and_copy_capacity_str(struct mapped_device *md, char **capacity_str, + bool noio) +{ + sector_t capacity; + + capacity = get_capacity(md->disk); + + *capacity_str = dm_ima_alloc(DM_IMA_DEVICE_CAPACITY_BUF_LEN, GFP_KERNEL, noio); + if (!(*capacity_str)) + return -ENOMEM; + + scnprintf(*capacity_str, DM_IMA_DEVICE_BUF_LEN, "current_device_capacity=%llu;", + capacity); + + return 0; +} + /* * Initialize/reset the dm ima related data structure variables. */ @@ -328,3 +348,98 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl kfree(target_data_buf); } +/* + * Measure IMA data on device resume. + */ +void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) +{ + char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char active[] = "active_table_hash="; + unsigned int active_len = strlen(active), capacity_len = 0; + unsigned int l = 0; + bool noio = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!device_table_data) + return; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error; + + if (swap) { + if (md->ima.active_table.hash != md->ima.inactive_table.hash) + kfree(md->ima.active_table.hash); + + md->ima.active_table.hash = NULL; + md->ima.active_table.hash_len = 0; + + if (md->ima.active_table.device_metadata != + md->ima.inactive_table.device_metadata) + kfree(md->ima.active_table.device_metadata); + + md->ima.active_table.device_metadata = NULL; + md->ima.active_table.device_metadata_len = 0; + md->ima.active_table.num_targets = 0; + + if (md->ima.inactive_table.hash) { + md->ima.active_table.hash = md->ima.inactive_table.hash; + md->ima.active_table.hash_len = md->ima.inactive_table.hash_len; + md->ima.inactive_table.hash = NULL; + md->ima.inactive_table.hash_len = 0; + } + + if (md->ima.inactive_table.device_metadata) { + md->ima.active_table.device_metadata = + md->ima.inactive_table.device_metadata; + md->ima.active_table.device_metadata_len = + md->ima.inactive_table.device_metadata_len; + md->ima.active_table.num_targets = md->ima.inactive_table.num_targets; + md->ima.inactive_table.device_metadata = NULL; + md->ima.inactive_table.device_metadata_len = 0; + md->ima.inactive_table.num_targets = 0; + } + } + + if (md->ima.active_table.device_metadata) { + l = md->ima.active_table.device_metadata_len; + memcpy(device_table_data, md->ima.active_table.device_metadata, l); + } + + if (md->ima.active_table.hash) { + memcpy(device_table_data + l, active, active_len); + l += active_len; + + memcpy(device_table_data + l, md->ima.active_table.hash, + md->ima.active_table.hash_len); + l += md->ima.active_table.hash_len; + + memcpy(device_table_data + l, ";", 1); + l++; + } + + if (!l) { + r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio); + if (r) + goto error; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "name=%s,uuid=%s;device_resume=no_data;", + dev_name, dev_uuid); + l += strlen(device_table_data); + + } + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("device_resume", device_table_data, l, noio); + + kfree(dev_name); + kfree(dev_uuid); +error: + kfree(capacity_str); + kfree(device_table_data); +} diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h index 16afd9a8c0b2a..78c36b877ccf4 100644 --- a/drivers/md/dm-ima.h +++ b/drivers/md/dm-ima.h @@ -15,6 +15,7 @@ #define DM_IMA_DEVICE_BUF_LEN 1024 #define DM_IMA_TARGET_METADATA_BUF_LEN 128 #define DM_IMA_TARGET_DATA_BUF_LEN 2048 +#define DM_IMA_DEVICE_CAPACITY_BUF_LEN 128 #ifdef CONFIG_IMA @@ -48,11 +49,13 @@ struct dm_ima_measurements { void dm_ima_reset_data(struct mapped_device *md); void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags); +void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap); #else static inline void dm_ima_reset_data(struct mapped_device *md) {} static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) {} +static inline void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) {} #endif /* CONFIG_IMA */ diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e6e9fe74baf94..11af40f9b9c0b 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1160,8 +1160,12 @@ static int do_resume(struct dm_ioctl *param) if (dm_suspended_md(md)) { r = dm_resume(md); - if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) - param->flags |= DM_UEVENT_GENERATED_FLAG; + if (!r) { + dm_ima_measure_on_device_resume(md, new_map ? true : false); + + if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) + param->flags |= DM_UEVENT_GENERATED_FLAG; + } } /* From 5c061ced276b726b9d861eaa887915d637cee90b Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:49:00 -0700 Subject: [PATCH 286/851] dm ima: measure data on device remove Presence of an active block-device, configured with expected parameters, is important for an external attestation service to determine if a system meets the attestation requirements. Therefore it is important for DM to measure the device remove events. Measure device parameters and table hashes when the device is removed, using either remove or remove_all. Signed-off-by: Tushar Sugandhi Signed-off-by: Mike Snitzer --- drivers/md/dm-ima.c | 119 ++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-ima.h | 2 + drivers/md/dm-ioctl.c | 3 ++ 3 files changed, 124 insertions(+) diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index 6cd032ce67e6d..09324cc48384a 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -443,3 +443,122 @@ void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) kfree(capacity_str); kfree(device_table_data); } + +/* + * Measure IMA data on remove. + */ +void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) +{ + char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char active_table_str[] = "active_table_hash="; + char inactive_table_str[] = "inactive_table_hash="; + char device_active_str[] = "device_active_metadata="; + char device_inactive_str[] = "device_inactive_metadata="; + char remove_all_str[] = "remove_all="; + unsigned int active_table_len = strlen(active_table_str); + unsigned int inactive_table_len = strlen(inactive_table_str); + unsigned int device_active_len = strlen(device_active_str); + unsigned int device_inactive_len = strlen(device_inactive_str); + unsigned int remove_all_len = strlen(remove_all_str); + unsigned int capacity_len = 0; + unsigned int l = 0; + bool noio = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN*2, GFP_KERNEL, noio); + if (!device_table_data) + goto exit; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) { + kfree(device_table_data); + goto exit; + } + + if (md->ima.active_table.device_metadata) { + memcpy(device_table_data + l, device_active_str, device_active_len); + l += device_active_len; + + memcpy(device_table_data + l, md->ima.active_table.device_metadata, + md->ima.active_table.device_metadata_len); + l += md->ima.active_table.device_metadata_len; + } + + if (md->ima.inactive_table.device_metadata) { + memcpy(device_table_data + l, device_inactive_str, device_inactive_len); + l += device_inactive_len; + + memcpy(device_table_data + l, md->ima.inactive_table.device_metadata, + md->ima.inactive_table.device_metadata_len); + l += md->ima.inactive_table.device_metadata_len; + } + + if (md->ima.active_table.hash) { + memcpy(device_table_data + l, active_table_str, active_table_len); + l += active_table_len; + + memcpy(device_table_data + l, md->ima.active_table.hash, + md->ima.active_table.hash_len); + l += md->ima.active_table.hash_len; + + memcpy(device_table_data + l, ",", 1); + l++; + } + + if (md->ima.inactive_table.hash) { + memcpy(device_table_data + l, inactive_table_str, inactive_table_len); + l += inactive_table_len; + + memcpy(device_table_data + l, md->ima.inactive_table.hash, + md->ima.inactive_table.hash_len); + l += md->ima.inactive_table.hash_len; + + memcpy(device_table_data + l, ",", 1); + l++; + } + /* + * In case both active and inactive tables, and corresponding + * device metadata is cleared/missing - record the name and uuid + * in IMA measurements. + */ + if (!l) { + if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) + goto error; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "name=%s,uuid=%s;device_remove=no_data;", + dev_name, dev_uuid); + l += strlen(device_table_data); + } + + memcpy(device_table_data + l, remove_all_str, remove_all_len); + l += remove_all_len; + memcpy(device_table_data + l, remove_all ? "y;" : "n;", 2); + l += 2; + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("device_remove", device_table_data, l, noio); + +error: + kfree(device_table_data); + kfree(capacity_str); +exit: + kfree(md->ima.active_table.device_metadata); + + if (md->ima.active_table.device_metadata != + md->ima.inactive_table.device_metadata) + kfree(md->ima.inactive_table.device_metadata); + + kfree(md->ima.active_table.hash); + + if (md->ima.active_table.hash != md->ima.inactive_table.hash) + kfree(md->ima.inactive_table.hash); + + dm_ima_reset_data(md); + + kfree(dev_name); + kfree(dev_uuid); +} diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h index 78c36b877ccf4..59bede2454b04 100644 --- a/drivers/md/dm-ima.h +++ b/drivers/md/dm-ima.h @@ -50,12 +50,14 @@ struct dm_ima_measurements { void dm_ima_reset_data(struct mapped_device *md); void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags); void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap); +void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all); #else static inline void dm_ima_reset_data(struct mapped_device *md) {} static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) {} static inline void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) {} +static inline void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) {} #endif /* CONFIG_IMA */ diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 11af40f9b9c0b..2d4475f6de7d5 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -348,6 +348,7 @@ static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool dm_sync_table(md); dm_table_destroy(t); } + dm_ima_measure_on_device_remove(md, true); dm_put(md); if (likely(keep_open_devices)) dm_destroy(md); @@ -982,6 +983,8 @@ static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_si param->flags &= ~DM_DEFERRED_REMOVE; + dm_ima_measure_on_device_remove(md, false); + if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) param->flags |= DM_UEVENT_GENERATED_FLAG; From 843de84e94940c04ac38edb708461abd0f93ab60 Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:49:01 -0700 Subject: [PATCH 287/851] dm ima: measure data on table clear For a given block device, an inactive table slot contains the parameters to configure the device with. The inactive table can be cleared multiple times, accidentally or maliciously, which may impact the functionality of the device, and compromise the system. Therefore it is important to measure and log the event when a table is cleared. Measure device parameters, and table hashes when the inactive table slot is cleared. Signed-off-by: Tushar Sugandhi Signed-off-by: Mike Snitzer --- drivers/md/dm-ima.c | 92 +++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-ima.h | 2 + drivers/md/dm-ioctl.c | 3 ++ 3 files changed, 97 insertions(+) diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index 09324cc48384a..bc3485558edb1 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -562,3 +562,95 @@ void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) kfree(dev_name); kfree(dev_uuid); } + +/* + * Measure ima data on table clear. + */ +void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) +{ + unsigned int l = 0, capacity_len = 0; + char *device_table_data = NULL, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char inactive_str[] = "inactive_table_hash="; + unsigned int inactive_len = strlen(inactive_str); + bool noio = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!device_table_data) + return; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error1; + + if (md->ima.inactive_table.device_metadata_len && + md->ima.inactive_table.hash_len) { + memcpy(device_table_data + l, md->ima.inactive_table.device_metadata, + md->ima.inactive_table.device_metadata_len); + l += md->ima.inactive_table.device_metadata_len; + + memcpy(device_table_data + l, inactive_str, inactive_len); + l += inactive_len; + + memcpy(device_table_data + l, md->ima.inactive_table.hash, + md->ima.inactive_table.hash_len); + + l += md->ima.inactive_table.hash_len; + + memcpy(device_table_data + l, ";", 1); + l++; + } + + if (!l) { + if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) + goto error2; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "name=%s,uuid=%s;table_clear=no_data;", dev_name, dev_uuid); + l += strlen(device_table_data); + } + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("table_clear", device_table_data, l, noio); + + if (new_map) { + if (md->ima.inactive_table.hash && + md->ima.inactive_table.hash != md->ima.active_table.hash) + kfree(md->ima.inactive_table.hash); + + md->ima.inactive_table.hash = NULL; + md->ima.inactive_table.hash_len = 0; + + if (md->ima.inactive_table.device_metadata && + md->ima.inactive_table.device_metadata != md->ima.active_table.device_metadata) + kfree(md->ima.inactive_table.device_metadata); + + md->ima.inactive_table.device_metadata = NULL; + md->ima.inactive_table.device_metadata_len = 0; + md->ima.inactive_table.num_targets = 0; + + if (md->ima.active_table.hash) { + md->ima.inactive_table.hash = md->ima.active_table.hash; + md->ima.inactive_table.hash_len = md->ima.active_table.hash_len; + } + + if (md->ima.active_table.device_metadata) { + md->ima.inactive_table.device_metadata = + md->ima.active_table.device_metadata; + md->ima.inactive_table.device_metadata_len = + md->ima.active_table.device_metadata_len; + md->ima.inactive_table.num_targets = + md->ima.active_table.num_targets; + } + } + + kfree(dev_name); + kfree(dev_uuid); +error2: + kfree(capacity_str); +error1: + kfree(device_table_data); +} diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h index 59bede2454b04..caa5c84017b1d 100644 --- a/drivers/md/dm-ima.h +++ b/drivers/md/dm-ima.h @@ -51,6 +51,7 @@ void dm_ima_reset_data(struct mapped_device *md); void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags); void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap); void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all); +void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map); #else @@ -58,6 +59,7 @@ static inline void dm_ima_reset_data(struct mapped_device *md) {} static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) {} static inline void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) {} static inline void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) {} +static inline void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) {} #endif /* CONFIG_IMA */ diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 2d4475f6de7d5..b07c19037c7c0 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1508,6 +1508,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s struct hash_cell *hc; struct mapped_device *md; struct dm_table *old_map = NULL; + bool has_new_map = false; down_write(&_hash_lock); @@ -1521,6 +1522,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s if (hc->new_map) { old_map = hc->new_map; hc->new_map = NULL; + has_new_map = true; } param->flags &= ~DM_INACTIVE_PRESENT_FLAG; @@ -1532,6 +1534,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s dm_sync_table(md); dm_table_destroy(old_map); } + dm_ima_measure_on_table_clear(md, has_new_map); dm_put(md); return 0; From 7fefe259be8dd46663878010fceec8f0ceeb82a0 Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:49:02 -0700 Subject: [PATCH 288/851] dm ima: measure data on device rename A given block device is identified by it's name and UUID. However, both these parameters can be renamed. For an external attestation service to correctly attest a given device, it needs to keep track of these rename events. Update the device data with the new values for IMA measurements. Measure both old and new device name/UUID parameters in the same IMA measurement event, so that the old and the new values can be connected later. Signed-off-by: Tushar Sugandhi Signed-off-by: Mike Snitzer --- drivers/md/dm-ima.c | 48 +++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-ima.h | 2 ++ drivers/md/dm-ioctl.c | 3 +++ 3 files changed, 53 insertions(+) diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c index bc3485558edb1..706140d4a9f7b 100644 --- a/drivers/md/dm-ima.c +++ b/drivers/md/dm-ima.c @@ -654,3 +654,51 @@ void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) error1: kfree(device_table_data); } + +/* + * Measure IMA data on device rename. + */ +void dm_ima_measure_on_device_rename(struct mapped_device *md) +{ + char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL; + char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL; + bool noio = true; + int r; + + if (dm_ima_alloc_and_copy_device_data(md, &new_device_data, + md->ima.active_table.num_targets, noio)) + return; + + if (dm_ima_alloc_and_copy_name_uuid(md, &new_dev_name, &new_dev_uuid, noio)) + goto error; + + combined_device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN * 2, GFP_KERNEL, noio); + if (!combined_device_data) + goto error; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error; + + old_device_data = md->ima.active_table.device_metadata; + + md->ima.active_table.device_metadata = new_device_data; + md->ima.active_table.device_metadata_len = strlen(new_device_data); + + scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2, "%snew_name=%s,new_uuid=%s;%s", + old_device_data, new_dev_name, new_dev_uuid, capacity_str); + + dm_ima_measure_data("device_rename", combined_device_data, strlen(combined_device_data), + noio); + + goto exit; + +error: + kfree(new_device_data); +exit: + kfree(capacity_str); + kfree(combined_device_data); + kfree(old_device_data); + kfree(new_dev_name); + kfree(new_dev_uuid); +} diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h index caa5c84017b1d..6e6f18bf05b42 100644 --- a/drivers/md/dm-ima.h +++ b/drivers/md/dm-ima.h @@ -52,6 +52,7 @@ void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_fl void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap); void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all); void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map); +void dm_ima_measure_on_device_rename(struct mapped_device *md); #else @@ -60,6 +61,7 @@ static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned static inline void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) {} static inline void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) {} static inline void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) {} +static inline void dm_ima_measure_on_device_rename(struct mapped_device *md) {} #endif /* CONFIG_IMA */ diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b07c19037c7c0..e45f6c6ef84af 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -485,6 +485,9 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, param->flags |= DM_UEVENT_GENERATED_FLAG; md = hc->md; + + dm_ima_measure_on_device_rename(md); + up_write(&_hash_lock); kfree(old_name); From fdac9de80c2e66d6df999ac810382c66b0cb2830 Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:49:03 -0700 Subject: [PATCH 289/851] dm: update target status functions to support IMA measurement For device mapper targets to take advantage of IMA's measurement capabilities, the status functions for the individual targets need to be updated to handle the status_type_t case for value STATUSTYPE_IMA. Update status functions for the following target types, to log their respective attributes to be measured using IMA. 01. cache 02. crypt 03. integrity 04. linear 05. mirror 06. multipath 07. raid 08. snapshot 09. striped 10. verity For rest of the targets, handle the STATUSTYPE_IMA case by setting the measurement buffer to NULL. For IMA to measure the data on a given system, the IMA policy on the system needs to be updated to have the following line, and the system needs to be restarted for the measurements to take effect. /etc/ima/ima-policy measure func=CRITICAL_DATA label=device-mapper template=ima-buf The measurements will be reflected in the IMA logs, which are located at: /sys/kernel/security/integrity/ima/ascii_runtime_measurements /sys/kernel/security/integrity/ima/binary_runtime_measurements These IMA logs can later be consumed by various attestation clients running on the system, and send them to external services for attesting the system. The DM target data measured by IMA subsystem can alternatively be queried from userspace by setting DM_IMA_MEASUREMENT_FLAG with DM_TABLE_STATUS_CMD. Signed-off-by: Tushar Sugandhi Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 24 +++++++++++++ drivers/md/dm-clone-target.c | 5 +++ drivers/md/dm-crypt.c | 27 ++++++++++++++ drivers/md/dm-delay.c | 4 +++ drivers/md/dm-dust.c | 4 +++ drivers/md/dm-ebs-target.c | 3 ++ drivers/md/dm-era-target.c | 4 +++ drivers/md/dm-flakey.c | 4 +++ drivers/md/dm-integrity.c | 25 +++++++++++++ drivers/md/dm-linear.c | 10 ++++-- drivers/md/dm-log-userspace-base.c | 3 ++ drivers/md/dm-log-writes.c | 4 +++ drivers/md/dm-log.c | 10 ++++++ drivers/md/dm-mpath.c | 28 +++++++++++++++ drivers/md/dm-ps-historical-service-time.c | 3 ++ drivers/md/dm-ps-io-affinity.c | 3 ++ drivers/md/dm-ps-queue-length.c | 3 ++ drivers/md/dm-ps-round-robin.c | 4 +++ drivers/md/dm-ps-service-time.c | 3 ++ drivers/md/dm-raid.c | 38 ++++++++++++++++++++ drivers/md/dm-raid1.c | 17 +++++++++ drivers/md/dm-snap-persistent.c | 4 +++ drivers/md/dm-snap-transient.c | 4 +++ drivers/md/dm-snap.c | 13 +++++++ drivers/md/dm-stripe.c | 15 ++++++++ drivers/md/dm-switch.c | 4 +++ drivers/md/dm-thin.c | 8 +++++ drivers/md/dm-unstripe.c | 4 +++ drivers/md/dm-verity-target.c | 41 ++++++++++++++++++++++ drivers/md/dm-writecache.c | 3 ++ drivers/md/dm-zoned-target.c | 3 ++ include/linux/device-mapper.h | 4 +++ 32 files changed, 327 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 8e4ced5a25160..bdd500447dea2 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3122,6 +3122,30 @@ static void cache_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", cache->ctr_args[i]); if (cache->nr_ctr_args) DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + if (get_cache_mode(cache) == CM_FAIL) + DMEMIT(",metadata_mode=fail"); + else if (get_cache_mode(cache) == CM_READ_ONLY) + DMEMIT(",metadata_mode=ro"); + else + DMEMIT(",metadata_mode=rw"); + + format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); + DMEMIT(",cache_metadata_device=%s", buf); + format_dev_t(buf, cache->cache_dev->bdev->bd_dev); + DMEMIT(",cache_device=%s", buf); + format_dev_t(buf, cache->origin_dev->bdev->bd_dev); + DMEMIT(",cache_origin_device=%s", buf); + DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n'); + DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n'); + DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n'); + DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n'); + DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y'); + DMEMIT(";"); + break; } return; diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index a90bdf9b2ca6b..84dbe08ad2053 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -1499,6 +1499,11 @@ static void clone_status(struct dm_target *ti, status_type_t type, for (i = 0; i < clone->nr_ctr_args; i++) DMEMIT(" %s", clone->ctr_args[i]); + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 50f4cbd600d58..80fdc42ce3c85 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3485,7 +3485,34 @@ static void crypt_status(struct dm_target *ti, status_type_t type, if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) DMEMIT(" iv_large_sectors"); } + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n'); + DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n'); + DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",no_write_workqueue=%c", test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",iv_large_sectors=%c", test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags) ? + 'y' : 'n'); + if (cc->on_disk_tag_size) + DMEMIT(",integrity_tag_size=%u,cipher_auth=%s", + cc->on_disk_tag_size, cc->cipher_auth); + if (cc->sector_size != (1 << SECTOR_SHIFT)) + DMEMIT(",sector_size=%d", cc->sector_size); + if (cc->cipher_string) + DMEMIT(",cipher_string=%s", cc->cipher_string); + + DMEMIT(",key_size=%u", cc->key_size); + DMEMIT(",key_parts=%u", cc->key_parts); + DMEMIT(",key_extra_size=%u", cc->key_extra_size); + DMEMIT(",key_mac_size=%u", cc->key_mac_size); + DMEMIT(";"); break; } } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2628a832787b0..59e51d285b0e5 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -326,6 +326,10 @@ static void delay_status(struct dm_target *ti, status_type_t type, DMEMIT_DELAY_CLASS(&dc->flush); } break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index cbe1058ee589a..3163e2b1418e7 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -527,6 +527,10 @@ static void dust_status(struct dm_target *ti, status_type_t type, DMEMIT("%s %llu %u", dd->dev->name, (unsigned long long)dd->start, dd->blksz); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 71475a2410bea..ec8426611cc66 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -401,6 +401,9 @@ static void ebs_status(struct dm_target *ti, status_type_t type, snprintf(result, maxlen, ec->u_bs_set ? "%s %llu %u %u" : "%s %llu %u", ec->dev->name, (unsigned long long) ec->start, ec->e_bs, ec->u_bs); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 3b748393fca5d..2a78f68741431 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1644,6 +1644,10 @@ static void era_status(struct dm_target *ti, status_type_t type, format_dev_t(buf, era->origin_dev->bdev->bd_dev); DMEMIT("%s %u", buf, era->sectors_per_block); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 5877220c01edb..4b94ffe6f2d4f 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -440,6 +440,10 @@ static void flakey_status(struct dm_target *ti, status_type_t type, fc->corrupt_bio_value, fc->corrupt_bio_flags); break; + + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 20f2510db1f67..40f8116c8e443 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3306,6 +3306,31 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, EMIT_ALG(journal_mac_alg, "journal_mac"); break; } + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c", + ic->dev->name, ic->start, ic->tag_size, ic->mode); + + if (ic->meta_dev) + DMEMIT(",meta_device=%s", ic->meta_dev->name); + if (ic->sectors_per_block != 1) + DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT); + + DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ? + 'y' : 'n'); + DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n'); + DMEMIT(",fix_padding=%c", + ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n'); + DMEMIT(",fix_hmac=%c", + ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n'); + DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n'); + + DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS); + DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors); + DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors); + DMEMIT(",mode=%c", ic->mode); + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index c91f1e2e2f656..679b4c0a2eea1 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -106,6 +106,7 @@ static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; + size_t sz = 0; switch (type) { case STATUSTYPE_INFO: @@ -113,8 +114,13 @@ static void linear_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s %llu", lc->dev->name, - (unsigned long long)lc->start); + DMEMIT("%s %llu", lc->dev->name, (unsigned long long)lc->start); + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",device_name=%s,start=%llu;", lc->dev->name, + (unsigned long long)lc->start); break; } } diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 52090bee17c28..9ab93ebea8895 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -820,6 +820,9 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, DMEMIT("integrated_flush "); DMEMIT("%s ", table_args); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return (r) ? 0 : (int)sz; } diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 57882654ffee7..d93a4db235124 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -834,6 +834,10 @@ static void log_writes_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s %s", lc->dev->name, lc->logdev->name); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 33e71ea6cc143..1ecf75ef276a4 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -793,6 +793,11 @@ static int core_status(struct dm_dirty_log *log, status_type_t status, DMEMIT("%s %u %u ", log->type->name, lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); DMEMIT_SYNC; + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; @@ -817,6 +822,11 @@ static int disk_status(struct dm_dirty_log *log, status_type_t status, lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name, lc->region_size); DMEMIT_SYNC; + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index bced42f082b02..c3c514a9edbb9 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1904,6 +1904,34 @@ static void multipath_status(struct dm_target *ti, status_type_t type, } } break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + list_for_each_entry(pg, &m->priority_groups, list) { + if (pg->bypassed) + state = 'D'; /* Disabled */ + else if (pg == m->current_pg) + state = 'A'; /* Currently Active */ + else + state = 'E'; /* Enabled */ + DMEMIT(",pg_state=%c", state); + DMEMIT(",nr_pgpaths=%u", pg->nr_pgpaths); + DMEMIT(",path_selector_name=%s", pg->ps.type->name); + + list_for_each_entry(p, &pg->pgpaths, list) { + DMEMIT(",path_name=%s,is_active=%c,fail_count=%u", + p->path.dev->name, p->is_active ? 'A' : 'F', + p->fail_count); + if (pg->ps.type->status) { + DMEMIT(",path_selector_status="); + sz += pg->ps.type->status(&pg->ps, &p->path, + type, result + sz, + maxlen - sz); + } + } + } + DMEMIT(";"); + break; } spin_unlock_irqrestore(&m->lock, flags); diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 186f91e2752c1..1856a1b125cc1 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -255,6 +255,9 @@ static int hst_status(struct path_selector *ps, struct dm_path *path, case STATUSTYPE_TABLE: DMEMIT("0 "); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c index cb8e83bfb1a7d..f74501e65a8ed 100644 --- a/drivers/md/dm-ps-io-affinity.c +++ b/drivers/md/dm-ps-io-affinity.c @@ -170,6 +170,9 @@ static int ioa_status(struct path_selector *ps, struct dm_path *path, pi = path->pscontext; DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask)); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c index 5fd018d184187..cef70657bbbc2 100644 --- a/drivers/md/dm-ps-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c @@ -102,6 +102,9 @@ static int ql_status(struct path_selector *ps, struct dm_path *path, case STATUSTYPE_TABLE: DMEMIT("%u ", pi->repeat_count); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c index bdbb7e6e8212b..27f44c5fa04e8 100644 --- a/drivers/md/dm-ps-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c @@ -100,6 +100,10 @@ static int rr_status(struct path_selector *ps, struct dm_path *path, pi = path->pscontext; DMEMIT("%u ", pi->repeat_count); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c index 9cfda665e9ebd..3ec9c33265c52 100644 --- a/drivers/md/dm-ps-service-time.c +++ b/drivers/md/dm-ps-service-time.c @@ -99,6 +99,9 @@ static int st_status(struct path_selector *ps, struct dm_path *path, DMEMIT("%u %u ", pi->repeat_count, pi->relative_throughput); break; + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index bf4a467fc73a4..75829ddad60e4 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3671,6 +3671,44 @@ static void raid_status(struct dm_target *ti, status_type_t type, for (i = 0; i < rs->raid_disks; i++) DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), __get_dev_name(rs->dev[i].data_dev)); + break; + + case STATUSTYPE_IMA: + rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout); + if (!rt) + return; + + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",raid_type=%s,raid_disks=%d", rt->name, mddev->raid_disks); + + /* Access most recent mddev properties for status output */ + smp_rmb(); + state = decipher_sync_action(mddev, recovery); + DMEMIT(",raid_state=%s", sync_str(state)); + + for (i = 0; i < rs->raid_disks; i++) { + DMEMIT(",raid_device_%d_status=", i); + DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev)); + } + + if (rt_is_raid456(rt)) { + DMEMIT(",journal_dev_mode="); + switch (rs->journal_dev.mode) { + case R5C_JOURNAL_MODE_WRITE_THROUGH: + DMEMIT("%s", + _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_THROUGH].param); + break; + case R5C_JOURNAL_MODE_WRITE_BACK: + DMEMIT("%s", + _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_BACK].param); + break; + default: + DMEMIT("invalid"); + break; + } + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ebb4810cc3b40..8811d484fdd14 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1435,6 +1435,23 @@ static void mirror_status(struct dm_target *ti, status_type_t type, } break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",nr_mirrors=%d", ms->nr_mirrors); + for (m = 0; m < ms->nr_mirrors; m++) { + DMEMIT(",mirror_device_%d=%s", m, ms->mirror[m].dev->name); + DMEMIT(",mirror_device_%d_status=%c", + m, device_status_char(&(ms->mirror[m]))); + } + + DMEMIT(",handle_errors=%c", errors_handled(ms) ? 'y' : 'n'); + DMEMIT(",keep_log=%c", keep_log(ms) ? 'y' : 'n'); + + DMEMIT(",log_type_status="); + sz += log->type->status(log, type, result+sz, maxlen-sz); + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 9ab4bf651ca93..3bb5cff5d6fc3 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -908,6 +908,10 @@ static unsigned persistent_status(struct dm_exception_store *store, case STATUSTYPE_TABLE: DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P", (unsigned long long)store->chunk_size); + break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 4d50a12cf00c6..0e0ae4c36b374 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -95,6 +95,10 @@ static unsigned transient_status(struct dm_exception_store *store, break; case STATUSTYPE_TABLE: DMEMIT(" N %llu", (unsigned long long)store->chunk_size); + break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 751ec5ea1dbb5..dcf34c6b05ad3 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2390,6 +2390,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT(" discard_passdown_origin"); } break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",snap_origin_name=%s", snap->origin->name); + DMEMIT(",snap_cow_name=%s", snap->cow->name); + DMEMIT(",snap_valid=%c", snap->valid ? 'y' : 'n'); + DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? 'y' : 'n'); + DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? 'y' : 'n'); + DMEMIT(";"); + break; } } @@ -2734,6 +2744,9 @@ static void origin_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: snprintf(result, maxlen, "%s", o->dev->name); break; + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index df359d33cda89..6660b6b53d5bf 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -428,6 +428,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s %llu", sc->stripe[i].dev->name, (unsigned long long)sc->stripe[i].physical_start); break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",stripes=%d,chunk_size=%llu", sc->stripes, + (unsigned long long)sc->chunk_size); + + for (i = 0; i < sc->stripes; i++) { + DMEMIT(",stripe_%d_device_name=%s", i, sc->stripe[i].dev->name); + DMEMIT(",stripe_%d_physical_start=%llu", i, + (unsigned long long)sc->stripe[i].physical_start); + DMEMIT(",stripe_%d_status=%c", i, + atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A'); + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 262e2b0fd9750..028a92ff6d576 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -504,6 +504,10 @@ static void switch_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name, (unsigned long long)sctx->path_list[path_nr].start); break; + + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 985baee3a678e..4c67b77c23c1b 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4012,6 +4012,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, (unsigned long long)pt->low_water_blocks); emit_flags(&pt->requested_pf, result, sz, maxlen); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; @@ -4423,6 +4427,10 @@ static void thin_status(struct dm_target *ti, status_type_t type, if (tc->origin_dev) DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev)); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 7357c1bd58631..fdc8921e5c19f 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -156,6 +156,10 @@ static void unstripe_status(struct dm_target *ti, status_type_t type, uc->stripes, (unsigned long long)uc->chunk_size, uc->unstripe, uc->dev->name, (unsigned long long)uc->physical_start); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d3e76aefc1a6c..bfefa100c265c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -772,6 +772,47 @@ static void verity_status(struct dm_target *ti, status_type_t type, DMEMIT(" " DM_VERITY_ROOT_HASH_VERIFICATION_OPT_SIG_KEY " %s", v->signature_key_desc); break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",hash_failed=%c", v->hash_failed ? 'C' : 'V'); + DMEMIT(",verity_version=%u", v->version); + DMEMIT(",data_device_name=%s", v->data_dev->name); + DMEMIT(",hash_device_name=%s", v->hash_dev->name); + DMEMIT(",verity_algorithm=%s", v->alg_name); + + DMEMIT(",root_digest="); + for (x = 0; x < v->digest_size; x++) + DMEMIT("%02x", v->root_digest[x]); + + DMEMIT(",salt="); + if (!v->salt_size) + DMEMIT("-"); + else + for (x = 0; x < v->salt_size; x++) + DMEMIT("%02x", v->salt[x]); + + DMEMIT(",ignore_zero_blocks=%c", v->zero_digest ? 'y' : 'n'); + DMEMIT(",check_at_most_once=%c", v->validated_blocks ? 'y' : 'n'); + + if (v->mode != DM_VERITY_MODE_EIO) { + DMEMIT(",verity_mode="); + switch (v->mode) { + case DM_VERITY_MODE_LOGGING: + DMEMIT(DM_VERITY_OPT_LOGGING); + break; + case DM_VERITY_MODE_RESTART: + DMEMIT(DM_VERITY_OPT_RESTART); + break; + case DM_VERITY_MODE_PANIC: + DMEMIT(DM_VERITY_OPT_PANIC); + break; + default: + DMEMIT("invalid"); + } + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 5b553df56b7a3..510f9b8417d20 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -2671,6 +2671,9 @@ static void writecache_status(struct dm_target *ti, status_type_t type, if (wc->pause_set) DMEMIT(" pause_writeback %u", wc->pause_value); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 7e88df64d197b..ae1bc48c0043d 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1119,6 +1119,9 @@ static void dmz_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", buf); } break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 74486c332946e..a92260273eb2c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -603,6 +603,10 @@ void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ 0 : scnprintf(result + sz, maxlen - sz, x)) +#define DMEMIT_TARGET_NAME_VERSION(y) \ + DMEMIT("target_name=%s,target_version=%u.%u.%u", \ + (y)->name, (y)->version[0], (y)->version[1], (y)->version[2]) + /* * Definitions of return values from target end_io function. */ From e820ba87f9d15399fa565ceba4a92b902c879d29 Mon Sep 17 00:00:00 2001 From: Tushar Sugandhi Date: Mon, 12 Jul 2021 17:49:04 -0700 Subject: [PATCH 290/851] dm: add documentation for IMA measurement support To interpret various DM target measurement data in IMA logs, a separate documentation page is needed under Documentation/admin-guide/device-mapper. Add documentation to help system administrators and attestation client/server component owners to interpret the measurement data generated by various DM targets, on various device/table state changes. Signed-off-by: Tushar Sugandhi Signed-off-by: Mike Snitzer --- .../admin-guide/device-mapper/dm-ima.rst | 306 ++++++++++++++++++ .../admin-guide/device-mapper/index.rst | 1 + 2 files changed, 307 insertions(+) create mode 100644 Documentation/admin-guide/device-mapper/dm-ima.rst diff --git a/Documentation/admin-guide/device-mapper/dm-ima.rst b/Documentation/admin-guide/device-mapper/dm-ima.rst new file mode 100644 index 0000000000000..41894112ff26f --- /dev/null +++ b/Documentation/admin-guide/device-mapper/dm-ima.rst @@ -0,0 +1,306 @@ +====== +dm-ima +====== + +For a given system, various external services/infrastructure tools +(including the attestation service) interact with it - both during the +setup and during rest of the system run-time. They share sensitive data +and/or execute critical workload on that system. The external services +may want to verify the current run-time state of the relevant kernel +subsystems before fully trusting the system with business-critical +data/workload. + +Device mapper plays a critical role on a given system by providing +various important functionalities to the block devices using various +target types like crypt, verity, integrity etc. Each of these target +types’ functionalities can be configured with various attributes. +The attributes chosen to configure these target types can significantly +impact the security profile of the block device, and in-turn, of the +system itself. For instance, the type of encryption algorithm and the +key size determines the strength of encryption for a given block device. + +Therefore, verifying the current state of various block devices as well +as their various target attributes is crucial for external services before +fully trusting the system with business-critical data/workload. + +IMA kernel subsystem provides the necessary functionality for +device mapper to measure the state and configuration of +various block devices - + - BY device mapper itself, from within the kernel, + - in a tamper resistant way, + - and re-measured - triggered on state/configuration change. + +Setting the IMA Policy: +======================= +For IMA to measure the data on a given system, the IMA policy on the +system needs to be updated to have following line, and the system needs +to be restarted for the measurements to take effect. + +/etc/ima/ima-policy + measure func=CRITICAL_DATA label=device-mapper template=ima-buf + +The measurements will be reflected in the IMA logs, which are located at: + +/sys/kernel/security/integrity/ima/ascii_runtime_measurements +/sys/kernel/security/integrity/ima/binary_runtime_measurements + +Then IMA ASCII measurement log has the following format: +PCR TEMPLATE_DIGEST TEMPLATE ALG:EVENT_DIGEST EVENT_NAME EVENT_DATA + +PCR := Platform Configuration Register, in which the values are registered. + This is applicable if TPM chip is in use. +TEMPLATE_DIGEST := Template digest of the IMA record. +TEMPLATE := Template that registered the integrity value (e.g. ima-buf). +ALG:EVENT_DIGEST = Algorithm to compute event digest, followed by digest of event data +EVENT_NAME := Description of the event (e.g. 'table_load'). +EVENT_DATA := The event data to be measured. + +The DM target data measured by IMA subsystem can alternatively +be queried from userspace by setting DM_IMA_MEASUREMENT_FLAG with +DM_TABLE_STATUS_CMD. + +Supported Device States: +======================== +Following device state changes will trigger IMA measurements. +01. Table load +02. Device resume +03. Device remove +04. Table clear +05. Device rename + +01. Table load: +--------------- +When a new table is loaded in a device's inactive table slot, +the device information and target specific details from the +targets in the table are measured. + +For instance, if a linear device is created with the following table entries, +# dmsetup create linear1 +0 2 linear /dev/loop0 512 +2 2 linear /dev/loop0 512 +4 2 linear /dev/loop0 512 +6 2 linear /dev/loop0 512 + +Then IMA ASCII measurement log will have an entry with: +EVENT_NAME := table_load +EVENT_DATA := [device_data];[target_data_row_1];[target_data_row_2];...[target_data_row_n]; + +E.g. +(converted from ASCII to text for readability) +10 a8c5ff755561c7a28146389d1514c318592af49a ima-buf sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72 +table_load +name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4; +target_index=0,target_begin=0,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; +target_index=1,target_begin=2,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; +target_index=2,target_begin=4,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; +target_index=3,target_begin=6,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; + +02. Device resume: +------------------ +When a suspended device is resumed, the device information and a sha256 hash of the +data from previous load of an active table are measured. + +For instance, if a linear device is resumed with the following command, +#dmsetup resume linear1 + +Then IMA ASCII measurement log will have an entry with: +EVENT_NAME := device_resume +EVENT_DATA := [device_data];active_table_hash=(sha256hash([device_data];[target_data_row_1];...[target_data_row_n]); + current_device_capacity=; + +E.g. +(converted from ASCII to text for readability) +10 56c00cc062ffc24ccd9ac2d67d194af3282b934e ima-buf sha256:e7d12c03b958b4e0e53e7363a06376be88d98a1ac191fdbd3baf5e4b77f329b6 +device_resume +name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4; +active_table_hash=4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72;current_device_capacity=8; + +03. Device remove: +------------------ +When a device is removed, the device information and a sha256 hash of the +data from an active and inactive table are measured. + +For instance, if a linear device is removed with the following command, +# dmsetup remove linear1 + +Then IMA ASCII measurement log will have an entry with: +EVENT_NAME := device_remove +EVENT_DATA := [device_active_metadata];[device_inactive_metadata]; + [active_table_hash=(sha256hash([device_active_metadata];[active_table_row_1];...[active_table_row_n]), + [inactive_table_hash=(sha256hash([device_inactive_metadata];[inactive_table_row_1];...[inactive_table_row_n]), + remove_all=[y|n];current_device_capacity=; + +E.g +(converted from ASCII to text for readability) +10 499812b621b705061c4514d643894483e16d2619 ima-buf sha256:c3f26b02f09bf5b464925589454bdd4d354077ce430fd1e75c9e96ce29cd1cad +device_remove +device_active_metadata=name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4; +device_inactive_metadata=name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=2; +active_table_hash=4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72, +inactive_table_hash=5596cc857b0e887fd0c5d58dc6382513284596b07f09fd37efae2da224bd521d,remove_all=n; +current_device_capacity=8; + + +04. Table clear: +---------------- +When an inactive table is cleared from the device, the device information and a sha256 hash of the +data from an inactive table are measured. + +For instance, if a linear device's inactive table is cleared with the following command, + +# dmsetup clear linear1 + +Then IMA ASCII measurement log will have an entry with: +EVENT_NAME := table_clear +EVENT_DATA := [device_data];inactive_table_hash=(sha256hash([device_data];[inactive_table_row_1];...[inactive_table_row_n]); +current_device_capacity=; + +E.g. +(converted from ASCII to text for readability) +10 9c11e284d792875352d51c09f6643c96649484be ima-buf sha256:84b22b364ea4d8264fa33c38635c18ef448fa9077731fa7e5f969b1da2003ea4 +table_clear +name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=2; +inactive_table_hash=5596cc857b0e887fd0c5d58dc6382513284596b07f09fd37efae2da224bd521d;current_device_capacity=0; + + +05. Device rename: +------------------ +When an device's NAME or UUID is changed, the device information and the new NAME and UUID +are measured. + +For instance, if a linear device's name is changed with the following command, + +#dmsetup rename linear1 linear=2 +Then IMA ASCII measurement log will have an entry with: +EVENT_NAME := device_rename +EVENT_DATA := [current_device_data];new_name=;new_uuid=;current_device_capacity=; + +E.g 1: +#dmsetup rename linear1 --setuuid 1234-5678 + +IMA Log entry: +(converted from ASCII to text for readability) +10 7380ef4d1349fe1ebd74affa54e9fcc960e3cbf5 ima-buf sha256:9759e36a17a967ea43c1bf3455279395a40bd0401105ec5ad8edb9a52054efc7 +device_rename +name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=1;new_name=linear1,new_uuid=1234-5678;current_device_capacity=2; + +E.g 2: +# dmsetup rename linear1 linear=2 +10 092c8266fc36e44f74c59f123ecfe15310f249f4 ima-buf sha256:4cf8b85c81fa6fedaeb602b05019124dbbb0605dce58fcdeea56887a8a3874cd +device_rename +name=linear1,uuid=1234-5678,major=253,minor=0,minor_count=1,num_targets=1;new_name=linear\=2,new_uuid=1234-5678;current_device_capacity=2; + + +Supported targets: +================== +Following targets are supported to measure their data using IMA. + +01. cache +02. crypt +03. integrity +04. linear +05. mirror +06. multipath +07. raid +08. snapshot +09. striped +10. verity + +01. cache +--------- +<> + +02. crypt +--------- +When a crypt target is loaded, then IMA ASCII measurement log will have an entry +similar to the following, depicting what crypt attributes are measured in EVENT_DATA. + +(converted from ASCII to text for readability) +10 fe3b80a35b155bd282df778e2625066c05fc068c ima-buf sha256:2d86ce9d6f16a4a97607318aa123ae816e0ceadefeea7903abf7f782f2cb78ad +table_load +name=test-crypt,uuid=,major=253,minor=0,minor_count=1,num_targets=1; +target_index=0,target_begin=0,target_len=1953125,target_name=crypt,target_version=1.23.0, +allow_discards=y,same_cpu=n,submit_from_crypt_cpus=n,no_read_workqueue=n,no_write_workqueue=n, +iv_large_sectors=n,cipher_string=aes-xts-plain64,key_size=32,key_parts=1,key_extra_size=0,key_mac_size=0; + +03. integrity +------------- +<> + + +04. linear +---------- +When a linear target is loaded, then IMA ASCII measurement log will have an entry +similar to the following, depicting what linear attributes are measured in EVENT_DATA. + +(converted from ASCII to text for readability) +10 a8c5ff755561c7a28146389d1514c318592af49a ima-buf sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72 +table_load +name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4; +target_index=0,target_begin=0,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; +target_index=1,target_begin=2,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; +target_index=2,target_begin=4,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; +target_index=3,target_begin=6,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; + +05. mirror +---------- +When a mirror target is loaded, then IMA ASCII measurement log will have an entry +similar to the following, depicting what mirror attributes are measured in EVENT_DATA. + +(converted from ASCII to text for readability) +10 90ff9113a00c367df823595dc347425ce3bfc50a ima-buf sha256:8da0678ed3bf616533573d9e61e5342f2bd16cb0b3145a08262641a743806c2e +table_load +name=test-mirror,uuid=,major=253,minor=4,minor_count=1,num_targets=1; +target_index=0,target_begin=0,target_len=1953125,target_name=mirror,target_version=1.14.0, +nr_mirrors=2,mirror_device_0=253:2,mirror_device_0_status=A,mirror_device_1=253:3,mirror_device_1_status=A, +handle_errors=y,keep_log=n,log_type_status=; + +06. multipath +------------- +<> + +07. raid +-------- +When a raid target is loaded, then IMA ASCII measurement log will have an entry +similar to the following, depicting what raid attributes are measured in EVENT_DATA. + +(converted from ASCII to text for readability) +10 76cb30d0cd0fe099966f20f5c82e3a2ac29b21a0 ima-buf sha256:52250f20b27376fcfb348bdfa1e1cf5acfd6646e0f3ad1a72952cffd9f818753 +table_load +name=test-raid1,uuid=,major=253,minor=2,minor_count=1,num_targets=1; +target_index=0,target_begin=0,target_len=1953125,target_name=raid,target_version=1.15.1, +raid_type=raid1,raid_disks=2,raid_state=idle,raid_device_0_status=A,raid_device_1_status=A; + +08. snapshot +------------ +<> + +09. striped +----------- +When a linear target is loaded, then IMA ASCII measurement log will have an entry +similar to the following, depicting what linear attributes are measured in EVENT_DATA. + +(converted from ASCII to text for readability) +10 7bd94fa8f799169b9f12d97b9dbdce4dc5509233 ima-buf sha256:0d148eda69887f7833f1a6042767b54359cd23b64fa941b9e1856879eee1f778 +table_load +name=test-raid0,uuid=,major=253,minor=8,minor_count=1,num_targets=1; +target_index=0,target_begin=0,target_len=7812096,target_name=striped,target_version=1.6.0,stripes=4,chunk_size=128, +stripe_0_device_name=253:1,stripe_0_physical_start=0,stripe_0_status=A, +stripe_1_device_name=253:3,stripe_1_physical_start=0,stripe_1_status=A, +stripe_2_device_name=253:5,stripe_2_physical_start=0,stripe_2_status=A, +stripe_3_device_name=253:7,stripe_3_physical_start=0,stripe_3_status=A; + +10. verity +---------- +When a verity target is loaded, then IMA ASCII measurement log will have an entry +similar to the following, depicting what verity attributes are measured in EVENT_DATA. + +(converted from ASCII to text for readability) +10 fced5f575b140fc0efac302c88a635174cd663da ima-buf sha256:021370c1cc93929460b06922c606334fb1d7ea5ecf04f2384f3157a446894283 +table_load +name=test-verity,uuid=,major=253,minor=2,minor_count=1,num_targets=1; +target_index=0,target_begin=0,target_len=1953120,target_name=verity,target_version=1.8.0,hash_failed=V, +verity_version=1,data_device_name=253:1,hash_device_name=253:0,verity_algorithm=sha256, +root_digest=29cb87e60ce7b12b443ba6008266f3e41e93e403d7f298f8e3f316b29ff89c5e, +salt=e48da609055204e89ae53b655ca2216dd983cf3cb829f34f63a297d106d53e2d, +ignore_zero_blocks=n,check_at_most_once=n; diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst index 6cf8adc86fa80..cde52cc09645e 100644 --- a/Documentation/admin-guide/device-mapper/index.rst +++ b/Documentation/admin-guide/device-mapper/index.rst @@ -13,6 +13,7 @@ Device Mapper dm-dust dm-ebs dm-flakey + dm-ima dm-init dm-integrity dm-io From 6160d948cc6f83d429813449a032e64dfadd1d39 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 20 Jul 2021 14:54:17 -0700 Subject: [PATCH 291/851] hardening: Clarify Kconfig text for auto-var-init Clarify the details around the automatic variable initialization modes available. Specifically this details the values used for pattern init and expands on the rationale for zero init safety. Additionally makes zero init the default when available. Cc: Gustavo A. R. Silva Cc: glider@google.com Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: linux-security-module@vger.kernel.org Cc: clang-built-linux@googlegroups.com Signed-off-by: Kees Cook --- security/Kconfig.hardening | 52 +++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 023aea5e117c2..90cbaff86e13a 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -29,6 +29,7 @@ choice prompt "Initialize kernel stack variables at function entry" default GCC_PLUGIN_STRUCTLEAK_BYREF_ALL if COMPILE_TEST && GCC_PLUGINS default INIT_STACK_ALL_PATTERN if COMPILE_TEST && CC_HAS_AUTO_VAR_INIT_PATTERN + default INIT_STACK_ALL_ZERO if CC_HAS_AUTO_VAR_INIT_PATTERN default INIT_STACK_NONE help This option enables initialization of stack variables at @@ -39,11 +40,11 @@ choice syscalls. This chooses the level of coverage over classes of potentially - uninitialized variables. The selected class will be + uninitialized variables. The selected class of variable will be initialized before use in a function. config INIT_STACK_NONE - bool "no automatic initialization (weakest)" + bool "no automatic stack variable initialization (weakest)" help Disable automatic stack variable initialization. This leaves the kernel vulnerable to the standard @@ -80,7 +81,7 @@ choice and is disallowed. config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL - bool "zero-init anything passed by reference (very strong)" + bool "zero-init everything passed by reference (very strong)" depends on GCC_PLUGINS depends on !(KASAN && KASAN_STACK) select GCC_PLUGIN_STRUCTLEAK @@ -91,33 +92,44 @@ choice of uninitialized stack variable exploits and information exposures. + As a side-effect, this keeps a lot of variables on the + stack that can otherwise be optimized out, so combining + this with CONFIG_KASAN_STACK can lead to a stack overflow + and is disallowed. + config INIT_STACK_ALL_PATTERN - bool "0xAA-init everything on the stack (strongest)" + bool "pattern-init everything (strongest)" depends on CC_HAS_AUTO_VAR_INIT_PATTERN help - Initializes everything on the stack with a 0xAA - pattern. This is intended to eliminate all classes - of uninitialized stack variable exploits and information - exposures, even variables that were warned to have been - left uninitialized. + Initializes everything on the stack (including padding) + with a specific debug value. This is intended to eliminate + all classes of uninitialized stack variable exploits and + information exposures, even variables that were warned about + having been left uninitialized. Pattern initialization is known to provoke many existing bugs related to uninitialized locals, e.g. pointers receive - non-NULL values, buffer sizes and indices are very big. + non-NULL values, buffer sizes and indices are very big. The + pattern is situation-specific; Clang on 64-bit uses 0xAA + repeating for all types and padding except float and double + which use 0xFF repeating (-NaN). Clang on 32-bit uses 0xFF + repeating for all types and padding. config INIT_STACK_ALL_ZERO - bool "zero-init everything on the stack (strongest and safest)" + bool "zero-init everything (strongest and safest)" depends on CC_HAS_AUTO_VAR_INIT_ZERO help - Initializes everything on the stack with a zero - value. This is intended to eliminate all classes - of uninitialized stack variable exploits and information - exposures, even variables that were warned to have been - left uninitialized. - - Zero initialization provides safe defaults for strings, - pointers, indices and sizes, and is therefore - more suitable as a security mitigation measure. + Initializes everything on the stack (including padding) + with a zero value. This is intended to eliminate all + classes of uninitialized stack variable exploits and + information exposures, even variables that were warned + about having been left uninitialized. + + Zero initialization provides safe defaults for strings + (immediately NUL-terminated), pointers (NULL), indices + (index 0), and sizes (0 length), so it is therefore more + suitable as a production security mitigation than pattern + initialization. endchoice From 28ca0f6a07301d279502423192b1a0553c8c8d19 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 21 Jul 2021 11:40:59 +1000 Subject: [PATCH 292/851] userfaultfd: do not untag user pointers Patch series "userfaultfd: do not untag user pointers", v5. If a user program uses userfaultfd on ranges of heap memory, it may end up passing a tagged pointer to the kernel in the range.start field of the UFFDIO_REGISTER ioctl. This can happen when using an MTE-capable allocator, or on Android if using the Tagged Pointers feature for MTE readiness [1]. When a fault subsequently occurs, the tag is stripped from the fault address returned to the application in the fault.address field of struct uffd_msg. However, from the application's perspective, the tagged address *is* the memory address, so if the application is unaware of memory tags, it may get confused by receiving an address that is, from its point of view, outside of the bounds of the allocation. We observed this behavior in the kselftest for userfaultfd [2] but other applications could have the same problem. Address this by not untagging pointers passed to the userfaultfd ioctls. Instead, let the system call fail. Also change the kselftest to use mmap so that it doesn't encounter this problem. [1] https://source.android.com/devices/tech/debug/tagged-pointers [2] tools/testing/selftests/vm/userfaultfd.c This patch (of 2): If a user program uses userfaultfd on ranges of heap memory, it may end up passing a tagged pointer to the kernel in the range.start field of the UFFDIO_REGISTER ioctl. This can happen when using an MTE-capable allocator, or on Android if using the Tagged Pointers feature for MTE readiness [1]. When a fault subsequently occurs, the tag is stripped from the fault address returned to the application in the fault.address field of struct uffd_msg. However, from the application's perspective, the tagged address *is* the memory address, so if the application is unaware of memory tags, it may get confused by receiving an address that is, from its point of view, outside of the bounds of the allocation. We observed this behavior in the kselftest for userfaultfd [2] but other applications could have the same problem. Address this by not untagging pointers passed to the userfaultfd ioctls. Instead, let the system call fail. This will provide an early indication of problems with tag-unaware userspace code instead of letting the code get confused later, and is consistent with how we decided to handle brk/mmap/mremap in commit dcde237319e6 ("mm: Avoid creating virtual address aliases in brk()/mmap()/mremap()"), as well as being consistent with the existing tagged address ABI documentation relating to how ioctl arguments are handled. The code change is a revert of commit 7d0325749a6c ("userfaultfd: untag user pointers") plus some fixups to some additional calls to validate_range that have appeared since then. [1] https://source.android.com/devices/tech/debug/tagged-pointers [2] tools/testing/selftests/vm/userfaultfd.c Link: https://lkml.kernel.org/r/20210714195437.118982-1-pcc@google.com Link: https://lkml.kernel.org/r/20210714195437.118982-2-pcc@google.com Link: https://linux-review.googlesource.com/id/I761aa9f0344454c482b83fcfcce547db0a25501b Fixes: 63f0c6037965 ("arm64: Introduce prctl() options to control the tagged user addresses ABI") Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Reviewed-by: Catalin Marinas Cc: Alistair Delva Cc: Andrea Arcangeli Cc: Dave Martin Cc: Evgenii Stepanov Cc: Lokesh Gidra Cc: Mitch Phillips Cc: Vincenzo Frascino Cc: Will Deacon Cc: William McVicker Cc: [5.4] Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/arm64/tagged-address-abi.rst | 26 +++++++++++++++------- fs/userfaultfd.c | 26 ++++++++++------------ 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index 459e6b66ff68c..0c9120ec58ae6 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -45,14 +45,24 @@ how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space management (e.g. ``mprotect()``, ``madvise()``). The use of valid - tagged pointers in this context is allowed with the exception of - ``brk()``, ``mmap()`` and the ``new_address`` argument to - ``mremap()`` as these have the potential to alias with existing - user addresses. - - NOTE: This behaviour changed in v5.6 and so some earlier kernels may - incorrectly accept valid tagged pointers for the ``brk()``, - ``mmap()`` and ``mremap()`` system calls. + tagged pointers in this context is allowed with these exceptions: + + - ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. + + - The ``range.start``, ``start`` and ``dst`` arguments to the + ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from + ``userfaultfd()``, as fault addresses subsequently obtained by reading + the file descriptor will be untagged, which may otherwise confuse + tag-unaware programs. + + NOTE: This behaviour changed in v5.14 and so some earlier kernels may + incorrectly accept valid tagged pointers for this system call. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f6e0f0c0d0e57..5c2d806e6ae53 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, } static __always_inline int validate_range(struct mm_struct *mm, - __u64 *start, __u64 len) + __u64 start, __u64 len) { __u64 task_size = mm->task_size; - *start = untagged_addr(*start); - - if (*start & ~PAGE_MASK) + if (start & ~PAGE_MASK) return -EINVAL; if (len & ~PAGE_MASK) return -EINVAL; if (!len) return -EINVAL; - if (*start < mmap_min_addr) + if (start < mmap_min_addr) return -EINVAL; - if (*start >= task_size) + if (start >= task_size) return -EINVAL; - if (len > task_size - *start) + if (len > task_size - start) return -EINVAL; return 0; } @@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vm_flags |= VM_UFFD_MINOR; } - ret = validate_range(mm, &uffdio_register.range.start, + ret = validate_range(mm, uffdio_register.range.start, uffdio_register.range.len); if (ret) goto out; @@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) goto out; - ret = validate_range(mm, &uffdio_unregister.start, + ret = validate_range(mm, uffdio_unregister.start, uffdio_unregister.len); if (ret) goto out; @@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) goto out; - ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); + ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); if (ret) goto out; @@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, sizeof(uffdio_copy)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); + ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); if (ret) goto out; /* @@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, sizeof(uffdio_zeropage)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, + ret = validate_range(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); if (ret) goto out; @@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, sizeof(struct uffdio_writeprotect))) return -EFAULT; - ret = validate_range(ctx->mm, &uffdio_wp.range.start, + ret = validate_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len); if (ret) return ret; @@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) sizeof(uffdio_continue) - (sizeof(__s64)))) goto out; - ret = validate_range(ctx->mm, &uffdio_continue.range.start, + ret = validate_range(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len); if (ret) goto out; From c8723c6f17bfe8b6873b9b048d3a5dd02ca09601 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 21 Jul 2021 11:41:00 +1000 Subject: [PATCH 293/851] selftest: use mmap instead of posix_memalign to allocate memory This test passes pointers obtained from anon_allocate_area to the userfaultfd and mremap APIs. This causes a problem if the system allocator returns tagged pointers because with the tagged address ABI the kernel rejects tagged addresses passed to these APIs, which would end up causing the test to fail. To make this test compatible with such system allocators, stop using the system allocator to allocate memory in anon_allocate_area, and instead just use mmap. Link: https://lkml.kernel.org/r/20210714195437.118982-3-pcc@google.com Link: https://linux-review.googlesource.com/id/Icac91064fcd923f77a83e8e133f8631c5b8fc241 Fixes: c47174fc362a ("userfaultfd: selftest") Co-developed-by: Lokesh Gidra Signed-off-by: Lokesh Gidra Signed-off-by: Peter Collingbourne Reviewed-by: Catalin Marinas Cc: Vincenzo Frascino Cc: Dave Martin Cc: Will Deacon Cc: Andrea Arcangeli Cc: Alistair Delva Cc: William McVicker Cc: Evgenii Stepanov Cc: Mitch Phillips Cc: Andrey Konovalov Cc: [5.4] Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/userfaultfd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e363bdaff59d4..2ea438e6b8b1f 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area) static void anon_allocate_area(void **alloc_area) { - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) - err("posix_memalign() failed"); + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*alloc_area == MAP_FAILED) + err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) From 2aac83e9a0deb765a735e2350f5dfa811c8787df Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Wed, 21 Jul 2021 11:41:01 +1000 Subject: [PATCH 294/851] kfence: defer kfence_test_init to ensure that kunit debugfs is created kfence_test_init and kunit_init both use the same level late_initcall, which means if kfence_test_init linked ahead of kunit_init, kfence_test_init will get a NULL debugfs_rootdir as parent dentry, then kfence_test_init and kfence_debugfs_init both create a debugfs node named "kfence" under debugfs_mount->mnt_root, and it will throw out "debugfs: Directory 'kfence' with parent '/' already present!" with EEXIST. So kfence_test_init should be deferred. Link: https://lkml.kernel.org/r/20210714113140.2949995-1-o451686892@gmail.com Signed-off-by: Weizhao Ouyang Tested-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/kfence_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 7f24b9bcb2ec5..942cbc16ad26b 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -852,7 +852,7 @@ static void kfence_test_exit(void) tracepoint_synchronize_unregister(); } -late_initcall(kfence_test_init); +late_initcall_sync(kfence_test_init); module_exit(kfence_test_exit); MODULE_LICENSE("GPL v2"); From 93b3caca7d683951510d7650ceb3792624978144 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 21 Jul 2021 11:41:01 +1000 Subject: [PATCH 295/851] kfence: move the size check to the beginning of __kfence_alloc() Check the allocation size before toggling kfence_allocation_gate. This way allocations that can't be served by KFENCE will not result in waiting for another CONFIG_KFENCE_SAMPLE_INTERVAL without allocating anything. Link: https://lkml.kernel.org/r/20210714092222.1890268-1-glider@google.com Signed-off-by: Alexander Potapenko Suggested-by: Marco Elver Reviewed-by: Marco Elver Cc: Dmitry Vyukov Cc: Marco Elver Cc: Greg Kroah-Hartman Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index d7666ace9d2e4..2623ff401a104 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -733,6 +733,13 @@ void kfence_shutdown_cache(struct kmem_cache *s) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { + /* + * Perform size check before switching kfence_allocation_gate, so that + * we don't disable KFENCE without making an allocation. + */ + if (size > PAGE_SIZE) + return NULL; + /* * allocation_gate only needs to become non-zero, so it doesn't make * sense to continue writing to it and pay the associated contention @@ -757,9 +764,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (!READ_ONCE(kfence_enabled)) return NULL; - if (size > PAGE_SIZE) - return NULL; - return kfence_guarded_alloc(s, size, flags); } From caf1a449d226576b2773e0817ef87fb7a12d66cb Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 21 Jul 2021 11:41:02 +1000 Subject: [PATCH 296/851] kfence: skip all GFP_ZONEMASK allocations Allocation requests outside ZONE_NORMAL (MOVABLE, HIGHMEM or DMA) cannot be fulfilled by KFENCE, because KFENCE memory pool is located in a zone different from the requested one. Because callers of kmem_cache_alloc() may actually rely on the allocation to reside in the requested zone (e.g. memory allocations done with __GFP_DMA must be DMAable), skip all allocations done with GFP_ZONEMASK and/or respective SLAB flags (SLAB_CACHE_DMA and SLAB_CACHE_DMA32). Link: https://lkml.kernel.org/r/20210714092222.1890268-2-glider@google.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Alexander Potapenko Reviewed-by: Marco Elver Acked-by: Souptick Joarder Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Souptick Joarder Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 2623ff401a104..575c685aa6422 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -740,6 +740,15 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (size > PAGE_SIZE) return NULL; + /* + * Skip allocations from non-default zones, including DMA. We cannot + * guarantee that pages in the KFENCE pool will have the requested + * properties (e.g. reside in DMAable memory). + */ + if ((flags & GFP_ZONEMASK) || + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) + return NULL; + /* * allocation_gate only needs to become non-zero, so it doesn't make * sense to continue writing to it and pay the associated contention From d77c489a114e6ee3d1f6d631132a66243fc4deae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 11:41:03 +1000 Subject: [PATCH 297/851] mm: call flush_dcache_page() in memcpy_to_page() and memzero_page() memcpy_to_page and memzero_page can write to arbitrary pages, which could be in the page cache or in high memory, so call flush_kernel_dcache_pages to flush the dcache. This is a problem when using these helpers on dcache challeneged architectures. Right now there are just a few users, chances are no one used the PC floppy dr\u0456ver, the aha1542 driver for an ISA SCSI HBA, and a few advanced and optional btrfs and ext4 features on those platforms yet since the conversion. Link: https://lkml.kernel.org/r/20210713055231.137602-2-hch@lst.de Fixes: bb90d4bc7b6a ("mm/highmem: Lift memcpy_[to|from]_page to core") Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Cc: Chaitanya Kulkarni Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/highmem.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8c6e8e996c875..8e7e50a53a129 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -318,6 +318,7 @@ static inline void memcpy_to_page(struct page *page, size_t offset, VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); + flush_dcache_page(page); kunmap_local(to); } @@ -325,6 +326,7 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_atomic(page); memset(addr + offset, 0, len); + flush_dcache_page(page); kunmap_atomic(addr); } From dfdd50b7eb0bb2d17ce696355eaf9166ee0d7f5a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 11:41:03 +1000 Subject: [PATCH 298/851] mm: use kmap_local_page in memzero_page The commit message introducing the global memzero_page explicitly mentions switching to kmap_local_page in the commit log but doesn't actually do that. Link: https://lkml.kernel.org/r/20210713055231.137602-3-hch@lst.de Fixes: 28961998f858 ("iov_iter: lift memzero_page() to highmem.h") Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ira Weiny Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/highmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8e7e50a53a129..d9a606a9fc64a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -324,10 +324,10 @@ static inline void memcpy_to_page(struct page *page, size_t offset, static inline void memzero_page(struct page *page, size_t offset, size_t len) { - char *addr = kmap_atomic(page); + char *addr = kmap_local_page(page); memset(addr + offset, 0, len); flush_dcache_page(page); - kunmap_atomic(addr); + kunmap_local(addr); } #endif /* _LINUX_HIGHMEM_H */ From 30f0a9076e8dd69909deeaec1aa9435f5bbaf460 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Wed, 21 Jul 2021 11:41:04 +1000 Subject: [PATCH 299/851] mm: page_alloc: fix page_poison=1 / INIT_ON_ALLOC_DEFAULT_ON interaction To reproduce the failure we need the following system: - kernel command: page_poison=1 init_on_free=0 init_on_alloc=0 - kernel config: * CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y * CONFIG_INIT_ON_FREE_DEFAULT_ON=y * CONFIG_PAGE_POISONING=y 0000000085629bdd: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 0000000022861832: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000000c597f5b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 11 PID: 15195 Comm: bash Kdump: loaded Tainted: G U O 5.13.1-gentoo-x86_64 #1 Hardware name: System manufacturer System Product Name/PRIME Z370-A, BIOS 2801 01/13/2021 Call Trace: dump_stack+0x64/0x7c __kernel_unpoison_pages.cold+0x48/0x84 post_alloc_hook+0x60/0xa0 get_page_from_freelist+0xdb8/0x1000 __alloc_pages+0x163/0x2b0 __get_free_pages+0xc/0x30 pgd_alloc+0x2e/0x1a0 ? dup_mm+0x37/0x4f0 mm_init+0x185/0x270 dup_mm+0x6b/0x4f0 ? __lock_task_sighand+0x35/0x70 copy_process+0x190d/0x1b10 kernel_clone+0xba/0x3b0 __do_sys_clone+0x8f/0xb0 do_syscall_64+0x68/0x80 ? do_syscall_64+0x11/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Before the 51cba1eb ("init_on_alloc: Optimize static branches") init_on_alloc never enabled static branch by default. It could only be enabed explicitly by init_mem_debugging_and_hardening(). But after the 51cba1eb static branch could already be enabled by default. There was no code to ever disable it. That caused page_poison=1 / init_on_free=1 conflict. This change extends init_mem_debugging_and_hardening() to also disable static branch disabling. Link: https://lkml.kernel.org/r/20210714031935.4094114-1-keescook@chromium.org Link: https://lore.kernel.org/r/20210712215816.1512739-1-slyfox@gentoo.org Fixes: 51cba1ebc60d ("init_on_alloc: Optimize static branches") Signed-off-by: Sergei Trofimovich Signed-off-by: Kees Cook Co-developed-by: Kees Cook Reported-by: Mikhail Morfikov Reported-by: Tested-by: Reviewed-by: David Hildenbrand Cc: Alexander Potapenko Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e97e68aef7a8..856b175c15a4f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -840,21 +840,24 @@ void init_mem_debugging_and_hardening(void) } #endif - if (_init_on_alloc_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_alloc\n"); - else - static_branch_enable(&init_on_alloc); - } - if (_init_on_free_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_free\n"); - else - static_branch_enable(&init_on_free); + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && + page_poisoning_requested) { + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " + "will take precedence over init_on_alloc and init_on_free\n"); + _init_on_alloc_enabled_early = false; + _init_on_free_enabled_early = false; } + if (_init_on_alloc_enabled_early) + static_branch_enable(&init_on_alloc); + else + static_branch_disable(&init_on_alloc); + + if (_init_on_free_enabled_early) + static_branch_enable(&init_on_free); + else + static_branch_disable(&init_on_free); + #ifdef CONFIG_DEBUG_PAGEALLOC if (!debug_pagealloc_enabled()) return; From e75b8b41cf3f3c25b8f4b7b176377cd3aa000eb4 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:41:04 +1000 Subject: [PATCH 300/851] memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions Commit b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") didn't take into account that when there is movable_node parameter in the kernel command line, for_each_mem_range() would skip ranges marked with MEMBLOCK_HOTPLUG. The page table setup code in POWER uses for_each_mem_range() to create the linear mapping of the physical memory and since the regions marked as MEMORY_HOTPLUG are skipped, they never make it to the linear map. A later access to the memory in those ranges will fail: [ 2.271743] BUG: Unable to handle kernel data access on write at 0xc000000400000000 [ 2.271984] Faulting instruction address: 0xc00000000008a3c0 [ 2.272568] Oops: Kernel access of bad area, sig: 11 [#1] [ 2.272683] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries [ 2.273063] Modules linked in: [ 2.273435] CPU: 0 PID: 53 Comm: kworker/u2:0 Not tainted 5.13.0 #7 [ 2.273832] NIP: c00000000008a3c0 LR: c0000000003c1ed8 CTR: 0000000000000040 [ 2.273918] REGS: c000000008a57770 TRAP: 0300 Not tainted (5.13.0) [ 2.274036] MSR: 8000000002009033 CR: 84222202 XER: 20040000 [ 2.274454] CFAR: c0000000003c1ed4 DAR: c000000400000000 DSISR: 42000000 IRQMASK: 0 [ 2.274454] GPR00: c0000000003c1ed8 c000000008a57a10 c0000000019da700 c000000400000000 [ 2.274454] GPR04: 0000000000000280 0000000000000180 0000000000000400 0000000000000200 [ 2.274454] GPR08: 0000000000000100 0000000000000080 0000000000000040 0000000000000300 [ 2.274454] GPR12: 0000000000000380 c000000001bc0000 c0000000001660c8 c000000006337e00 [ 2.274454] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 2.274454] GPR20: 0000000040000000 0000000020000000 c000000001a81990 c000000008c30000 [ 2.274454] GPR24: c000000008c20000 c000000001a81998 000fffffffff0000 c000000001a819a0 [ 2.274454] GPR28: c000000001a81908 c00c000001000000 c000000008c40000 c000000008a64680 [ 2.275520] NIP [c00000000008a3c0] clear_user_page+0x50/0x80 [ 2.276333] LR [c0000000003c1ed8] __handle_mm_fault+0xc88/0x1910 [ 2.276688] Call Trace: [ 2.276839] [c000000008a57a10] [c0000000003c1e94] __handle_mm_fault+0xc44/0x1910 (unreliable) [ 2.277142] [c000000008a57af0] [c0000000003c2c90] handle_mm_fault+0x130/0x2a0 [ 2.277331] [c000000008a57b40] [c0000000003b5f08] __get_user_pages+0x248/0x610 [ 2.277541] [c000000008a57c40] [c0000000003b848c] __get_user_pages_remote+0x12c/0x3e0 [ 2.277768] [c000000008a57cd0] [c000000000473f24] get_arg_page+0x54/0xf0 [ 2.277959] [c000000008a57d10] [c000000000474a7c] copy_string_kernel+0x11c/0x210 [ 2.278159] [c000000008a57d80] [c00000000047663c] kernel_execve+0x16c/0x220 [ 2.278361] [c000000008a57dd0] [c000000000166270] call_usermodehelper_exec_async+0x1b0/0x2f0 [ 2.278543] [c000000008a57e10] [c00000000000d5ec] ret_from_kernel_thread+0x5c/0x70 [ 2.278870] Instruction dump: [ 2.279214] 79280fa4 79271764 79261f24 794ae8e2 7ca94214 7d683a14 7c893a14 7d893050 [ 2.279416] 7d4903a6 60000000 60000000 60000000 <7c001fec> 7c091fec 7c081fec 7c051fec [ 2.280193] ---[ end trace 490b8c67e6075e09 ]--- Making for_each_mem_range() include MEMBLOCK_HOTPLUG regions in the traversal fixes this issue. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1976100 Link: https://lkml.kernel.org/r/20210712071132.20902-1-rppt@kernel.org Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Signed-off-by: Mike Rapoport Tested-by: Greg Kurz Reviewed-by: David Hildenbrand Cc: [5.10+] Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memblock.h | 4 ++-- mm/memblock.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cbf46f56d1053..4a53c3ca86bdc 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range(i, p_start, p_end) \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_mem_range_rev - reverse iterate through memblock areas from @@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range_rev(i, p_start, p_end) \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_reserved_mem_range - iterate over all reserved memblock areas diff --git a/mm/memblock.c b/mm/memblock.c index 0041ff62c584e..de7b553baa500 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type, return true; /* skip hotpluggable memory regions if needed */ - if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) + if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && + !(flags & MEMBLOCK_HOTPLUG)) return true; /* if we want mirror memory skip non-mirror memory regions */ From 8db71ecdeac16d62c9bcd79e79179672bca7b01f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 21 Jul 2021 11:41:05 +1000 Subject: [PATCH 301/851] writeback, cgroup: remove wb from offline list before releasing refcnt Boyang reported that the commit c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") causes the kernel to crash while running xfstests generic/256 on ext4 on aarch64 and ppc64le. [ 4366.380974] run fstests generic/256 at 2021-07-12 05:41:40 [ 4368.337078] EXT4-fs (vda3): mounted filesystem with ordered data mode. Opts: . Quota mode: none. [ 4371.275986] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 4371.278210] Mem abort info: [ 4371.278880] ESR = 0x96000005 [ 4371.279603] EC = 0x25: DABT (current EL), IL = 32 bits [ 4371.280878] SET = 0, FnV = 0 [ 4371.281621] EA = 0, S1PTW = 0 [ 4371.282396] FSC = 0x05: level 1 translation fault [ 4371.283635] Data abort info: [ 4371.284333] ISV = 0, ISS = 0x00000005 [ 4371.285246] CM = 0, WnR = 0 [ 4371.285975] user pgtable: 64k pages, 48-bit VAs, pgdp=00000000b0502000 [ 4371.287640] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 4371.290016] Internal error: Oops: 96000005 [#1] SMP [ 4371.291251] Modules linked in: dm_flakey dm_snapshot dm_bufio dm_zero dm_mod loop tls rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc ext4 vfat fat mbcache jbd2 drm fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_blk virtio_net net_failover virtio_console failover virtio_mmio aes_neon_bs [last unloaded: scsi_debug] [ 4371.300059] CPU: 0 PID: 408468 Comm: kworker/u8:5 Tainted: G X --------- --- 5.14.0-0.rc1.15.bx.el9.aarch64 #1 [ 4371.303009] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 4371.304685] Workqueue: events_unbound cleanup_offline_cgwbs_workfn [ 4371.306329] pstate: 004000c5 (nzcv daIF +PAN -UAO -TCO BTYPE=--) [ 4371.307867] pc : cleanup_offline_cgwbs_workfn+0x320/0x394 [ 4371.309254] lr : cleanup_offline_cgwbs_workfn+0xe0/0x394 [ 4371.310597] sp : ffff80001554fd10 [ 4371.311443] x29: ffff80001554fd10 x28: 0000000000000000 x27: 0000000000000001 [ 4371.313320] x26: 0000000000000000 x25: 00000000000000e0 x24: ffffd2a2fbe671a8 [ 4371.315159] x23: ffff80001554fd88 x22: ffffd2a2fbe67198 x21: ffffd2a2fc25a730 [ 4371.316945] x20: ffff210412bc3000 x19: ffff210412bc3280 x18: 0000000000000000 [ 4371.318690] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 4371.320437] x14: 0000000000000000 x13: 0000000000000030 x12: 0000000000000040 [ 4371.322444] x11: ffff210481572238 x10: ffff21048157223a x9 : ffffd2a2fa276c60 [ 4371.324243] x8 : ffff210484106b60 x7 : 0000000000000000 x6 : 000000000007d18a [ 4371.326049] x5 : ffff210416a86400 x4 : ffff210412bc0280 x3 : 0000000000000000 [ 4371.327898] x2 : ffff80001554fd88 x1 : ffff210412bc0280 x0 : 0000000000000003 [ 4371.329748] Call trace: [ 4371.330372] cleanup_offline_cgwbs_workfn+0x320/0x394 [ 4371.331694] process_one_work+0x1f4/0x4b0 [ 4371.332767] worker_thread+0x184/0x540 [ 4371.333732] kthread+0x114/0x120 [ 4371.334535] ret_from_fork+0x10/0x18 [ 4371.335440] Code: d63f0020 97f99963 17ffffa6 f8588263 (f9400061) [ 4371.337174] ---[ end trace e250fe289272792a ]--- [ 4371.338365] Kernel panic - not syncing: Oops: Fatal exception [ 4371.339884] SMP: stopping secondary CPUs [ 4372.424137] SMP: failed to stop secondary CPUs 0-2 [ 4372.436894] Kernel Offset: 0x52a2e9fa0000 from 0xffff800010000000 [ 4372.438408] PHYS_OFFSET: 0xfff0defca0000000 [ 4372.439496] CPU features: 0x00200251,23200840 [ 4372.440603] Memory Limit: none [ 4372.441374] ---[ end Kernel panic - not syncing: Oops: Fatal exception ]--- The problem happens when cgwb_release_workfn() races with cleanup_offline_cgwbs_workfn(): wb_tryget() in cleanup_offline_cgwbs_workfn() can be called after percpu_ref_exit() is cgwb_release_workfn(), which is basically a use-after-free error. Fix the problem by making removing the writeback structure from the offline list before releasing the percpu reference counter. It will guarantee that cleanup_offline_cgwbs_workfn() will not see and not access writeback structures which are about to be released. Link: https://lkml.kernel.org/r/20210716201039.3762203-1-guro@fb.com Fixes: c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") Signed-off-by: Roman Gushchin Reported-by: Boyang Xue Suggested-by: Jan Kara Tested-by: Darrick J. Wong Cc: Will Deacon Cc: Dave Chinner Cc: Murphy Zhou Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/backing-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 271f2ca862c82..f5561ea7d90ad 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work) blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); - percpu_ref_exit(&wb->refcnt); spin_lock_irq(&cgwb_lock); list_del(&wb->offline_node); spin_unlock_irq(&cgwb_lock); + percpu_ref_exit(&wb->refcnt); wb_exit(wb); WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); From 45d78575d78af151f0ceabb340bf48fb93e3efbb Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 21 Jul 2021 11:41:05 +1000 Subject: [PATCH 302/851] writeback, cgroup: do not reparent dax inodes The inode switching code is not suited for dax inodes. An attempt to switch a dax inode to a parent writeback structure (as a part of a writeback cleanup procedure) results in a panic like this: [ 987.071651] run fstests generic/270 at 2021-07-15 05:54:02 [ 988.704940] XFS (pmem0p2): EXPERIMENTAL big timestamp feature in use. Use at your own risk! [ 988.746847] XFS (pmem0p2): DAX enabled. Warning: EXPERIMENTAL, use at your own risk [ 988.786070] XFS (pmem0p2): EXPERIMENTAL inode btree counters feature in use. Use at your own risk! [ 988.828639] XFS (pmem0p2): Mounting V5 Filesystem [ 988.854019] XFS (pmem0p2): Ending clean mount [ 988.874550] XFS (pmem0p2): Quotacheck needed: Please wait. [ 988.900618] XFS (pmem0p2): Quotacheck: Done. [ 989.090783] XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) [ 989.092751] XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) [ 989.092962] XFS (pmem0p2): xlog_verify_grant_tail: space > BBTOB(tail_blocks) [ 1010.105586] BUG: unable to handle page fault for address: 0000000005b0f669 [ 1010.141817] #PF: supervisor read access in kernel mode [ 1010.167824] #PF: error_code(0x0000) - not-present page [ 1010.191499] PGD 0 P4D 0 [ 1010.203346] Oops: 0000 [#1] SMP PTI [ 1010.219596] CPU: 13 PID: 10479 Comm: kworker/13:16 Not tainted 5.14.0-rc1-master-8096acd7442e+ #8 [ 1010.260441] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 09/13/2016 [ 1010.297792] Workqueue: inode_switch_wbs inode_switch_wbs_work_fn [ 1010.324832] RIP: 0010:inode_do_switch_wbs+0xaf/0x470 [ 1010.347261] Code: 00 30 0f 85 c1 03 00 00 0f 1f 44 00 00 31 d2 48 c7 c6 ff ff ff ff 48 8d 7c 24 08 e8 eb 49 1a 00 48 85 c0 74 4a bb ff ff ff ff <48> 8b 50 08 48 8d 4a ff 83 e2 01 48 0f 45 c1 48 8b 00 a8 08 0f 85 [ 1010.434307] RSP: 0018:ffff9c66691abdc8 EFLAGS: 00010002 [ 1010.457795] RAX: 0000000005b0f661 RBX: 00000000ffffffff RCX: ffff89e6a21382b0 [ 1010.489922] RDX: 0000000000000001 RSI: ffff89e350230248 RDI: ffffffffffffffff [ 1010.522085] RBP: ffff89e681d19400 R08: 0000000000000000 R09: 0000000000000228 [ 1010.554234] R10: ffffffffffffffff R11: ffffffffffffffc0 R12: ffff89e6a2138130 [ 1010.586414] R13: ffff89e316af7400 R14: ffff89e316af6e78 R15: ffff89e6a21382b0 [ 1010.619394] FS: 0000000000000000(0000) GS:ffff89ee5fb40000(0000) knlGS:0000000000000000 [ 1010.658874] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1010.688085] CR2: 0000000005b0f669 CR3: 0000000cb2410004 CR4: 00000000001706e0 [ 1010.722129] Call Trace: [ 1010.733132] inode_switch_wbs_work_fn+0xb6/0x2a0 [ 1010.754121] process_one_work+0x1e6/0x380 [ 1010.772512] worker_thread+0x53/0x3d0 [ 1010.789221] ? process_one_work+0x380/0x380 [ 1010.807964] kthread+0x10f/0x130 [ 1010.822043] ? set_kthread_struct+0x40/0x40 [ 1010.840818] ret_from_fork+0x22/0x30 [ 1010.856851] Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter nf_tables nfnetlink bridge stp llc rfkill sunrpc intel_rapl_msr intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm mgag200 i2c_algo_bit iTCO_wdt irqbypass drm_kms_helper iTCO_vendor_support acpi_ipmi rapl syscopyarea sysfillrect intel_cstate ipmi_si sysimgblt ioatdma dax_pmem_compat fb_sys_fops ipmi_devintf device_dax i2c_i801 pcspkr intel_uncore hpilo nd_pmem cec dax_pmem_core dca i2c_smbus acpi_tad lpc_ich ipmi_msghandler acpi_power_meter drm fuse xfs libcrc32c sd_mod t10_pi crct10dif_pclmul crc32_pclmul crc32c_intel tg3 ghash_clmulni_intel serio_raw hpsa hpwdt scsi_transport_sas wmi dm_mirror dm_region_hash dm_log dm_mod [ 1011.200864] CR2: 0000000005b0f669 [ 1011.215700] ---[ end trace ed2105faff8384f3 ]--- [ 1011.241727] RIP: 0010:inode_do_switch_wbs+0xaf/0x470 [ 1011.264306] Code: 00 30 0f 85 c1 03 00 00 0f 1f 44 00 00 31 d2 48 c7 c6 ff ff ff ff 48 8d 7c 24 08 e8 eb 49 1a 00 48 85 c0 74 4a bb ff ff ff ff <48> 8b 50 08 48 8d 4a ff 83 e2 01 48 0f 45 c1 48 8b 00 a8 08 0f 85 [ 1011.348821] RSP: 0018:ffff9c66691abdc8 EFLAGS: 00010002 [ 1011.372734] RAX: 0000000005b0f661 RBX: 00000000ffffffff RCX: ffff89e6a21382b0 [ 1011.405826] RDX: 0000000000000001 RSI: ffff89e350230248 RDI: ffffffffffffffff [ 1011.437852] RBP: ffff89e681d19400 R08: 0000000000000000 R09: 0000000000000228 [ 1011.469926] R10: ffffffffffffffff R11: ffffffffffffffc0 R12: ffff89e6a2138130 [ 1011.502179] R13: ffff89e316af7400 R14: ffff89e316af6e78 R15: ffff89e6a21382b0 [ 1011.534233] FS: 0000000000000000(0000) GS:ffff89ee5fb40000(0000) knlGS:0000000000000000 [ 1011.571247] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1011.597063] CR2: 0000000005b0f669 CR3: 0000000cb2410004 CR4: 00000000001706e0 [ 1011.629160] Kernel panic - not syncing: Fatal exception [ 1011.653802] Kernel Offset: 0x15200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 1011.713723] ---[ end Kernel panic - not syncing: Fatal exception ]--- The crash happens on an attempt to iterate over attached pagecache pages and check the dirty flag: a dax inode's xarray contains pfn's instead of generic struct page pointers. This happens for DAX and not for other kinds of non-page entries in the inodes because it's a tagged iteration, and shadow/swap entries are never tagged; only DAX entries get tagged. Fix the problem by bailing out (with the false return value) of inode_prepare_sbs_switch() if a dax inode is passed. [willy@infradead.org: changelog addition] Link: https://lkml.kernel.org/r/20210719171350.3876830-1-guro@fb.com Fixes: c22d70a162d3 ("writeback, cgroup: release dying cgwbs by switching attached inodes") Signed-off-by: Roman Gushchin Reported-by: Murphy Zhou Reported-by: Darrick J. Wong Tested-by: Darrick J. Wong Tested-by: Murphy Zhou Acked-by: Matthew Wilcox (Oracle) Cc: Jan Kara Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/fs-writeback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06d04a74ab6c7..4c33705489825 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode, */ smp_mb(); + if (IS_DAX(inode)) + return false; + /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); if (!(inode->i_sb->s_flags & SB_ACTIVE) || From ec74e4327618f64411f46bcc7f7b54b7a641df94 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:41:06 +1000 Subject: [PATCH 303/851] mm/secretmem: wire up ->set_page_dirty Make secretmem up to date with the changes done in commit 0af573780b0b ("mm: require ->set_page_dirty to be explicitly wired up") so that unconditional call to this method won't cause crashes. Link: https://lkml.kernel.org/r/20210716063933.31633-1-rppt@kernel.org Fixes: 0af573780b0b ("mm: require ->set_page_dirty to be explicitly wired up") Signed-off-by: Mike Rapoport Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/secretmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/secretmem.c b/mm/secretmem.c index f77d25467a14a..030f02ddc7c1d 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page) } const struct address_space_operations secretmem_aops = { + .set_page_dirty = __set_page_dirty_no_writeback, .freepage = secretmem_freepage, .migratepage = secretmem_migratepage, .isolate_page = secretmem_isolate_page, From f59897409f3e20d78a05ebf1724be5d40bf310cf Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Wed, 21 Jul 2021 11:41:06 +1000 Subject: [PATCH 304/851] mm: mmap_lock: fix disabling preemption directly The commit 832b50725373 ("mm: mmap_lock: use local locks instead of disabling preemption") fix a bug by using local locks. But commit d01079f3d0c0 ("mm/mmap_lock: remove dead code for !CONFIG_TRACING configurations") changes those lines to original version. I guess it is introduced by the conflicts fixing on merging. Link: https://lkml.kernel.org/r/20210720074228.76342-1-songmuchun@bytedance.com Fixes: d01079f3d0c0 ("mm/mmap_lock: remove dead code for !CONFIG_TRACING configurations") Signed-off-by: Muchun Song Acked-by: Mel Gorman Reviewed-by: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mmap_lock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index f5852a058ce0b..1854850b4b897 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void) #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ do { \ const char *memcg_path; \ - preempt_disable(); \ + local_lock(&memcg_paths.lock); \ memcg_path = get_mm_memcg_path(mm); \ trace_mmap_lock_##type(mm, \ memcg_path != NULL ? memcg_path : "", \ ##__VA_ARGS__); \ if (likely(memcg_path != NULL)) \ put_memcg_path_buf(); \ - preempt_enable(); \ + local_unlock(&memcg_paths.lock); \ } while (0) #else /* !CONFIG_MEMCG */ From e63c8bb994e2b871de7aaa5b0c9a58517683f35d Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 21 Jul 2021 11:41:07 +1000 Subject: [PATCH 305/851] procfs: prevent unprivileged processes accessing fdinfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file permissions on the fdinfo dir from were changed from S_IRUSR|S_IXUSR to S_IRUGO|S_IXUGO, and a PTRACE_MODE_READ check was added for opening the fdinfo files [1]. However, the ptrace permission check was not added to the directory, allowing anyone to get the open FD numbers by reading the fdinfo directory. Add the missing ptrace permission check for opening the fdinfo directory. The check is also added for readdir, lseek in the case that an unprivileged process inherits an open FD to the fdinfo dir after an exec. For the same reason, similar checks are added for fdinfo files which previously only checked the ptrace permission in open. [1] https://lkml.kernel.org/r/20210308170651.919148-1-kaleshsingh@google.com Link: https://lkml.kernel.org/r/20210708155647.44208-1-kaleshsingh@google.com Fixes: 7bc3fa0172a4 ("procfs: allow reading fdinfo with PTRACE_MODE_READ") Signed-off-by: Kalesh Singh Cc: Kees Cook Cc: Eric W. Biederman Cc: Christian Brauner Cc: Christian König Cc: Suren Baghdasaryan Cc: Hridya Valsaraju Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/fd.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 172c86270b312..aea59e243bae1 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -72,7 +72,7 @@ static int seq_show(struct seq_file *m, void *v) return 0; } -static int seq_fdinfo_open(struct inode *inode, struct file *file) +static int proc_fdinfo_access_allowed(struct inode *inode) { bool allowed = false; struct task_struct *task = get_proc_task(inode); @@ -86,13 +86,44 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) if (!allowed) return -EACCES; + return 0; +} + +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + return single_open(file, seq_show, inode); } +static ssize_t seq_fdinfo_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) +{ + int ret = proc_fdinfo_access_allowed(file_inode(file)); + + if (ret) + return ret; + + return seq_read(file, buf, size, ppos); +} + +static loff_t seq_fdinfo_lseek(struct file *file, loff_t offset, int whence) +{ + int ret = proc_fdinfo_access_allowed(file_inode(file)); + + if (ret) + return ret; + + return seq_lseek(file, offset, whence); +} + static const struct file_operations proc_fdinfo_file_operations = { .open = seq_fdinfo_open, - .read = seq_read, - .llseek = seq_lseek, + .read = seq_fdinfo_read, + .llseek = seq_fdinfo_lseek, .release = single_release, }; @@ -344,17 +375,43 @@ proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) static int proc_readfdinfo(struct file *file, struct dir_context *ctx) { + int ret = proc_fdinfo_access_allowed(file_inode(file)); + + if (ret) + return ret; + return proc_readfd_common(file, ctx, proc_fdinfo_instantiate); } +static loff_t proc_llseek_fdinfo(struct file *file, loff_t offset, int whence) +{ + int ret = proc_fdinfo_access_allowed(file_inode(file)); + + if (ret) + return ret; + + return generic_file_llseek(file, offset, whence); +} + +static int proc_open_fdinfo(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + + return 0; +} + const struct inode_operations proc_fdinfo_inode_operations = { .lookup = proc_lookupfdinfo, .setattr = proc_setattr, }; const struct file_operations proc_fdinfo_operations = { + .open = proc_open_fdinfo, .read = generic_read_dir, .iterate_shared = proc_readfdinfo, - .llseek = generic_file_llseek, + .llseek = proc_llseek_fdinfo, }; From a09a517056b8943350ac55bffabe3622f15f0352 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 21 Jul 2021 11:41:07 +1000 Subject: [PATCH 306/851] shm: skip shm_destroy if task IPC namespace was changed Patch series "shm: omit forced shm destroy if task IPC namespace was changed". Task IPC namespace shm's has shm_rmid_forced feature which is per IPC namespace and controlled by kernel.shm_rmid_forced sysctl. When feature is turned on, then during task exit (and unshare(CLONE_NEWIPC)) all sysvshm's will be destroyed by exit_shm(struct task_struct *task) function. But there is a problem if task was changed IPC namespace since shmget() call. In such situation exit_shm() function will try to call shm_destroy(, ) which leads to the situation when sysvshm object still attached to old IPC namespace but freed; later during old IPC namespace cleanup we will try to free such sysvshm object for the second time and will get the problem :) First patch solves this problem by postponing shm_destroy to the moment when IPC namespace cleanup will be called. Second patch is useful to prevent (or easy catch) such bugs in the future by adding corresponding WARNings. This patch (of 2): Task may change IPC namespace by doing setns() but sysvshm objects remains at the origin IPC namespace (=IPC namespace where task was when shmget() was called). Let's skip forced shm destroy in such case because we can't determine IPC namespace by shm only. These problematic sysvshm's will be destroyed on ipc namespace cleanup. Link: https://lkml.kernel.org/r/20210706132259.71740-1-alexander.mikhalitsyn@virtuozzo.com Link: https://lkml.kernel.org/r/20210706132259.71740-2-alexander.mikhalitsyn@virtuozzo.com Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity") Signed-off-by: Alexander Mikhalitsyn Cc: Milton Miller Cc: Jack Miller Cc: Pavel Tikhomirov Cc: Alexander Mikhalitsyn Cc: Manfred Spraul Cc: Davidlohr Bueso Cc: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- ipc/shm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ipc/shm.c b/ipc/shm.c index 748933e376cad..70a41171b8bb5 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -173,6 +173,14 @@ static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace return container_of(ipcp, struct shmid_kernel, shm_perm); } +static inline bool is_shm_in_ns(struct ipc_namespace *ns, struct shmid_kernel *shp) +{ + int idx = ipcid_to_idx(shp->shm_perm.id); + struct shmid_kernel *tshp = shm_obtain_object(ns, idx); + + return !IS_ERR(tshp) && tshp == shp; +} + /* * shm_lock_(check_) routines are called in the paths where the rwsem * is not necessarily held. @@ -415,7 +423,7 @@ void exit_shm(struct task_struct *task) list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { shp->shm_creator = NULL; - if (shm_may_destroy(ns, shp)) { + if (is_shm_in_ns(ns, shp) && shm_may_destroy(ns, shp)) { shm_lock_by_ptr(shp); shm_destroy(ns, shp); } From b808027815d33c6b03a27fe5cdb4e82a1bf9c0dd Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 21 Jul 2021 11:41:08 +1000 Subject: [PATCH 307/851] ipc: WARN if trying to remove ipc object which is absent Let's produce a warning if we trying to remove non-existing IPC object from IPC namespace kht/idr structures. This allows to catch possible bugs when ipc_rmid() function was called with inconsistent struct ipc_ids*, struct kern_ipc_perm* arguments. Link: https://lkml.kernel.org/r/20210706132259.71740-3-alexander.mikhalitsyn@virtuozzo.com Signed-off-by: Alexander Mikhalitsyn Cc: Milton Miller Cc: Jack Miller Cc: Pavel Tikhomirov Cc: Alexander Mikhalitsyn Cc: Davidlohr Bueso Cc: "Eric W. Biederman" Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- ipc/util.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ipc/util.c b/ipc/util.c index 0027e47626b7b..45bb8ce6c42ca 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -447,8 +447,8 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { if (ipcp->key != IPC_PRIVATE) - rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, - ipc_kht_params); + WARN_ON(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, + ipc_kht_params)); } /** @@ -498,7 +498,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { int idx = ipcid_to_idx(ipcp->id); - idr_remove(&ids->ipcs_idr, idx); + WARN_ON(idr_remove(&ids->ipcs_idr, idx) != ipcp); ipc_kht_remove(ids, ipcp); ids->in_use--; ipcp->deleted = true; From 2ff2187ca9ed41b9db0f273a3dadb045b855dc63 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 21 Jul 2021 11:41:09 +1000 Subject: [PATCH 308/851] lib/test_string.c: move string selftest in the Runtime Testing menu STRING_SELFTEST is presented in the "Library routines" menu. Move it in Kernel hacking > Kernel Testing and Coverage > Runtime Testing together with other similar tests found in lib/ --- Runtime Testing <*> Test functions located in the hexdump module at runtime <*> Test string functions (NEW) <*> Test functions located in the string_helpers module at runtime <*> Test strscpy*() family of functions at runtime <*> Test kstrto*() family of functions at runtime <*> Test printf() family of functions at runtime <*> Test scanf() family of functions at runtime Link: https://lkml.kernel.org/r/20210719185158.190371-1-mcroce@linux.microsoft.com Signed-off-by: Matteo Croce Cc: Peter Rosin Cc: Geert Uytterhoeven Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/Kconfig | 3 --- lib/Kconfig.debug | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index d241fe476fdac..5c9c0687f76d1 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -683,9 +683,6 @@ config PARMAN config OBJAGG tristate "objagg" if COMPILE_TEST -config STRING_SELFTEST - tristate "Test string functions" - endmenu config GENERIC_IOREMAP diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 831212722924c..5ddd575159fb8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2180,6 +2180,9 @@ config ASYNC_RAID6_TEST config TEST_HEXDUMP tristate "Test functions located in the hexdump module at runtime" +config STRING_SELFTEST + tristate "Test string functions at runtime" + config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" From baddef428bc79afa56eb5163f1be5f526880d54c Mon Sep 17 00:00:00 2001 From: Toshiki Fukasawa Date: Wed, 21 Jul 2021 11:41:09 +1000 Subject: [PATCH 309/851] /proc/kpageflags: prevent an integer overflow in stable_page_flags() stable_page_flags() returns kpageflags info in u64, but it uses "1 << KPF_*" internally which is considered as int. This type mismatch causes no visible problem now, but it will if you set bit 32 or more as done in a subsequent patch. So use BIT_ULL in order to avoid future overflow issues. Link: http://lkml.kernel.org/r/20190725023100.31141-2-t-fukasawa@vx.jp.nec.com Signed-off-by: Toshiki Fukasawa Cc: Michal Hocko Cc: Dan Williams Cc: Alexey Dobriyan Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Junichi Nomura Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/page.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/fs/proc/page.c b/fs/proc/page.c index 9f1077d94cde1..265f4fca15e29 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -115,7 +115,7 @@ u64 stable_page_flags(struct page *page) * it differentiates a memory hole from a page with no flags */ if (!page) - return 1 << KPF_NOPAGE; + return BIT_ULL(KPF_NOPAGE); k = page->flags; u = 0; @@ -127,22 +127,22 @@ u64 stable_page_flags(struct page *page) * simple test in page_mapped() is not enough. */ if (!PageSlab(page) && page_mapped(page)) - u |= 1 << KPF_MMAP; + u |= BIT_ULL(KPF_MMAP); if (PageAnon(page)) - u |= 1 << KPF_ANON; + u |= BIT_ULL(KPF_ANON); if (PageKsm(page)) - u |= 1 << KPF_KSM; + u |= BIT_ULL(KPF_KSM); /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) - u |= 1 << KPF_COMPOUND_HEAD; + u |= BIT_ULL(KPF_COMPOUND_HEAD); if (PageTail(page)) - u |= 1 << KPF_COMPOUND_TAIL; + u |= BIT_ULL(KPF_COMPOUND_TAIL); if (PageHuge(page)) - u |= 1 << KPF_HUGE; + u |= BIT_ULL(KPF_HUGE); /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it @@ -153,14 +153,13 @@ u64 stable_page_flags(struct page *page) struct page *head = compound_head(page); if (PageLRU(head) || PageAnon(head)) - u |= 1 << KPF_THP; + u |= BIT_ULL(KPF_THP); else if (is_huge_zero_page(head)) { - u |= 1 << KPF_ZERO_PAGE; - u |= 1 << KPF_THP; + u |= BIT_ULL(KPF_ZERO_PAGE); + u |= BIT_ULL(KPF_THP); } } else if (is_zero_pfn(page_to_pfn(page))) - u |= 1 << KPF_ZERO_PAGE; - + u |= BIT_ULL(KPF_ZERO_PAGE); /* * Caveats on high order pages: page->_refcount will only be set @@ -168,23 +167,23 @@ u64 stable_page_flags(struct page *page) * SLOB won't set PG_slab at all on compound pages. */ if (PageBuddy(page)) - u |= 1 << KPF_BUDDY; + u |= BIT_ULL(KPF_BUDDY); else if (page_count(page) == 0 && is_free_buddy_page(page)) - u |= 1 << KPF_BUDDY; + u |= BIT_ULL(KPF_BUDDY); if (PageOffline(page)) - u |= 1 << KPF_OFFLINE; + u |= BIT_ULL(KPF_OFFLINE); if (PageTable(page)) - u |= 1 << KPF_PGTABLE; + u |= BIT_ULL(KPF_PGTABLE); if (page_is_idle(page)) - u |= 1 << KPF_IDLE; + u |= BIT_ULL(KPF_IDLE); u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); if (PageTail(page) && PageSlab(compound_head(page))) - u |= 1 << KPF_SLAB; + u |= BIT_ULL(KPF_SLAB); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); @@ -197,7 +196,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); if (PageSwapCache(page)) - u |= 1 << KPF_SWAPCACHE; + u |= BIT_ULL(KPF_SWAPCACHE); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); From 38abd2c4a86caf54ecb747b0366a245af4bb8cb3 Mon Sep 17 00:00:00 2001 From: Toshiki Fukasawa Date: Wed, 21 Jul 2021 11:41:10 +1000 Subject: [PATCH 310/851] /proc/kpageflags: do not use uninitialized struct pages A kernel panic was observed during reading /proc/kpageflags for first few pfns allocated by pmem namespace: BUG: unable to handle page fault for address: fffffffffffffffe [ 114.495280] #PF: supervisor read access in kernel mode [ 114.495738] #PF: error_code(0x0000) - not-present page [ 114.496203] PGD 17120e067 P4D 17120e067 PUD 171210067 PMD 0 [ 114.496713] Oops: 0000 [#1] SMP PTI [ 114.497037] CPU: 9 PID: 1202 Comm: page-types Not tainted 5.3.0-rc1 #1 [ 114.497621] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 [ 114.498706] RIP: 0010:stable_page_flags+0x27/0x3f0 [ 114.499142] Code: 82 66 90 66 66 66 66 90 48 85 ff 0f 84 d1 03 00 00 41 54 55 48 89 fd 53 48 8b 57 08 48 8b 1f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 c4 02 0f 84 57 03 00 00 45 31 e4 48 8b 55 08 48 89 ef [ 114.500788] RSP: 0018:ffffa5e601a0fe60 EFLAGS: 00010202 [ 114.501373] RAX: fffffffffffffffe RBX: ffffffffffffffff RCX: 0000000000000000 [ 114.502009] RDX: 0000000000000001 RSI: 00007ffca13a7310 RDI: ffffd07489000000 [ 114.502637] RBP: ffffd07489000000 R08: 0000000000000001 R09: 0000000000000000 [ 114.503270] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000240000 [ 114.503896] R13: 0000000000080000 R14: 00007ffca13a7310 R15: ffffa5e601a0ff08 [ 114.504530] FS: 00007f0266c7f540(0000) GS:ffff962dbbac0000(0000) knlGS:0000000000000000 [ 114.505245] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 114.505754] CR2: fffffffffffffffe CR3: 000000023a204000 CR4: 00000000000006e0 [ 114.506401] Call Trace: [ 114.506660] kpageflags_read+0xb1/0x130 [ 114.507051] proc_reg_read+0x39/0x60 [ 114.507387] vfs_read+0x8a/0x140 [ 114.507686] ksys_pread64+0x61/0xa0 [ 114.508021] do_syscall_64+0x5f/0x1a0 [ 114.508372] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 114.508844] RIP: 0033:0x7f0266ba426b The reason for the panic is that stable_page_flags() which parses the page flags uses uninitialized struct pages reserved by the ZONE_DEVICE driver. Earlier approach to fix this was discussed here: https://marc.info/?l=linux-mm&m=152964770000672&w=2 This is another approach. To avoid using the uninitialized struct page, immediately return with KPF_RESERVED at the beginning of stable_page_flags() if the page is reserved by ZONE_DEVICE driver. Dan said: : The nvdimm implementation uses vmem_altmap to arrange for the 'struct : page' array to be allocated from a reservation of a pmem namespace. A : namespace in this mode contains an info-block that consumes the first : 8K of the namespace capacity, capacity designated for page mapping, : capacity for padding the start of data to optionally 4K, 2MB, or 1GB : (on x86), and then the namespace data itself. The implementation : specifies a section aligned (now sub-section aligned) address to : arch_add_memory() to establish the linear mapping to map the metadata, : and then vmem_altmap indicates to memmap_init_zone() which pfns : represent data. The implementation only specifies enough 'struct page' : capacity for pfn_to_page() to operate on the data space, not the : namespace metadata space. : : The proposal to validate ZONE_DEVICE pfns against the altmap seems the : right approach to me. Link: http://lkml.kernel.org/r/20190725023100.31141-3-t-fukasawa@vx.jp.nec.com Signed-off-by: Toshiki Fukasawa Cc: Alexey Dobriyan Cc: Christoph Hellwig Cc: Dan Williams Cc: Junichi Nomura Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/page.c | 3 +++ include/linux/memremap.h | 6 ++++++ mm/memremap.c | 20 ++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/fs/proc/page.c b/fs/proc/page.c index 265f4fca15e29..4dcbcd506cb6e 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -117,6 +117,9 @@ u64 stable_page_flags(struct page *page) if (!page) return BIT_ULL(KPF_NOPAGE); + if (pfn_zone_device_reserved(page_to_pfn(page))) + return BIT_ULL(KPF_RESERVED); + k = page->flags; u = 0; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index c0e9d35889e8d..119f130ef8f10 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -131,6 +131,7 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) } #ifdef CONFIG_ZONE_DEVICE +bool pfn_zone_device_reserved(unsigned long pfn); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); @@ -143,6 +144,11 @@ unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); unsigned long memremap_compat_align(void); #else +static inline bool pfn_zone_device_reserved(unsigned long pfn) +{ + return false; +} + static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { diff --git a/mm/memremap.c b/mm/memremap.c index 15a074ffb8d73..805d761740c42 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -109,6 +109,26 @@ static unsigned long pfn_next(unsigned long pfn) return pfn + 1; } +/* + * This returns true if the page is reserved by ZONE_DEVICE driver. + */ +bool pfn_zone_device_reserved(unsigned long pfn) +{ + struct dev_pagemap *pgmap; + struct vmem_altmap *altmap; + bool ret = false; + + pgmap = get_dev_pagemap(pfn, NULL); + if (!pgmap) + return ret; + altmap = pgmap_altmap(pgmap); + if (altmap && pfn < (altmap->base_pfn + altmap->reserve)) + ret = true; + put_dev_pagemap(pgmap); + + return ret; +} + #define for_each_device_pfn(pfn, map, i) \ for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) From 5c764c97fe3da4b9e8bf46726a5c17f81ccf74d8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 21 Jul 2021 11:41:11 +1000 Subject: [PATCH 311/851] ocfs2: remove an unnecessary condition The case where "tmp_oh" is NULL is handled at the start of the function. At this point we know it's non-NULL so this will always return 1. Link: https://lkml.kernel.org/r/YOcItgIXtisi3MaO@mwanda Signed-off-by: Dan Carpenter Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Larry Chen Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/dlmglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 48fd369c29a4b..33fbdc823278c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2721,7 +2721,7 @@ int ocfs2_inode_lock_tracker(struct inode *inode, return status; } } - return tmp_oh ? 1 : 0; + return 1; } void ocfs2_inode_unlock_tracker(struct inode *inode, From 55e6b033831fc0efbb98770569e9d6ca3f67fa3c Mon Sep 17 00:00:00 2001 From: Wangyan Date: Wed, 21 Jul 2021 11:41:12 +1000 Subject: [PATCH 312/851] ocfs2: clear links count in ocfs2_mknod() if an error occurs In this condition, the inode can not be wiped when error happened. ocfs2_mkdir() ->ocfs2_mknod() ->ocfs2_mknod_locked() ->__ocfs2_mknod_locked() ->ocfs2_set_links_count() // i_links_count is 2 -> ... // an error accrue, goto roll_back or leave. ->ocfs2_commit_trans() ->iput(inode) ->evict() ->ocfs2_evict_inode() ->ocfs2_delete_inode() ->ocfs2_inode_lock() ->ocfs2_inode_lock_update() ->ocfs2_refresh_inode() ->set_nlink(); // inode->i_nlink is 2 now. /* if wipe is 0, it will goto bail_unlock_inode */ ->ocfs2_query_inode_wipe() ->if (inode->i_nlink) return; // wipe is 0. /* inode can not be wiped */ ->ocfs2_wipe_inode() So, we need clear links before the transaction committed. Link: http://lkml.kernel.org/r/d8147c41-fb2b-bdf7-b660-1f3c8448c33f@huawei.com Signed-off-by: Yan Wang Reviewed-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/namei.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2c46ff6ba4ea2..e717061f0dbe1 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -453,8 +453,12 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, leave: if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && new_fe_bh != NULL) + ocfs2_set_links_count((struct ocfs2_dinode *) + new_fe_bh->b_data, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) @@ -598,6 +602,8 @@ static int __ocfs2_mknod_locked(struct inode *dir, leave: if (status < 0) { if (*new_fe_bh) { + if (fe) + ocfs2_set_links_count(fe, 0); brelse(*new_fe_bh); *new_fe_bh = NULL; } @@ -2027,8 +2033,12 @@ static int ocfs2_symlink(struct user_namespace *mnt_userns, ocfs2_clusters_to_bytes(osb->sb, 1)); if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && new_fe_bh != NULL) + ocfs2_set_links_count((struct ocfs2_dinode *) + new_fe_bh->b_data, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) From 1e10dca18d886789dd2519923490b826949520dc Mon Sep 17 00:00:00 2001 From: Wangyan Date: Wed, 21 Jul 2021 11:41:13 +1000 Subject: [PATCH 313/851] ocfs2: fix ocfs2 corrupt when iputting an inode In this condition, it will cause an bug on error. ocfs2_mkdir() ->ocfs2_mknod() ->ocfs2_mknod_locked() ->__ocfs2_mknod_locked() //Assume inode->i_generation is genN. ->inode->i_generation = osb->s_next_generation++; // The inode lockres has been initialized. ->ocfs2_populate_inode() ->ocfs2_create_new_inode_locks() ->An error happened, returned value is non-zero // free the start_bit x in bg_blkno ->ocfs2_free_suballoc_bits() ->... /* Another process execute mkdir success in this place, and it occupied the start_bit x in bg_blkno which has been freed before. Its inode->i_generation is genN + 1 */ ->iput(inode) ->evict() ->ocfs2_evict_inode() ->ocfs2_delete_inode() ->ocfs2_inode_lock() ->ocfs2_inode_lock_update() /* Bug on here, genN != genN + 1 */ ->mlog_bug_on_msg(inode->i_generation != le32_to_cpu(fe->i_generation)) So, we need not to reclaim the inode when the inode->ip_inode_lockres has been initialized. It will be freed in iput(). Link: http://lkml.kernel.org/r/ef080ca3-5d74-e276-17a1-d9e7c7e662c9@huawei.com Fixes: b1529a41f777 ("ocfs2: should reclaim the inode if '__ocfs2_mknod_locked' returns an error") Signed-off-by: Yan Wang Reviewed-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index e717061f0dbe1..307dd2222874b 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -640,7 +640,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, parent_fe_bh, handle, inode_ac, fe_blkno, suballoc_loc, suballoc_bit); - if (status < 0) { + if (status < 0 && !(OCFS2_I(inode)->ip_inode_lockres.l_flags & + OCFS2_LOCK_INITIALIZED)) { u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit); int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode, inode_ac->ac_bh, suballoc_bit, bg_blkno, 1); From a7e4936dbc798218e74c5b099ca037f78d1e1f9f Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Wed, 21 Jul 2021 11:41:14 +1000 Subject: [PATCH 314/851] arch/csky/kernel/probes/kprobes.c: fix bugon.cocci warnings Use BUG_ON instead of a if condition followed by BUG. Generated by: scripts/coccinelle/misc/bugon.cocci Link: https://lkml.kernel.org/r/alpine.DEB.2.22.394.2107061049150.7197@hadrien Fixes: 7d37cb2c912d ("lib: fix kconfig dependency on ARCH_WANT_FRAME_POINTERS") Signed-off-by: kernel test robot Signed-off-by: Julia Lawall Reported-by: kernel test robot Cc: Julian Braha Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/csky/kernel/probes/kprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c index 68b22b499aebf..8fffa34d4e1c5 100644 --- a/arch/csky/kernel/probes/kprobes.c +++ b/arch/csky/kernel/probes/kprobes.c @@ -283,8 +283,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) * normal page fault. */ regs->pc = (unsigned long) cur->addr; - if (!instruction_pointer(regs)) - BUG(); + BUG_ON(!instruction_pointer(regs)); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); From f2eecc7e6e084c3218eda10c9947a060cb8dc67d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Jul 2021 11:41:14 +1000 Subject: [PATCH 315/851] mm: move kvmalloc-related functions to slab.h Not all files in the kernel should include mm.h. Migrating callers from kmalloc to kvmalloc is easier if the kvmalloc functions are in slab.h. Link: https://lkml.kernel.org/r/20210622215757.3525604-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Pekka Enberg Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/of/kexec.c | 1 + include/linux/mm.h | 32 -------------------------------- include/linux/slab.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index f335d941a716e..b90660c05f306 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -16,6 +16,7 @@ #include #include #include +#include #include /* relevant device tree properties */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ca22e6e694a8..629df6a245276 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -800,38 +800,6 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif -extern void *kvmalloc_node(size_t size, gfp_t flags, int node); -static inline void *kvmalloc(size_t size, gfp_t flags) -{ - return kvmalloc_node(size, flags, NUMA_NO_NODE); -} -static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) -{ - return kvmalloc_node(size, flags | __GFP_ZERO, node); -} -static inline void *kvzalloc(size_t size, gfp_t flags) -{ - return kvmalloc(size, flags | __GFP_ZERO); -} - -static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(n, size, &bytes))) - return NULL; - - return kvmalloc(bytes, flags); -} - -static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) -{ - return kvmalloc_array(n, size, flags | __GFP_ZERO); -} - -extern void kvfree(const void *addr); -extern void kvfree_sensitive(const void *addr, size_t len); - static inline int head_compound_mapcount(struct page *head) { return atomic_read(compound_mapcount_ptr(head)) + 1; diff --git a/include/linux/slab.h b/include/linux/slab.h index 083f3ce550bca..2c0d80cca6b8c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -732,6 +732,38 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +void *kvmalloc_node(size_t size, gfp_t flags, int node); +static inline void *kvmalloc(size_t size, gfp_t flags) +{ + return kvmalloc_node(size, flags, NUMA_NO_NODE); +} +static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) +{ + return kvmalloc_node(size, flags | __GFP_ZERO, node); +} +static inline void *kvzalloc(size_t size, gfp_t flags) +{ + return kvmalloc(size, flags | __GFP_ZERO); +} + +static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + return kvmalloc(bytes, flags); +} + +static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) +{ + return kvmalloc_array(n, size, flags | __GFP_ZERO); +} + +void kvfree(const void *addr); +void kvfree_sensitive(const void *addr, size_t len); + unsigned int kmem_cache_size(struct kmem_cache *s); void __init kmem_cache_init_late(void); From a9aac8ea55160ae3ba8b0b6dd3cf4e80f97fe148 Mon Sep 17 00:00:00 2001 From: Vijayanand Jitta Date: Wed, 21 Jul 2021 11:41:15 +1000 Subject: [PATCH 316/851] mm: slub: Fix slub_debug disablement for list of slabs Consider the scenario where CONFIG_SLUB_DEBUG_ON is set and we would want to disable slub_debug for few slabs. Using boot parameter with slub_debug=-,slab_name syntax doesn't work as expected i.e; only disabling debugging for the specified list of slabs, instead it disables debugging for all slabs. Fix this. Link: https://lkml.kernel.org/r/1626176750-13099-1-git-send-email-vjitta@codeaurora.org Signed-off-by: Vijayanand Jitta Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 090fa14628f92..6dad2b6fda6fa 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1404,6 +1404,7 @@ static int __init setup_slub_debug(char *str) char *slab_list; bool global_slub_debug_changed = false; bool slab_list_specified = false; + bool slab_list_debug_disable = true; slub_debug = DEBUG_DEFAULT_FLAGS; if (*str++ != '=' || !*str) @@ -1411,7 +1412,6 @@ static int __init setup_slub_debug(char *str) * No options specified. Switch on full debugging. */ goto out; - saved_str = str; while (str) { str = parse_slub_debug_flags(str, &flags, &slab_list, true); @@ -1420,6 +1420,8 @@ static int __init setup_slub_debug(char *str) slub_debug = flags; global_slub_debug_changed = true; } else { + if (flags || !IS_ENABLED(CONFIG_SLUB_DEBUG_ON)) + slab_list_debug_disable = false; slab_list_specified = true; } } @@ -1431,7 +1433,7 @@ static int __init setup_slub_debug(char *str) * long as there is no option specifying flags without a slab list. */ if (slab_list_specified) { - if (!global_slub_debug_changed) + if (!global_slub_debug_changed && !slab_list_debug_disable) slub_debug = 0; slub_debug_string = saved_str; } From 5b79c4a1469bcc5793a361ef57d5d2e2393cabcc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:16 +1000 Subject: [PATCH 317/851] mm/debug_vm_pgtable: introduce struct pgtable_debug_args Patch series "mm/debug_vm_pgtable: Enhancements", v3. There are a couple of issues with current implementations and this series tries to resolve the issues: (a) All needed information are scattered in variables, passed to various test functions. The code is organized in pretty much relaxed fashion. (b) The page isn't allocated from buddy during page table entry modifying tests. The page can be invalid, conflicting to the implementations of set_xxx_at() on ARM64. The target page is accessed so that the iCache can be flushed when execution permission is given on ARM64. Besides, the target page can be unmapped and access to it causes kernel crash. "struct pgtable_debug_args" is introduced to address issue (a). For issue (b), the used page is allocated from buddy in page table entry modifying tests. The corresponding tets will be skipped if we fail to allocate the (huge) page. For other test cases, the original page around to kernel symbol (@start_kernel) is still used. The patches are organized as below. PATCH[2-10] could be combined to one patch, but it will make the review harder: PATCH[1] introduces "struct pgtable_debug_args" as place holder of all needed information. With it, the old and new implementation can coexist. PATCH[2-10] uses "struct pgtable_debug_args" in various test functions. PATCH[11] removes the unused code for old implementation. PATCH[12] fixes the issue of corrupted page flag for ARM64 This patch (of 12): In debug_vm_pgtable(), there are many local variables introduced to track the needed information and they are passed to the functions for various test cases. It'd better to introduce a struct as place holder for these information. With it, what the functions for various test cases need is the struct, to simplify the code. It also makes code easier to be maintained. Besides, set_xxx_at() could access the data on the corresponding pages in the page table modifying tests. So the accessed pages in the tests should have been allocated from buddy. Otherwise, we're accessing pages that aren't owned by us. This causes issues like page flag corruption. This introduces "struct pgtable_debug_args". The struct is initialized and destroyed, but the information in the struct isn't used yet. They will be used in subsequent patches. Link: https://lkml.kernel.org/r/20210719130613.334901-1-gshan@redhat.com Link: https://lkml.kernel.org/r/20210719130613.334901-2-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 197 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 196 insertions(+), 1 deletion(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 1c922691aa616..ea153ff40d232 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -58,6 +58,36 @@ #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK) #define RANDOM_NZVALUE GENMASK(7, 0) +struct pgtable_debug_args { + struct mm_struct *mm; + struct vm_area_struct *vma; + + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + p4d_t *start_p4dp; + pud_t *start_pudp; + pmd_t *start_pmdp; + pgtable_t start_ptep; + + unsigned long vaddr; + pgprot_t page_prot; + pgprot_t page_prot_none; + + unsigned long pud_pfn; + unsigned long pmd_pfn; + unsigned long pte_pfn; + + unsigned long fixed_pgd_pfn; + unsigned long fixed_p4d_pfn; + unsigned long fixed_pud_pfn; + unsigned long fixed_pmd_pfn; + unsigned long fixed_pte_pfn; +}; + static void __init pte_basic_tests(unsigned long pfn, int idx) { pgprot_t prot = protection_map[idx]; @@ -955,8 +985,167 @@ static unsigned long __init get_random_vaddr(void) return random_vaddr; } +static void __init destroy_args(struct pgtable_debug_args *args) +{ + struct page *page = NULL; + + /* Free (huge) page */ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) && + has_transparent_hugepage() && + args->pud_pfn != ULONG_MAX) { + page = pfn_to_page(args->pud_pfn); + __free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT); + } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + has_transparent_hugepage() && + args->pmd_pfn != ULONG_MAX) { + page = pfn_to_page(args->pmd_pfn); + __free_pages(page, HPAGE_PMD_ORDER); + } else if (args->pte_pfn != ULONG_MAX) { + page = pfn_to_page(args->pte_pfn); + __free_pages(page, 0); + } + + /* Free page table */ + if (args->start_ptep) { + pte_free(args->mm, args->start_ptep); + mm_dec_nr_ptes(args->mm); + } + + if (args->start_pmdp) { + pmd_free(args->mm, args->start_pmdp); + mm_dec_nr_pmds(args->mm); + } + + if (args->start_pudp) { + pud_free(args->mm, args->start_pudp); + mm_dec_nr_puds(args->mm); + } + + if (args->start_p4dp) + p4d_free(args->mm, args->p4dp); + + /* Free vma and mm struct */ + if (args->vma) + vm_area_free(args->vma); + if (args->mm) + mmdrop(args->mm); +} + +static int __init init_args(struct pgtable_debug_args *args) +{ + struct page *page = NULL; + phys_addr_t phys; + int ret = 0; + + /* Initialize the debugging data */ + memset(args, 0, sizeof(*args)); + args->page_prot = vm_get_page_prot(VMFLAGS); + args->page_prot_none = __P000; + args->pud_pfn = ULONG_MAX; + args->pmd_pfn = ULONG_MAX; + args->pte_pfn = ULONG_MAX; + args->fixed_pgd_pfn = ULONG_MAX; + args->fixed_p4d_pfn = ULONG_MAX; + args->fixed_pud_pfn = ULONG_MAX; + args->fixed_pmd_pfn = ULONG_MAX; + args->fixed_pte_pfn = ULONG_MAX; + + /* Allocate mm and vma */ + args->mm = mm_alloc(); + if (!args->mm) { + pr_err("Failed to allocate mm struct\n"); + ret = -ENOMEM; + goto error; + } + + args->vma = vm_area_alloc(args->mm); + if (!args->vma) { + pr_err("Failed to allocate vma\n"); + ret = -ENOMEM; + goto error; + } + + /* Figure out the virtual address and allocate page table entries */ + args->vaddr = get_random_vaddr(); + args->pgdp = pgd_offset(args->mm, args->vaddr); + args->p4dp = p4d_alloc(args->mm, args->pgdp, args->vaddr); + args->pudp = args->p4dp ? + pud_alloc(args->mm, args->p4dp, args->vaddr) : NULL; + args->pmdp = args->pudp ? + pmd_alloc(args->mm, args->pudp, args->vaddr) : NULL; + args->ptep = args->pmdp ? + pte_alloc_map(args->mm, args->pmdp, args->vaddr) : NULL; + if (!args->ptep) { + pr_err("Failed to allocate page table\n"); + ret = -ENOMEM; + goto error; + } + + /* + * The above page table entries will be modified. Lets save the + * page table entries so that they can be released when the tests + * are completed. + */ + args->start_p4dp = p4d_offset(args->pgdp, 0UL); + args->start_pudp = pud_offset(args->p4dp, 0UL); + args->start_pmdp = pmd_offset(args->pudp, 0UL); + args->start_ptep = pmd_pgtable(READ_ONCE(*(args->pmdp))); + + /* + * Figure out the fixed addresses, which are all around the kernel + * symbol (@start_kernel). The corresponding PFNs might be invalid, + * but it's fine as the following tests won't access the pages. + */ + phys = __pa_symbol(&start_kernel); + args->fixed_pgd_pfn = __phys_to_pfn(phys & PGDIR_MASK); + args->fixed_p4d_pfn = __phys_to_pfn(phys & P4D_MASK); + args->fixed_pud_pfn = __phys_to_pfn(phys & PUD_MASK); + args->fixed_pmd_pfn = __phys_to_pfn(phys & PMD_MASK); + args->fixed_pte_pfn = __phys_to_pfn(phys & PAGE_MASK); + + /* + * Allocate (huge) pages because some of the tests need to access + * the data in the pages. The corresponding tests will be skipped + * if we fail to allocate (huge) pages. + */ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) && + has_transparent_hugepage()) { + page = alloc_pages(GFP_KERNEL | __GFP_NOWARN, + HPAGE_PUD_SHIFT - PAGE_SHIFT); + if (page) { + args->pud_pfn = page_to_pfn(page); + args->pmd_pfn = args->pud_pfn; + args->pte_pfn = args->pud_pfn; + return 0; + } + } + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + has_transparent_hugepage()) { + page = alloc_pages(GFP_KERNEL | __GFP_NOWARN, HPAGE_PMD_ORDER); + if (page) { + args->pmd_pfn = page_to_pfn(page); + args->pte_pfn = args->pmd_pfn; + return 0; + } + } + + page = alloc_pages(GFP_KERNEL, 0); + if (page) + args->pte_pfn = page_to_pfn(page); + + return 0; + +error: + destroy_args(args); + return ret; +} + static int __init debug_vm_pgtable(void) { + struct pgtable_debug_args args; struct vm_area_struct *vma; struct mm_struct *mm; pgd_t *pgdp; @@ -970,9 +1159,13 @@ static int __init debug_vm_pgtable(void) unsigned long vaddr, pte_aligned, pmd_aligned; unsigned long pud_aligned, p4d_aligned, pgd_aligned; spinlock_t *ptl = NULL; - int idx; + int idx, ret; pr_info("Validating architecture page table helpers\n"); + ret = init_args(&args); + if (ret) + return ret; + prot = vm_get_page_prot(VMFLAGS); vaddr = get_random_vaddr(); mm = mm_alloc(); @@ -1127,6 +1320,8 @@ static int __init debug_vm_pgtable(void) mm_dec_nr_pmds(mm); mm_dec_nr_ptes(mm); mmdrop(mm); + + destroy_args(&args); return 0; } late_initcall(debug_vm_pgtable); From 41b399706a09873c5385327bd1d1d59543fcf4da Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:17 +1000 Subject: [PATCH 318/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in basic tests This uses struct pgtable_debug_args in the basic test functions. There are more cleanups applied: * Move hugetlb_basic_tests() so that the basic tests are treated as a bundle. * Drop parameters to {pgd, p4d}_basic_tests() as they're never used. * Drop unused variables @{pgd, p4d}_aligned in debug_vm_pgtable(). Link: https://lkml.kernel.org/r/20210719130613.334901-3-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 51 ++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index ea153ff40d232..5f79d725f8d85 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -88,10 +88,10 @@ struct pgtable_debug_args { unsigned long fixed_pte_pfn; }; -static void __init pte_basic_tests(unsigned long pfn, int idx) +static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx) { pgprot_t prot = protection_map[idx]; - pte_t pte = pfn_pte(pfn, prot); + pte_t pte = pfn_pte(args->fixed_pte_pfn, prot); unsigned long val = idx, *ptr = &val; pr_debug("Validating PTE basic (%pGv)\n", ptr); @@ -173,7 +173,7 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void __init pmd_basic_tests(unsigned long pfn, int idx) +static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { pgprot_t prot = protection_map[idx]; unsigned long val = idx, *ptr = &val; @@ -183,7 +183,7 @@ static void __init pmd_basic_tests(unsigned long pfn, int idx) return; pr_debug("Validating PMD basic (%pGv)\n", ptr); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, prot); /* * This test needs to be executed after the given page table entry @@ -295,7 +295,7 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) +static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { pgprot_t prot = protection_map[idx]; unsigned long val = idx, *ptr = &val; @@ -305,7 +305,7 @@ static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int return; pr_debug("Validating PUD basic (%pGv)\n", ptr); - pud = pfn_pud(pfn, prot); + pud = pfn_pud(args->fixed_pud_pfn, prot); /* * This test needs to be executed after the given page table entry @@ -326,7 +326,7 @@ static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud)))); WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud)))); - if (mm_pmd_folded(mm)) + if (mm_pmd_folded(args->mm)) return; /* @@ -403,7 +403,7 @@ static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) WARN_ON(!pud_leaf(pud)); } #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { } +static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } static void __init pud_advanced_tests(struct mm_struct *mm, struct vm_area_struct *vma, pud_t *pudp, unsigned long pfn, unsigned long vaddr, @@ -413,8 +413,8 @@ static void __init pud_advanced_tests(struct mm_struct *mm, static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ -static void __init pmd_basic_tests(unsigned long pfn, int idx) { } -static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { } +static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { } +static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } static void __init pmd_advanced_tests(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmdp, unsigned long pfn, unsigned long vaddr, @@ -475,7 +475,7 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ -static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot) +static void __init p4d_basic_tests(void) { p4d_t p4d; @@ -484,7 +484,7 @@ static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot) WARN_ON(!p4d_same(p4d, p4d)); } -static void __init pgd_basic_tests(unsigned long pfn, pgprot_t prot) +static void __init pgd_basic_tests(void) { pgd_t pgd; @@ -889,7 +889,7 @@ static void __init swap_migration_tests(void) } #ifdef CONFIG_HUGETLB_PAGE -static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) +static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { struct page *page; pte_t pte; @@ -899,21 +899,21 @@ static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) * Accessing the page associated with the pfn is safe here, * as it was previously derived from a real kernel symbol. */ - page = pfn_to_page(pfn); - pte = mk_huge_pte(page, prot); + page = pfn_to_page(args->fixed_pmd_pfn); + pte = mk_huge_pte(page, args->page_prot); WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte))); WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte)))); WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte)))); #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB - pte = pfn_pte(pfn, prot); + pte = pfn_pte(args->fixed_pmd_pfn, args->page_prot); WARN_ON(!pte_huge(pte_mkhuge(pte))); #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ } #else /* !CONFIG_HUGETLB_PAGE */ -static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { } +static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -1157,7 +1157,7 @@ static int __init debug_vm_pgtable(void) pgprot_t prot, protnone; phys_addr_t paddr; unsigned long vaddr, pte_aligned, pmd_aligned; - unsigned long pud_aligned, p4d_aligned, pgd_aligned; + unsigned long pud_aligned; spinlock_t *ptl = NULL; int idx, ret; @@ -1200,8 +1200,6 @@ static int __init debug_vm_pgtable(void) pte_aligned = (paddr & PAGE_MASK) >> PAGE_SHIFT; pmd_aligned = (paddr & PMD_MASK) >> PAGE_SHIFT; pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT; - p4d_aligned = (paddr & P4D_MASK) >> PAGE_SHIFT; - pgd_aligned = (paddr & PGDIR_MASK) >> PAGE_SHIFT; WARN_ON(!pfn_valid(pte_aligned)); pgdp = pgd_offset(mm, vaddr); @@ -1235,9 +1233,9 @@ static int __init debug_vm_pgtable(void) * given page table entry. */ for (idx = 0; idx < ARRAY_SIZE(protection_map); idx++) { - pte_basic_tests(pte_aligned, idx); - pmd_basic_tests(pmd_aligned, idx); - pud_basic_tests(mm, pud_aligned, idx); + pte_basic_tests(&args, idx); + pmd_basic_tests(&args, idx); + pud_basic_tests(&args, idx); } /* @@ -1247,8 +1245,9 @@ static int __init debug_vm_pgtable(void) * the above iteration for now to save some test execution * time. */ - p4d_basic_tests(p4d_aligned, prot); - pgd_basic_tests(pgd_aligned, prot); + p4d_basic_tests(); + pgd_basic_tests(); + hugetlb_basic_tests(&args); pmd_leaf_tests(pmd_aligned, prot); pud_leaf_tests(pud_aligned, prot); @@ -1277,8 +1276,6 @@ static int __init debug_vm_pgtable(void) pmd_thp_tests(pmd_aligned, prot); pud_thp_tests(pud_aligned, prot); - hugetlb_basic_tests(pte_aligned, prot); - /* * Page table modifying tests. They need to hold * proper page table lock. From cd8c1cffe7deeea417c453f8b1f3dae5d4312050 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:18 +1000 Subject: [PATCH 319/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in leaf and savewrite tests This uses struct pgtable_debug_args in the leaf and savewrite test functions. Link: https://lkml.kernel.org/r/20210719130613.334901-4-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 5f79d725f8d85..b4b33afae9428 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -160,9 +160,9 @@ static void __init pte_advanced_tests(struct mm_struct *mm, WARN_ON(pte_young(pte)); } -static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot) +static void __init pte_savedwrite_tests(struct pgtable_debug_args *args) { - pte_t pte = pfn_pte(pfn, prot); + pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none); if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) return; @@ -261,7 +261,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm, pgtable = pgtable_trans_huge_withdraw(mm, pmdp); } -static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -269,7 +269,7 @@ static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PMD leaf\n"); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); /* * PMD based THP is a leaf entry. @@ -278,7 +278,7 @@ static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) WARN_ON(!pmd_leaf(pmd)); } -static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -289,7 +289,7 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PMD saved write\n"); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none); WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd)))); WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd)))); } @@ -387,7 +387,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm, pudp_huge_get_and_clear(mm, vaddr, pudp); } -static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) +static void __init pud_leaf_tests(struct pgtable_debug_args *args) { pud_t pud; @@ -395,7 +395,7 @@ static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PUD leaf\n"); - pud = pfn_pud(pfn, prot); + pud = pfn_pud(args->fixed_pud_pfn, args->page_prot); /* * PUD based THP is a leaf entry. */ @@ -410,7 +410,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm, pgprot_t prot) { } -static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { } @@ -427,9 +427,9 @@ static void __init pud_advanced_tests(struct mm_struct *mm, pgprot_t prot) { } -static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) { } -static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { } -static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { } +static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } +static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP @@ -1249,11 +1249,11 @@ static int __init debug_vm_pgtable(void) pgd_basic_tests(); hugetlb_basic_tests(&args); - pmd_leaf_tests(pmd_aligned, prot); - pud_leaf_tests(pud_aligned, prot); + pmd_leaf_tests(&args); + pud_leaf_tests(&args); - pte_savedwrite_tests(pte_aligned, protnone); - pmd_savedwrite_tests(pmd_aligned, protnone); + pte_savedwrite_tests(&args); + pmd_savedwrite_tests(&args); pte_special_tests(pte_aligned, prot); pte_protnone_tests(pte_aligned, protnone); From fb6c79d1fb3a74fc7bc8cced28f81da35fe95e52 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:18 +1000 Subject: [PATCH 320/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in protnone and devmap tests This uses struct pgtable_debug_args in protnone and devmap test functions. After that, the unused variable @protnone in debug_vm_pgtable() is dropped. Link: https://lkml.kernel.org/r/20210719130613.334901-5-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 58 +++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index b4b33afae9428..1ae204831484e 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -661,9 +661,9 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp, WARN_ON(pmd_bad(pmd)); } -static void __init pte_special_tests(unsigned long pfn, pgprot_t prot) +static void __init pte_special_tests(struct pgtable_debug_args *args) { - pte_t pte = pfn_pte(pfn, prot); + pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) return; @@ -672,9 +672,9 @@ static void __init pte_special_tests(unsigned long pfn, pgprot_t prot) WARN_ON(!pte_special(pte_mkspecial(pte))); } -static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot) +static void __init pte_protnone_tests(struct pgtable_debug_args *args) { - pte_t pte = pfn_pte(pfn, prot); + pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none); if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) return; @@ -685,7 +685,7 @@ static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -696,25 +696,25 @@ static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PMD protnone\n"); - pmd = pmd_mkhuge(pfn_pmd(pfn, prot)); + pmd = pmd_mkhuge(pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none)); WARN_ON(!pmd_protnone(pmd)); WARN_ON(!pmd_present(pmd)); } #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ -static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP -static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) +static void __init pte_devmap_tests(struct pgtable_debug_args *args) { - pte_t pte = pfn_pte(pfn, prot); + pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); pr_debug("Validating PTE devmap\n"); WARN_ON(!pte_devmap(pte_mkdevmap(pte))); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -722,12 +722,12 @@ static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PMD devmap\n"); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd))); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) +static void __init pud_devmap_tests(struct pgtable_debug_args *args) { pud_t pud; @@ -735,20 +735,20 @@ static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PUD devmap\n"); - pud = pfn_pud(pfn, prot); + pud = pfn_pud(args->fixed_pud_pfn, args->page_prot); WARN_ON(!pud_devmap(pud_mkdevmap(pud))); } #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { } -static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { } +static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #else -static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) { } -static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { } -static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pte_devmap_tests(struct pgtable_debug_args *args) { } +static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { } +static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot) @@ -1154,7 +1154,7 @@ static int __init debug_vm_pgtable(void) pmd_t *pmdp, *saved_pmdp, pmd; pte_t *ptep; pgtable_t saved_ptep; - pgprot_t prot, protnone; + pgprot_t prot; phys_addr_t paddr; unsigned long vaddr, pte_aligned, pmd_aligned; unsigned long pud_aligned; @@ -1174,12 +1174,6 @@ static int __init debug_vm_pgtable(void) return 1; } - /* - * __P000 (or even __S000) will help create page table entries with - * PROT_NONE permission as required for pxx_protnone_tests(). - */ - protnone = __P000; - vma = vm_area_alloc(mm); if (!vma) { pr_err("vma allocation failed\n"); @@ -1255,13 +1249,13 @@ static int __init debug_vm_pgtable(void) pte_savedwrite_tests(&args); pmd_savedwrite_tests(&args); - pte_special_tests(pte_aligned, prot); - pte_protnone_tests(pte_aligned, protnone); - pmd_protnone_tests(pmd_aligned, protnone); + pte_special_tests(&args); + pte_protnone_tests(&args); + pmd_protnone_tests(&args); - pte_devmap_tests(pte_aligned, prot); - pmd_devmap_tests(pmd_aligned, prot); - pud_devmap_tests(pud_aligned, prot); + pte_devmap_tests(&args); + pmd_devmap_tests(&args); + pud_devmap_tests(&args); pte_soft_dirty_tests(pte_aligned, prot); pmd_soft_dirty_tests(pmd_aligned, prot); From ee33d1ef728c48e157d1f2414106092dcdfa7c7b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:19 +1000 Subject: [PATCH 321/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in soft_dirty and swap tests This uses struct pgtable_debug_args in the soft_dirty and swap test functions. Link: https://lkml.kernel.org/r/20210719130613.334901-6-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 48 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 1ae204831484e..a20ed77bf05f5 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -751,9 +751,9 @@ static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { } static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ -static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot) +static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) { - pte_t pte = pfn_pte(pfn, prot); + pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) return; @@ -763,9 +763,9 @@ static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot) WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte))); } -static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) +static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) { - pte_t pte = pfn_pte(pfn, prot); + pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) return; @@ -776,7 +776,7 @@ static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -787,12 +787,12 @@ static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PMD soft dirty\n"); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd))); WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd))); } -static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -804,31 +804,29 @@ static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PMD swap soft dirty\n"); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd))); WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd))); } #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ -static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { } -static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) -{ -} +static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { } +static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot) +static void __init pte_swap_tests(struct pgtable_debug_args *args) { swp_entry_t swp; pte_t pte; pr_debug("Validating PTE swap\n"); - pte = pfn_pte(pfn, prot); + pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); swp = __pte_to_swp_entry(pte); pte = __swp_entry_to_pte(swp); - WARN_ON(pfn != pte_pfn(pte)); + WARN_ON(args->fixed_pte_pfn != pte_pfn(pte)); } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION -static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_swap_tests(struct pgtable_debug_args *args) { swp_entry_t swp; pmd_t pmd; @@ -837,13 +835,13 @@ static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PMD swap\n"); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); swp = __pmd_to_swp_entry(pmd); pmd = __swp_entry_to_pmd(swp); - WARN_ON(pfn != pmd_pfn(pmd)); + WARN_ON(args->fixed_pmd_pfn != pmd_pfn(pmd)); } #else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pmd_swap_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static void __init swap_migration_tests(void) @@ -1257,13 +1255,13 @@ static int __init debug_vm_pgtable(void) pmd_devmap_tests(&args); pud_devmap_tests(&args); - pte_soft_dirty_tests(pte_aligned, prot); - pmd_soft_dirty_tests(pmd_aligned, prot); - pte_swap_soft_dirty_tests(pte_aligned, prot); - pmd_swap_soft_dirty_tests(pmd_aligned, prot); + pte_soft_dirty_tests(&args); + pmd_soft_dirty_tests(&args); + pte_swap_soft_dirty_tests(&args); + pmd_swap_soft_dirty_tests(&args); - pte_swap_tests(pte_aligned, prot); - pmd_swap_tests(pmd_aligned, prot); + pte_swap_tests(&args); + pmd_swap_tests(&args); swap_migration_tests(); From d5a676bd7334716c58974781107fd16bde0e7d72 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:20 +1000 Subject: [PATCH 322/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in migration and thp tests This uses struct pgtable_debug_args in the migration and thp test functions. It's notable that the pre-allocated page is used in swap_migration_tests() as set_pte_at() is used there. Link: https://lkml.kernel.org/r/20210719130613.334901-7-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index a20ed77bf05f5..d32e55a95c558 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -844,7 +844,7 @@ static void __init pmd_swap_tests(struct pgtable_debug_args *args) static void __init pmd_swap_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static void __init swap_migration_tests(void) +static void __init swap_migration_tests(struct pgtable_debug_args *args) { struct page *page; swp_entry_t swp; @@ -860,9 +860,10 @@ static void __init swap_migration_tests(void) * problematic. Lets allocate a dedicated page explicitly for this * purpose that will be freed subsequently. */ - page = alloc_page(GFP_KERNEL); + page = (args->pte_pfn != ULONG_MAX) ? + pfn_to_page(args->pte_pfn) : NULL; if (!page) { - pr_err("page allocation failed\n"); + pr_err("no page available\n"); return; } @@ -883,7 +884,6 @@ static void __init swap_migration_tests(void) WARN_ON(!is_migration_entry(swp)); WARN_ON(is_writable_migration_entry(swp)); __ClearPageLocked(page); - __free_page(page); } #ifdef CONFIG_HUGETLB_PAGE @@ -915,7 +915,7 @@ static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) +static void __init pmd_thp_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -934,7 +934,7 @@ static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) * needs to return true. pmd_present() should be true whenever * pmd_trans_huge() returns true. */ - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd))); #ifndef __HAVE_ARCH_PMDP_INVALIDATE @@ -944,7 +944,7 @@ static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD -static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) +static void __init pud_thp_tests(struct pgtable_debug_args *args) { pud_t pud; @@ -952,7 +952,7 @@ static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) return; pr_debug("Validating PUD based THP\n"); - pud = pfn_pud(pfn, prot); + pud = pfn_pud(args->fixed_pud_pfn, args->page_prot); WARN_ON(!pud_trans_huge(pud_mkhuge(pud))); /* @@ -964,11 +964,11 @@ static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) */ } #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ -static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pud_thp_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ -static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) { } -static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { } +static void __init pmd_thp_tests(struct pgtable_debug_args *args) { } +static void __init pud_thp_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static unsigned long __init get_random_vaddr(void) @@ -1263,10 +1263,10 @@ static int __init debug_vm_pgtable(void) pte_swap_tests(&args); pmd_swap_tests(&args); - swap_migration_tests(); + swap_migration_tests(&args); - pmd_thp_tests(pmd_aligned, prot); - pud_thp_tests(pud_aligned, prot); + pmd_thp_tests(&args); + pud_thp_tests(&args); /* * Page table modifying tests. They need to hold From 0f452aa3e5819d3c2dde71022be49653adbed0dc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:21 +1000 Subject: [PATCH 323/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in PTE modifying tests This uses struct pgtable_debug_args in PTE modifying tests. The allocated page is used as set_pte_at() is used there. The tests are skipped if the allocated page doesn't exist. Besides, the unused variable @ptep and @pte_aligned in debug_vm_pgtable() are dropped. Link: https://lkml.kernel.org/r/20210719130613.334901-8-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 75 ++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index d32e55a95c558..eb6dda88e0d9e 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -116,10 +116,7 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx) WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte)))); } -static void __init pte_advanced_tests(struct mm_struct *mm, - struct vm_area_struct *vma, pte_t *ptep, - unsigned long pfn, unsigned long vaddr, - pgprot_t prot) +static void __init pte_advanced_tests(struct pgtable_debug_args *args) { pte_t pte; @@ -130,33 +127,38 @@ static void __init pte_advanced_tests(struct mm_struct *mm, */ pr_debug("Validating PTE advanced\n"); - pte = pfn_pte(pfn, prot); - set_pte_at(mm, vaddr, ptep, pte); - ptep_set_wrprotect(mm, vaddr, ptep); - pte = ptep_get(ptep); + if (args->pte_pfn == ULONG_MAX) { + pr_debug("%s: Skipped\n", __func__); + return; + } + + pte = pfn_pte(args->pte_pfn, args->page_prot); + set_pte_at(args->mm, args->vaddr, args->ptep, pte); + ptep_set_wrprotect(args->mm, args->vaddr, args->ptep); + pte = ptep_get(args->ptep); WARN_ON(pte_write(pte)); - ptep_get_and_clear(mm, vaddr, ptep); - pte = ptep_get(ptep); + ptep_get_and_clear(args->mm, args->vaddr, args->ptep); + pte = ptep_get(args->ptep); WARN_ON(!pte_none(pte)); - pte = pfn_pte(pfn, prot); + pte = pfn_pte(args->pte_pfn, args->page_prot); pte = pte_wrprotect(pte); pte = pte_mkclean(pte); - set_pte_at(mm, vaddr, ptep, pte); + set_pte_at(args->mm, args->vaddr, args->ptep, pte); pte = pte_mkwrite(pte); pte = pte_mkdirty(pte); - ptep_set_access_flags(vma, vaddr, ptep, pte, 1); - pte = ptep_get(ptep); + ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1); + pte = ptep_get(args->ptep); WARN_ON(!(pte_write(pte) && pte_dirty(pte))); - ptep_get_and_clear_full(mm, vaddr, ptep, 1); - pte = ptep_get(ptep); + ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1); + pte = ptep_get(args->ptep); WARN_ON(!pte_none(pte)); - pte = pfn_pte(pfn, prot); + pte = pfn_pte(args->pte_pfn, args->page_prot); pte = pte_mkyoung(pte); - set_pte_at(mm, vaddr, ptep, pte); - ptep_test_and_clear_young(vma, vaddr, ptep); - pte = ptep_get(ptep); + set_pte_at(args->mm, args->vaddr, args->ptep, pte); + ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep); + pte = ptep_get(args->ptep); WARN_ON(pte_young(pte)); } @@ -617,20 +619,24 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, } #endif /* PAGETABLE_P4D_FOLDED */ -static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep, - unsigned long pfn, unsigned long vaddr, - pgprot_t prot) +static void __init pte_clear_tests(struct pgtable_debug_args *args) { - pte_t pte = pfn_pte(pfn, prot); + pte_t pte; pr_debug("Validating PTE clear\n"); + if (args->pte_pfn == ULONG_MAX) { + pr_debug("%s: Skipped\n", __func__); + return; + } + + pte = pfn_pte(args->pte_pfn, args->page_prot); #ifndef CONFIG_RISCV pte = __pte(pte_val(pte) | RANDOM_ORVALUE); #endif - set_pte_at(mm, vaddr, ptep, pte); + set_pte_at(args->mm, args->vaddr, args->ptep, pte); barrier(); - pte_clear(mm, vaddr, ptep); - pte = ptep_get(ptep); + pte_clear(args->mm, args->vaddr, args->ptep); + pte = ptep_get(args->ptep); WARN_ON(!pte_none(pte)); } @@ -1150,11 +1156,10 @@ static int __init debug_vm_pgtable(void) p4d_t *p4dp, *saved_p4dp; pud_t *pudp, *saved_pudp; pmd_t *pmdp, *saved_pmdp, pmd; - pte_t *ptep; pgtable_t saved_ptep; pgprot_t prot; phys_addr_t paddr; - unsigned long vaddr, pte_aligned, pmd_aligned; + unsigned long vaddr, pmd_aligned; unsigned long pud_aligned; spinlock_t *ptl = NULL; int idx, ret; @@ -1189,10 +1194,8 @@ static int __init debug_vm_pgtable(void) */ paddr = __pa_symbol(&start_kernel); - pte_aligned = (paddr & PAGE_MASK) >> PAGE_SHIFT; pmd_aligned = (paddr & PMD_MASK) >> PAGE_SHIFT; pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT; - WARN_ON(!pfn_valid(pte_aligned)); pgdp = pgd_offset(mm, vaddr); p4dp = p4d_alloc(mm, pgdp, vaddr); @@ -1272,11 +1275,11 @@ static int __init debug_vm_pgtable(void) * Page table modifying tests. They need to hold * proper page table lock. */ - - ptep = pte_offset_map_lock(mm, pmdp, vaddr, &ptl); - pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot); - pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot); - pte_unmap_unlock(ptep, ptl); + ptl = pte_lockptr(args.mm, args.pmdp); + spin_lock(ptl); + pte_clear_tests(&args); + pte_advanced_tests(&args); + spin_unlock(ptl); ptl = pmd_lock(mm, pmdp); pmd_clear_tests(mm, pmdp); From 795a854b60482eab4efbcea22c0548ae2a5cf6c9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:22 +1000 Subject: [PATCH 324/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in PMD modifying tests This uses struct pgtable_debug_args in PMD modifying tests. The allocated huge page is used when set_pmd_at() is used. The corresponding tests are skipped if the huge page doesn't exist. Besides, the unused variable @pmd_aligned in debug_vm_pgtable() is dropped. Link: https://lkml.kernel.org/r/20210719130613.334901-9-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 102 ++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index eb6dda88e0d9e..cec3cbf99a6b2 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -213,54 +213,54 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) WARN_ON(!pmd_bad(pmd_mkhuge(pmd))); } -static void __init pmd_advanced_tests(struct mm_struct *mm, - struct vm_area_struct *vma, pmd_t *pmdp, - unsigned long pfn, unsigned long vaddr, - pgprot_t prot, pgtable_t pgtable) +static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { pmd_t pmd; + unsigned long vaddr = (args->vaddr & HPAGE_PMD_MASK); if (!has_transparent_hugepage()) return; pr_debug("Validating PMD advanced\n"); - /* Align the address wrt HPAGE_PMD_SIZE */ - vaddr &= HPAGE_PMD_MASK; + if (args->pmd_pfn == ULONG_MAX) { + pr_debug("%s: Skipped\n", __func__); + return; + } - pgtable_trans_huge_deposit(mm, pmdp, pgtable); + pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep); - pmd = pfn_pmd(pfn, prot); - set_pmd_at(mm, vaddr, pmdp, pmd); - pmdp_set_wrprotect(mm, vaddr, pmdp); - pmd = READ_ONCE(*pmdp); + pmd = pfn_pmd(args->pmd_pfn, args->page_prot); + set_pmd_at(args->mm, vaddr, args->pmdp, pmd); + pmdp_set_wrprotect(args->mm, vaddr, args->pmdp); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(pmd_write(pmd)); - pmdp_huge_get_and_clear(mm, vaddr, pmdp); - pmd = READ_ONCE(*pmdp); + pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(!pmd_none(pmd)); - pmd = pfn_pmd(pfn, prot); + pmd = pfn_pmd(args->pmd_pfn, args->page_prot); pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); - set_pmd_at(mm, vaddr, pmdp, pmd); + set_pmd_at(args->mm, vaddr, args->pmdp, pmd); pmd = pmd_mkwrite(pmd); pmd = pmd_mkdirty(pmd); - pmdp_set_access_flags(vma, vaddr, pmdp, pmd, 1); - pmd = READ_ONCE(*pmdp); + pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd))); - pmdp_huge_get_and_clear_full(vma, vaddr, pmdp, 1); - pmd = READ_ONCE(*pmdp); + pmdp_huge_get_and_clear_full(args->vma, vaddr, args->pmdp, 1); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(!pmd_none(pmd)); - pmd = pmd_mkhuge(pfn_pmd(pfn, prot)); + pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot)); pmd = pmd_mkyoung(pmd); - set_pmd_at(mm, vaddr, pmdp, pmd); - pmdp_test_and_clear_young(vma, vaddr, pmdp); - pmd = READ_ONCE(*pmdp); + set_pmd_at(args->mm, vaddr, args->pmdp, pmd); + pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(pmd_young(pmd)); /* Clear the pte entries */ - pmdp_huge_get_and_clear(mm, vaddr, pmdp); - pgtable = pgtable_trans_huge_withdraw(mm, pmdp); + pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp); + pgtable_trans_huge_withdraw(args->mm, args->pmdp); } static void __init pmd_leaf_tests(struct pgtable_debug_args *args) @@ -417,12 +417,7 @@ static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { } static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } -static void __init pmd_advanced_tests(struct mm_struct *mm, - struct vm_area_struct *vma, pmd_t *pmdp, - unsigned long pfn, unsigned long vaddr, - pgprot_t prot, pgtable_t pgtable) -{ -} +static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { } static void __init pud_advanced_tests(struct mm_struct *mm, struct vm_area_struct *vma, pud_t *pudp, unsigned long pfn, unsigned long vaddr, @@ -435,11 +430,11 @@ static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) +static void __init pmd_huge_tests(struct pgtable_debug_args *args) { pmd_t pmd; - if (!arch_vmap_pmd_supported(prot)) + if (!arch_vmap_pmd_supported(args->page_prot)) return; pr_debug("Validating PMD huge\n"); @@ -447,10 +442,11 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) * X86 defined pmd_set_huge() verifies that the given * PMD is not a populated non-leaf entry. */ - WRITE_ONCE(*pmdp, __pmd(0)); - WARN_ON(!pmd_set_huge(pmdp, __pfn_to_phys(pfn), prot)); - WARN_ON(!pmd_clear_huge(pmdp)); - pmd = READ_ONCE(*pmdp); + WRITE_ONCE(*(args->pmdp), __pmd(0)); + WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), + args->page_prot)); + WARN_ON(!pmd_clear_huge(args->pmdp)); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(!pmd_none(pmd)); } @@ -473,7 +469,7 @@ static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) WARN_ON(!pud_none(pud)); } #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ -static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) { } +static void __init pmd_huge_tests(struct pgtable_debug_args *args) { } static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ @@ -640,20 +636,19 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args) WARN_ON(!pte_none(pte)); } -static void __init pmd_clear_tests(struct mm_struct *mm, pmd_t *pmdp) +static void __init pmd_clear_tests(struct pgtable_debug_args *args) { - pmd_t pmd = READ_ONCE(*pmdp); + pmd_t pmd = READ_ONCE(*(args->pmdp)); pr_debug("Validating PMD clear\n"); pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); - WRITE_ONCE(*pmdp, pmd); - pmd_clear(pmdp); - pmd = READ_ONCE(*pmdp); + WRITE_ONCE(*(args->pmdp), pmd); + pmd_clear(args->pmdp); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(!pmd_none(pmd)); } -static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable) +static void __init pmd_populate_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -662,8 +657,8 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp, * This entry points to next level page table page. * Hence this must not qualify as pmd_bad(). */ - pmd_populate(mm, pmdp, pgtable); - pmd = READ_ONCE(*pmdp); + pmd_populate(args->mm, args->pmdp, args->start_ptep); + pmd = READ_ONCE(*(args->pmdp)); WARN_ON(pmd_bad(pmd)); } @@ -1159,7 +1154,7 @@ static int __init debug_vm_pgtable(void) pgtable_t saved_ptep; pgprot_t prot; phys_addr_t paddr; - unsigned long vaddr, pmd_aligned; + unsigned long vaddr; unsigned long pud_aligned; spinlock_t *ptl = NULL; int idx, ret; @@ -1194,7 +1189,6 @@ static int __init debug_vm_pgtable(void) */ paddr = __pa_symbol(&start_kernel); - pmd_aligned = (paddr & PMD_MASK) >> PAGE_SHIFT; pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT; pgdp = pgd_offset(mm, vaddr); @@ -1281,11 +1275,11 @@ static int __init debug_vm_pgtable(void) pte_advanced_tests(&args); spin_unlock(ptl); - ptl = pmd_lock(mm, pmdp); - pmd_clear_tests(mm, pmdp); - pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep); - pmd_huge_tests(pmdp, pmd_aligned, prot); - pmd_populate_tests(mm, pmdp, saved_ptep); + ptl = pmd_lock(args.mm, args.pmdp); + pmd_clear_tests(&args); + pmd_advanced_tests(&args); + pmd_huge_tests(&args); + pmd_populate_tests(&args); spin_unlock(ptl); ptl = pud_lock(mm, pudp); From 25ec4f487b9b4d6bd3b027856de409baf3928272 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:23 +1000 Subject: [PATCH 325/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in PUD modifying tests This uses struct pgtable_debug_args in PUD modifying tests. The allocated huge page is used when set_pud_at() is used. The corresponding tests are skipped if the huge page doesn't exist. Besides, the following unused variables in debug_vm_pgtable() are dropped: @prot, @paddr, @pud_aligned. Link: https://lkml.kernel.org/r/20210719130613.334901-10-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 130 ++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 80 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index cec3cbf99a6b2..57b7ead0708ba 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -338,55 +338,55 @@ static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) WARN_ON(!pud_bad(pud_mkhuge(pud))); } -static void __init pud_advanced_tests(struct mm_struct *mm, - struct vm_area_struct *vma, pud_t *pudp, - unsigned long pfn, unsigned long vaddr, - pgprot_t prot) +static void __init pud_advanced_tests(struct pgtable_debug_args *args) { + unsigned long vaddr = (args->vaddr & HPAGE_PUD_MASK); pud_t pud; if (!has_transparent_hugepage()) return; pr_debug("Validating PUD advanced\n"); - /* Align the address wrt HPAGE_PUD_SIZE */ - vaddr &= HPAGE_PUD_MASK; + if (args->pud_pfn == ULONG_MAX) { + pr_debug("%s: Skipped\n", __func__); + return; + } - pud = pfn_pud(pfn, prot); - set_pud_at(mm, vaddr, pudp, pud); - pudp_set_wrprotect(mm, vaddr, pudp); - pud = READ_ONCE(*pudp); + pud = pfn_pud(args->pud_pfn, args->page_prot); + set_pud_at(args->mm, vaddr, args->pudp, pud); + pudp_set_wrprotect(args->mm, vaddr, args->pudp); + pud = READ_ONCE(*(args->pudp)); WARN_ON(pud_write(pud)); #ifndef __PAGETABLE_PMD_FOLDED - pudp_huge_get_and_clear(mm, vaddr, pudp); - pud = READ_ONCE(*pudp); + pudp_huge_get_and_clear(args->mm, vaddr, args->pudp); + pud = READ_ONCE(*(args->pudp)); WARN_ON(!pud_none(pud)); #endif /* __PAGETABLE_PMD_FOLDED */ - pud = pfn_pud(pfn, prot); + pud = pfn_pud(args->pud_pfn, args->page_prot); pud = pud_wrprotect(pud); pud = pud_mkclean(pud); - set_pud_at(mm, vaddr, pudp, pud); + set_pud_at(args->mm, vaddr, args->pudp, pud); pud = pud_mkwrite(pud); pud = pud_mkdirty(pud); - pudp_set_access_flags(vma, vaddr, pudp, pud, 1); - pud = READ_ONCE(*pudp); + pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1); + pud = READ_ONCE(*(args->pudp)); WARN_ON(!(pud_write(pud) && pud_dirty(pud))); #ifndef __PAGETABLE_PMD_FOLDED - pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1); - pud = READ_ONCE(*pudp); + pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1); + pud = READ_ONCE(*(args->pudp)); WARN_ON(!pud_none(pud)); #endif /* __PAGETABLE_PMD_FOLDED */ - pud = pfn_pud(pfn, prot); + pud = pfn_pud(args->pud_pfn, args->page_prot); pud = pud_mkyoung(pud); - set_pud_at(mm, vaddr, pudp, pud); - pudp_test_and_clear_young(vma, vaddr, pudp); - pud = READ_ONCE(*pudp); + set_pud_at(args->mm, vaddr, args->pudp, pud); + pudp_test_and_clear_young(args->vma, vaddr, args->pudp); + pud = READ_ONCE(*(args->pudp)); WARN_ON(pud_young(pud)); - pudp_huge_get_and_clear(mm, vaddr, pudp); + pudp_huge_get_and_clear(args->mm, vaddr, args->pudp); } static void __init pud_leaf_tests(struct pgtable_debug_args *args) @@ -406,24 +406,14 @@ static void __init pud_leaf_tests(struct pgtable_debug_args *args) } #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } -static void __init pud_advanced_tests(struct mm_struct *mm, - struct vm_area_struct *vma, pud_t *pudp, - unsigned long pfn, unsigned long vaddr, - pgprot_t prot) -{ -} +static void __init pud_advanced_tests(struct pgtable_debug_args *args) { } static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { } static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { } -static void __init pud_advanced_tests(struct mm_struct *mm, - struct vm_area_struct *vma, pud_t *pudp, - unsigned long pfn, unsigned long vaddr, - pgprot_t prot) -{ -} +static void __init pud_advanced_tests(struct pgtable_debug_args *args) { } static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { } static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { } @@ -450,11 +440,11 @@ static void __init pmd_huge_tests(struct pgtable_debug_args *args) WARN_ON(!pmd_none(pmd)); } -static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) +static void __init pud_huge_tests(struct pgtable_debug_args *args) { pud_t pud; - if (!arch_vmap_pud_supported(prot)) + if (!arch_vmap_pud_supported(args->page_prot)) return; pr_debug("Validating PUD huge\n"); @@ -462,15 +452,16 @@ static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) * X86 defined pud_set_huge() verifies that the given * PUD is not a populated non-leaf entry. */ - WRITE_ONCE(*pudp, __pud(0)); - WARN_ON(!pud_set_huge(pudp, __pfn_to_phys(pfn), prot)); - WARN_ON(!pud_clear_huge(pudp)); - pud = READ_ONCE(*pudp); + WRITE_ONCE(*(args->pudp), __pud(0)); + WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), + args->page_prot)); + WARN_ON(!pud_clear_huge(args->pudp)); + pud = READ_ONCE(*(args->pudp)); WARN_ON(!pud_none(pud)); } #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ static void __init pmd_huge_tests(struct pgtable_debug_args *args) { } -static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { } +static void __init pud_huge_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ static void __init p4d_basic_tests(void) @@ -492,27 +483,26 @@ static void __init pgd_basic_tests(void) } #ifndef __PAGETABLE_PUD_FOLDED -static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) +static void __init pud_clear_tests(struct pgtable_debug_args *args) { - pud_t pud = READ_ONCE(*pudp); + pud_t pud = READ_ONCE(*(args->pudp)); - if (mm_pmd_folded(mm)) + if (mm_pmd_folded(args->mm)) return; pr_debug("Validating PUD clear\n"); pud = __pud(pud_val(pud) | RANDOM_ORVALUE); - WRITE_ONCE(*pudp, pud); - pud_clear(pudp); - pud = READ_ONCE(*pudp); + WRITE_ONCE(*(args->pudp), pud); + pud_clear(args->pudp); + pud = READ_ONCE(*(args->pudp)); WARN_ON(!pud_none(pud)); } -static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp, - pmd_t *pmdp) +static void __init pud_populate_tests(struct pgtable_debug_args *args) { pud_t pud; - if (mm_pmd_folded(mm)) + if (mm_pmd_folded(args->mm)) return; pr_debug("Validating PUD populate\n"); @@ -520,16 +510,13 @@ static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp, * This entry points to next level page table page. * Hence this must not qualify as pud_bad(). */ - pud_populate(mm, pudp, pmdp); - pud = READ_ONCE(*pudp); + pud_populate(args->mm, args->pudp, args->start_pmdp); + pud = READ_ONCE(*(args->pudp)); WARN_ON(pud_bad(pud)); } #else /* !__PAGETABLE_PUD_FOLDED */ -static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) { } -static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp, - pmd_t *pmdp) -{ -} +static void __init pud_clear_tests(struct pgtable_debug_args *args) { } +static void __init pud_populate_tests(struct pgtable_debug_args *args) { } #endif /* PAGETABLE_PUD_FOLDED */ #ifndef __PAGETABLE_P4D_FOLDED @@ -1152,10 +1139,7 @@ static int __init debug_vm_pgtable(void) pud_t *pudp, *saved_pudp; pmd_t *pmdp, *saved_pmdp, pmd; pgtable_t saved_ptep; - pgprot_t prot; - phys_addr_t paddr; unsigned long vaddr; - unsigned long pud_aligned; spinlock_t *ptl = NULL; int idx, ret; @@ -1164,7 +1148,6 @@ static int __init debug_vm_pgtable(void) if (ret) return ret; - prot = vm_get_page_prot(VMFLAGS); vaddr = get_random_vaddr(); mm = mm_alloc(); if (!mm) { @@ -1178,19 +1161,6 @@ static int __init debug_vm_pgtable(void) return 1; } - /* - * PFN for mapping at PTE level is determined from a standard kernel - * text symbol. But pfns for higher page table levels are derived by - * masking lower bits of this real pfn. These derived pfns might not - * exist on the platform but that does not really matter as pfn_pxx() - * helpers will still create appropriate entries for the test. This - * helps avoid large memory block allocations to be used for mapping - * at higher page table levels. - */ - paddr = __pa_symbol(&start_kernel); - - pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT; - pgdp = pgd_offset(mm, vaddr); p4dp = p4d_alloc(mm, pgdp, vaddr); pudp = pud_alloc(mm, p4dp, vaddr); @@ -1282,11 +1252,11 @@ static int __init debug_vm_pgtable(void) pmd_populate_tests(&args); spin_unlock(ptl); - ptl = pud_lock(mm, pudp); - pud_clear_tests(mm, pudp); - pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot); - pud_huge_tests(pudp, pud_aligned, prot); - pud_populate_tests(mm, pudp, saved_pmdp); + ptl = pud_lock(args.mm, args.pudp); + pud_clear_tests(&args); + pud_advanced_tests(&args); + pud_huge_tests(&args); + pud_populate_tests(&args); spin_unlock(ptl); spin_lock(&mm->page_table_lock); From a55eb457c4a090afaa168d15dee41eb568567578 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:24 +1000 Subject: [PATCH 326/851] mm/debug_vm_pgtable: use struct pgtable_debug_args in PGD and P4D modifying tests This uses struct pgtable_debug_args in PGD/P4D modifying tests. No allocated huge page is used in these tests. Besides, the unused variable @saved_p4dp and @saved_pudp are dropped. Link: https://lkml.kernel.org/r/20210719130613.334901-11-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 86 +++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 48 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 57b7ead0708ba..5ebacc940b685 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -520,27 +520,26 @@ static void __init pud_populate_tests(struct pgtable_debug_args *args) { } #endif /* PAGETABLE_PUD_FOLDED */ #ifndef __PAGETABLE_P4D_FOLDED -static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) +static void __init p4d_clear_tests(struct pgtable_debug_args *args) { - p4d_t p4d = READ_ONCE(*p4dp); + p4d_t p4d = READ_ONCE(*(args->p4dp)); - if (mm_pud_folded(mm)) + if (mm_pud_folded(args->mm)) return; pr_debug("Validating P4D clear\n"); p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); - WRITE_ONCE(*p4dp, p4d); - p4d_clear(p4dp); - p4d = READ_ONCE(*p4dp); + WRITE_ONCE(*(args->p4dp), p4d); + p4d_clear(args->p4dp); + p4d = READ_ONCE(*(args->p4dp)); WARN_ON(!p4d_none(p4d)); } -static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp, - pud_t *pudp) +static void __init p4d_populate_tests(struct pgtable_debug_args *args) { p4d_t p4d; - if (mm_pud_folded(mm)) + if (mm_pud_folded(args->mm)) return; pr_debug("Validating P4D populate\n"); @@ -548,34 +547,33 @@ static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp, * This entry points to next level page table page. * Hence this must not qualify as p4d_bad(). */ - pud_clear(pudp); - p4d_clear(p4dp); - p4d_populate(mm, p4dp, pudp); - p4d = READ_ONCE(*p4dp); + pud_clear(args->pudp); + p4d_clear(args->p4dp); + p4d_populate(args->mm, args->p4dp, args->start_pudp); + p4d = READ_ONCE(*(args->p4dp)); WARN_ON(p4d_bad(p4d)); } -static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) +static void __init pgd_clear_tests(struct pgtable_debug_args *args) { - pgd_t pgd = READ_ONCE(*pgdp); + pgd_t pgd = READ_ONCE(*(args->pgdp)); - if (mm_p4d_folded(mm)) + if (mm_p4d_folded(args->mm)) return; pr_debug("Validating PGD clear\n"); pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); - WRITE_ONCE(*pgdp, pgd); - pgd_clear(pgdp); - pgd = READ_ONCE(*pgdp); + WRITE_ONCE(*(args->pgdp), pgd); + pgd_clear(args->pgdp); + pgd = READ_ONCE(*(args->pgdp)); WARN_ON(!pgd_none(pgd)); } -static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, - p4d_t *p4dp) +static void __init pgd_populate_tests(struct pgtable_debug_args *args) { pgd_t pgd; - if (mm_p4d_folded(mm)) + if (mm_p4d_folded(args->mm)) return; pr_debug("Validating PGD populate\n"); @@ -583,23 +581,17 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, * This entry points to next level page table page. * Hence this must not qualify as pgd_bad(). */ - p4d_clear(p4dp); - pgd_clear(pgdp); - pgd_populate(mm, pgdp, p4dp); - pgd = READ_ONCE(*pgdp); + p4d_clear(args->p4dp); + pgd_clear(args->pgdp); + pgd_populate(args->mm, args->pgdp, args->start_p4dp); + pgd = READ_ONCE(*(args->pgdp)); WARN_ON(pgd_bad(pgd)); } #else /* !__PAGETABLE_P4D_FOLDED */ -static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) { } -static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) { } -static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp, - pud_t *pudp) -{ -} -static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, - p4d_t *p4dp) -{ -} +static void __init p4d_clear_tests(struct pgtable_debug_args *args) { } +static void __init pgd_clear_tests(struct pgtable_debug_args *args) { } +static void __init p4d_populate_tests(struct pgtable_debug_args *args) { } +static void __init pgd_populate_tests(struct pgtable_debug_args *args) { } #endif /* PAGETABLE_P4D_FOLDED */ static void __init pte_clear_tests(struct pgtable_debug_args *args) @@ -1135,8 +1127,8 @@ static int __init debug_vm_pgtable(void) struct vm_area_struct *vma; struct mm_struct *mm; pgd_t *pgdp; - p4d_t *p4dp, *saved_p4dp; - pud_t *pudp, *saved_pudp; + p4d_t *p4dp; + pud_t *pudp; pmd_t *pmdp, *saved_pmdp, pmd; pgtable_t saved_ptep; unsigned long vaddr; @@ -1180,8 +1172,6 @@ static int __init debug_vm_pgtable(void) * page table pages. */ pmd = READ_ONCE(*pmdp); - saved_p4dp = p4d_offset(pgdp, 0UL); - saved_pudp = pud_offset(p4dp, 0UL); saved_pmdp = pmd_offset(pudp, 0UL); saved_ptep = pmd_pgtable(pmd); @@ -1259,15 +1249,15 @@ static int __init debug_vm_pgtable(void) pud_populate_tests(&args); spin_unlock(ptl); - spin_lock(&mm->page_table_lock); - p4d_clear_tests(mm, p4dp); - pgd_clear_tests(mm, pgdp); - p4d_populate_tests(mm, p4dp, saved_pudp); - pgd_populate_tests(mm, pgdp, saved_p4dp); - spin_unlock(&mm->page_table_lock); + spin_lock(&(args.mm->page_table_lock)); + p4d_clear_tests(&args); + pgd_clear_tests(&args); + p4d_populate_tests(&args); + pgd_populate_tests(&args); + spin_unlock(&(args.mm->page_table_lock)); - p4d_free(mm, saved_p4dp); - pud_free(mm, saved_pudp); + p4d_free(mm, p4d_offset(pgdp, 0UL)); + pud_free(mm, pud_offset(p4dp, 0UL)); pmd_free(mm, saved_pmdp); pte_free(mm, saved_ptep); From 023d1e9e0e824562b0e6c498d422f15946aa9d17 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:25 +1000 Subject: [PATCH 327/851] mm/debug_vm_pgtable: remove unused code The variables used by old implementation isn't needed as we switched to "struct pgtable_debug_args". Lets remove them and related code in debug_vm_pgtable(). Link: https://lkml.kernel.org/r/20210719130613.334901-12-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 54 ------------------------------------------- 1 file changed, 54 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 5ebacc940b685..4f7bf1c9724ae 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -1124,14 +1124,6 @@ static int __init init_args(struct pgtable_debug_args *args) static int __init debug_vm_pgtable(void) { struct pgtable_debug_args args; - struct vm_area_struct *vma; - struct mm_struct *mm; - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp, *saved_pmdp, pmd; - pgtable_t saved_ptep; - unsigned long vaddr; spinlock_t *ptl = NULL; int idx, ret; @@ -1140,41 +1132,6 @@ static int __init debug_vm_pgtable(void) if (ret) return ret; - vaddr = get_random_vaddr(); - mm = mm_alloc(); - if (!mm) { - pr_err("mm_struct allocation failed\n"); - return 1; - } - - vma = vm_area_alloc(mm); - if (!vma) { - pr_err("vma allocation failed\n"); - return 1; - } - - pgdp = pgd_offset(mm, vaddr); - p4dp = p4d_alloc(mm, pgdp, vaddr); - pudp = pud_alloc(mm, p4dp, vaddr); - pmdp = pmd_alloc(mm, pudp, vaddr); - /* - * Allocate pgtable_t - */ - if (pte_alloc(mm, pmdp)) { - pr_err("pgtable allocation failed\n"); - return 1; - } - - /* - * Save all the page table page addresses as the page table - * entries will be used for testing with random or garbage - * values. These saved addresses will be used for freeing - * page table pages. - */ - pmd = READ_ONCE(*pmdp); - saved_pmdp = pmd_offset(pudp, 0UL); - saved_ptep = pmd_pgtable(pmd); - /* * Iterate over the protection_map[] to make sure that all * the basic page table transformation validations just hold @@ -1256,17 +1213,6 @@ static int __init debug_vm_pgtable(void) pgd_populate_tests(&args); spin_unlock(&(args.mm->page_table_lock)); - p4d_free(mm, p4d_offset(pgdp, 0UL)); - pud_free(mm, pud_offset(p4dp, 0UL)); - pmd_free(mm, saved_pmdp); - pte_free(mm, saved_ptep); - - vm_area_free(vma); - mm_dec_nr_puds(mm); - mm_dec_nr_pmds(mm); - mm_dec_nr_ptes(mm); - mmdrop(mm); - destroy_args(&args); return 0; } From 61fdc72b0082f1f5de8f441efd9a5b5f2e68b045 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 21 Jul 2021 11:41:26 +1000 Subject: [PATCH 328/851] mm/debug_vm_pgtable: fix corrupted page flag In page table entry modifying tests, set_xxx_at() are used to populate the page table entries. On ARM64, PG_arch_1 is set to the target page flag if execution permission is given. The page flag is kept when the page is free'd to buddy's free area list. However, it will trigger page checking failure when it's pulled from the buddy's free area list, as the following warning messages indicate. BUG: Bad page state in process memhog pfn:08000 page:0000000015c0a628 refcount:0 mapcount:0 \ mapping:0000000000000000 index:0x1 pfn:0x8000 flags: 0x7ffff8000000800(arch_1|node=0|zone=0|lastcpupid=0xfffff) raw: 07ffff8000000800 dead000000000100 dead000000000122 0000000000000000 raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag(s) set This fixes the issue by clearing PG_arch_1 through flush_dcache_page() after set_xxx_at() is called. Link: https://lkml.kernel.org/r/20210719130613.334901-13-gshan@redhat.com Signed-off-by: Gavin Shan Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 4f7bf1c9724ae..de9def6f7ce16 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -29,6 +29,8 @@ #include #include #include + +#include #include #include @@ -118,6 +120,7 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx) static void __init pte_advanced_tests(struct pgtable_debug_args *args) { + struct page *page; pte_t pte; /* @@ -127,13 +130,16 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args) */ pr_debug("Validating PTE advanced\n"); - if (args->pte_pfn == ULONG_MAX) { + page = (args->pte_pfn != ULONG_MAX) ? + pfn_to_page(args->pte_pfn) : NULL; + if (!page) { pr_debug("%s: Skipped\n", __func__); return; } pte = pfn_pte(args->pte_pfn, args->page_prot); set_pte_at(args->mm, args->vaddr, args->ptep, pte); + flush_dcache_page(page); ptep_set_wrprotect(args->mm, args->vaddr, args->ptep); pte = ptep_get(args->ptep); WARN_ON(pte_write(pte)); @@ -145,6 +151,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args) pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(args->mm, args->vaddr, args->ptep, pte); + flush_dcache_page(page); pte = pte_mkwrite(pte); pte = pte_mkdirty(pte); ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1); @@ -157,6 +164,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args) pte = pfn_pte(args->pte_pfn, args->page_prot); pte = pte_mkyoung(pte); set_pte_at(args->mm, args->vaddr, args->ptep, pte); + flush_dcache_page(page); ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep); pte = ptep_get(args->ptep); WARN_ON(pte_young(pte)); @@ -215,6 +223,7 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { + struct page *page; pmd_t pmd; unsigned long vaddr = (args->vaddr & HPAGE_PMD_MASK); @@ -222,7 +231,9 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PMD advanced\n"); - if (args->pmd_pfn == ULONG_MAX) { + page = (args->pmd_pfn != ULONG_MAX) ? + pfn_to_page(args->pmd_pfn) : NULL; + if (!page) { pr_debug("%s: Skipped\n", __func__); return; } @@ -231,6 +242,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args) pmd = pfn_pmd(args->pmd_pfn, args->page_prot); set_pmd_at(args->mm, vaddr, args->pmdp, pmd); + flush_dcache_page(page); pmdp_set_wrprotect(args->mm, vaddr, args->pmdp); pmd = READ_ONCE(*(args->pmdp)); WARN_ON(pmd_write(pmd)); @@ -242,6 +254,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args) pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(args->mm, vaddr, args->pmdp, pmd); + flush_dcache_page(page); pmd = pmd_mkwrite(pmd); pmd = pmd_mkdirty(pmd); pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1); @@ -254,6 +267,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args) pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot)); pmd = pmd_mkyoung(pmd); set_pmd_at(args->mm, vaddr, args->pmdp, pmd); + flush_dcache_page(page); pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp); pmd = READ_ONCE(*(args->pmdp)); WARN_ON(pmd_young(pmd)); @@ -340,6 +354,7 @@ static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) static void __init pud_advanced_tests(struct pgtable_debug_args *args) { + struct page *page; unsigned long vaddr = (args->vaddr & HPAGE_PUD_MASK); pud_t pud; @@ -347,13 +362,16 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PUD advanced\n"); - if (args->pud_pfn == ULONG_MAX) { + page = (args->pud_pfn != ULONG_MAX) ? + pfn_to_page(args->pud_pfn) : NULL; + if (!page) { pr_debug("%s: Skipped\n", __func__); return; } pud = pfn_pud(args->pud_pfn, args->page_prot); set_pud_at(args->mm, vaddr, args->pudp, pud); + flush_dcache_page(page); pudp_set_wrprotect(args->mm, vaddr, args->pudp); pud = READ_ONCE(*(args->pudp)); WARN_ON(pud_write(pud)); @@ -367,6 +385,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) pud = pud_wrprotect(pud); pud = pud_mkclean(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); + flush_dcache_page(page); pud = pud_mkwrite(pud); pud = pud_mkdirty(pud); pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1); @@ -382,6 +401,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args) pud = pfn_pud(args->pud_pfn, args->page_prot); pud = pud_mkyoung(pud); set_pud_at(args->mm, vaddr, args->pudp, pud); + flush_dcache_page(page); pudp_test_and_clear_young(args->vma, vaddr, args->pudp); pud = READ_ONCE(*(args->pudp)); WARN_ON(pud_young(pud)); @@ -596,10 +616,13 @@ static void __init pgd_populate_tests(struct pgtable_debug_args *args) { } static void __init pte_clear_tests(struct pgtable_debug_args *args) { + struct page *page; pte_t pte; pr_debug("Validating PTE clear\n"); - if (args->pte_pfn == ULONG_MAX) { + page = (args->pte_pfn != ULONG_MAX) ? + pfn_to_page(args->pte_pfn) : NULL; + if (!page) { pr_debug("%s: Skipped\n", __func__); return; } @@ -609,6 +632,7 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args) pte = __pte(pte_val(pte) | RANDOM_ORVALUE); #endif set_pte_at(args->mm, args->vaddr, args->ptep, pte); + flush_dcache_page(page); barrier(); pte_clear(args->mm, args->vaddr, args->ptep); pte = ptep_get(args->ptep); From a1393e0e5383b03dadeb37b12c5ba76ba11d84d2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 21 Jul 2021 11:41:27 +1000 Subject: [PATCH 329/851] mm: mark idle page tracking as BROKEN In discussion with other MM developers around how idle page tracking should be fixed for transparent huge pages, several expressed the opinion that it should be removed as it is inefficient at accomplishing the job that it is supposed to, and we have better mechanisms (eg uffd) for accomplishing the same goals these days. Mark the feature as BROKEN for now and we can remove it entirely in a few months if nobody complains. It is not enabled by Android, ChromeOS, Debian, Fedora or SUSE. Red Hat enabled it with RHEL-8.1 and UEK followed suit, but I have been unable to find why RHEL enabled it. Link: https://lkml.kernel.org/r/20210612000714.775825-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Yu Zhao Acked-by: Kirill A. Shutemov SeongJae Park Cc: Heiko Carstens Cc: Rafael Aquini Cc: Vlastimil Babka Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 40a9bfcd5062e..5dc28e9205e00 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -744,7 +744,7 @@ config DEFERRED_STRUCT_PAGE_INIT config IDLE_PAGE_TRACKING bool "Enable idle page tracking" - depends on SYSFS && MMU + depends on SYSFS && MMU && BROKEN select PAGE_EXTENSION if !64BIT help This feature allows to estimate the amount of user pages that have From 4dd7a4fe832141ffbbaad30bf5d936c04f6b7e80 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jul 2021 11:41:28 +1000 Subject: [PATCH 330/851] writeback: track number of inodes under writeback Patch series "writeback: Fix bandwidth estimates", v4. Fix estimate of writeback throughput when device is not fully busy doing writeback. Michael Stapelberg has reported that such workload (e.g. generated by linking) tends to push estimated throughput down to 0 and as a result writeback on the device is practically stalled. The first three patches fix the reported issue, the remaining two patches are unrelated cleanups of problems I've noticed when reading the code. This patch (of 4): Track number of inodes under writeback for each bdi_writeback structure. We will use this to decide whether wb does any IO and so we can estimate its writeback throughput. In principle we could use number of pages under writeback (WB_WRITEBACK counter) for this however normal percpu counter reads are too inaccurate for our purposes and summing the counter is too expensive. Link: https://lkml.kernel.org/r/20210713104519.16394-1-jack@suse.cz Link: https://lkml.kernel.org/r/20210713104716.22868-1-jack@suse.cz Signed-off-by: Jan Kara Cc: Wu Fengguang Cc: Michael Stapelberg Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/fs-writeback.c | 5 +++++ include/linux/backing-dev-defs.h | 1 + mm/backing-dev.c | 1 + mm/page-writeback.c | 22 ++++++++++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4c33705489825..7439ecd44ac9e 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -406,6 +406,11 @@ static bool inode_do_switch_wbs(struct inode *inode, inc_wb_stat(new_wb, WB_WRITEBACK); } + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { + atomic_dec(&old_wb->writeback_inodes); + atomic_inc(&new_wb->writeback_inodes); + } + wb_get(new_wb); /* diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 1d7edad9914fc..06fb8e13f6bc5 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -116,6 +116,7 @@ struct bdi_writeback { struct list_head b_dirty_time; /* time stamps are dirty */ spinlock_t list_lock; /* protects the b_* lists */ + atomic_t writeback_inodes; /* number of inodes under writeback */ struct percpu_counter stat[NR_WB_STAT_ITEMS]; unsigned long congested; /* WB_[a]sync_congested flags */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f5561ea7d90ad..b4c707ddedb1b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -293,6 +293,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->b_dirty_time); spin_lock_init(&wb->list_lock); + atomic_set(&wb->writeback_inodes, 0); wb->bw_time_stamp = jiffies; wb->balanced_dirty_ratelimit = INIT_BW; wb->dirty_ratelimit = INIT_BW; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9f63548f247c3..e1aa1c9d8e362 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2731,6 +2731,16 @@ int clear_page_dirty_for_io(struct page *page) } EXPORT_SYMBOL(clear_page_dirty_for_io); +static void wb_inode_writeback_start(struct bdi_writeback *wb) +{ + atomic_inc(&wb->writeback_inodes); +} + +static void wb_inode_writeback_end(struct bdi_writeback *wb) +{ + atomic_dec(&wb->writeback_inodes); +} + int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -2752,6 +2762,9 @@ int test_clear_page_writeback(struct page *page) dec_wb_stat(wb, WB_WRITEBACK); __wb_writeout_inc(wb); + if (!mapping_tagged(mapping, + PAGECACHE_TAG_WRITEBACK)) + wb_inode_writeback_end(wb); } } @@ -2794,8 +2807,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write) PAGECACHE_TAG_WRITEBACK); xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); - if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) - inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK); + if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { + struct bdi_writeback *wb = inode_to_wb(inode); + + inc_wb_stat(wb, WB_WRITEBACK); + if (!on_wblist) + wb_inode_writeback_start(wb); + } /* * We can come through here when swapping anonymous From ed5cbc697a363ada636e2688ac79d42b49a56ebf Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jul 2021 11:41:29 +1000 Subject: [PATCH 331/851] writeback: reliably update bandwidth estimation Currently we trigger writeback bandwidth estimation from balance_dirty_pages() and from wb_writeback(). However neither of these need to trigger when the system is relatively idle and writeback is triggered e.g. from fsync(2). Make sure writeback estimates happen reliably by triggering them from do_writepages(). Link: https://lkml.kernel.org/r/20210713104716.22868-2-jack@suse.cz Signed-off-by: Jan Kara Cc: Michael Stapelberg Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/fs-writeback.c | 3 --- include/linux/backing-dev.h | 19 ++++++++++++++++++ include/linux/writeback.h | 1 - mm/page-writeback.c | 39 +++++++++++++++++++++++++------------ 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7439ecd44ac9e..867984e778c3d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2004,7 +2004,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, static long wb_writeback(struct bdi_writeback *wb, struct wb_writeback_work *work) { - unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; unsigned long dirtied_before = jiffies; struct inode *inode; @@ -2058,8 +2057,6 @@ static long wb_writeback(struct bdi_writeback *wb, progress = __writeback_inodes_wb(wb, work); trace_writeback_written(wb, work); - wb_update_bandwidth(wb, wb_start); - /* * Did we write something? Try for more * diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 44df4fcef65c1..a5d7d625dcc6f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -288,6 +288,17 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) return inode->i_wb; } +static inline struct bdi_writeback *inode_to_wb_wbc( + struct inode *inode, + struct writeback_control *wbc) +{ + /* + * If wbc does not have inode attached, it means cgroup writeback was + * disabled when wbc started. Just use the default wb in that case. + */ + return wbc->wb ? wbc->wb : &inode_to_bdi(inode)->wb; +} + /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode @@ -366,6 +377,14 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) return &inode_to_bdi(inode)->wb; } +static inline struct bdi_writeback *inode_to_wb_wbc( + struct inode *inode, + struct writeback_control *wbc) +{ + return inode_to_wb(inode); +} + + static inline struct bdi_writeback * unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 667e86cfbdcfe..2480322c06a76 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -379,7 +379,6 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); -void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); void balance_dirty_pages_ratelimited(struct address_space *mapping); bool wb_over_bg_thresh(struct bdi_writeback *wb); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e1aa1c9d8e362..e4a381b8944b6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1332,7 +1332,6 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, struct dirty_throttle_control *mdtc, - unsigned long start_time, bool update_ratelimit) { struct bdi_writeback *wb = gdtc->wb; @@ -1352,13 +1351,6 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]); written = percpu_counter_read(&wb->stat[WB_WRITTEN]); - /* - * Skip quiet periods when disk bandwidth is under-utilized. - * (at least 1s idle time between two flusher runs) - */ - if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time)) - goto snapshot; - if (update_ratelimit) { domain_update_bandwidth(gdtc, now); wb_update_dirty_ratelimit(gdtc, dirtied, elapsed); @@ -1374,17 +1366,36 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, } wb_update_write_bandwidth(wb, elapsed, written); -snapshot: wb->dirtied_stamp = dirtied; wb->written_stamp = written; wb->bw_time_stamp = now; } -void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time) +static void wb_update_bandwidth(struct bdi_writeback *wb) { struct dirty_throttle_control gdtc = { GDTC_INIT(wb) }; - __wb_update_bandwidth(&gdtc, NULL, start_time, false); + spin_lock(&wb->list_lock); + __wb_update_bandwidth(&gdtc, NULL, false); + spin_unlock(&wb->list_lock); +} + +/* Interval after which we consider wb idle and don't estimate bandwidth */ +#define WB_BANDWIDTH_IDLE_JIF (HZ) + +static void wb_bandwidth_estimate_start(struct bdi_writeback *wb) +{ + unsigned long now = jiffies; + unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp); + + if (elapsed > WB_BANDWIDTH_IDLE_JIF && + !atomic_read(&wb->writeback_inodes)) { + spin_lock(&wb->list_lock); + wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED); + wb->written_stamp = wb_stat(wb, WB_WRITTEN); + wb->bw_time_stamp = now; + spin_unlock(&wb->list_lock); + } } /* @@ -1713,7 +1724,7 @@ static void balance_dirty_pages(struct bdi_writeback *wb, if (time_is_before_jiffies(wb->bw_time_stamp + BANDWIDTH_INTERVAL)) { spin_lock(&wb->list_lock); - __wb_update_bandwidth(gdtc, mdtc, start_time, true); + __wb_update_bandwidth(gdtc, mdtc, true); spin_unlock(&wb->list_lock); } @@ -2347,9 +2358,12 @@ EXPORT_SYMBOL(generic_writepages); int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; + struct bdi_writeback *wb; if (wbc->nr_to_write <= 0) return 0; + wb = inode_to_wb_wbc(mapping->host, wbc); + wb_bandwidth_estimate_start(wb); while (1) { if (mapping->a_ops->writepages) ret = mapping->a_ops->writepages(mapping, wbc); @@ -2360,6 +2374,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) cond_resched(); congestion_wait(BLK_RW_ASYNC, HZ/50); } + wb_update_bandwidth(wb); return ret; } From e4165cfcd3399b51fffed7960f27aeb1efe57c0b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jul 2021 11:41:30 +1000 Subject: [PATCH 332/851] writeback: fix bandwidth estimate for spiky workload Michael Stapelberg has reported that for workload with short big spikes of writes (GCC linker seem to trigger this frequently) the write throughput is heavily underestimated and tends to steadily sink until it reaches zero. This has rather bad impact on writeback throttling (causing stalls). The problem is that writeback throughput estimate gets updated at most once per 200 ms. One update happens early after we submit pages for writeback (at that point writeout of only small fraction of pages is completed and thus observed throughput is tiny). Next update happens only during the next write spike (updates happen only from inode writeback and dirty throttling code) and if that is more than 1s after previous spike, we decide system was idle and just ignore whatever was written until this moment. Fix the problem by making sure writeback throughput estimate is also updated shortly after writeback completes to get reasonable estimate of throughput for spiky workloads. Link: https://lore.kernel.org/lkml/20210617095309.3542373-1-stapelberg+linux@google.com Link: https://lkml.kernel.org/r/20210713104716.22868-3-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Michael Stapelberg Tested-by: Michael Stapelberg Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/backing-dev-defs.h | 1 + include/linux/writeback.h | 1 + mm/backing-dev.c | 10 +++++++++ mm/page-writeback.c | 35 +++++++++++++++++--------------- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 06fb8e13f6bc5..33207004cfded 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -143,6 +143,7 @@ struct bdi_writeback { spinlock_t work_lock; /* protects work_list & dwork scheduling */ struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ unsigned long dirty_sleep; /* last wait */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 2480322c06a76..cbaef099645ec 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -379,6 +379,7 @@ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); +void wb_update_bandwidth(struct bdi_writeback *wb); void balance_dirty_pages_ratelimited(struct address_space *mapping); bool wb_over_bg_thresh(struct bdi_writeback *wb); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b4c707ddedb1b..6122c78ce9146 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -271,6 +271,14 @@ void wb_wakeup_delayed(struct bdi_writeback *wb) spin_unlock_bh(&wb->work_lock); } +static void wb_update_bandwidth_workfn(struct work_struct *work) +{ + struct bdi_writeback *wb = container_of(to_delayed_work(work), + struct bdi_writeback, bw_dwork); + + wb_update_bandwidth(wb); +} + /* * Initial write bandwidth: 100 MB/s */ @@ -303,6 +311,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, spin_lock_init(&wb->work_lock); INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); + INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn); wb->dirty_sleep = jiffies; err = fprop_local_init_percpu(&wb->completions, gfp); @@ -351,6 +360,7 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); + flush_delayed_work(&wb->bw_dwork); } static void wb_exit(struct bdi_writeback *wb) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e4a381b8944b6..eb55c8882db01 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1340,14 +1340,7 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, unsigned long dirtied; unsigned long written; - lockdep_assert_held(&wb->list_lock); - - /* - * rate-limit, only update once every 200ms. - */ - if (elapsed < BANDWIDTH_INTERVAL) - return; - + spin_lock(&wb->list_lock); dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]); written = percpu_counter_read(&wb->stat[WB_WRITTEN]); @@ -1369,15 +1362,14 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, wb->dirtied_stamp = dirtied; wb->written_stamp = written; wb->bw_time_stamp = now; + spin_unlock(&wb->list_lock); } -static void wb_update_bandwidth(struct bdi_writeback *wb) +void wb_update_bandwidth(struct bdi_writeback *wb) { struct dirty_throttle_control gdtc = { GDTC_INIT(wb) }; - spin_lock(&wb->list_lock); __wb_update_bandwidth(&gdtc, NULL, false); - spin_unlock(&wb->list_lock); } /* Interval after which we consider wb idle and don't estimate bandwidth */ @@ -1722,11 +1714,8 @@ static void balance_dirty_pages(struct bdi_writeback *wb, wb->dirty_exceeded = 1; if (time_is_before_jiffies(wb->bw_time_stamp + - BANDWIDTH_INTERVAL)) { - spin_lock(&wb->list_lock); + BANDWIDTH_INTERVAL)) __wb_update_bandwidth(gdtc, mdtc, true); - spin_unlock(&wb->list_lock); - } /* throttle according to the chosen dtc */ dirty_ratelimit = wb->dirty_ratelimit; @@ -2374,7 +2363,13 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) cond_resched(); congestion_wait(BLK_RW_ASYNC, HZ/50); } - wb_update_bandwidth(wb); + /* + * Usually few pages are written by now from those we've just submitted + * but if there's constant writeback being submitted, this makes sure + * writeback bandwidth is updated once in a while. + */ + if (time_is_before_jiffies(wb->bw_time_stamp + BANDWIDTH_INTERVAL)) + wb_update_bandwidth(wb); return ret; } @@ -2754,6 +2749,14 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb) static void wb_inode_writeback_end(struct bdi_writeback *wb) { atomic_dec(&wb->writeback_inodes); + /* + * Make sure estimate of writeback throughput gets updated after + * writeback completed. We delay the update by BANDWIDTH_INTERVAL + * (which is the interval other bandwidth updates use for batching) so + * that if multiple inodes end writeback at a similar time, they get + * batched into one bandwidth update. + */ + queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL); } int test_clear_page_writeback(struct page *page) From 2db75123e2f9b06e86f9711bafd6f95abe4e9876 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jul 2021 11:41:30 +1000 Subject: [PATCH 333/851] writeback: rename domain_update_bandwidth() Rename domain_update_bandwidth() to domain_update_dirty_limit(). The original name is a misnomer. The function has nothing to do with a bandwidth, it updates dirty limits. Link: https://lkml.kernel.org/r/20210713104716.22868-4-jack@suse.cz Signed-off-by: Jan Kara Cc: Michael Stapelberg Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page-writeback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index eb55c8882db01..ae5ace94e86ee 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1147,8 +1147,8 @@ static void update_dirty_limit(struct dirty_throttle_control *dtc) dom->dirty_limit = limit; } -static void domain_update_bandwidth(struct dirty_throttle_control *dtc, - unsigned long now) +static void domain_update_dirty_limit(struct dirty_throttle_control *dtc, + unsigned long now) { struct wb_domain *dom = dtc_dom(dtc); @@ -1345,7 +1345,7 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, written = percpu_counter_read(&wb->stat[WB_WRITTEN]); if (update_ratelimit) { - domain_update_bandwidth(gdtc, now); + domain_update_dirty_limit(gdtc, now); wb_update_dirty_ratelimit(gdtc, dirtied, elapsed); /* @@ -1353,7 +1353,7 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, * compiler has no way to figure that out. Help it. */ if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) { - domain_update_bandwidth(mdtc, now); + domain_update_dirty_limit(mdtc, now); wb_update_dirty_ratelimit(mdtc, dirtied, elapsed); } } From 0911d9f9dc317feb94500ed77544591063011a3a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jul 2021 11:41:31 +1000 Subject: [PATCH 334/851] writeback: use READ_ONCE for unlocked reads of writeback stats We do some unlocked reads of writeback statistics like avg_write_bandwidth, dirty_ratelimit, or bw_time_stamp. Generally we are fine with getting somewhat out-of-date values but actually getting different values in various parts of the functions because the compiler decided to reload value from original memory location could confuse calculations. Use READ_ONCE for these unlocked accesses and WRITE_ONCE for the updates to be on the safe side. Link: https://lkml.kernel.org/r/20210713104716.22868-5-jack@suse.cz Signed-off-by: Jan Kara Cc: Michael Stapelberg Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page-writeback.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ae5ace94e86ee..57b98ea365e27 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -183,7 +183,7 @@ static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb) static void wb_min_max_ratio(struct bdi_writeback *wb, unsigned long *minp, unsigned long *maxp) { - unsigned long this_bw = wb->avg_write_bandwidth; + unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth); unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); unsigned long long min = wb->bdi->min_ratio; unsigned long long max = wb->bdi->max_ratio; @@ -892,7 +892,7 @@ static long long pos_ratio_polynom(unsigned long setpoint, static void wb_position_ratio(struct dirty_throttle_control *dtc) { struct bdi_writeback *wb = dtc->wb; - unsigned long write_bw = wb->avg_write_bandwidth; + unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth); unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh); unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh); unsigned long wb_thresh = dtc->wb_thresh; @@ -1115,7 +1115,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, &wb->bdi->tot_write_bandwidth) <= 0); } wb->write_bandwidth = bw; - wb->avg_write_bandwidth = avg; + WRITE_ONCE(wb->avg_write_bandwidth, avg); } static void update_dirty_limit(struct dirty_throttle_control *dtc) @@ -1324,7 +1324,7 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc, else dirty_ratelimit -= step; - wb->dirty_ratelimit = max(dirty_ratelimit, 1UL); + WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL)); wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit; trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit); @@ -1336,11 +1336,12 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, { struct bdi_writeback *wb = gdtc->wb; unsigned long now = jiffies; - unsigned long elapsed = now - wb->bw_time_stamp; + unsigned long elapsed; unsigned long dirtied; unsigned long written; spin_lock(&wb->list_lock); + elapsed = now - wb->bw_time_stamp; dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]); written = percpu_counter_read(&wb->stat[WB_WRITTEN]); @@ -1361,7 +1362,7 @@ static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc, wb->dirtied_stamp = dirtied; wb->written_stamp = written; - wb->bw_time_stamp = now; + WRITE_ONCE(wb->bw_time_stamp, now); spin_unlock(&wb->list_lock); } @@ -1385,7 +1386,7 @@ static void wb_bandwidth_estimate_start(struct bdi_writeback *wb) spin_lock(&wb->list_lock); wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED); wb->written_stamp = wb_stat(wb, WB_WRITTEN); - wb->bw_time_stamp = now; + WRITE_ONCE(wb->bw_time_stamp, now); spin_unlock(&wb->list_lock); } } @@ -1410,7 +1411,7 @@ static unsigned long dirty_poll_interval(unsigned long dirty, static unsigned long wb_max_pause(struct bdi_writeback *wb, unsigned long wb_dirty) { - unsigned long bw = wb->avg_write_bandwidth; + unsigned long bw = READ_ONCE(wb->avg_write_bandwidth); unsigned long t; /* @@ -1432,8 +1433,8 @@ static long wb_min_pause(struct bdi_writeback *wb, unsigned long dirty_ratelimit, int *nr_dirtied_pause) { - long hi = ilog2(wb->avg_write_bandwidth); - long lo = ilog2(wb->dirty_ratelimit); + long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth)); + long lo = ilog2(READ_ONCE(wb->dirty_ratelimit)); long t; /* target pause */ long pause; /* estimated next pause */ int pages; /* target nr_dirtied_pause */ @@ -1713,12 +1714,12 @@ static void balance_dirty_pages(struct bdi_writeback *wb, if (dirty_exceeded && !wb->dirty_exceeded) wb->dirty_exceeded = 1; - if (time_is_before_jiffies(wb->bw_time_stamp + + if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + BANDWIDTH_INTERVAL)) __wb_update_bandwidth(gdtc, mdtc, true); /* throttle according to the chosen dtc */ - dirty_ratelimit = wb->dirty_ratelimit; + dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit); task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >> RATELIMIT_CALC_SHIFT; max_pause = wb_max_pause(wb, sdtc->wb_dirty); @@ -2368,7 +2369,8 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) * but if there's constant writeback being submitted, this makes sure * writeback bandwidth is updated once in a while. */ - if (time_is_before_jiffies(wb->bw_time_stamp + BANDWIDTH_INTERVAL)) + if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) + + BANDWIDTH_INTERVAL)) wb_update_bandwidth(wb); return ret; } From 36d8e52bdb0157d550247277047f96130d01fbde Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Jul 2021 11:41:32 +1000 Subject: [PATCH 335/851] mm: remove irqsave/restore locking from contexts with irqs enabled The page cache deletion paths all have interrupts enabled, so no need to use irqsafe/irqrestore locking variants. They used to have irqs disabled by the memcg lock added in commit c4843a7593a9 ("memcg: add per cgroup dirty page accounting"), but that has since been replaced by memcg taking the page lock instead, commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge AP"). Link: https://lkml.kernel.org/r/20210614211904.14420-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/filemap.c | 15 ++++++--------- mm/truncate.c | 8 +++----- mm/vmscan.c | 9 ++++----- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index d1458ecf2f51e..4926f16ec52db 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -258,12 +258,11 @@ static void page_cache_free_page(struct address_space *mapping, void delete_from_page_cache(struct page *page) { struct address_space *mapping = page_mapping(page); - unsigned long flags; BUG_ON(!PageLocked(page)); - xa_lock_irqsave(&mapping->i_pages, flags); + xa_lock_irq(&mapping->i_pages); __delete_from_page_cache(page, NULL); - xa_unlock_irqrestore(&mapping->i_pages, flags); + xa_unlock_irq(&mapping->i_pages); page_cache_free_page(mapping, page); } @@ -335,19 +334,18 @@ void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec) { int i; - unsigned long flags; if (!pagevec_count(pvec)) return; - xa_lock_irqsave(&mapping->i_pages, flags); + xa_lock_irq(&mapping->i_pages); for (i = 0; i < pagevec_count(pvec); i++) { trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); unaccount_page_cache_page(mapping, pvec->pages[i]); } page_cache_delete_batch(mapping, pvec); - xa_unlock_irqrestore(&mapping->i_pages, flags); + xa_unlock_irq(&mapping->i_pages); for (i = 0; i < pagevec_count(pvec); i++) page_cache_free_page(mapping, pvec->pages[i]); @@ -821,7 +819,6 @@ void replace_page_cache_page(struct page *old, struct page *new) void (*freepage)(struct page *) = mapping->a_ops->freepage; pgoff_t offset = old->index; XA_STATE(xas, &mapping->i_pages, offset); - unsigned long flags; VM_BUG_ON_PAGE(!PageLocked(old), old); VM_BUG_ON_PAGE(!PageLocked(new), new); @@ -833,7 +830,7 @@ void replace_page_cache_page(struct page *old, struct page *new) mem_cgroup_migrate(old, new); - xas_lock_irqsave(&xas, flags); + xas_lock_irq(&xas); xas_store(&xas, new); old->mapping = NULL; @@ -846,7 +843,7 @@ void replace_page_cache_page(struct page *old, struct page *new) __dec_lruvec_page_state(old, NR_SHMEM); if (PageSwapBacked(new)) __inc_lruvec_page_state(new, NR_SHMEM); - xas_unlock_irqrestore(&xas, flags); + xas_unlock_irq(&xas); if (freepage) freepage(old); put_page(old); diff --git a/mm/truncate.c b/mm/truncate.c index 234ddd879caa1..2adff8f800bbc 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -560,21 +560,19 @@ void invalidate_mapping_pagevec(struct address_space *mapping, static int invalidate_complete_page2(struct address_space *mapping, struct page *page) { - unsigned long flags; - if (page->mapping != mapping) return 0; if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return 0; - xa_lock_irqsave(&mapping->i_pages, flags); + xa_lock_irq(&mapping->i_pages); if (PageDirty(page)) goto failed; BUG_ON(page_has_private(page)); __delete_from_page_cache(page, NULL); - xa_unlock_irqrestore(&mapping->i_pages, flags); + xa_unlock_irq(&mapping->i_pages); if (mapping->a_ops->freepage) mapping->a_ops->freepage(page); @@ -582,7 +580,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) put_page(page); /* pagecache ref */ return 1; failed: - xa_unlock_irqrestore(&mapping->i_pages, flags); + xa_unlock_irq(&mapping->i_pages); return 0; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 4620df62f0ffa..6f784e1653821 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1049,14 +1049,13 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) static int __remove_mapping(struct address_space *mapping, struct page *page, bool reclaimed, struct mem_cgroup *target_memcg) { - unsigned long flags; int refcount; void *shadow = NULL; BUG_ON(!PageLocked(page)); BUG_ON(mapping != page_mapping(page)); - xa_lock_irqsave(&mapping->i_pages, flags); + xa_lock_irq(&mapping->i_pages); /* * The non racy check for a busy page. * @@ -1097,7 +1096,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(page, target_memcg); __delete_from_swap_cache(page, swap, shadow); - xa_unlock_irqrestore(&mapping->i_pages, flags); + xa_unlock_irq(&mapping->i_pages); put_swap_page(page, swap); } else { void (*freepage)(struct page *); @@ -1123,7 +1122,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, !mapping_exiting(mapping) && !dax_mapping(mapping)) shadow = workingset_eviction(page, target_memcg); __delete_from_page_cache(page, shadow); - xa_unlock_irqrestore(&mapping->i_pages, flags); + xa_unlock_irq(&mapping->i_pages); if (freepage != NULL) freepage(page); @@ -1132,7 +1131,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, return 1; cannot_free: - xa_unlock_irqrestore(&mapping->i_pages, flags); + xa_unlock_irq(&mapping->i_pages); return 0; } From d91c7d5c667f793375318439c3dc4cfdf7a40637 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Jul 2021 11:41:33 +1000 Subject: [PATCH 336/851] fs: drop_caches: fix skipping over shadow cache inodes When drop_caches truncates the page cache in an inode it also includes any shadow entries for evicted pages. However, there is a preliminary check on whether the inode has pages: if it has *only* shadow entries, it will skip running truncation on the inode and leave it behind. Fix the check to mapping_empty(), such that it runs truncation on any inode that has cache entries at all. Link: https://lkml.kernel.org/r/20210614211904.14420-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Roman Gushchin Acked-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/drop_caches.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index f00fcc4a4f721..e619c31b6bd92 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -3,6 +3,7 @@ * Implement the manual drop-all-pagecache function */ +#include #include #include #include @@ -27,7 +28,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) * we need to reschedule to avoid softlockups. */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (inode->i_mapping->nrpages == 0 && !need_resched())) { + (mapping_empty(inode->i_mapping) && !need_resched())) { spin_unlock(&inode->i_lock); continue; } From 3351123e0d0ccaefe8eb4229b7857a7609847a59 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Jul 2021 11:41:34 +1000 Subject: [PATCH 337/851] fs: inode: count invalidated shadow pages in pginodesteal pginodesteal is supposed to capture the impact that inode reclaim has on the page cache state. Currently, it doesn't consider shadow pages that get dropped this way, even though this can have a significant impact on paging behavior, memory pressure calculations etc. To improve visibility into these effects, make sure shadow pages get counted when they get dropped through inode reclaim. This changes the return value semantics of invalidate_mapping_pages() semantics slightly, but the only two users are the inode shrinker itsel and a usb driver that logs it for debugging purposes. Link: https://lkml.kernel.org/r/20210614211904.14420-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/inode.c | 2 +- mm/truncate.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index c93500d84264d..8830a727b0af0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -768,7 +768,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, return LRU_ROTATE; } - if (inode_has_buffers(inode) || inode->i_data.nrpages) { + if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(lru_lock); diff --git a/mm/truncate.c b/mm/truncate.c index 2adff8f800bbc..787b35f2cdc16 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -483,8 +483,9 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping, index = indices[i]; if (xa_is_value(page)) { - invalidate_exceptional_entry(mapping, index, - page); + count += invalidate_exceptional_entry(mapping, + index, + page); continue; } index += thp_nr_pages(page) - 1; @@ -512,19 +513,18 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping, } /** - * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode - * @mapping: the address_space which holds the pages to invalidate + * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode + * @mapping: the address_space which holds the cache to invalidate * @start: the offset 'from' which to invalidate * @end: the offset 'to' which to invalidate (inclusive) * - * This function only removes the unlocked pages, if you want to - * remove all the pages of one inode, you must call truncate_inode_pages. + * This function removes pages that are clean, unmapped and unlocked, + * as well as shadow entries. It will not block on IO activity. * - * invalidate_mapping_pages() will not block on IO activity. It will not - * invalidate pages which are dirty, locked, under writeback or mapped into - * pagetables. + * If you want to remove all the pages of one inode, regardless of + * their use and writeback state, use truncate_inode_pages(). * - * Return: the number of the pages that were invalidated + * Return: the number of the cache entries that were invalidated */ unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) From 935ce608b674009b9ecb4a5238063d6b81929aa4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Jul 2021 11:41:35 +1000 Subject: [PATCH 338/851] vfs: keep inodes with page cache off the inode shrinker LRU Historically (pre-2.5), the inode shrinker used to reclaim only empty inodes and skip over those that still contained page cache. This caused problems on highmem hosts: struct inode could put fill lowmem zones before the cache was getting reclaimed in the highmem zones. To address this, the inode shrinker started to strip page cache to facilitate reclaiming lowmem. However, this comes with its own set of problems: the shrinkers may drop actively used page cache just because the inodes are not currently open or dirty - think working with a large git tree. It further doesn't respect cgroup memory protection settings and can cause priority inversions between containers. Nowadays, the page cache also holds non-resident info for evicted cache pages in order to detect refaults. We've come to rely heavily on this data inside reclaim for protecting the cache workingset and driving swap behavior. We also use it to quantify and report workload health through psi. The latter in turn is used for fleet health monitoring, as well as driving automated memory sizing of workloads and containers, proactive reclaim and memory offloading schemes. The consequences of dropping page cache prematurely is that we're seeing subtle and not-so-subtle failures in all of the above-mentioned scenarios, with the workload generally entering unexpected thrashing states while losing the ability to reliably detect it. To fix this on non-highmem systems at least, going back to rotating inodes on the LRU isn't feasible. We've tried (commit a76cf1a474d7 ("mm: don't reclaim inodes with many attached pages")) and failed (commit 69056ee6a8a3 ("Revert "mm: don't reclaim inodes with many attached pages"")). The issue is mostly that shrinker pools attract pressure based on their size, and when objects get skipped the shrinkers remember this as deferred reclaim work. This accumulates excessive pressure on the remaining inodes, and we can quickly eat into heavily used ones, or dirty ones that require IO to reclaim, when there potentially is plenty of cold, clean cache around still. Instead, this patch keeps populated inodes off the inode LRU in the first place - just like an open file or dirty state would. An otherwise clean and unused inode then gets queued when the last cache entry disappears. This solves the problem without reintroducing the reclaim issues, and generally is a bit more scalable than having to wade through potentially hundreds of thousands of busy inodes. Locking is a bit tricky because the locks protecting the inode state (i_lock) and the inode LRU (lru_list.lock) don't nest inside the irq-safe page cache lock (i_pages.xa_lock). Page cache deletions are serialized through i_lock, taken before the i_pages lock, to make sure depopulated inodes are queued reliably. Additions may race with deletions, but we'll check again in the shrinker. If additions race with the shrinker itself, we're protected by the i_lock: if find_inode() or iput() win, the shrinker will bail on the elevated i_count or I_REFERENCED; if the shrinker wins and goes ahead with the inode, it will set I_FREEING and inhibit further igets(), which will cause the other side to create a new instance of the inode instead. Link: https://lkml.kernel.org/r/20210614211904.14420-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Roman Gushchin Cc: Tejun Heo Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/inode.c | 46 +++++++++++++++++++++---------------- fs/internal.h | 1 - include/linux/fs.h | 1 + include/linux/pagemap.h | 50 +++++++++++++++++++++++++++++++++++++++++ mm/filemap.c | 8 +++++++ mm/truncate.c | 19 ++++++++++++++-- mm/vmscan.c | 7 ++++++ mm/workingset.c | 10 +++++++++ 8 files changed, 120 insertions(+), 22 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 8830a727b0af0..6b74701c19547 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -424,11 +424,20 @@ void ihold(struct inode *inode) } EXPORT_SYMBOL(ihold); -static void inode_lru_list_add(struct inode *inode) +static void __inode_add_lru(struct inode *inode, bool rotate) { + if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) + return; + if (atomic_read(&inode->i_count)) + return; + if (!(inode->i_sb->s_flags & SB_ACTIVE)) + return; + if (!mapping_shrinkable(&inode->i_data)) + return; + if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); - else + else if (rotate) inode->i_state |= I_REFERENCED; } @@ -439,16 +448,11 @@ static void inode_lru_list_add(struct inode *inode) */ void inode_add_lru(struct inode *inode) { - if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | - I_FREEING | I_WILL_FREE)) && - !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE) - inode_lru_list_add(inode); + __inode_add_lru(inode, false); } - static void inode_lru_list_del(struct inode *inode) { - if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_dec(nr_unused); } @@ -724,10 +728,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) /* * Isolate the inode from the LRU in preparation for freeing it. * - * Any inodes which are pinned purely because of attached pagecache have their - * pagecache removed. If the inode has metadata buffers attached to - * mapping->private_list then try to remove them. - * * If the inode has the I_REFERENCED flag set, then it means that it has been * used recently - the flag is set in iput_final(). When we encounter such an * inode, clear the flag and move it to the back of the LRU so it gets another @@ -743,31 +743,39 @@ static enum lru_status inode_lru_isolate(struct list_head *item, struct inode *inode = container_of(item, struct inode, i_lru); /* - * we are inverting the lru lock/inode->i_lock here, so use a trylock. - * If we fail to get the lock, just skip it. + * We are inverting the lru lock/inode->i_lock here, so use a + * trylock. If we fail to get the lock, just skip it. */ if (!spin_trylock(&inode->i_lock)) return LRU_SKIP; /* - * Referenced or dirty inodes are still in use. Give them another pass - * through the LRU as we canot reclaim them now. + * Inodes can get referenced, redirtied, or repopulated while + * they're already on the LRU, and this can make them + * unreclaimable for a while. Remove them lazily here; iput, + * sync, or the last page cache deletion will requeue them. */ if (atomic_read(&inode->i_count) || - (inode->i_state & ~I_REFERENCED)) { + (inode->i_state & ~I_REFERENCED) || + !mapping_shrinkable(&inode->i_data)) { list_lru_isolate(lru, &inode->i_lru); spin_unlock(&inode->i_lock); this_cpu_dec(nr_unused); return LRU_REMOVED; } - /* recently referenced inodes get one more pass */ + /* Recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; } + /* + * On highmem systems, mapping_shrinkable() permits dropping + * page cache in order to free up struct inodes: lowmem might + * be under pressure before the cache inside the highmem zone. + */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { __iget(inode); spin_unlock(&inode->i_lock); @@ -1634,7 +1642,7 @@ static void iput_final(struct inode *inode) if (!drop && !(inode->i_state & I_DONTCACHE) && (sb->s_flags & SB_ACTIVE)) { - inode_add_lru(inode); + __inode_add_lru(inode, true); spin_unlock(&inode->i_lock); return; } diff --git a/fs/internal.h b/fs/internal.h index 3ce8edbaa3ca2..ea26c910d0850 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -146,7 +146,6 @@ extern int vfs_open(const struct path *, struct file *); * inode.c */ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); -extern void inode_add_lru(struct inode *inode); extern int dentry_needs_remove_privs(struct dentry *dentry); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216c..8057699362df8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3219,6 +3219,7 @@ static inline void remove_inode_hash(struct inode *inode) } extern void inode_sb_list_add(struct inode *inode); +extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ed02aa5222634..b071babc66299 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -23,6 +23,56 @@ static inline bool mapping_empty(struct address_space *mapping) return xa_empty(&mapping->i_pages); } +/* + * mapping_shrinkable - test if page cache state allows inode reclaim + * @mapping: the page cache mapping + * + * This checks the mapping's cache state for the pupose of inode + * reclaim and LRU management. + * + * The caller is expected to hold the i_lock, but is not required to + * hold the i_pages lock, which usually protects cache state. That's + * because the i_lock and the list_lru lock that protect the inode and + * its LRU state don't nest inside the irq-safe i_pages lock. + * + * Cache deletions are performed under the i_lock, which ensures that + * when an inode goes empty, it will reliably get queued on the LRU. + * + * Cache additions do not acquire the i_lock and may race with this + * check, in which case we'll report the inode as shrinkable when it + * has cache pages. This is okay: the shrinker also checks the + * refcount and the referenced bit, which will be elevated or set in + * the process of adding new cache pages to an inode. + */ +static inline bool mapping_shrinkable(struct address_space *mapping) +{ + void *head; + + /* + * On highmem systems, there could be lowmem pressure from the + * inodes before there is highmem pressure from the page + * cache. Make inodes shrinkable regardless of cache state. + */ + if (IS_ENABLED(CONFIG_HIGHMEM)) + return true; + + /* Cache completely empty? Shrink away. */ + head = rcu_access_pointer(mapping->i_pages.xa_head); + if (!head) + return true; + + /* + * The xarray stores single offset-0 entries directly in the + * head pointer, which allows non-resident page cache entries + * to escape the shadow shrinker's list of xarray nodes. The + * inode shrinker needs to pick them up under memory pressure. + */ + if (!xa_is_node(head) && xa_is_value(head)) + return true; + + return false; +} + /* * Bits in mapping->flags. */ diff --git a/mm/filemap.c b/mm/filemap.c index 4926f16ec52db..867df9ac20e8a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -260,9 +260,13 @@ void delete_from_page_cache(struct page *page) struct address_space *mapping = page_mapping(page); BUG_ON(!PageLocked(page)); + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); __delete_from_page_cache(page, NULL); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); page_cache_free_page(mapping, page); } @@ -338,6 +342,7 @@ void delete_from_page_cache_batch(struct address_space *mapping, if (!pagevec_count(pvec)) return; + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); for (i = 0; i < pagevec_count(pvec); i++) { trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); @@ -346,6 +351,9 @@ void delete_from_page_cache_batch(struct address_space *mapping, } page_cache_delete_batch(mapping, pvec); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); for (i = 0; i < pagevec_count(pvec); i++) page_cache_free_page(mapping, pvec->pages[i]); diff --git a/mm/truncate.c b/mm/truncate.c index 787b35f2cdc16..9cc0a638c6447 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -45,9 +45,13 @@ static inline void __clear_shadow_entry(struct address_space *mapping, static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, void *entry) { + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); __clear_shadow_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); } /* @@ -73,8 +77,10 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, return; dax = dax_mapping(mapping); - if (!dax) + if (!dax) { + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); + } for (i = j; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; @@ -93,8 +99,12 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, __clear_shadow_entry(mapping, index, page); } - if (!dax) + if (!dax) { xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); + } pvec->nr = j; } @@ -566,6 +576,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return 0; + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); if (PageDirty(page)) goto failed; @@ -573,6 +584,9 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) BUG_ON(page_has_private(page)); __delete_from_page_cache(page, NULL); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); if (mapping->a_ops->freepage) mapping->a_ops->freepage(page); @@ -581,6 +595,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) return 1; failed: xa_unlock_irq(&mapping->i_pages); + spin_unlock(&mapping->host->i_lock); return 0; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f784e1653821..a7602f71ec04f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1055,6 +1055,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, BUG_ON(!PageLocked(page)); BUG_ON(mapping != page_mapping(page)); + if (!PageSwapCache(page)) + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); /* * The non racy check for a busy page. @@ -1123,6 +1125,9 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, shadow = workingset_eviction(page, target_memcg); __delete_from_page_cache(page, shadow); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); if (freepage != NULL) freepage(page); @@ -1132,6 +1137,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, cannot_free: xa_unlock_irq(&mapping->i_pages); + if (!PageSwapCache(page)) + spin_unlock(&mapping->host->i_lock); return 0; } diff --git a/mm/workingset.c b/mm/workingset.c index 5ba3e42446fa6..cf8bb34fb321b 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -542,6 +542,13 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, goto out; } + if (!spin_trylock(&mapping->host->i_lock)) { + xa_unlock(&mapping->i_pages); + spin_unlock_irq(lru_lock); + ret = LRU_RETRY; + goto out; + } + list_lru_isolate(lru, item); __dec_lruvec_kmem_state(node, WORKINGSET_NODES); @@ -561,6 +568,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, out_invalid: xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); ret = LRU_REMOVED_RETRY; out: cond_resched(); From 220758bb4eaed14626157507558722f9d3c97b2c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 21 Jul 2021 11:41:36 +1000 Subject: [PATCH 339/851] fs, mm: fix race in unlinking swapfile We had a recurring situation in which admin procedures setting up swapfiles would race with test preparation clearing away swapfiles; and just occasionally that got stuck on a swapfile "(deleted)" which could never be swapped off. That is not supposed to be possible. 2.6.28 commit f9454548e17c ("don't unlink an active swapfile") admitted that it was leaving a race window open: now close it. may_delete() makes the IS_SWAPFILE check (amongst many others) before inode_lock has been taken on target: now repeat just that simple check in vfs_unlink() and vfs_rename(), after taking inode_lock. Which goes most of the way to fixing the race, but swapon() must also check after it acquires inode_lock, that the file just opened has not already been unlinked. Link: https://lkml.kernel.org/r/e17b91ad-a578-9a15-5e3-4989e0f999b5@google.com Fixes: f9454548e17c ("don't unlink an active swapfile") Signed-off-by: Hugh Dickins Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/namei.c | 8 +++++++- mm/swapfile.c | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index bf6d8a738c599..ff866c07f4d2b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4024,7 +4024,9 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir, return -EPERM; inode_lock(target); - if (is_local_mountpoint(dentry)) + if (IS_SWAPFILE(target)) + error = -EPERM; + else if (is_local_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); @@ -4526,6 +4528,10 @@ int vfs_rename(struct renamedata *rd) else if (target) inode_lock(target); + error = -EPERM; + if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target))) + goto out; + error = -EBUSY; if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) goto out; diff --git a/mm/swapfile.c b/mm/swapfile.c index 1e07d1c776f2a..7527afd95284e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3130,6 +3130,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) struct filename *name; struct file *swap_file = NULL; struct address_space *mapping; + struct dentry *dentry; int prio; int error; union swap_header *swap_header; @@ -3173,6 +3174,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->swap_file = swap_file; mapping = swap_file->f_mapping; + dentry = swap_file->f_path.dentry; inode = mapping->host; error = claim_swapfile(p, inode); @@ -3180,6 +3182,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap; inode_lock(inode); + if (d_unlinked(dentry) || cant_mount(dentry)) { + error = -ENOENT; + goto bad_swap_unlock_inode; + } if (IS_SWAPFILE(inode)) { error = -EBUSY; goto bad_swap_unlock_inode; From 8df74428b638d970618e2b714a9eb17910e69786 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 21 Jul 2021 11:41:37 +1000 Subject: [PATCH 340/851] mm, memcg: add mem_cgroup_disabled checks in vmpressure and swap-related functions Add mem_cgroup_disabled check in vmpressure, mem_cgroup_uncharge_swap and cgroup_throttle_swaprate functions. This minimizes the memcg overhead in the pagefault and exit_mmap paths when memcgs are disabled using cgroup_disable=memory command-line option. This change results in ~2.1% overhead reduction when running PFT test [1] comparing {CONFIG_MEMCG=n, CONFIG_MEMCG_SWAP=n} against {CONFIG_MEMCG=y, CONFIG_MEMCG_SWAP=y, cgroup_disable=memory} configuration on an 8-core ARM64 Android device. [1] https://lkml.org/lkml/2006/8/29/294 also used in mmtests suite Link: https://lkml.kernel.org/r/20210713010934.299876-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Reviewed-by: Muchun Song Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Alex Shi Cc: Alistair Popple Cc: David Hildenbrand Cc: Jens Axboe Cc: Joonsoo Kim Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Minchan Kim Cc: Roman Gushchin Cc: Tejun Heo Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 3 +++ mm/swapfile.c | 3 +++ mm/vmpressure.c | 7 ++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae1f5d0cb5810..a228cd51c4bd3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7305,6 +7305,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) struct mem_cgroup *memcg; unsigned short id; + if (mem_cgroup_disabled()) + return; + id = swap_cgroup_record(entry, 0, nr_pages); rcu_read_lock(); memcg = mem_cgroup_from_id(id); diff --git a/mm/swapfile.c b/mm/swapfile.c index 7527afd95284e..627b16aed1dc8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3784,6 +3784,9 @@ void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) struct swap_info_struct *si, *next; int nid = page_to_nid(page); + if (mem_cgroup_disabled()) + return; + if (!(gfp_mask & __GFP_IO)) return; diff --git a/mm/vmpressure.c b/mm/vmpressure.c index d69019fc37898..9b172561fded7 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -240,7 +240,12 @@ static void vmpressure_work_fn(struct work_struct *work) void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, unsigned long scanned, unsigned long reclaimed) { - struct vmpressure *vmpr = memcg_to_vmpressure(memcg); + struct vmpressure *vmpr; + + if (mem_cgroup_disabled()) + return; + + vmpr = memcg_to_vmpressure(memcg); /* * Here we only want to account pressure that userland is able to From 1f4c6a1cf27476819ad04bb91f192686b5ce77ed Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 21 Jul 2021 11:41:38 +1000 Subject: [PATCH 341/851] mm, memcg: inline mem_cgroup_{charge/uncharge} to improve disabled memcg config Inline mem_cgroup_{charge/uncharge} and mem_cgroup_uncharge_list functions functions to perform mem_cgroup_disabled static key check inline before calling the main body of the function. This minimizes the memcg overhead in the pagefault and exit_mmap paths when memcgs are disabled using cgroup_disable=memory command-line option. This change results in ~0.4% overhead reduction when running PFT test [1] comparing {CONFIG_MEMCG=n} against {CONFIG_MEMCG=y, cgroup_disable=memory} configuration on an 8-core ARM64 Android device. [1] https://lkml.org/lkml/2006/8/29/294 also used in mmtests suite Link: https://lkml.kernel.org/r/20210713010934.299876-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Reviewed-by: Muchun Song Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Alex Shi Cc: Alistair Popple Cc: David Hildenbrand Cc: Jens Axboe Cc: Joonsoo Kim Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Minchan Kim Cc: Roman Gushchin Cc: Tejun Heo Cc: Wei Yang Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memcontrol.h | 28 +++++++++++++++++++++++++--- mm/memcontrol.c | 33 ++++++++++++--------------------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bfe5c486f4add..39fa88051a428 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -693,13 +693,35 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) page_counter_read(&memcg->memory); } -int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask); +static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) +{ + if (mem_cgroup_disabled()) + return 0; + return __mem_cgroup_charge(page, mm, gfp_mask); +} + int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); -void mem_cgroup_uncharge(struct page *page); -void mem_cgroup_uncharge_list(struct list_head *page_list); +void __mem_cgroup_uncharge(struct page *page); +static inline void mem_cgroup_uncharge(struct page *page) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_uncharge(page); +} + +void __mem_cgroup_uncharge_list(struct list_head *page_list); +static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_uncharge_list(page_list); +} void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a228cd51c4bd3..6adf50acdbe2f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6701,8 +6701,7 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); } -static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, - gfp_t gfp) +static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) { unsigned int nr_pages = thp_nr_pages(page); int ret; @@ -6723,7 +6722,7 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, } /** - * mem_cgroup_charge - charge a newly allocated page to a cgroup + * __mem_cgroup_charge - charge a newly allocated page to a cgroup * @page: page to charge * @mm: mm context of the victim * @gfp_mask: reclaim mode @@ -6736,16 +6735,14 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, * * Returns 0 on success. Otherwise, an error code is returned. */ -int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) +int __mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) { struct mem_cgroup *memcg; int ret; - if (mem_cgroup_disabled()) - return 0; - memcg = get_mem_cgroup_from_mm(mm); - ret = __mem_cgroup_charge(page, memcg, gfp_mask); + ret = charge_memcg(page, memcg, gfp_mask); css_put(&memcg->css); return ret; @@ -6780,7 +6777,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, memcg = get_mem_cgroup_from_mm(mm); rcu_read_unlock(); - ret = __mem_cgroup_charge(page, memcg, gfp); + ret = charge_memcg(page, memcg, gfp); css_put(&memcg->css); return ret; @@ -6916,18 +6913,15 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) } /** - * mem_cgroup_uncharge - uncharge a page + * __mem_cgroup_uncharge - uncharge a page * @page: page to uncharge * - * Uncharge a page previously charged with mem_cgroup_charge(). + * Uncharge a page previously charged with __mem_cgroup_charge(). */ -void mem_cgroup_uncharge(struct page *page) +void __mem_cgroup_uncharge(struct page *page) { struct uncharge_gather ug; - if (mem_cgroup_disabled()) - return; - /* Don't touch page->lru of any random page, pre-check: */ if (!page_memcg(page)) return; @@ -6938,20 +6932,17 @@ void mem_cgroup_uncharge(struct page *page) } /** - * mem_cgroup_uncharge_list - uncharge a list of page + * __mem_cgroup_uncharge_list - uncharge a list of page * @page_list: list of pages to uncharge * * Uncharge a list of pages previously charged with - * mem_cgroup_charge(). + * __mem_cgroup_charge(). */ -void mem_cgroup_uncharge_list(struct list_head *page_list) +void __mem_cgroup_uncharge_list(struct list_head *page_list) { struct uncharge_gather ug; struct page *page; - if (mem_cgroup_disabled()) - return; - uncharge_gather_clear(&ug); list_for_each_entry(page, page_list, lru) uncharge_page(page, &ug); From 2e22ac3037b62e81e086e8790ab6e340265e6b06 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 21 Jul 2021 11:41:39 +1000 Subject: [PATCH 342/851] mm, memcg: inline swap-related functions to improve disabled memcg config Inline mem_cgroup_try_charge_swap, mem_cgroup_uncharge_swap and cgroup_throttle_swaprate functions to perform mem_cgroup_disabled static key check inline before calling the main body of the function. This minimizes the memcg overhead in the pagefault and exit_mmap paths when memcgs are disabled using cgroup_disable=memory command-line option. This change results in ~1% overhead reduction when running PFT test [1] comparing {CONFIG_MEMCG=n} against {CONFIG_MEMCG=y, cgroup_disable=memory} configuration on an 8-core ARM64 Android device. [1] https://lkml.org/lkml/2006/8/29/294 also used in mmtests suite Link: https://lkml.kernel.org/r/20210713010934.299876-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Reviewed-by: Muchun Song Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Cc: Roman Gushchin Cc: Yang Shi Cc: Alex Shi Cc: Wei Yang Cc: Jens Axboe Cc: Joonsoo Kim Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Alistair Popple Cc: Minchan Kim Cc: Miaohe Lin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/swap.h | 26 +++++++++++++++++++++++--- mm/memcontrol.c | 14 ++++---------- mm/swapfile.c | 5 +---- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 6f5a43251593c..f30d26b0f71db 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -721,7 +721,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) #endif #if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -extern void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); +extern void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask); +static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) +{ + if (mem_cgroup_disabled()) + return; + __cgroup_throttle_swaprate(page, gfp_mask); +} #else static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) { @@ -730,8 +736,22 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); -extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); -extern void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); +extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); +static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) +{ + if (mem_cgroup_disabled()) + return 0; + return __mem_cgroup_try_charge_swap(page, entry); +} + +extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); +static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_uncharge_swap(entry, nr_pages); +} + extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct page *page); #else diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6adf50acdbe2f..b8aab19009958 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7234,7 +7234,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) } /** - * mem_cgroup_try_charge_swap - try charging swap space for a page + * __mem_cgroup_try_charge_swap - try charging swap space for a page * @page: page being added to swap * @entry: swap entry to charge * @@ -7242,16 +7242,13 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * * Returns 0 on success, -ENOMEM on failure. */ -int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) +int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) { unsigned int nr_pages = thp_nr_pages(page); struct page_counter *counter; struct mem_cgroup *memcg; unsigned short oldid; - if (mem_cgroup_disabled()) - return 0; - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return 0; @@ -7287,18 +7284,15 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) } /** - * mem_cgroup_uncharge_swap - uncharge swap space + * __mem_cgroup_uncharge_swap - uncharge swap space * @entry: swap entry to uncharge * @nr_pages: the amount of swap space to uncharge */ -void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) +void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) { struct mem_cgroup *memcg; unsigned short id; - if (mem_cgroup_disabled()) - return; - id = swap_cgroup_record(entry, 0, nr_pages); rcu_read_lock(); memcg = mem_cgroup_from_id(id); diff --git a/mm/swapfile.c b/mm/swapfile.c index 627b16aed1dc8..22d10f7138487 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3779,14 +3779,11 @@ static void free_swap_count_continuations(struct swap_info_struct *si) } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) +void __cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) { struct swap_info_struct *si, *next; int nid = page_to_nid(page); - if (mem_cgroup_disabled()) - return; - if (!(gfp_mask & __GFP_IO)) return; From 484d6cd671b091cea30e2a0c2c51ee3d5bd5ab5e Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 21 Jul 2021 11:41:40 +1000 Subject: [PATCH 343/851] memcg: enable accounting for pids in nested pid namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5d097056c9a0 ("kmemcg: account certain kmem allocations to memcg") enabled memcg accounting for pids allocated from init_pid_ns.pid_cachep, but forgot to adjust the setting for nested pid namespaces. As a result, pid memory is not accounted exactly where it is really needed, inside memcg-limited containers with their own pid namespaces. Pid was one the first kernel objects enabled for memcg accounting. init_pid_ns.pid_cachep marked by SLAB_ACCOUNT and we can expect that any new pids in the system are memcg-accounted. Though recently I've noticed that it is wrong. nested pid namespaces creates own slab caches for pid objects, nested pids have increased size because contain id both for all parent and for own pid namespaces. The problem is that these slab caches are _NOT_ marked by SLAB_ACCOUNT, as a result any pids allocated in nested pid namespaces are not memcg-accounted. Pid struct in nested pid namespace consumes up to 500 bytes memory, 100000 such objects gives us up to ~50Mb unaccounted memory, this allow container to exceed assigned memcg limits. Link: https://lkml.kernel.org/r/8b6de616-fd1a-02c6-cbdb-976ecdcfa604@virtuozzo.com Fixes: 5d097056c9a0 ("kmemcg: account certain kmem allocations to memcg") Cc: stable@vger.kernel.org Signed-off-by: Vasily Averin Reviewed-by: Michal Koutný Reviewed-by: Shakeel Butt Acked-by: Christian Brauner Acked-by: Roman Gushchin Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/pid_namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index ca43239a255ad..cb5a25a8a0cc7 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -51,7 +51,8 @@ static struct kmem_cache *create_pid_cachep(unsigned int level) mutex_lock(&pid_caches_mutex); /* Name collision forces to do allocation under mutex. */ if (!*pkc) - *pkc = kmem_cache_create(name, len, 0, SLAB_HWCACHE_ALIGN, 0); + *pkc = kmem_cache_create(name, len, 0, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, 0); mutex_unlock(&pid_caches_mutex); /* current can fail, but someone else can succeed. */ return READ_ONCE(*pkc); From 3b4b2143d68dec24470df7e77465fdd7b4dfc73e Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 21 Jul 2021 11:41:41 +1000 Subject: [PATCH 344/851] memcg: switch lruvec stats to rstat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 2d146aa3aa84 ("mm: memcontrol: switch to rstat") switched memcg stats to rstat infrastructure but skipped the conversion of the lruvec stats as such stats are read in the performance critical code paths and flushing stats may have impacted the performances of the applications. This patch converts the lruvec stats to rstat and later patches add mechanisms to keep the performance impact to minimum. The rstat conversion comes with the price i.e. memory cost. Effectively this patch reverts the savings done by the commit f3344adf38bd ("mm: memcontrol: optimize per-lruvec stats counter memory usage"). However this cost is justified due to negative impact of the inaccurate lruvec stats on many heuristics. One such case is reported in [1]. The memory reclaim code is filled with plethora of heuristics and many of those heuristics reads the lruvec stats. So, inaccurate stats can make such heuristics ineffective. [1] reports the impact of inaccurate lruvec stats on the "cache trim mode" heuristic. Inaccurate lruvec stats can impact the deactivation and aging anon heuristics as well. [1] https://lore.kernel.org/linux-mm/20210311004449.1170308-1-ying.huang@intel.com/ Link: https://lkml.kernel.org/r/20210714013948.270662-1-shakeelb@google.com Signed-off-by: Shakeel Butt Cc: Tejun Heo Cc: Johannes Weiner Cc: Muchun Song Cc: Michal Hocko Cc: Roman Gushchin Cc: Huang Ying Cc: Hillf Danton Cc: Michal Koutný Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memcontrol.h | 42 +++++++------- mm/memcontrol.c | 114 +++++++++++++------------------------ 2 files changed, 58 insertions(+), 98 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 39fa88051a428..540a0539ba105 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -105,14 +105,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -struct lruvec_stat { - long count[NR_VM_NODE_STAT_ITEMS]; -}; - -struct batched_lruvec_stat { - s32 count[NR_VM_NODE_STAT_ITEMS]; -}; - /* * Bitmap and deferred work of shrinker::id corresponding to memcg-aware * shrinkers, which have elements charged to this memcg. @@ -123,24 +115,30 @@ struct shrinker_info { unsigned long *map; }; +struct lruvec_stats_percpu { + /* Local (CPU and cgroup) state */ + long state[NR_VM_NODE_STAT_ITEMS]; + + /* Delta calculation for lockless upward propagation */ + long state_prev[NR_VM_NODE_STAT_ITEMS]; +}; + +struct lruvec_stats { + /* Aggregated (CPU and subtree) state */ + long state[NR_VM_NODE_STAT_ITEMS]; + + /* Pending child counts during tree propagation */ + long state_pending[NR_VM_NODE_STAT_ITEMS]; +}; + /* * per-node information in memory controller. */ struct mem_cgroup_per_node { struct lruvec lruvec; - /* - * Legacy local VM stats. This should be struct lruvec_stat and - * cannot be optimized to struct batched_lruvec_stat. Because - * the threshold of the lruvec_stat_cpu can be as big as - * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this - * filed has no upper limit. - */ - struct lruvec_stat __percpu *lruvec_stat_local; - - /* Subtree VM stats (batched updates) */ - struct batched_lruvec_stat __percpu *lruvec_stat_cpu; - atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; + struct lruvec_stats_percpu __percpu *lruvec_stats_percpu; + struct lruvec_stats lruvec_stats; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; @@ -987,7 +985,7 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec, return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - x = atomic_long_read(&pn->lruvec_stat[idx]); + x = READ_ONCE(pn->lruvec_stats.state[idx]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -1007,7 +1005,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); for_each_possible_cpu(cpu) - x += per_cpu(pn->lruvec_stat_local->count[idx], cpu); + x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu); #ifdef CONFIG_SMP if (x < 0) x = 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b8aab19009958..d5efcf95758d1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -671,23 +671,11 @@ static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) return x; } -static struct mem_cgroup_per_node * -parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid) -{ - struct mem_cgroup *parent; - - parent = parent_mem_cgroup(pn->memcg); - if (!parent) - return NULL; - return parent->nodeinfo[nid]; -} - void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { struct mem_cgroup_per_node *pn; struct mem_cgroup *memcg; - long x, threshold = MEMCG_CHARGE_BATCH; pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); memcg = pn->memcg; @@ -696,21 +684,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, __mod_memcg_state(memcg, idx, val); /* Update lruvec */ - __this_cpu_add(pn->lruvec_stat_local->count[idx], val); - - if (vmstat_item_in_bytes(idx)) - threshold <<= PAGE_SHIFT; - - x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); - if (unlikely(abs(x) > threshold)) { - pg_data_t *pgdat = lruvec_pgdat(lruvec); - struct mem_cgroup_per_node *pi; - - for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) - atomic_long_add(x, &pi->lruvec_stat[idx]); - x = 0; - } - __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); + __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); } /** @@ -2289,40 +2263,13 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) mutex_unlock(&percpu_charge_mutex); } -static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu) -{ - int nid; - - for_each_node(nid) { - struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; - unsigned long stat[NR_VM_NODE_STAT_ITEMS]; - struct batched_lruvec_stat *lstatc; - int i; - - lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu); - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { - stat[i] = lstatc->count[i]; - lstatc->count[i] = 0; - } - - do { - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) - atomic_long_add(stat[i], &pn->lruvec_stat[i]); - } while ((pn = parent_nodeinfo(pn, nid))); - } -} - static int memcg_hotplug_cpu_dead(unsigned int cpu) { struct memcg_stock_pcp *stock; - struct mem_cgroup *memcg; stock = &per_cpu(memcg_stock, cpu); drain_stock(stock); - for_each_mem_cgroup(memcg) - memcg_flush_lruvec_page_state(memcg, cpu); - return 0; } @@ -5126,17 +5073,9 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat, - GFP_KERNEL_ACCOUNT); - if (!pn->lruvec_stat_local) { - kfree(pn); - return 1; - } - - pn->lruvec_stat_cpu = alloc_percpu_gfp(struct batched_lruvec_stat, - GFP_KERNEL_ACCOUNT); - if (!pn->lruvec_stat_cpu) { - free_percpu(pn->lruvec_stat_local); + pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu, + GFP_KERNEL_ACCOUNT); + if (!pn->lruvec_stats_percpu) { kfree(pn); return 1; } @@ -5157,8 +5096,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return; - free_percpu(pn->lruvec_stat_cpu); - free_percpu(pn->lruvec_stat_local); + free_percpu(pn->lruvec_stats_percpu); kfree(pn); } @@ -5174,15 +5112,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) static void mem_cgroup_free(struct mem_cgroup *memcg) { - int cpu; - memcg_wb_domain_exit(memcg); - /* - * Flush percpu lruvec stats to guarantee the value - * correctness on parent's and all ancestor levels. - */ - for_each_online_cpu(cpu) - memcg_flush_lruvec_page_state(memcg, cpu); __mem_cgroup_free(memcg); } @@ -5415,7 +5345,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) struct mem_cgroup *parent = parent_mem_cgroup(memcg); struct memcg_vmstats_percpu *statc; long delta, v; - int i; + int i, nid; statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); @@ -5463,6 +5393,36 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) if (parent) parent->vmstats.events_pending[i] += delta; } + + for_each_node_state(nid, N_MEMORY) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; + struct mem_cgroup_per_node *ppn = NULL; + struct lruvec_stats_percpu *lstatc; + + if (parent) + ppn = parent->nodeinfo[nid]; + + lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu); + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + delta = pn->lruvec_stats.state_pending[i]; + if (delta) + pn->lruvec_stats.state_pending[i] = 0; + + v = READ_ONCE(lstatc->state[i]); + if (v != lstatc->state_prev[i]) { + delta += v - lstatc->state_prev[i]; + lstatc->state_prev[i] = v; + } + + if (!delta) + continue; + + pn->lruvec_stats.state[i] += delta; + if (ppn) + ppn->lruvec_stats.state_pending[i] += delta; + } + } } #ifdef CONFIG_MMU @@ -6396,6 +6356,8 @@ static int memory_numa_stat_show(struct seq_file *m, void *v) int i; struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + cgroup_rstat_flush(memcg->css.cgroup); + for (i = 0; i < ARRAY_SIZE(memory_stats); i++) { int nid; From 5aeb4292e13d9b1b6c9e1da7f2629168a09109bc Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 21 Jul 2021 11:41:42 +1000 Subject: [PATCH 345/851] memcg: infrastructure to flush memcg stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment memcg stats are read in four contexts: 1. memcg stat user interfaces 2. dirty throttling 3. page fault 4. memory reclaim Currently the kernel flushes the stats for first two cases. Flushing the stats for remaining two casese may have performance impact. Always flushing the memcg stats on the page fault code path may negatively impacts the performance of the applications. In addition flushing in the memory reclaim code path, though treated as slowpath, can become the source of contention for the global lock taken for stat flushing because when system or memcg is under memory pressure, many tasks may enter the reclaim path. This patch uses following mechanisms to solve these challenges: 1. Periodically flush the stats from root memcg every 2 seconds. This will time limit the out of sync stats. 2. Asynchronously flush the stats after fixed number of stat updates. In the worst case the stat can be out of sync by O(nr_cpus * BATCH) for 2 seconds. 3. For avoiding thundering herd to flush the stats particularly from the memory reclaim context, introduce memcg local spinlock and let only one flusher active at a time. This could have been done through cgroup_rstat_lock lock but that lock is used by other subsystem and for userspace reading memcg stats. So, it is better to keep flushers introduced by this patch decoupled from cgroup_rstat_lock. However we would have to use irqsafe version of rstat flush but that is fine as this code path will be flushing for whole tree and do the work for everyone. No one will be waiting for that worker. Link: https://lkml.kernel.org/r/20210714013948.270662-2-shakeelb@google.com Signed-off-by: Shakeel Butt Cc: Hillf Danton Cc: Huang Ying Cc: Johannes Weiner Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memcontrol.h | 6 ++++++ mm/memcontrol.c | 34 ++++++++++++++++++++++++++++++++++ mm/vmscan.c | 6 ++++++ 3 files changed, 46 insertions(+) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 540a0539ba105..406058a0c480f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1013,6 +1013,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return x; } +void mem_cgroup_flush_stats(void); + void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); @@ -1422,6 +1424,10 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, return node_page_state(lruvec_pgdat(lruvec), idx); } +static inline void mem_cgroup_flush_stats(void) +{ +} + static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d5efcf95758d1..456f5310ea59d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -103,6 +103,14 @@ static bool do_memsw_account(void) return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap; } +/* memcg and lruvec stats flushing */ +static void flush_memcg_stats_dwork(struct work_struct *w); +static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork); +static void flush_memcg_stats_work(struct work_struct *w); +static DECLARE_WORK(stats_flush_work, flush_memcg_stats_work); +static DEFINE_PER_CPU(unsigned int, stats_flush_threshold); +static DEFINE_SPINLOCK(stats_flush_lock); + #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024 @@ -685,6 +693,8 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, /* Update lruvec */ __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); + if (!(__this_cpu_inc_return(stats_flush_threshold) % MEMCG_CHARGE_BATCH)) + queue_work(system_unbound_wq, &stats_flush_work); } /** @@ -5248,6 +5258,10 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) /* Online state pins memcg ID, memcg ID pins CSS */ refcount_set(&memcg->id.ref, 1); css_get(css); + + if (unlikely(mem_cgroup_is_root(memcg))) + queue_delayed_work(system_unbound_wq, &stats_flush_dwork, + 2UL*HZ); return 0; } @@ -5339,6 +5353,26 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) memcg_wb_domain_size_changed(memcg); } +void mem_cgroup_flush_stats(void) +{ + if (!spin_trylock(&stats_flush_lock)) + return; + + cgroup_rstat_flush(root_mem_cgroup->css.cgroup); + spin_unlock(&stats_flush_lock); +} + +static void flush_memcg_stats_dwork(struct work_struct *w) +{ + mem_cgroup_flush_stats(); + queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ); +} + +static void flush_memcg_stats_work(struct work_struct *w) +{ + mem_cgroup_flush_stats(); +} + static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); diff --git a/mm/vmscan.c b/mm/vmscan.c index a7602f71ec04f..1cc05ab8ca159 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2893,6 +2893,12 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); again: + /* + * Flush the memory cgroup stats, so that we read accurate per-memcg + * lruvec stats for heuristics. + */ + mem_cgroup_flush_stats(); + memset(&sc->nr, 0, sizeof(sc->nr)); nr_reclaimed = sc->nr_reclaimed; From 0f35f946f8f5c762afb98a73e5a095bc57d08565 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 21 Jul 2021 11:41:43 +1000 Subject: [PATCH 346/851] memcg-infrastructure-to-flush-memcg-stats-v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix sleep-in-wrong context bug Link: https://lkml.kernel.org/r/20210716212137.1391164-2-shakeelb@google.com Signed-off-by: Shakeel Butt Reported-by: Yang Yingliang Reported-by: Marek Szyprowski Cc: Stephen Rothwell Cc: Hillf Danton Cc: Huang Ying Cc: Johannes Weiner Cc: Michal Hocko Cc: Michal Koutný Cc: Muchun Song Cc: Roman Gushchin Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 456f5310ea59d..fb3da5e8b8d2f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5358,7 +5358,7 @@ void mem_cgroup_flush_stats(void) if (!spin_trylock(&stats_flush_lock)) return; - cgroup_rstat_flush(root_mem_cgroup->css.cgroup); + cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup); spin_unlock(&stats_flush_lock); } From 6aded2a191e73ace5505ea25c515746b37ee2a87 Mon Sep 17 00:00:00 2001 From: Yutian Yang Date: Wed, 21 Jul 2021 11:41:43 +1000 Subject: [PATCH 347/851] memcg: charge fs_context and legacy_fs_context This patch adds accounting flags to fs_context and legacy_fs_context allocation sites so that kernel could correctly charge these objects. We have written a PoC to demonstrate the effect of the missing-charging bugs. The PoC takes around 1,200MB unaccounted memory, while it is charged for only 362MB memory usage. We evaluate the PoC on QEMU x86_64 v5.2.90 + Linux kernel v5.10.19 + Debian buster. All the limitations including ulimits and sysctl variables are set as default. Specifically, the hard NOFILE limit and nr_open in sysctl are both 1,048,576. /*------------------------- POC code ----------------------------*/ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \ } while (0) #define STACK_SIZE (8 * 1024) #ifndef __NR_fsopen #define __NR_fsopen 430 #endif static inline int fsopen(const char *fs_name, unsigned int flags) { return syscall(__NR_fsopen, fs_name, flags); } static char thread_stack[512][STACK_SIZE]; int thread_fn(void* arg) { for (int i = 0; i< 800000; ++i) { int fsfd = fsopen("nfs", FSOPEN_CLOEXEC); if (fsfd == -1) { errExit("fsopen"); } } while(1); return 0; } int main(int argc, char *argv[]) { int thread_pid; for (int i = 0; i < 1; ++i) { thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \ SIGCHLD, NULL); } while(1); return 0; } /*-------------------------- end --------------------------------*/ Link: https://lkml.kernel.org/r/1626517201-24086-1-git-send-email-nglaive@gmail.com Signed-off-by: Yutian Yang Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/fs_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fs_context.c b/fs/fs_context.c index de1985eae535f..b7e43a780a625 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -254,7 +254,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, struct fs_context *fc; int ret = -ENOMEM; - fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT); if (!fc) return ERR_PTR(-ENOMEM); @@ -649,7 +649,7 @@ const struct fs_context_operations legacy_fs_context_ops = { */ static int legacy_init_fs_context(struct fs_context *fc) { - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT); if (!fc->fs_private) return -ENOMEM; fc->ops = &legacy_fs_context_ops; From b597c997b95f9e36b2c1884e4a5c1013d22e4c9e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 11:41:44 +1000 Subject: [PATCH 348/851] lazy tlb: introduce lazy mm refcount helper functions Patch series "shoot lazy tlbs", v4. On a 16-socket 192-core POWER8 system, a context switching benchmark with as many software threads as CPUs (so each switch will go in and out of idle), upstream can achieve a rate of about 1 million context switches per second. After this series it goes up to 118 million. This patch (of 4): Add explicit _lazy_tlb annotated functions for lazy mm refcounting. This makes lazy mm references more obvious, and allows explicit refcounting to be removed if it is not used. If a kernel thread's current lazy tlb mm happens to be the one it wants to use, then kthread_use_mm() cleverly transfers the mm refcount from the lazy tlb mm reference to the returned reference. If the lazy tlb mm reference is no longer identical to a normal reference, this trick does not work, so that is changed to be explicit about the two references. Link: https://lkml.kernel.org/r/20210605014216.446867-1-npiggin@gmail.com Link: https://lkml.kernel.org/r/20210605014216.446867-2-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Andy Lutomirski Cc: Anton Blanchard Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/arm/mach-rpc/ecard.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/mm/book3s64/radix_tlb.c | 4 ++-- fs/exec.c | 4 ++-- include/linux/sched/mm.h | 11 +++++++++++ kernel/cpu.c | 2 +- kernel/exit.c | 2 +- kernel/kthread.c | 11 +++++++---- kernel/sched/core.c | 15 ++++++++------- 9 files changed, 34 insertions(+), 19 deletions(-) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index 827b50f1c73e6..1b4a41aad7932 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -253,7 +253,7 @@ static int ecard_init_mm(void) current->mm = mm; current->active_mm = mm; activate_mm(active_mm, mm); - mmdrop(active_mm); + mmdrop_lazy_tlb(active_mm); ecard_init_pgtables(mm); return 0; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 447b78a87c8f2..a492170b5ba0b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1556,7 +1556,7 @@ void start_secondary(void *unused) if (IS_ENABLED(CONFIG_PPC32)) setup_kup(); - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index aefc100d79a7c..2710a61d7ef20 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -785,10 +785,10 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) if (current->active_mm == mm) { WARN_ON_ONCE(current->mm != NULL); /* Is a kernel thread and is using mm as the lazy tlb */ - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; switch_mm_irqs_off(mm, &init_mm, current); - mmdrop(mm); + mmdrop_lazy_tlb(mm); } /* diff --git a/fs/exec.c b/fs/exec.c index 38f63451b9282..17ddaad5462fe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1026,9 +1026,9 @@ static int exec_mmap(struct mm_struct *mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); mm_update_next_owner(old_mm); mmput(old_mm); - return 0; + } else { + mmdrop_lazy_tlb(active_mm); } - mmdrop(active_mm); return 0; } diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index e24b1fe348e3b..bfd1baca52668 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +/* Helpers for lazy TLB mm refcounting */ +static inline void mmgrab_lazy_tlb(struct mm_struct *mm) +{ + mmgrab(mm); +} + +static inline void mmdrop_lazy_tlb(struct mm_struct *mm) +{ + mmdrop(mm); +} + /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. diff --git a/kernel/cpu.c b/kernel/cpu.c index 804b847912dc0..79882ce1f2b53 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -603,7 +603,7 @@ static int finish_cpu(unsigned int cpu) */ if (mm != &init_mm) idle->active_mm = &init_mm; - mmdrop(mm); + mmdrop_lazy_tlb(mm); return 0; } diff --git a/kernel/exit.c b/kernel/exit.c index 9a89e7f36acb4..3e9ec041a4e59 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -475,7 +475,7 @@ static void exit_mm(void) __set_current_state(TASK_RUNNING); mmap_read_lock(mm); } - mmgrab(mm); + mmgrab_lazy_tlb(mm); BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ task_lock(current); diff --git a/kernel/kthread.c b/kernel/kthread.c index 5b37a8567168b..e82a17863b098 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1350,14 +1350,14 @@ void kthread_use_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(tsk->mm); + mmgrab(mm); + task_lock(tsk); /* Hold off tlb flush IPIs while switching mm's */ local_irq_disable(); active_mm = tsk->active_mm; - if (active_mm != mm) { - mmgrab(mm); + if (active_mm != mm) tsk->active_mm = mm; - } tsk->mm = mm; membarrier_update_current_mm(mm); switch_mm_irqs_off(active_mm, mm, tsk); @@ -1377,7 +1377,7 @@ void kthread_use_mm(struct mm_struct *mm) * mmdrop(), or explicitly with smp_mb(). */ if (active_mm != mm) - mmdrop(active_mm); + mmdrop_lazy_tlb(active_mm); else smp_mb(); @@ -1411,10 +1411,13 @@ void kthread_unuse_mm(struct mm_struct *mm) local_irq_disable(); tsk->mm = NULL; membarrier_update_current_mm(NULL); + mmgrab_lazy_tlb(mm); /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); local_irq_enable(); task_unlock(tsk); + + mmdrop(mm); } EXPORT_SYMBOL_GPL(kthread_unuse_mm); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d9ff40f46619..0963b62726c57 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4574,13 +4574,14 @@ static struct rq *finish_task_switch(struct task_struct *prev) * rq->curr, before returning to userspace, so provide them here: * * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly - * provided by mmdrop(), + * provided by mmdrop_lazy_tlb(), * - a sync_core for SYNC_CORE. */ if (mm) { membarrier_mm_sync_core_before_usermode(mm); - mmdrop(mm); + mmdrop_lazy_tlb(mm); } + if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); @@ -4643,9 +4644,9 @@ context_switch(struct rq *rq, struct task_struct *prev, /* * kernel -> kernel lazy + transfer active - * user -> kernel lazy + mmgrab() active + * user -> kernel lazy + mmgrab_lazy_tlb() active * - * kernel -> user switch + mmdrop() active + * kernel -> user switch + mmdrop_lazy_tlb() active * user -> user switch */ if (!next->mm) { // to kernel @@ -4653,7 +4654,7 @@ context_switch(struct rq *rq, struct task_struct *prev, next->active_mm = prev->active_mm; if (prev->mm) // from user - mmgrab(prev->active_mm); + mmgrab_lazy_tlb(prev->active_mm); else prev->active_mm = NULL; } else { // to user @@ -4669,7 +4670,7 @@ context_switch(struct rq *rq, struct task_struct *prev, switch_mm_irqs_off(prev->active_mm, next->mm, next); if (!prev->mm) { // from kernel - /* will mmdrop() in finish_task_switch(). */ + /* will mmdrop_lazy_tlb() in finish_task_switch(). */ rq->prev_mm = prev->active_mm; prev->active_mm = NULL; } @@ -9057,7 +9058,7 @@ void __init sched_init(void) /* * The boot idle thread does lazy MMU switching as well: */ - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); enter_lazy_tlb(&init_mm, current); /* From 3ab58c3b0d479c783257ab164ccfd97f63375c79 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 11:41:46 +1000 Subject: [PATCH 349/851] lazy-tlb-introduce-lazy-mm-refcount-helper-functions-fix Fix a refcounting bug in kthread_use_mm (the mm reference is increased unconditionally now, but the lazy tlb refcount is still only dropped only if mm != active_mm). Link: https://lkml.kernel.org/r/1623125298.bx63h3mopj.astroid@bobo.none Signed-off-by: Nicholas Piggin Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/kthread.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index e82a17863b098..83ed75d531b4b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1350,6 +1350,11 @@ void kthread_use_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(tsk->mm); + /* + * It's possible that tsk->active_mm == mm here, but we must + * still mmgrab(mm) and mmdrop_lazy_tlb(active_mm), because lazy + * mm may not have its own refcount (see mmgrab/drop_lazy_tlb()). + */ mmgrab(mm); task_lock(tsk); @@ -1374,12 +1379,9 @@ void kthread_use_mm(struct mm_struct *mm) * memory barrier after storing to tsk->mm, before accessing * user-space memory. A full memory barrier for membarrier * {PRIVATE,GLOBAL}_EXPEDITED is implicitly provided by - * mmdrop(), or explicitly with smp_mb(). + * mmdrop_lazy_tlb(). */ - if (active_mm != mm) - mmdrop_lazy_tlb(active_mm); - else - smp_mb(); + mmdrop_lazy_tlb(active_mm); to_kthread(tsk)->oldfs = force_uaccess_begin(); } From c764d0f4aa5a7abfd4c8b6d7ee80d162c13ee703 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 11:41:47 +1000 Subject: [PATCH 350/851] lazy tlb: allow lazy tlb mm refcounting to be configurable Add CONFIG_MMU_TLB_REFCOUNT which enables refcounting of the lazy tlb mm when it is context switched. This can be disabled by architectures that don't require this refcounting if they clean up lazy tlb mms when the last refcount is dropped. Currently this is always enabled, which is what existing code does, so the patch is effectively a no-op. Rename rq->prev_mm to rq->prev_lazy_mm, because that's what it is. Link: https://lkml.kernel.org/r/20210605014216.446867-3-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Andy Lutomirski Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/Kconfig | 4 ++++ include/linux/sched/mm.h | 13 +++++++++++-- kernel/sched/core.c | 22 ++++++++++++++++++---- kernel/sched/sched.h | 4 +++- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 129df498a8e12..9dddd38d2bf41 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -425,6 +425,10 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM irqs disabled over activate_mm. Architectures that do IPI based TLB shootdowns should enable this. +# Use normal mm refcounting for MMU_LAZY_TLB kernel thread references. +config MMU_LAZY_TLB_REFCOUNT + def_bool y + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index bfd1baca52668..29e4638ad1242 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -52,12 +52,21 @@ static inline void mmdrop(struct mm_struct *mm) /* Helpers for lazy TLB mm refcounting */ static inline void mmgrab_lazy_tlb(struct mm_struct *mm) { - mmgrab(mm); + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) + mmgrab(mm); } static inline void mmdrop_lazy_tlb(struct mm_struct *mm) { - mmdrop(mm); + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) { + mmdrop(mm); + } else { + /* + * mmdrop_lazy_tlb must provide a full memory barrier, see the + * membarrier comment finish_task_switch which relies on this. + */ + smp_mb(); + } } /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0963b62726c57..cf2734cd31a92 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4515,7 +4515,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) __releases(rq->lock) { struct rq *rq = this_rq(); - struct mm_struct *mm = rq->prev_mm; + struct mm_struct *mm = NULL; long prev_state; /* @@ -4534,7 +4534,10 @@ static struct rq *finish_task_switch(struct task_struct *prev) current->comm, current->pid, preempt_count())) preempt_count_set(FORK_PREEMPT_COUNT); - rq->prev_mm = NULL; +#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT + mm = rq->prev_lazy_mm; + rq->prev_lazy_mm = NULL; +#endif /* * A task struct has one reference for the use as "current". @@ -4670,9 +4673,20 @@ context_switch(struct rq *rq, struct task_struct *prev, switch_mm_irqs_off(prev->active_mm, next->mm, next); if (!prev->mm) { // from kernel - /* will mmdrop_lazy_tlb() in finish_task_switch(). */ - rq->prev_mm = prev->active_mm; +#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT + /* Will mmdrop_lazy_tlb() in finish_task_switch(). */ + rq->prev_lazy_mm = prev->active_mm; prev->active_mm = NULL; +#else + /* + * Without MMU_LAZY_TLB_REFCOUNT there is no lazy + * tracking (because no rq->prev_lazy_mm) in + * finish_task_switch, so no mmdrop_lazy_tlb(), so no + * memory barrier for membarrier (see the membarrier + * comment in finish_task_switch()). Do it here. + */ + smp_mb(); +#endif } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 14a41a243f7ba..1a13bb32eba2c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -967,7 +967,9 @@ struct rq { struct task_struct *idle; struct task_struct *stop; unsigned long next_balance; - struct mm_struct *prev_mm; +#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT + struct mm_struct *prev_lazy_mm; +#endif unsigned int clock_update_flags; u64 clock; From 631560d98f35f7db689705d719ca01770a00e30c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 21 Jul 2021 11:41:48 +1000 Subject: [PATCH 351/851] lazy-tlb-allow-lazy-tlb-mm-refcounting-to-be-configurable-fix fix comment Cc: Nicholas Piggin Cc: Andy Lutomirski Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/sched/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 29e4638ad1242..8a5d6db25430f 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -63,7 +63,8 @@ static inline void mmdrop_lazy_tlb(struct mm_struct *mm) } else { /* * mmdrop_lazy_tlb must provide a full memory barrier, see the - * membarrier comment finish_task_switch which relies on this. + * membarrier comment in finish_task_switch which relies on + * this. */ smp_mb(); } From 9f21f97ad67ee196a0649399bef0c5e1063014b0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 11:41:49 +1000 Subject: [PATCH 352/851] lazy-tlb-allow-lazy-tlb-mm-refcounting-to-be-configurable-fix-2 Explain the requirements for lazy tlb mm refcounting in the comment, to help with archs that may want to disable this by some means other than MMU_LAZY_TLB_SHOOTDOWN. Link: https://lkml.kernel.org/r/1623121605.j47gdpccep.astroid@bobo.none Signed-off-by: Nicholas Piggin Cc: Anton Blanchard Cc: Andy Lutomirski Cc: Randy Dunlap Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 9dddd38d2bf41..16e7a3eddb663 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -426,6 +426,16 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM shootdowns should enable this. # Use normal mm refcounting for MMU_LAZY_TLB kernel thread references. +# MMU_LAZY_TLB_REFCOUNT=n can improve the scalability of context switching +# to/from kernel threads when the same mm is running on a lot of CPUs (a large +# multi-threaded application), by reducing contention on the mm refcount. +# +# This can be disabled if the architecture ensures no CPUs are using an mm as a +# "lazy tlb" beyond its final refcount (i.e., by the time __mmdrop frees the mm +# or its kernel page tables). This could be arranged by arch_exit_mmap(), or +# final exit(2) TLB flush, for example. arch code must also ensure the +# _lazy_tlb variants of mmgrab/mmdrop are used when dropping the lazy reference +# to a kthread ->active_mm (non-arch code has been converted already). config MMU_LAZY_TLB_REFCOUNT def_bool y From e06787ae586cfdc7ccd52d4ebd4e65efab037f6c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 11:41:50 +1000 Subject: [PATCH 353/851] lazy tlb: shoot lazies, a non-refcounting lazy tlb option On big systems, the mm refcount can become highly contented when doing a lot of context switching with threaded applications (particularly switching between the idle thread and an application thread). Abandoning lazy tlb slows switching down quite a bit in the important user->idle->user cases, so instead implement a non-refcounted scheme that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down any remaining lazy ones. Shootdown IPIs are some concern, but they have not been observed to be a big problem with this scheme (the powerpc implementation generated 314 additional interrupts on a 144 CPU system during a kernel compile). There are a number of strategies that could be employed to reduce IPIs if they turn out to be a problem for some workload. Link: https://lkml.kernel.org/r/20210605014216.446867-4-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Anton Blanchard Cc: Andy Lutomirski Cc: Randy Dunlap Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/Kconfig | 13 +++++++++++++ kernel/fork.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 16e7a3eddb663..57477e18e5a99 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -438,6 +438,19 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM # to a kthread ->active_mm (non-arch code has been converted already). config MMU_LAZY_TLB_REFCOUNT def_bool y + depends on !MMU_LAZY_TLB_SHOOTDOWN + +# Instead of refcounting the lazy mm struct for kernel thread references +# (which can cause contention with multi-threaded apps on large multiprocessor +# systems), this option causes __mmdrop to IPI all CPUs in the mm_cpumask and +# switch to init_mm if they were using the to-be-freed mm as the lazy tlb. To +# implement this, architectures must use _lazy_tlb variants of mm refcounting +# when releasing kernel thread mm references, and mm_cpumask must include at +# least all possible CPUs in which the mm might be lazy, at the time of the +# final mmdrop. mmgrab/mmdrop in arch/ code must be switched to _lazy_tlb +# postfix as necessary. +config MMU_LAZY_TLB_SHOOTDOWN + bool config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/kernel/fork.c b/kernel/fork.c index bc94b2cc59956..abed9591f82c0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -674,6 +674,53 @@ static void check_mm(struct mm_struct *mm) #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) +static void do_shoot_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + + if (current->active_mm == mm) { + WARN_ON_ONCE(current->mm); + current->active_mm = &init_mm; + switch_mm(mm, &init_mm, current); + } +} + +static void do_check_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + + WARN_ON_ONCE(current->active_mm == mm); +} + +static void shoot_lazy_tlbs(struct mm_struct *mm) +{ + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) { + /* + * IPI overheads have not found to be expensive, but they could + * be reduced in a number of possible ways, for example (in + * roughly increasing order of complexity): + * - A batch of mms requiring IPIs could be gathered and freed + * at once. + * - CPUs could store their active mm somewhere that can be + * remotely checked without a lock, to filter out + * false-positives in the cpumask. + * - After mm_users or mm_count reaches zero, switching away + * from the mm could clear mm_cpumask to reduce some IPIs + * (some batching or delaying would help). + * - A delayed freeing and RCU-like quiescing sequence based on + * mm switching to avoid IPIs completely. + */ + on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1); + if (IS_ENABLED(CONFIG_DEBUG_VM)) + on_each_cpu(do_check_lazy_tlb, (void *)mm, 1); + } else { + /* + * In this case, lazy tlb mms are refounted and would not reach + * __mmdrop until all CPUs have switched away and mmdrop()ed. + */ + } +} + /* * Called when the last reference to the mm * is dropped: either by a lazy thread or by @@ -683,6 +730,10 @@ void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); WARN_ON_ONCE(mm == current->mm); + + /* Ensure no CPUs are using this as their lazy tlb mm */ + shoot_lazy_tlbs(mm); + WARN_ON_ONCE(mm == current->active_mm); mm_free_pgd(mm); destroy_context(mm); From 6751361458ba615741d26920fefb10551659fe1c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 11:41:51 +1000 Subject: [PATCH 354/851] lazy-tlb-shoot-lazies-a-non-refcounting-lazy-tlb-option-fix Update the comment to be clearer, and account for the improvement to MMU_LAZY_TLB_REFCOUNT comment. Link: https://lkml.kernel.org/r/1623121901.mszkmmum0n.astroid@bobo.none Signed-off-by: Nicholas Piggin Cc: Anton Blanchard Cc: Andy Lutomirski Cc: Randy Dunlap Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/Kconfig | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 57477e18e5a99..1baac7bfdd441 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -440,15 +440,16 @@ config MMU_LAZY_TLB_REFCOUNT def_bool y depends on !MMU_LAZY_TLB_SHOOTDOWN -# Instead of refcounting the lazy mm struct for kernel thread references -# (which can cause contention with multi-threaded apps on large multiprocessor -# systems), this option causes __mmdrop to IPI all CPUs in the mm_cpumask and -# switch to init_mm if they were using the to-be-freed mm as the lazy tlb. To -# implement this, architectures must use _lazy_tlb variants of mm refcounting -# when releasing kernel thread mm references, and mm_cpumask must include at -# least all possible CPUs in which the mm might be lazy, at the time of the -# final mmdrop. mmgrab/mmdrop in arch/ code must be switched to _lazy_tlb -# postfix as necessary. +# This option allows MMU_LAZY_TLB_REFCOUNT=n. It ensures no CPUs are using an +# mm as a lazy tlb beyond its last reference count, by shooting down these +# users before the mm is deallocated. __mmdrop() first IPIs all CPUs that may +# be using the mm as a lazy tlb, so that they may switch themselves to using +# init_mm for their active mm. mm_cpumask(mm) is used to determine which CPUs +# may be using mm as a lazy tlb mm. +# +# To implement this, an arch must ensure mm_cpumask(mm) contains at least all +# possible CPUs in which the mm is lazy, and it must meet the requirements for +# MMU_LAZY_TLB_REFCOUNT=n (see above). config MMU_LAZY_TLB_SHOOTDOWN bool From d4f7b00d941858646cb95684e0465c55c0821147 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 11:41:52 +1000 Subject: [PATCH 355/851] powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN On a 16-socket 192-core POWER8 system, a context switching benchmark with as many software threads as CPUs (so each switch will go in and out of idle), upstream can achieve a rate of about 1 million context switches per second. After this patch it goes up to 118 million. No real datya for real world workloads unfortunately. I think it's always been a "known" cacheline, it just showed up badly on will-it-scale tests recently when Anton was doing a sweep of low hanging scalability issues on big systems. We have some very big systems running certain in-memory databases that get into very high contention conditions on mutexes that push context switch rates right up and with idle times pretty high, which would get a lot of parallel context switching between user and idle thread, we might be getting a bit of this contention there. It's not something at the top of profiles though. And on multi-threaded workloads like this, the normal refcounting of the user mm still has fundmaental contention. It's tricky to get the change tested on these workloads (machine time is very limited and I can't drive the software). I suspect it could also show in things that do high net or disk IO rates (enough to need a lot of cores), and do some user processing steps along the way. You'd potentially get a lot of idle switching. This infrastructure could be beneficial to other architectures. The cacheline is going to bounce in the same situations on other archs, so I would say yes. Rik at one stage had some patches to try avoid it for x86 some years ago, I don't know what happened to those. The way powerpc has to maintain mm_cpumask for its TLB flushing makes it relatively easy to do this shootdown, and we decided the additional IPIs were less of a concern than the bouncing. Others have different concerns, but I tried to make it generic and add comments explaining what other archs can do, or possibly different ways it might be achieved. Link: https://lkml.kernel.org/r/20210605014216.446867-5-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Andy Lutomirski Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d01e3401581d6..53db06ba4223b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -254,6 +254,7 @@ config PPC select IRQ_FORCED_THREADING select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE + select MMU_LAZY_TLB_SHOOTDOWN if PPC_BOOK3S_64 select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH From d08236e77e7e45e1126259a87c3da27414e6e3ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 11:41:53 +1000 Subject: [PATCH 356/851] mmc: JZ4740: remove the flush_kernel_dcache_page call in jz4740_mmc_read_data Patch series "_kernel_dcache_page fixes and removal". While looking to convert the block layer away from kmap_atomic towards kmap_local_page and prefeably the helpers that abstract it away I noticed that a few block drivers directly or implicitly call flush_kernel_dcache_page before kunmapping a page that has been written to. flush_kernel_dcache_page is documented to to be used in such cases, but flush_dcache_page is actually required when the page could be in the page cache and mapped to userspace, which is pretty much always the case when kmapping an arbitrary page. Unfortunately the documentation doesn't exactly make that clear, which lead to this misused. And it turns out that only the copy_strings / copy_string_kernel in the exec code were actually correct users of flush_kernel_dcache_page, which is why I think we should just remove it and eat the very minor overhead in exec rather than confusing poor driver writers. This patch (of 6): MIPS now implements flush_kernel_dcache_page (as an alias to flush_dcache_page). Link: https://lkml.kernel.org/r/20210712060928.4161649-1-hch@lst.de Link: https://lkml.kernel.org/r/20210712060928.4161649-2-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds Cc: "James E.J. Bottomley" Cc: Russell King Cc: Guo Ren Cc: Thomas Bogendoerfer Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Helge Deller Cc: Yoshinori Sato Cc: Rich Felker Cc: Geoff Levand Cc: Paul Cercueil Cc: Ulf Hansson Cc: Alex Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/mmc/host/jz4740_mmc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index cb1a64a5c256f..80a2c270d502e 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -578,10 +578,6 @@ static bool jz4740_mmc_read_data(struct jz4740_mmc_host *host, } } data->bytes_xfered += miter->length; - - /* This can go away once MIPS implements - * flush_kernel_dcache_page */ - flush_dcache_page(miter->page); } sg_miter_stop(miter); From fcf25a2f046f62f54dd6f8e094b56649c6201925 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 11:41:54 +1000 Subject: [PATCH 357/851] mmc: mmc_spi: replace flush_kernel_dcache_page with flush_dcache_page Pages passed to block drivers can be mapped page cache pages, so we must use flush_dcache_page here instead of the more limited flush_kernel_dcache_page that is intended for highmem pages only. Link: https://lkml.kernel.org/r/20210712060928.4161649-3-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds Cc: Alex Shi Cc: Geoff Levand Cc: Greentime Hu Cc: Guo Ren Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Nick Hu Cc: Paul Cercueil Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Ulf Hansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/mmc/host/mmc_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 65c65bb5737fc..3d28a3d3001be 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -948,7 +948,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd, /* discard mappings */ if (direction == DMA_FROM_DEVICE) - flush_kernel_dcache_page(sg_page(sg)); + flush_dcache_page(sg_page(sg)); kunmap(sg_page(sg)); if (dma_dev) dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir); From 0dfbe021289f9d5c20e085f7f396ef8d865dfac6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 11:41:55 +1000 Subject: [PATCH 358/851] ps3disk: replace flush_kernel_dcache_page with flush_dcache_page Pages passed to block drivers can be mapped page cache pages, so we must use flush_dcache_page here instead of the more limited flush_kernel_dcache_page that is intended for highmem pages only. Link: https://lkml.kernel.org/r/20210712060928.4161649-4-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds Cc: Alex Shi Cc: Geoff Levand Cc: Greentime Hu Cc: Guo Ren Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Nick Hu Cc: Paul Cercueil Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Ulf Hansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/block/ps3disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index f374ea2c67ceb..32bfb0487bdba 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -100,7 +100,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, else memcpy(buf, dev->bounce_buf+offset, size); offset += size; - flush_kernel_dcache_page(bvec.bv_page); + flush_dcache_page(bvec.bv_page); bvec_kunmap_irq(buf, &flags); i++; } From f4727054f2bcc3b9ebf5bf650b91cad5d516ba0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 11:41:56 +1000 Subject: [PATCH 359/851] scatterlist: replace flush_kernel_dcache_page with flush_dcache_page Pages used in scatterlist can be mapped page cache pages (and often are), so we must use flush_dcache_page here instead of the more limited flush_kernel_dcache_page that is intended for highmem pages only. Also remove the PageSlab check given that page_mapping_file as used by the flush_dcache_page implementations already contains that check. Link: https://lkml.kernel.org/r/20210712060928.4161649-5-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds Cc: Alex Shi Cc: Geoff Levand Cc: Greentime Hu Cc: Guo Ren Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Nick Hu Cc: Paul Cercueil Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Ulf Hansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/scatterlist.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 27efa61781538..627aa84f8bbd5 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -887,9 +887,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter) miter->__offset += miter->consumed; miter->__remaining -= miter->consumed; - if ((miter->__flags & SG_MITER_TO_SG) && - !PageSlab(miter->page)) - flush_kernel_dcache_page(miter->page); + if (miter->__flags & SG_MITER_TO_SG) + flush_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { WARN_ON_ONCE(preemptible()); From 6f1ea81c56089a6fe9b2ccbc05db5623fb25cbab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Jul 2021 11:41:57 +1000 Subject: [PATCH 360/851] mm: remove flush_kernel_dcache_page flush_kernel_dcache_page is a rather confusing interface that implements a subset of flush_dcache_page by not being able to properly handle page cache mapped pages. The only callers left are in the exec code as all other previous callers were incorrect as they could have dealt with page cache pages. Replace the calls to flush_kernel_dcache_page with calls to flush_dcache_page, which for all architectures does either exactly the same thing, can contains one or more of the following: 1) an optimization to defer the cache flush for page cache pages not mapped into userspace 2) additional flushing for mapped page cache pages if cache aliases are possible Link: https://lkml.kernel.org/r/20210712060928.4161649-7-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds Reviewed-by: Ira Weiny Cc: Alex Shi Cc: Geoff Levand Cc: Greentime Hu Cc: Guo Ren Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Nick Hu Cc: Paul Cercueil Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Ulf Hansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/core-api/cachetlb.rst | 86 ++++++++----------- .../translations/zh_CN/core-api/cachetlb.rst | 9 -- arch/arm/include/asm/cacheflush.h | 4 +- arch/arm/mm/flush.c | 33 ------- arch/arm/mm/nommu.c | 6 -- arch/csky/abiv1/cacheflush.c | 11 --- arch/csky/abiv1/inc/abi/cacheflush.h | 4 +- arch/mips/include/asm/cacheflush.h | 8 +- arch/nds32/include/asm/cacheflush.h | 3 +- arch/nds32/mm/cacheflush.c | 9 -- arch/parisc/include/asm/cacheflush.h | 8 +- arch/parisc/kernel/cache.c | 3 +- arch/sh/include/asm/cacheflush.h | 8 +- block/blk-map.c | 2 +- fs/exec.c | 6 +- include/linux/highmem.h | 5 +- tools/testing/scatterlist/linux/mm.h | 1 - 17 files changed, 51 insertions(+), 155 deletions(-) diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst index fe4290e267296..8aed9103e48a3 100644 --- a/Documentation/core-api/cachetlb.rst +++ b/Documentation/core-api/cachetlb.rst @@ -271,10 +271,15 @@ maps this page at its virtual address. ``void flush_dcache_page(struct page *page)`` - Any time the kernel writes to a page cache page, _OR_ - the kernel is about to read from a page cache page and - user space shared/writable mappings of this page potentially - exist, this routine is called. + This routines must be called when: + + a) the kernel did write to a page that is in the page cache page + and / or in high memory + b) the kernel is about to read from a page cache page and user space + shared/writable mappings of this page potentially exist. Note + that {get,pin}_user_pages{_fast} already call flush_dcache_page + on any page found in the user address space and thus driver + code rarely needs to take this into account. .. note:: @@ -284,38 +289,34 @@ maps this page at its virtual address. handling vfs symlinks in the page cache need not call this interface at all. - The phrase "kernel writes to a page cache page" means, - specifically, that the kernel executes store instructions - that dirty data in that page at the page->virtual mapping - of that page. It is important to flush here to handle - D-cache aliasing, to make sure these kernel stores are - visible to user space mappings of that page. - - The corollary case is just as important, if there are users - which have shared+writable mappings of this file, we must make - sure that kernel reads of these pages will see the most recent - stores done by the user. - - If D-cache aliasing is not an issue, this routine may - simply be defined as a nop on that architecture. - - There is a bit set aside in page->flags (PG_arch_1) as - "architecture private". The kernel guarantees that, - for pagecache pages, it will clear this bit when such - a page first enters the pagecache. - - This allows these interfaces to be implemented much more - efficiently. It allows one to "defer" (perhaps indefinitely) - the actual flush if there are currently no user processes - mapping this page. See sparc64's flush_dcache_page and - update_mmu_cache implementations for an example of how to go - about doing this. - - The idea is, first at flush_dcache_page() time, if - page->mapping->i_mmap is an empty tree, just mark the architecture - private page flag bit. Later, in update_mmu_cache(), a check is - made of this flag bit, and if set the flush is done and the flag - bit is cleared. + The phrase "kernel writes to a page cache page" means, specifically, + that the kernel executes store instructions that dirty data in that + page at the page->virtual mapping of that page. It is important to + flush here to handle D-cache aliasing, to make sure these kernel stores + are visible to user space mappings of that page. + + The corollary case is just as important, if there are users which have + shared+writable mappings of this file, we must make sure that kernel + reads of these pages will see the most recent stores done by the user. + + If D-cache aliasing is not an issue, this routine may simply be defined + as a nop on that architecture. + + There is a bit set aside in page->flags (PG_arch_1) as "architecture + private". The kernel guarantees that, for pagecache pages, it will + clear this bit when such a page first enters the pagecache. + + This allows these interfaces to be implemented much more efficiently. + It allows one to "defer" (perhaps indefinitely) the actual flush if + there are currently no user processes mapping this page. See sparc64's + flush_dcache_page and update_mmu_cache implementations for an example + of how to go about doing this. + + The idea is, first at flush_dcache_page() time, if page_file_mapping() + returns a mapping, and mapping_mapped on that mapping returns %false, + just mark the architecture private page flag bit. Later, in + update_mmu_cache(), a check is made of this flag bit, and if set the + flush is done and the flag bit is cleared. .. important:: @@ -351,19 +352,6 @@ maps this page at its virtual address. architectures). For incoherent architectures, it should flush the cache of the page at vmaddr. - ``void flush_kernel_dcache_page(struct page *page)`` - - When the kernel needs to modify a user page is has obtained - with kmap, it calls this function after all modifications are - complete (but before kunmapping it) to bring the underlying - page up to date. It is assumed here that the user has no - incoherent cached copies (i.e. the original page was obtained - from a mechanism like get_user_pages()). The default - implementation is a nop and should remain so on all coherent - architectures. On incoherent architectures, this should flush - the kernel cache for page (using page_address(page)). - - ``void flush_icache_range(unsigned long start, unsigned long end)`` When the kernel stores into addresses that it will execute diff --git a/Documentation/translations/zh_CN/core-api/cachetlb.rst b/Documentation/translations/zh_CN/core-api/cachetlb.rst index 8376485a534d1..55827b8a7c535 100644 --- a/Documentation/translations/zh_CN/core-api/cachetlb.rst +++ b/Documentation/translations/zh_CN/core-api/cachetlb.rst @@ -298,15 +298,6 @@ HyperSparc cpu就是这样一个具有这种属性的cpu。 用。默认的实现是nop(对于所有相干的架构应该保持这样)。对于不一致性 的架构,它应该刷新vmaddr处的页面缓存。 - ``void flush_kernel_dcache_page(struct page *page)`` - - 当内核需要修改一个用kmap获得的用户页时,它会在所有修改完成后(但在 - kunmapping之前)调用这个函数,以使底层页面达到最新状态。这里假定用 - 户没有不一致性的缓存副本(即原始页面是从类似get_user_pages()的机制 - 中获得的)。默认的实现是一个nop,在所有相干的架构上都应该如此。在不 - 一致性的架构上,这应该刷新内核缓存中的页面(使用page_address(page))。 - - ``void flush_icache_range(unsigned long start, unsigned long end)`` 当内核存储到它将执行的地址中时(例如在加载模块时),这个函数被调用。 diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 2e24e765e6d3a..5e56288e343bb 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -291,6 +291,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { if ((cache_is_vivt() || cache_is_vipt_aliasing())) @@ -312,9 +313,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma, __flush_anon_page(vma, page, vmaddr); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -extern void flush_kernel_dcache_page(struct page *); - #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6d89db7895d14..7ff9feea13a6a 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -345,39 +345,6 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); -/* - * Ensure cache coherency for the kernel mapping of this page. We can - * assume that the page is pinned via kmap. - * - * If the page only exists in the page cache and there are no user - * space mappings, this is a no-op since the page was already marked - * dirty at creation. Otherwise, we need to flush the dirty kernel - * cache lines directly. - */ -void flush_kernel_dcache_page(struct page *page) -{ - if (cache_is_vivt() || cache_is_vipt_aliasing()) { - struct address_space *mapping; - - mapping = page_mapping_file(page); - - if (!mapping || mapping_mapped(mapping)) { - void *addr; - - addr = page_address(page); - /* - * kmap_atomic() doesn't set the page virtual - * address for highmem pages, and - * kunmap_atomic() takes care of cache - * flushing already. - */ - if (!IS_ENABLED(CONFIG_HIGHMEM) || addr) - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - } - } -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - /* * Flush an anonymous page so that users of get_user_pages() * can safely access the data. The expected sequence is: diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 8b3d7191e2b88..2658f52903da6 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -166,12 +166,6 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); -void flush_kernel_dcache_page(struct page *page) -{ - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c index 07ff17ea33dee..fb91b069dc69f 100644 --- a/arch/csky/abiv1/cacheflush.c +++ b/arch/csky/abiv1/cacheflush.c @@ -56,17 +56,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, } } -void flush_kernel_dcache_page(struct page *page) -{ - struct address_space *mapping; - - mapping = page_mapping_file(page); - - if (!mapping || mapping_mapped(mapping)) - dcache_wbinv_all(); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 6cab7afae9626..ed62e2066ba76 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -14,12 +14,10 @@ extern void flush_dcache_page(struct page *); #define flush_cache_page(vma, page, pfn) cache_wbinv_all() #define flush_cache_dup_mm(mm) cache_wbinv_all() -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -extern void flush_kernel_dcache_page(struct page *); - #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { dcache_wbinv_all(); diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index d687b40b9fbbf..b3dc9c589442a 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -125,13 +125,7 @@ static inline void kunmap_noncoherent(void) kunmap_coherent(); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ - BUG_ON(cpu_has_dc_aliases && PageHighMem(page)); - flush_dcache_page(page); -} - +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 /* * For now flush_kernel_vmap_range and invalidate_kernel_vmap_range both do a * cache writeback and invalidate operation. diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 7d6824f7c0e8d..c2a222ebfa2af 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -36,8 +36,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr); -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -void flush_kernel_dcache_page(struct page *page); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *addr, int size); void invalidate_kernel_vmap_range(void *addr, int size); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index ad5344ef5d334..07aac65d1cab4 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -318,15 +318,6 @@ void flush_anon_page(struct vm_area_struct *vma, local_irq_restore(flags); } -void flush_kernel_dcache_page(struct page *page) -{ - unsigned long flags; - local_irq_save(flags); - cpu_dcache_wbinval_page((unsigned long)page_address(page)); - local_irq_restore(flags); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void flush_kernel_vmap_range(void *addr, int size) { unsigned long flags; diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 99663fc1f997f..eef0096db5f88 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -36,16 +36,12 @@ void flush_cache_all_local(void); void flush_cache_all(void); void flush_cache_mm(struct mm_struct *mm); -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE void flush_kernel_dcache_page_addr(void *addr); -static inline void flush_kernel_dcache_page(struct page *page) -{ - flush_kernel_dcache_page_addr(page_address(page)); -} #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *vaddr, int size); void invalidate_kernel_vmap_range(void *vaddr, int size); @@ -59,7 +55,7 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) #define flush_icache_page(vma,page) do { \ - flush_kernel_dcache_page(page); \ + flush_kernel_dcache_page_addr(page_address(page)); \ flush_kernel_icache_page(page_address(page)); \ } while (0) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 86a1a63563fd5..39e02227e2310 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -334,7 +334,7 @@ void flush_dcache_page(struct page *page) return; } - flush_kernel_dcache_page(page); + flush_kernel_dcache_page_addr(page_address(page)); if (!mapping) return; @@ -375,7 +375,6 @@ EXPORT_SYMBOL(flush_dcache_page); /* Defined in arch/parisc/kernel/pacache.S */ EXPORT_SYMBOL(flush_kernel_dcache_range_asm); -EXPORT_SYMBOL(flush_kernel_dcache_page_asm); EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index 4486a865ff62f..372afa82fee62 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -63,6 +63,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma, if (boot_cpu_data.dcache.n_aliases && PageAnon(page)) __flush_anon_page(page, vmaddr); } + +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { __flush_wback_region(addr, size); @@ -72,12 +74,6 @@ static inline void invalidate_kernel_vmap_range(void *addr, int size) __flush_invalidate_region(addr, size); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ - flush_dcache_page(page); -} - extern void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len); diff --git a/block/blk-map.c b/block/blk-map.c index 3743158ddaeb7..4639bc6b5c62f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -309,7 +309,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, static void bio_invalidate_vmalloc_pages(struct bio *bio) { -#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +#ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE if (bio->bi_private && !op_is_write(bio_op(bio))) { unsigned long i, len = 0; diff --git a/fs/exec.c b/fs/exec.c index 17ddaad5462fe..eb2a99793018d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -574,7 +574,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv, } if (kmapped_page) { - flush_kernel_dcache_page(kmapped_page); + flush_dcache_page(kmapped_page); kunmap(kmapped_page); put_arg_page(kmapped_page); } @@ -592,7 +592,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv, ret = 0; out: if (kmapped_page) { - flush_kernel_dcache_page(kmapped_page); + flush_dcache_page(kmapped_page); kunmap(kmapped_page); put_arg_page(kmapped_page); } @@ -634,7 +634,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) kaddr = kmap_atomic(page); flush_arg_page(bprm, pos & PAGE_MASK, page); memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy); - flush_kernel_dcache_page(page); + flush_dcache_page(page); kunmap_atomic(kaddr); put_arg_page(page); } diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d9a606a9fc64a..b4c49f9cc379e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -130,10 +130,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page } #endif -#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ -} +#ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE static inline void flush_kernel_vmap_range(void *vaddr, int size) { } diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index f9a12005fcea8..16ec895bbe5ff 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -127,7 +127,6 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) #define kmemleak_free(a) #define PageSlab(p) (0) -#define flush_kernel_dcache_page(p) #define MAX_ERRNO 4095 From b3e52fb9b8fcc495873e09ee4af05c04fe632a8c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 21 Jul 2021 11:41:58 +1000 Subject: [PATCH 361/851] mm,do_huge_pmd_numa_page: remove unnecessary TLB flushing code Before commit c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling"), the TLB flushing is done in do_huge_pmd_numa_page() itself via flush_tlb_range(). But after commit c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling"), the TLB flushing is done in migrate_pages() as in the following code path anyway. do_huge_pmd_numa_page migrate_misplaced_page migrate_pages So now, the TLB flushing code in do_huge_pmd_numa_page() becomes unnecessary. So the code is deleted in this patch to simplify the code. This is only code cleanup, there's no visible performance difference. Link: https://lkml.kernel.org/r/20210720065529.716031-1-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Zi Yan Reviewed-by: Yang Shi Cc: Dan Carpenter Cc: Mel Gorman Cc: Christian Borntraeger Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Vasily Gorbik Cc: Paolo Bonzini Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/huge_memory.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index afff3ac870673..9f21e44c90306 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1440,32 +1440,6 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) goto out; } - /* - * Since we took the NUMA fault, we must have observed the !accessible - * bit. Make sure all other CPUs agree with that, to avoid them - * modifying the page we're about to migrate. - * - * Must be done under PTL such that we'll observe the relevant - * inc_tlb_flush_pending(). - * - * We are not sure a pending tlb flush here is for a huge page - * mapping or not. Hence use the tlb range variant - */ - if (mm_tlb_flush_pending(vma->vm_mm)) { - flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); - /* - * change_huge_pmd() released the pmd lock before - * invalidating the secondary MMUs sharing the primary - * MMU pagetables (with ->invalidate_range()). The - * mmu_notifier_invalidate_range_end() (which - * internally calls ->invalidate_range()) in - * change_pmd_range() will run after us, so we can't - * rely on it here and we need an explicit invalidate. - */ - mmu_notifier_invalidate_range(vma->vm_mm, haddr, - haddr + HPAGE_PMD_SIZE); - } - pmd = pmd_modify(oldpmd, vma->vm_page_prot); page = vm_normal_page_pmd(vma, haddr, pmd); if (!page) From d015e1cd3bba337a8d74257894db1773cf411747 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Wed, 21 Jul 2021 11:41:59 +1000 Subject: [PATCH 362/851] mm/mremap: fix memory account on do_munmap() failure mremap will account the delta between new_len and old_len in vma_to_resize, and then call move_vma when expanding an existing memory mapping. In function move_vma, there are two scenarios when calling do_munmap: 1. move_page_tables from old_addr to new_addr success 2. move_page_tables from old_addr to new_addr fail In first scenario, it should account old_len if do_munmap fail, because the delta has already been accounted. In second scenario, new_addr/new_len will assign to old_addr/old_len if move_page_table fail, so do_munmap is try to unmap new_addr actually, if do_munmap fail, it should account the new_len, because error code will be return from move_vma, and delta will be unaccounted. What'more, because of new_len == old_len, so account old_len also is OK. In summary, account old_len will be correct if do_munmap fail. Link: https://lkml.kernel.org/r/20210717101942.120607-1-chenwandun@huawei.com Fixes: 51df7bcb6151 ("mm/mremap: account memory on do_munmap() failure") Signed-off-by: Chen Wandun Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Kefeng Wang Cc: Wei Yongjun Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index 5989d39900204..badfe17ade1f0 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -686,7 +686,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { /* OOM: unable to split vma, just get accounts right */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) - vm_acct_memory(new_len >> PAGE_SHIFT); + vm_acct_memory(old_len >> PAGE_SHIFT); excess = 0; } From d12f1d05b4c72ad7e93843ace7da4c6fe2bb2cac Mon Sep 17 00:00:00 2001 From: Ohhoon Kwon Date: Wed, 21 Jul 2021 11:42:00 +1000 Subject: [PATCH 363/851] mm: sparse: pass section_nr to section_mark_present Patch series "mm: sparse: remove __section_nr() function", v4. This patch (of 3): With CONFIG_SPARSEMEM_EXTREME enabled, __section_nr() which converts mem_section to section_nr could be costly since it iterates all section roots to check if the given mem_section is in its range. Since both callers of section_mark_present already know section_nr, let's also pass section_nr as well as mem_section in order to reduce costly translation. Link: https://lkml.kernel.org/r/20210707150212.855-1-ohoono.kwon@samsung.com Link: https://lkml.kernel.org/r/20210707150212.855-2-ohoono.kwon@samsung.com Signed-off-by: Ohhoon Kwon Acked-by: Mike Rapoport Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/sparse.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/sparse.c b/mm/sparse.c index 6326cdf36c4f2..8018ee7fcda52 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -187,10 +187,9 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, * those loops early. */ unsigned long __highest_present_section_nr; -static void section_mark_present(struct mem_section *ms) +static void __section_mark_present(struct mem_section *ms, + unsigned long section_nr) { - unsigned long section_nr = __section_nr(ms); - if (section_nr > __highest_present_section_nr) __highest_present_section_nr = section_nr; @@ -280,7 +279,7 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en if (!ms->section_mem_map) { ms->section_mem_map = sparse_encode_early_nid(nid) | SECTION_IS_ONLINE; - section_mark_present(ms); + __section_mark_present(ms, section); } } } @@ -934,7 +933,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, ms = __nr_to_section(section_nr); set_section_nid(section_nr, nid); - section_mark_present(ms); + __section_mark_present(ms, section_nr); /* Align memmap to section boundary in the subsection case */ if (section_nr_to_pfn(section_nr) != start_pfn) From cdfcc31746c836f6b9a1b22d130b343a8072ef42 Mon Sep 17 00:00:00 2001 From: Ohhoon Kwon Date: Wed, 21 Jul 2021 11:42:01 +1000 Subject: [PATCH 364/851] mm: sparse: pass section_nr to find_memory_block With CONFIG_SPARSEMEM_EXTREME enabled, __section_nr() which converts mem_section to section_nr could be costly since it iterates all section roots to check if the given mem_section is in its range. On the other hand, __nr_to_section() which converts section_nr to mem_section can be done in O(1). Let's pass section_nr instead of mem_section ptr to find_memory_block() in order to reduce needless iterations. Link: https://lkml.kernel.org/r/20210707150212.855-3-ohoono.kwon@samsung.com Signed-off-by: Ohhoon Kwon Acked-by: Michal Hocko Acked-by: Mike Rapoport Reviewed-by: David Hildenbrand Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/pseries/hotplug-memory.c | 4 +--- drivers/base/memory.c | 4 ++-- include/linux/memory.h | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 377d852f5a9aa..d4f28ee4d5dce 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -211,13 +211,11 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb) static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) { unsigned long section_nr; - struct mem_section *mem_sect; struct memory_block *mem_block; section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); - mem_sect = __nr_to_section(section_nr); - mem_block = find_memory_block(mem_sect); + mem_block = find_memory_block(section_nr); return mem_block; } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index aa31a21f33d7d..e3fd2dbf4eea8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -578,9 +578,9 @@ static struct memory_block *find_memory_block_by_id(unsigned long block_id) /* * Called under device_hotplug_lock. */ -struct memory_block *find_memory_block(struct mem_section *section) +struct memory_block *find_memory_block(unsigned long section_nr) { - unsigned long block_id = memory_block_id(__section_nr(section)); + unsigned long block_id = memory_block_id(section_nr); return find_memory_block_by_id(block_id); } diff --git a/include/linux/memory.h b/include/linux/memory.h index 97e92e8b556a3..d9a0b61cd4329 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -90,7 +90,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size, void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); -extern struct memory_block *find_memory_block(struct mem_section *); +extern struct memory_block *find_memory_block(unsigned long section_nr); typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); From 22112dd93ec1236e8b1dacb2b5c76b89e95a2408 Mon Sep 17 00:00:00 2001 From: Ohhoon Kwon Date: Wed, 21 Jul 2021 11:42:02 +1000 Subject: [PATCH 365/851] mm: sparse: remove __section_nr() function As the last users of __section_nr() are gone, let's remove unused function __section_nr(). Link: https://lkml.kernel.org/r/20210707150212.855-4-ohoono.kwon@samsung.com Signed-off-by: Ohhoon Kwon Acked-by: Michal Hocko Acked-by: Mike Rapoport Reviewed-by: David Hildenbrand Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mmzone.h | 1 - mm/sparse.c | 26 -------------------------- 2 files changed, 27 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fcb535560028f..8827f4d081d43 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1342,7 +1342,6 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return NULL; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } -extern unsigned long __section_nr(struct mem_section *ms); extern size_t mem_section_usage_size(void); /* diff --git a/mm/sparse.c b/mm/sparse.c index 8018ee7fcda52..d85655242ed98 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -109,32 +109,6 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) } #endif -#ifdef CONFIG_SPARSEMEM_EXTREME -unsigned long __section_nr(struct mem_section *ms) -{ - unsigned long root_nr; - struct mem_section *root = NULL; - - for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { - root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); - if (!root) - continue; - - if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT))) - break; - } - - VM_BUG_ON(!root); - - return (root_nr * SECTIONS_PER_ROOT) + (ms - root); -} -#else -unsigned long __section_nr(struct mem_section *ms) -{ - return (unsigned long)(ms - mem_section[0]); -} -#endif - /* * During early boot, before section_mem_map is used for an actual * mem_map, we use section_mem_map to store the section's NUMA From 847d7423f9a02ee2bb5e8b593ca8116446896cd7 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 21 Jul 2021 11:42:03 +1000 Subject: [PATCH 366/851] mm/sparse: set SECTION_NID_SHIFT to 6 Currently SECTION_NID_SHIFT is set to 3, which is incorrect because bit 3 and 4 can be overlapped by sub-field for early NID, and can be unexpectedly set on NUMA systems. There are a few non-critical issues related to this: - Having SECTION_TAINT_ZONE_DEVICE set for wrong sections forces pfn_to_online_page() through the slow path, but doesn't actually break the kernel. - A kdump generation tool like makedumpfile uses this field to calculate the physical address to read. So wrong bits can make the tool access to wrong address and fail to create kdump. This can be avoided by the tool, so it's not critical. To fix it, set SECTION_NID_SHIFT to 6 which is the minimum number of available bits of section flag field. Link: https://lkml.kernel.org/r/20210707045548.810271-1-naoya.horiguchi@linux.dev Fixes: 1f90a3477df3 ("mm: teach pfn_to_online_page() about ZONE_DEVICE section collisions") Signed-off-by: Naoya Horiguchi Reported-by: Kazuhito Hagio Suggested-by: Dan Williams Acked-by: David Hildenbrand Cc: Oscar Salvador Cc: Wang Wensheng Cc: Rui Xiang Cc: Kazu Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8827f4d081d43..59bad25ce78e0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1364,7 +1364,7 @@ extern size_t mem_section_usage_size(void); #define SECTION_TAINT_ZONE_DEVICE (1UL<<4) #define SECTION_MAP_LAST_BIT (1UL<<5) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) -#define SECTION_NID_SHIFT 3 +#define SECTION_NID_SHIFT 6 static inline struct page *__section_mem_map_addr(struct mem_section *section) { From 47c0cdaf47f272b177929e988598c541fb792fd1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 21 Jul 2021 11:42:04 +1000 Subject: [PATCH 367/851] include/linux/mmzone.h: avoid a warning in sparse memory support cppcheck warns that we're possibly losing information by shifting an int. It's a false positive, because we don't allow for a NUMA node ID that large, but if we ever change SECTION_NID_SHIFT, it could become a problem, and in any case this is usually a legitimate warning. Fix it by adding the necessary cast, which makes the compiler generate the right code. Link: https://lkml.kernel.org/r/YOya+aBZFFmC476e@casper.infradead.org Link: https://lkml.kernel.org/r/202107130348.6LsVT9Nc-lkp@intel.com Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/sparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/sparse.c b/mm/sparse.c index d85655242ed98..be7936e65b6a2 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -117,7 +117,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) */ static inline unsigned long sparse_encode_early_nid(int nid) { - return (nid << SECTION_NID_SHIFT); + return ((unsigned long)nid << SECTION_NID_SHIFT); } static inline int sparse_early_nid(struct mem_section *section) From d8e8f5241ec13a3d768c51b4f2ba83bf125fe1ab Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 21 Jul 2021 11:42:05 +1000 Subject: [PATCH 368/851] mm/vmalloc: use batched page requests in bulk-allocator In case of simultaneous vmalloc allocations, for example it is 1GB and 12 CPUs my system is able to hit "BUG: soft lockup" for !CONFIG_PREEMPT kernel. [ 62.512621] RIP: 0010:__alloc_pages_bulk+0xa9f/0xbb0 [ 62.512628] Code: ff 8b 44 24 48 44 29 f8 83 f8 01 0f 84 ea fe ff ff e9 07 f6 ff ff 48 8b 44 24 60 48 89 28 e9 00 f9 ff ff fb 66 0f 1f 44 00 00 e8 fd ff ff 65 48 01 51 10 e9 3e fe ff ff 48 8b 44 24 78 4d 89 [ 62.512629] RSP: 0018:ffffa7bfc29ffd20 EFLAGS: 00000206 [ 62.512631] RAX: 0000000000000200 RBX: ffffcd5405421888 RCX: ffff8c36ffdeb928 [ 62.512632] RDX: 0000000000040000 RSI: ffffa896f06b2ff8 RDI: ffffcd5405421880 [ 62.512633] RBP: ffffcd5405421880 R08: 000000000000007d R09: ffffffffffffffff [ 62.512634] R10: ffffffff9d63c084 R11: 00000000ffffffff R12: ffff8c373ffaeb80 [ 62.512635] R13: ffff8c36ffdf65f8 R14: ffff8c373ffaeb80 R15: 0000000000040000 [ 62.512637] FS: 0000000000000000(0000) GS:ffff8c36ffdc0000(0000) knlGS:0000000000000000 [ 62.512638] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 62.512639] CR2: 000055c8e2fe8610 CR3: 0000000c13e10000 CR4: 00000000000006e0 [ 62.512641] Call Trace: [ 62.512646] __vmalloc_node_range+0x11c/0x2d0 [ 62.512649] ? full_fit_alloc_test+0x140/0x140 [test_vmalloc] [ 62.512654] __vmalloc_node+0x4b/0x70 [ 62.512656] ? fix_size_alloc_test+0x44/0x60 [test_vmalloc] [ 62.512659] fix_size_alloc_test+0x44/0x60 [test_vmalloc] [ 62.512662] test_func+0xe7/0x1f0 [test_vmalloc] [ 62.512666] ? fix_align_alloc_test+0x50/0x50 [test_vmalloc] [ 62.512668] kthread+0x11a/0x140 [ 62.512671] ? set_kthread_struct+0x40/0x40 [ 62.512672] ret_from_fork+0x22/0x30 To address this issue invoke a bulk-allocator many times until all pages are obtained, i.e. do batched page requests adding cond_resched() meanwhile to reschedule. Batched value is hard-coded and is 100 pages per call. Link: https://lkml.kernel.org/r/20210707182639.31282-1-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Hillf Danton Cc: Matthew Wilcox Cc: Mel Gorman Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d5cd528051496..24bc65f02d040 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2779,7 +2779,7 @@ EXPORT_SYMBOL_GPL(vmap_pfn); static inline unsigned int vm_area_alloc_pages(gfp_t gfp, int nid, - unsigned int order, unsigned long nr_pages, struct page **pages) + unsigned int order, unsigned int nr_pages, struct page **pages) { unsigned int nr_allocated = 0; @@ -2789,10 +2789,32 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * to fails, fallback to a single page allocator that is * more permissive. */ - if (!order) - nr_allocated = alloc_pages_bulk_array_node( - gfp, nid, nr_pages, pages); - else + if (!order) { + while (nr_allocated < nr_pages) { + unsigned int nr, nr_pages_request; + + /* + * A maximum allowed request is hard-coded and is 100 + * pages per call. That is done in order to prevent a + * long preemption off scenario in the bulk-allocator + * so the range is [1:100]. + */ + nr_pages_request = min(100U, nr_pages - nr_allocated); + + nr = alloc_pages_bulk_array_node(gfp, nid, + nr_pages_request, pages + nr_allocated); + + nr_allocated += nr; + cond_resched(); + + /* + * If zero or pages were obtained partly, + * fallback to a single page allocator. + */ + if (nr != nr_pages_request) + break; + } + } else /* * Compound pages required for remap_vmalloc_page if * high-order pages. From 30251f2812165a090a99c7f82b4f2149822e6b80 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 21 Jul 2021 11:42:06 +1000 Subject: [PATCH 369/851] mm/vmalloc: remove gfpflags_allow_blocking() check Get rid of gfpflags_allow_blocking() check from the vmalloc() path as it is supposed to be sleepable anyway. Thus remove it from the alloc_vmap_area() as well as from the vm_area_alloc_pages(). Link: https://lkml.kernel.org/r/20210707182639.31282-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Acked-by: Michal Hocko Cc: Mel Gorman Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Nicholas Piggin Cc: Hillf Danton Cc: Oleksiy Avramchenko Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 24bc65f02d040..5dcb65dd9bf31 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1479,6 +1479,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, int node, gfp_t gfp_mask) { struct vmap_area *va; + unsigned long freed; unsigned long addr; int purged = 0; int ret; @@ -1542,13 +1543,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, goto retry; } - if (gfpflags_allow_blocking(gfp_mask)) { - unsigned long freed = 0; - blocking_notifier_call_chain(&vmap_notify_list, 0, &freed); - if (freed > 0) { - purged = 0; - goto retry; - } + freed = 0; + blocking_notifier_call_chain(&vmap_notify_list, 0, &freed); + + if (freed > 0) { + purged = 0; + goto retry; } if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) @@ -2838,9 +2838,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, for (i = 0; i < (1U << order); i++) pages[nr_allocated + i] = page + i; - if (gfpflags_allow_blocking(gfp)) - cond_resched(); - + cond_resched(); nr_allocated += 1U << order; } From 39b68616c3703bacef64ca884e7c0949cac2cb16 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 21 Jul 2021 11:42:07 +1000 Subject: [PATCH 370/851] lib/test_vmalloc.c: add a new 'nr_pages' parameter In order to simulate different fixed sizes for vmalloc allocation introduce a new parameter that sets number of pages to be allocated for the "fix_size_alloc_test" test. By default 1 page is used unless a different number is specified over the new parameter. Link: https://lkml.kernel.org/r/20210710194151.21370-1-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Cc: Mel Gorman Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Nicholas Piggin Cc: Hillf Danton Cc: Michal Hocko Cc: Oleksiy Avramchenko Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/test_vmalloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 01e9543de5664..e14993bc84d2d 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -35,6 +35,9 @@ __param(int, test_repeat_count, 1, __param(int, test_loop_count, 1000000, "Set test loop counter"); +__param(int, nr_pages, 0, + "Set number of pages for fix_size_alloc_test(default: 1)"); + __param(int, run_test_mask, INT_MAX, "Set tests specified in the mask.\n\n" "\t\tid: 1, name: fix_size_alloc_test\n" @@ -262,7 +265,7 @@ static int fix_size_alloc_test(void) int i; for (i = 0; i < test_loop_count; i++) { - ptr = vmalloc(3 * PAGE_SIZE); + ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); if (!ptr) return -1; From 1ea396da10ad442bc814c60fd6a7ac627f97f869 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Wed, 21 Jul 2021 11:42:08 +1000 Subject: [PATCH 371/851] mm/vmalloc: fix wrong behavior in vread commit f608788cd2d6 ("mm/vmalloc: use rb_tree instead of list for vread() lookups") use rb_tree instread of list to speed up lookup, but function __find_vmap_area is try to find a vmap_area that include target address, if target address is smaller than the leftmost node in vmap_area_root, it will return NULL, then vread will read nothing. This behavior is different from the primitive semantics. The correct way is find the first vmap_are that bigger than target addr, that is what function find_vmap_area_exceed_addr does. Link: https://lkml.kernel.org/r/20210714015959.3204871-1-chenwandun@huawei.com Fixes: f608788cd2d6 ("mm/vmalloc: use rb_tree instead of list for vread() lookups") Signed-off-by: Chen Wandun Reported-by: Hulk Robot Cc: Serapheim Dimitropoulos Cc: Uladzislau Rezki (Sony) Cc: Kefeng Wang Cc: Wei Yongjun Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5dcb65dd9bf31..3824dc16ce1cb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -787,6 +787,28 @@ unsigned long vmalloc_nr_pages(void) return atomic_long_read(&nr_vmalloc_pages); } +static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr) +{ + struct vmap_area *va = NULL; + struct rb_node *n = vmap_area_root.rb_node; + + while (n) { + struct vmap_area *tmp; + + tmp = rb_entry(n, struct vmap_area, rb_node); + if (tmp->va_end > addr) { + va = tmp; + if (tmp->va_start <= addr) + break; + + n = n->rb_left; + } else + n = n->rb_right; + } + + return va; +} + static struct vmap_area *__find_vmap_area(unsigned long addr) { struct rb_node *n = vmap_area_root.rb_node; @@ -3287,9 +3309,14 @@ long vread(char *buf, char *addr, unsigned long count) count = -(unsigned long) addr; spin_lock(&vmap_area_lock); - va = __find_vmap_area((unsigned long)addr); + va = find_vmap_area_exceed_addr((unsigned long)addr); if (!va) goto finished; + + /* no intersects with alive vmap_area */ + if ((unsigned long)addr + count <= va->va_start) + goto finished; + list_for_each_entry_from(va, &vmap_area_list, list) { if (!count) break; From 45c806303565f9c9d26eb9fee6cc66968c6b700e Mon Sep 17 00:00:00 2001 From: Woody Lin Date: Wed, 21 Jul 2021 11:42:09 +1000 Subject: [PATCH 372/851] mm/kasan: move kasan.fault to mm/kasan/report.c Move the boot parameter 'kasan.fault' from hw_tags.c to report.c, so it can support all KASAN modes - generic, and both tag-based. Link: https://lkml.kernel.org/r/20210713010536.3161822-1-woodylin@google.com Signed-off-by: Woody Lin Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/dev-tools/kasan.rst | 13 ++++++---- mm/kasan/hw_tags.c | 43 ------------------------------- mm/kasan/kasan.h | 1 - mm/kasan/report.c | 29 ++++++++++++++++++--- 4 files changed, 34 insertions(+), 52 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 83ec4a556c199..21dc03bc10a45 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -181,9 +181,16 @@ By default, KASAN prints a bug report only for the first invalid memory access. With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This effectively disables ``panic_on_warn`` for KASAN reports. +Alternatively, independent of ``panic_on_warn`` the ``kasan.fault=`` boot +parameter can be used to control panic and reporting behaviour: + +- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN + report or also panic the kernel (default: ``report``). The panic happens even + if ``kasan_multi_shot`` is enabled. + Hardware tag-based KASAN mode (see the section about various modes below) is intended for use in production as a security mitigation. Therefore, it supports -boot parameters that allow disabling KASAN or controlling its features. +additional boot parameters that allow disabling KASAN or controlling features: - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``). @@ -199,10 +206,6 @@ boot parameters that allow disabling KASAN or controlling its features. - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack traces collection (default: ``on``). -- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN - report or also panic the kernel (default: ``report``). The panic happens even - if ``kasan_multi_shot`` is enabled. - Implementation details ---------------------- diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 4ea8c368b5b8b..51903639e55fa 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -37,16 +37,9 @@ enum kasan_arg_stacktrace { KASAN_ARG_STACKTRACE_ON, }; -enum kasan_arg_fault { - KASAN_ARG_FAULT_DEFAULT, - KASAN_ARG_FAULT_REPORT, - KASAN_ARG_FAULT_PANIC, -}; - static enum kasan_arg kasan_arg __ro_after_init; static enum kasan_arg_mode kasan_arg_mode __ro_after_init; static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; -static enum kasan_arg_fault kasan_arg_fault __ro_after_init; /* Whether KASAN is enabled at all. */ DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled); @@ -59,9 +52,6 @@ EXPORT_SYMBOL_GPL(kasan_flag_async); /* Whether to collect alloc/free stack traces. */ DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); -/* Whether to panic or print a report and disable tag checking on fault. */ -bool kasan_flag_panic __ro_after_init; - /* kasan=off/on */ static int __init early_kasan_flag(char *arg) { @@ -113,23 +103,6 @@ static int __init early_kasan_flag_stacktrace(char *arg) } early_param("kasan.stacktrace", early_kasan_flag_stacktrace); -/* kasan.fault=report/panic */ -static int __init early_kasan_fault(char *arg) -{ - if (!arg) - return -EINVAL; - - if (!strcmp(arg, "report")) - kasan_arg_fault = KASAN_ARG_FAULT_REPORT; - else if (!strcmp(arg, "panic")) - kasan_arg_fault = KASAN_ARG_FAULT_PANIC; - else - return -EINVAL; - - return 0; -} -early_param("kasan.fault", early_kasan_fault); - /* kasan_init_hw_tags_cpu() is called for each CPU. */ void kasan_init_hw_tags_cpu(void) { @@ -197,22 +170,6 @@ void __init kasan_init_hw_tags(void) break; } - switch (kasan_arg_fault) { - case KASAN_ARG_FAULT_DEFAULT: - /* - * Default to no panic on report. - * Do nothing, kasan_flag_panic keeps its default value. - */ - break; - case KASAN_ARG_FAULT_REPORT: - /* Do nothing, kasan_flag_panic keeps its default value. */ - break; - case KASAN_ARG_FAULT_PANIC: - /* Enable panic on report. */ - kasan_flag_panic = true; - break; - } - pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d739cdd1621ab..fa02c88b6948f 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -37,7 +37,6 @@ static inline bool kasan_async_mode_enabled(void) #endif -extern bool kasan_flag_panic __ro_after_init; extern bool kasan_flag_async __ro_after_init; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8fff1825b22cd..884a950c70265 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -39,6 +39,31 @@ static unsigned long kasan_flags; #define KASAN_BIT_REPORTED 0 #define KASAN_BIT_MULTI_SHOT 1 +enum kasan_arg_fault { + KASAN_ARG_FAULT_DEFAULT, + KASAN_ARG_FAULT_REPORT, + KASAN_ARG_FAULT_PANIC, +}; + +static enum kasan_arg_fault kasan_arg_fault __ro_after_init = KASAN_ARG_FAULT_DEFAULT; + +/* kasan.fault=report/panic */ +static int __init early_kasan_fault(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strcmp(arg, "report")) + kasan_arg_fault = KASAN_ARG_FAULT_REPORT; + else if (!strcmp(arg, "panic")) + kasan_arg_fault = KASAN_ARG_FAULT_PANIC; + else + return -EINVAL; + + return 0; +} +early_param("kasan.fault", early_kasan_fault); + bool kasan_save_enable_multi_shot(void) { return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); @@ -102,10 +127,8 @@ static void end_report(unsigned long *flags, unsigned long addr) panic_on_warn = 0; panic("panic_on_warn set ...\n"); } -#ifdef CONFIG_KASAN_HW_TAGS - if (kasan_flag_panic) + if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC) panic("kasan.fault=panic set ...\n"); -#endif kasan_enable_current(); } From 7301f670c66999247d4b2ea314128cccc731fb05 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:42:10 +1000 Subject: [PATCH 373/851] mm/page_alloc: always initialize memory map for the holes Patch series "mm: ensure consistency of memory map poisoning". Currently memory map allocation for FLATMEM case does not poison the struct pages regardless of CONFIG_PAGE_POISON setting. This happens because allocation of the memory map for FLATMEM and SPARSMEM use different memblock functions and those that are used for SPARSMEM case (namely memblock_alloc_try_nid_raw() and memblock_alloc_exact_nid_raw()) implicitly poison the allocated memory. Another side effect of this implicit poisoning is that early setup code that uses the same functions to allocate memory burns cycles for the memory poisoning even if it was not intended. These patches introduce memmap_alloc() wrapper that ensure that the memory map allocation is consistent for different memory models. This patch (of 4): Currently memory map for the holes is initialized only when SPARSEMEM memory model is used. Yet, even with FLATMEM there could be holes in the physical memory layout that have memory map entries. For instance, the memory reserved using e820 API on i386 or "reserved-memory" nodes in device tree would not appear in memblock.memory and hence the struct pages for such holes will be skipped during memory map initialization. These struct pages will be zeroed because the memory map for FLATMEM systems is allocated with memblock_alloc_node() that clears the allocated memory. While zeroed struct pages do not cause immediate problems, the correct behaviour is to initialize every page using __init_single_page(). Besides, enabling page poison for FLATMEM case will trigger PF_POISONED_CHECK() unless the memory map is properly initialized. Make sure init_unavailable_range() is called for both SPARSEMEM and FLATMEM so that struct pages representing memory holes would appear as PG_Reserved with any memory layout. Link: https://lkml.kernel.org/r/20210714123739.16493-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20210714123739.16493-2-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 856b175c15a4f..09d7d4babe95c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6641,7 +6641,6 @@ static void __meminit zone_init_free_lists(struct zone *zone) } } -#if !defined(CONFIG_FLATMEM) /* * Only struct pages that correspond to ranges defined by memblock.memory * are zeroed and initialized by going through __init_single_page() during @@ -6686,13 +6685,6 @@ static void __init init_unavailable_range(unsigned long spfn, pr_info("On node %d, zone %s: %lld pages in unavailable ranges", node, zone_names[zone], pgcnt); } -#else -static inline void init_unavailable_range(unsigned long spfn, - unsigned long epfn, - int zone, int node) -{ -} -#endif static void __init memmap_init_zone_range(struct zone *zone, unsigned long start_pfn, From 0eddadb4bd4301d112500bfb2bb1776b55d4949c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:42:11 +1000 Subject: [PATCH 374/851] microblaze: simplify pte_alloc_one_kernel() The microblaze's implementation of pte_alloc_one_kernel() used memblock_alloc_try_nid_raw() along with clear_page() to allocated a zeroed page during early setup. Replace calls of these functions with a call to memblock_alloc_try_nid() that already returns zeroed page and respects the same allocation limits as memblock_alloc_try_nid_raw(). While on it drop early_get_page() wrapper that was only used in pte_alloc_one_kernel(). Link: https://lkml.kernel.org/r/20210714123739.16493-3-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/microblaze/include/asm/pgtable.h | 2 -- arch/microblaze/mm/init.c | 12 ------------ arch/microblaze/mm/pgtable.c | 17 ++++++++--------- 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 71cd547655d9d..c136a01e467eb 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -443,8 +443,6 @@ extern int mem_init_done; asmlinkage void __init mmu_init(void); -void __init *early_get_page(void); - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index ab55c70380a5c..952f35b335b26 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -265,18 +265,6 @@ asmlinkage void __init mmu_init(void) dma_contiguous_reserve(memory_start + lowmem_size - 1); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - /* - * Mem start + kernel_tlb -> here is limit - * because of mem mapping from head.S - */ - return memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE, - MEMBLOCK_LOW_LIMIT, memory_start + kernel_tlb, - NUMA_NO_NODE); -} - void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 38ccb909bc9d9..c1833b159d3be 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -242,15 +243,13 @@ unsigned long iopa(unsigned long addr) __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - pte_t *pte; - if (mem_init_done) { - pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - } else { - pte = (pte_t *)early_get_page(); - if (pte) - clear_page(pte); - } - return pte; + if (mem_init_done) + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + else + return memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, + MEMBLOCK_LOW_LIMIT, + memory_start + kernel_tlb, + NUMA_NO_NODE); } void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) From e79c52ecdfe4cffa02ed65490fa7505c202373ee Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:42:12 +1000 Subject: [PATCH 375/851] mm: introduce memmap_alloc() to unify memory map allocation There are several places that allocate memory for the memory map: alloc_node_mem_map() for FLATMEM, sparse_buffer_init() and __populate_section_memmap() for SPARSEMEM. The memory allocated in the FLATMEM case is zeroed and it is never poisoned, regardless of CONFIG_PAGE_POISON setting. The memory allocated in the SPARSEMEM cases is not zeroed and it is implicitly poisoned inside memblock if CONFIG_PAGE_POISON is set. Introduce memmap_alloc() wrapper for memblock allocators that will be used for both FLATMEM and SPARSEMEM cases and will makei memory map zeroing and poisoning consistent for different memory models. Link: https://lkml.kernel.org/r/20210714123739.16493-4-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/internal.h | 4 ++++ mm/page_alloc.c | 24 ++++++++++++++++++++++-- mm/sparse.c | 6 ++---- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 31ff935b2547d..57e28261a3b17 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -211,6 +211,10 @@ extern void zone_pcp_reset(struct zone *zone); extern void zone_pcp_disable(struct zone *zone); extern void zone_pcp_enable(struct zone *zone); +extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, + int nid, bool exact_nid); + #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 09d7d4babe95c..270de5a734ebe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6747,6 +6747,26 @@ static void __init memmap_init(void) init_unavailable_range(hole_pfn, end_pfn, zone_id, nid); } +void __init *memmap_alloc(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, int nid, bool exact_nid) +{ + void *ptr; + + if (exact_nid) + ptr = memblock_alloc_exact_nid_raw(size, align, min_addr, + MEMBLOCK_ALLOC_ACCESSIBLE, + nid); + else + ptr = memblock_alloc_try_nid_raw(size, align, min_addr, + MEMBLOCK_ALLOC_ACCESSIBLE, + nid); + + if (ptr && size > 0) + page_init_poison(ptr, size); + + return ptr; +} + static int zone_batchsize(struct zone *zone) { #ifdef CONFIG_MMU @@ -7518,8 +7538,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) end = pgdat_end_pfn(pgdat); end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); - map = memblock_alloc_node(size, SMP_CACHE_BYTES, - pgdat->node_id); + map = memmap_alloc(size, SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, + pgdat->node_id, false); if (!map) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); diff --git a/mm/sparse.c b/mm/sparse.c index be7936e65b6a2..6a0e7b692dcdc 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -435,8 +435,7 @@ struct page __init *__populate_section_memmap(unsigned long pfn, if (map) return map; - map = memblock_alloc_try_nid_raw(size, size, addr, - MEMBLOCK_ALLOC_ACCESSIBLE, nid); + map = memmap_alloc(size, size, addr, nid, false); if (!map) panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n", __func__, size, PAGE_SIZE, nid, &addr); @@ -463,8 +462,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) * and we want it to be properly aligned to the section size - this is * especially the case for VMEMMAP which maps memmap to PMDs */ - sparsemap_buf = memblock_alloc_exact_nid_raw(size, section_map_size(), - addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid); + sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; } From f3bc5f990528c6e263bf2f39fa1f2de956e25503 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:42:13 +1000 Subject: [PATCH 376/851] memblock: stop poisoning raw allocations Functions memblock_alloc_exact_nid_raw() and memblock_alloc_try_nid_raw() are intended for early memory allocation without overhead of zeroing the allocated memory. Since these functions were used to allocate the memory map, they have ended up with addition of a call to page_init_poison() that poisoned the allocated memory when CONFIG_PAGE_POISON was set. Since the memory map is allocated using a dedicated memmep_alloc() function that takes care of the poisoning, remove page poisoning from the memblock_alloc_*_raw() functions. Link: https://lkml.kernel.org/r/20210714123739.16493-5-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memblock.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index de7b553baa500..a69449bffc8d2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1491,18 +1491,12 @@ void * __init memblock_alloc_exact_nid_raw( phys_addr_t min_addr, phys_addr_t max_addr, int nid) { - void *ptr; - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); - ptr = memblock_alloc_internal(size, align, - min_addr, max_addr, nid, true); - if (ptr && size > 0) - page_init_poison(ptr, size); - - return ptr; + return memblock_alloc_internal(size, align, min_addr, max_addr, nid, + true); } /** @@ -1529,18 +1523,12 @@ void * __init memblock_alloc_try_nid_raw( phys_addr_t min_addr, phys_addr_t max_addr, int nid) { - void *ptr; - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); - ptr = memblock_alloc_internal(size, align, - min_addr, max_addr, nid, false); - if (ptr && size > 0) - page_init_poison(ptr, size); - - return ptr; + return memblock_alloc_internal(size, align, min_addr, max_addr, nid, + false); } /** From 8a11bab17eaef412bd29c060043ff42f948be774 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Wed, 21 Jul 2021 11:42:14 +1000 Subject: [PATCH 377/851] mm/page_alloc.c: fix 'zone_id' may be used uninitialized in this function warning When compiling with -Werror, cc1 will warn that 'zone_id' may be used uninitialized in this function warning. Initialize the zone_id as 0. Its safe to assume that if the code reaches this point it has at least one numa node with memory, so no need for an assertion before init_unavilable_range. Link: https://lkml.kernel.org/r/20210716210336.1114114-1-npache@redhat.com Fixes: 122e093c1734 ("mm/page_alloc: fix memory map initialization for descending nodes") Signed-off-by: Nico Pache Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 270de5a734ebe..6efc1f079de16 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6714,7 +6714,7 @@ static void __init memmap_init(void) { unsigned long start_pfn, end_pfn; unsigned long hole_pfn = 0; - int i, j, zone_id, nid; + int i, j, zone_id = 0, nid; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { struct pglist_data *node = NODE_DATA(nid); From 52606952594bee562cf93404d42b90a717d4144c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:42:15 +1000 Subject: [PATCH 378/851] mm/page_alloc: make alloc_node_mem_map() __init rather than __ref alloc_node_mem_map() is never only called from free_area_init_node() that is an __init function. Make the actual alloc_node_mem_map() also __init and its stub version static inline. Link: https://lkml.kernel.org/r/20210716064124.31865-1-rppt@kernel.org Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6efc1f079de16..cc1dd3871aad7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7514,7 +7514,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat) } #ifdef CONFIG_FLATMEM -static void __ref alloc_node_mem_map(struct pglist_data *pgdat) +static void __init alloc_node_mem_map(struct pglist_data *pgdat) { unsigned long __maybe_unused start = 0; unsigned long __maybe_unused offset = 0; @@ -7560,7 +7560,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) #endif } #else -static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { } +static inline void alloc_node_mem_map(struct pglist_data *pgdat) { } #endif /* CONFIG_FLATMEM */ #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT From 2aae2a02d4b130b1ed9de4e99335951d9156dde2 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 21 Jul 2021 11:42:15 +1000 Subject: [PATCH 379/851] hugetlb: simplify prep_compound_gigantic_page ref count racing code Patch series "hugetlb: fix potential ref counting races". When Muchun Song brought up a potential issue with hugetlb ref counting[1], I started looking closer at the code. hugetlbfs is the only code with it's own specialized compound page destructor and taking special action when ref counts drop to zero. Potential races happen in this unique handling of ref counts. The following patches address these races when creating and destroying hugetlb pages. These potential races have likely existed since the creation of hugetlbfs. They certainly have been around for more than 10 years. However, I am unaware of anyone actually hitting these races. It is VERY unlikely than anyone will actually hit these races, but they do exist. I could not think of an easy (or difficult) way to force these races. Therefore, testing consisted of adding code to randomly increase ref counts in strategic places. In this way, I was able to exercise all the race handling code paths. [1] https://lore.kernel.org/linux-mm/CAMZfGtVMn3daKrJwZMaVOGOaJU+B4dS--x_oPmGQMD=c=QNGEg@mail.gmail.com/ This patch (of 3): Code in prep_compound_gigantic_page waits for a rcu grace period if it notices a temporarily inflated ref count on a tail page. This was due to the identified potential race with speculative page cache references which could only last for a rcu grace period. This is overly complicated as this situation is VERY unlikely to ever happen. Instead, just quickly return an error. Also, only print a warning in prep_compound_gigantic_page instead of multiple callers. Link: https://lkml.kernel.org/r/20210710002441.167759-1-mike.kravetz@oracle.com Link: https://lkml.kernel.org/r/20210710002441.167759-2-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mina Almasry Cc: Muchun Song Cc: Naoya Horiguchi Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dfc940d5221dc..8f163fa66a15c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1657,16 +1657,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order) * cache adding could take a ref on a 'to be' tail page. * We need to respect any increased ref count, and only set * the ref count to zero if count is currently 1. If count - * is not 1, we call synchronize_rcu in the hope that a rcu - * grace period will cause ref count to drop and then retry. - * If count is still inflated on retry we return an error and - * must discard the pages. + * is not 1, we return an error and caller must discard the + * pages. */ if (!page_ref_freeze(p, 1)) { - pr_info("HugeTLB unexpected inflated ref count on freshly allocated page\n"); - synchronize_rcu(); - if (!page_ref_freeze(p, 1)) - goto out_error; + pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); + goto out_error; } set_page_count(p, 0); set_compound_head(p, page); @@ -1830,7 +1826,6 @@ static struct page *alloc_fresh_huge_page(struct hstate *h, retry = true; goto retry; } - pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); return NULL; } } @@ -2828,8 +2823,8 @@ static void __init gather_bootmem_prealloc(void) prep_new_huge_page(h, page, page_to_nid(page)); put_page(page); /* add to the hugepage allocator */ } else { + /* VERY unlikely inflated ref count on a tail page */ free_gigantic_page(page, huge_page_order(h)); - pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); } /* From 4c137fc87a711f8e1ce4ee41c97c4d0333c2f9f4 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 21 Jul 2021 11:42:16 +1000 Subject: [PATCH 380/851] hugetlb: drop ref count earlier after page allocation When discussing the possibility of inflated page ref counts, Muchun Song pointed out this potential issue [1]. It is true that any code could potentially take a reference on a compound page after allocation and before it is converted to and put into use as a hugetlb page. Specifically, this could be done by any users of get_page_unless_zero. There are three areas of concern within hugetlb code. 1) When adding pages to the pool. In this case, new pages are allocated added to the pool by calling put_page to invoke the hugetlb destructor (free_huge_page). If there is an inflated ref count on the page, it will not be immediately added to the free list. It will only be added to the free list when the temporary ref count is dropped. This is deemed acceptable and will not be addressed. 2) A page is allocated for immediate use normally as a surplus page or migration target. In this case, the user of the page will also hold a reference. There is no issue as this is just like normal page ref counting. 3) A page is allocated and MUST be added to the free list to satisfy a reservation. One such example is gather_surplus_pages as pointed out by Muchun in [1]. More specifically, this case covers callers of enqueue_huge_page where the page reference count must be zero. This patch covers this third case. Three routines call enqueue_huge_page when the page reference count could potentially be inflated. They are: gather_surplus_pages, alloc_and_dissolve_huge_page and add_hugetlb_page. add_hugetlb_page is called on error paths when a huge page can not be freed due to the inability to allocate vmemmap pages. In this case, the temporairly inflated ref count is not an issue. When the ref is dropped the appropriate action will be taken. Instead of VM_BUG_ON if the ref count does not drop to zero, simply return. In gather_surplus_pages and alloc_and_dissolve_huge_page the caller expects a page (or pages) to be put on the free lists. In this case we must ensure there are no temporary ref counts. We do this by calling put_page_testzero() earlier and not using pages without a zero ref count. The temporary page flag (HPageTemporary) is used in such cases so that as soon as the inflated ref count is dropped the page will be freed. [1] https://lore.kernel.org/linux-mm/CAMZfGtVMn3daKrJwZMaVOGOaJU+B4dS--x_oPmGQMD=c=QNGEg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20210710002441.167759-3-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: Muchun Song Cc: Michal Hocko Cc: Oscar Salvador Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Mina Almasry Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 100 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 22 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8f163fa66a15c..843658b09ea56 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1072,6 +1072,8 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) int nid = page_to_nid(page); lockdep_assert_held(&hugetlb_lock); + VM_BUG_ON_PAGE(page_count(page), page); + list_move(&page->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++; @@ -1399,11 +1401,20 @@ static void add_hugetlb_page(struct hstate *h, struct page *page, SetHPageVmemmapOptimized(page); /* - * This page is now managed by the hugetlb allocator and has - * no users -- drop the last reference. + * This page is about to be managed by the hugetlb allocator and + * should have no users. Drop our reference, and check for others + * just in case. */ zeroed = put_page_testzero(page); - VM_BUG_ON_PAGE(!zeroed, page); + if (!zeroed) + /* + * It is VERY unlikely soneone else has taken a ref on + * the page. In this case, we simply return as the + * hugetlb destructor (free_huge_page) will be called + * when this other ref is dropped. + */ + return; + arch_clear_hugepage_flags(page); enqueue_huge_page(h, page); } @@ -2015,9 +2026,10 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) * Allocates a fresh surplus page from the page allocator. */ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nmask) + int nid, nodemask_t *nmask, bool zero_ref) { struct page *page = NULL; + bool retry = false; if (hstate_is_gigantic(h)) return NULL; @@ -2027,6 +2039,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, goto out_unlock; spin_unlock_irq(&hugetlb_lock); +retry: page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL); if (!page) return NULL; @@ -2044,11 +2057,35 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, spin_unlock_irq(&hugetlb_lock); put_page(page); return NULL; - } else { - h->surplus_huge_pages++; - h->surplus_huge_pages_node[page_to_nid(page)]++; } + if (zero_ref) { + /* + * Caller requires a page with zero ref count. + * We will drop ref count here. If someone else is holding + * a ref, the page will be freed when they drop it. Abuse + * temporary page flag to accomplish this. + */ + SetHPageTemporary(page); + if (!put_page_testzero(page)) { + /* + * Unexpected inflated ref count on freshly allocated + * huge. Retry once. + */ + pr_info("HugeTLB unexpected inflated ref count on freshly allocated page\n"); + spin_unlock_irq(&hugetlb_lock); + if (retry) + return NULL; + + retry = true; + goto retry; + } + ClearHPageTemporary(page); + } + + h->surplus_huge_pages++; + h->surplus_huge_pages_node[page_to_nid(page)]++; + out_unlock: spin_unlock_irq(&hugetlb_lock); @@ -2090,7 +2127,7 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, nodemask_t *nodemask; nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); - page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); + page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false); mpol_cond_put(mpol); return page; @@ -2162,7 +2199,7 @@ static int gather_surplus_pages(struct hstate *h, long delta) spin_unlock_irq(&hugetlb_lock); for (i = 0; i < needed; i++) { page = alloc_surplus_huge_page(h, htlb_alloc_mask(h), - NUMA_NO_NODE, NULL); + NUMA_NO_NODE, NULL, true); if (!page) { alloc_ok = false; break; @@ -2203,24 +2240,20 @@ static int gather_surplus_pages(struct hstate *h, long delta) /* Free the needed pages to the hugetlb pool */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) { - int zeroed; - if ((--needed) < 0) break; - /* - * This page is now managed by the hugetlb allocator and has - * no users -- drop the buddy allocator's reference. - */ - zeroed = put_page_testzero(page); - VM_BUG_ON_PAGE(!zeroed, page); + /* Add the page to the hugetlb allocator */ enqueue_huge_page(h, page); } free: spin_unlock_irq(&hugetlb_lock); - /* Free unnecessary surplus pages to the buddy allocator */ + /* + * Free unnecessary surplus pages to the buddy allocator. + * Pages have no ref count, call free_huge_page directly. + */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) - put_page(page); + free_huge_page(page); spin_lock_irq(&hugetlb_lock); return ret; @@ -2529,6 +2562,7 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, { gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; int nid = page_to_nid(old_page); + bool alloc_retry = false; struct page *new_page; int ret = 0; @@ -2539,9 +2573,30 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, * the pool. This simplifies and let us do most of the processing * under the lock. */ +alloc_retry: new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL); if (!new_page) return -ENOMEM; + /* + * If all goes well, this page will be directly added to the free + * list in the pool. For this the ref count needs to be zero. + * Attempt to drop now, and retry once if needed. It is VERY + * unlikely there is another ref on the page. + * + * If someone else has a reference to the page, it will be freed + * when they drop their ref. Abuse temporary page flag to accomplish + * this. Retry once if there is an inflated ref count. + */ + SetHPageTemporary(new_page); + if (!put_page_testzero(new_page)) { + if (alloc_retry) + return -EBUSY; + + alloc_retry = true; + goto alloc_retry; + } + ClearHPageTemporary(new_page); + __prep_new_huge_page(h, new_page); retry: @@ -2581,11 +2636,10 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, remove_hugetlb_page(h, old_page, false); /* - * Reference count trick is needed because allocator gives us - * referenced page but the pool requires pages with 0 refcount. + * Ref count on new page is already zero as it was dropped + * earlier. It can be directly added to the pool free list. */ __prep_account_new_huge_page(h, nid); - page_ref_dec(new_page); enqueue_huge_page(h, new_page); /* @@ -2599,6 +2653,8 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, free_new: spin_unlock_irq(&hugetlb_lock); + /* Page has a zero ref count, but needs a ref to be freed */ + set_page_refcounted(new_page); update_and_free_page(h, new_page, false); return ret; From d6a5703a5d18d15ac2860c36625e6b826198d0ba Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 21 Jul 2021 11:42:17 +1000 Subject: [PATCH 381/851] hugetlb: before freeing hugetlb page set dtor to appropriate value When removing a hugetlb page from the pool the ref count is set to one (as the free page has no ref count) and compound page destructor is set to NULL_COMPOUND_DTOR. Since a subsequent call to free the hugetlb page will call __free_pages for non-gigantic pages and free_gigantic_page for gigantic pages the destructor is not used. However, consider the following race with code taking a speculative reference on the page: Thread 0 Thread 1 -------- -------- remove_hugetlb_page set_page_refcounted(page); set_compound_page_dtor(page, NULL_COMPOUND_DTOR); get_page_unless_zero(page) __update_and_free_page __free_pages(page, huge_page_order(h)); /* Note that __free_pages() will simply drop the reference to the page. */ put_page(page) __put_compound_page() destroy_compound_page NULL_COMPOUND_DTOR BUG: kernel NULL pointer dereference, address: 0000000000000000 To address this race, set the dtor to the normal compound page dtor for non-gigantic pages. The dtor for gigantic pages does not matter as gigantic pages are changed from a compound page to 'just a group of pages' before freeing. Hence, the destructor is not used. Link: https://lkml.kernel.org/r/20210710002441.167759-4-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mina Almasry Cc: Naoya Horiguchi Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 843658b09ea56..95714fb281503 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1370,8 +1370,28 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page, h->surplus_huge_pages_node[nid]--; } + /* + * Very subtle + * + * For non-gigantic pages set the destructor to the normal compound + * page dtor. This is needed in case someone takes an additional + * temporary ref to the page, and freeing is delayed until they drop + * their reference. + * + * For gigantic pages set the destructor to the null dtor. This + * destructor will never be called. Before freeing the gigantic + * page destroy_compound_gigantic_page will turn the compound page + * into a simple group of pages. After this the destructor does not + * apply. + * + * This handles the case where more than one ref is held when and + * after update_and_free_page is called. + */ set_page_refcounted(page); - set_compound_page_dtor(page, NULL_COMPOUND_DTOR); + if (hstate_is_gigantic(h)) + set_compound_page_dtor(page, NULL_COMPOUND_DTOR); + else + set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); h->nr_huge_pages--; h->nr_huge_pages_node[nid]--; From 67d239a6024f48167d4aac28dc2bb990a106ec08 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 21 Jul 2021 11:42:18 +1000 Subject: [PATCH 382/851] mm/numa: automatically generate node migration order Patch series "Migrate Pages in lieu of discard", v10. We're starting to see systems with more and more kinds of memory such as Intel's implementation of persistent memory. Let's say you have a system with some DRAM and some persistent memory. Today, once DRAM fills up, reclaim will start and some of the DRAM contents will be thrown out. Allocations will, at some point, start falling over to the slower persistent memory. That has two nasty properties. First, the newer allocations can end up in the slower persistent memory. Second, reclaimed data in DRAM are just discarded even if there are gobs of space in persistent memory that could be used. This set implements a solution to these problems. At the end of the reclaim process in shrink_page_list() just before the last page refcount is dropped, the page is migrated to persistent memory instead of being dropped. While I've talked about a DRAM/PMEM pairing, this approach would function in any environment where memory tiers exist. This is not perfect. It "strands" pages in slower memory and never brings them back to fast DRAM. Huang Ying has follow-on work which repurposes autonuma to promote hot pages back to DRAM. This is also all based on an upstream mechanism that allows persistent memory to be onlined and used as if it were volatile: http://lkml.kernel.org/r/20190124231441.37A4A305@viggo.jf.intel.com We have tested the patchset with the postgresql and pgbench. On a 2-socket server machine with DRAM and PMEM, the kernel with the patchset can improve the score of pgbench up to 22.1% compared with that of the DRAM only + disk case. This comes from the reduced disk read throughput (which reduces up to 70.8%). == Open Issues == * Memory policies and cpusets that, for instance, restrict allocations to DRAM can be demoted to PMEM whenever they opt in to this new mechanism. A cgroup-level API to opt-in or opt-out of these migrations will likely be required as a follow-on. * Could be more aggressive about where anon LRU scanning occurs since it no longer necessarily involves I/O. get_scan_count() for instance says: "If we have no swap space, do not bother scanning anon pages" This patch (of 9): Prepare for the kernel to auto-migrate pages to other memory nodes with a node migration table. This allows creating single migration target for each NUMA node to enable the kernel to do NUMA page migrations instead of simply discarding colder pages. A node with no target is a "terminal node", so reclaim acts normally there. The migration target does not fundamentally _need_ to be a single node, but this implementation starts there to limit complexity. When memory fills up on a node, memory contents can be automatically migrated to another node. The biggest problems are knowing when to migrate and to where the migration should be targeted. The most straightforward way to generate the "to where" list would be to follow the page allocator fallback lists. Those lists already tell us if memory is full where to look next. It would also be logical to move memory in that order. But, the allocator fallback lists have a fatal flaw: most nodes appear in all the lists. This would potentially lead to migration cycles (A->B, B->A, A->B, ...). Instead of using the allocator fallback lists directly, keep a separate node migration ordering. But, reuse the same data used to generate page allocator fallback in the first place: find_next_best_node(). This means that the firmware data used to populate node distances essentially dictates the ordering for now. It should also be architecture-neutral since all NUMA architectures have a working find_next_best_node(). RCU is used to allow lock-less read of node_demotion[] and prevent demotion cycles been observed. If multiple reads of node_demotion[] are performed, a single rcu_read_lock() must be held over all reads to ensure no cycles are observed. Details are as follows. === What does RCU provide? === Imagine a simple loop which walks down the demotion path looking for the last node: terminal_node = start_node; while (node_demotion[terminal_node] != NUMA_NO_NODE) { terminal_node = node_demotion[terminal_node]; } The initial values are: node_demotion[0] = 1; node_demotion[1] = NUMA_NO_NODE; and are updated to: node_demotion[0] = NUMA_NO_NODE; node_demotion[1] = 0; What guarantees that the cycle is not observed: node_demotion[0] = 1; node_demotion[1] = 0; and would loop forever? With RCU, a rcu_read_lock/unlock() can be placed around the loop. Since the write side does a synchronize_rcu(), the loop that observed the old contents is known to be complete before the synchronize_rcu() has completed. RCU, combined with disable_all_migrate_targets(), ensures that the old migration state is not visible by the time __set_migration_target_nodes() is called. === What does READ_ONCE() provide? === READ_ONCE() forbids the compiler from merging or reordering successive reads of node_demotion[]. This ensures that any updates are *eventually* observed. Consider the above loop again. The compiler could theoretically read the entirety of node_demotion[] into local storage (registers) and never go back to memory, and *permanently* observe bad values for node_demotion[]. Note: RCU does not provide any universal compiler-ordering guarantees: https://lore.kernel.org/lkml/20150921204327.GH4029@linux.vnet.ibm.com/ This code is unused for now. It will be called later in the series. Link: https://lkml.kernel.org/r/20210715055145.195411-1-ying.huang@intel.com Link: https://lkml.kernel.org/r/20210715055145.195411-2-ying.huang@intel.com Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Reviewed-by: Zi Yan Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: Wei Xu Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Keith Busch Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/internal.h | 5 ++ mm/migrate.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 2 +- 3 files changed, 222 insertions(+), 1 deletion(-) diff --git a/mm/internal.h b/mm/internal.h index 57e28261a3b17..cf3cb933eba3f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -543,12 +543,17 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, #ifdef CONFIG_NUMA extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); +extern int find_next_best_node(int node, nodemask_t *used_node_mask); #else static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, unsigned int order) { return NODE_RECLAIM_NOSCAN; } +static inline int find_next_best_node(int node, nodemask_t *used_node_mask) +{ + return NUMA_NO_NODE; +} #endif extern int hwpoison_filter(struct page *p); diff --git a/mm/migrate.c b/mm/migrate.c index 34a9ad3e0a4f9..b7a40ab47648a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1099,6 +1099,80 @@ static int __unmap_and_move(struct page *page, struct page *newpage, return rc; } + +/* + * node_demotion[] example: + * + * Consider a system with two sockets. Each socket has + * three classes of memory attached: fast, medium and slow. + * Each memory class is placed in its own NUMA node. The + * CPUs are placed in the node with the "fast" memory. The + * 6 NUMA nodes (0-5) might be split among the sockets like + * this: + * + * Socket A: 0, 1, 2 + * Socket B: 3, 4, 5 + * + * When Node 0 fills up, its memory should be migrated to + * Node 1. When Node 1 fills up, it should be migrated to + * Node 2. The migration path start on the nodes with the + * processors (since allocations default to this node) and + * fast memory, progress through medium and end with the + * slow memory: + * + * 0 -> 1 -> 2 -> stop + * 3 -> 4 -> 5 -> stop + * + * This is represented in the node_demotion[] like this: + * + * { 1, // Node 0 migrates to 1 + * 2, // Node 1 migrates to 2 + * -1, // Node 2 does not migrate + * 4, // Node 3 migrates to 4 + * 5, // Node 4 migrates to 5 + * -1} // Node 5 does not migrate + */ + +/* + * Writes to this array occur without locking. Cycles are + * not allowed: Node X demotes to Y which demotes to X... + * + * If multiple reads are performed, a single rcu_read_lock() + * must be held over all reads to ensure that no cycles are + * observed. + */ +static int node_demotion[MAX_NUMNODES] __read_mostly = + {[0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE}; + +/** + * next_demotion_node() - Get the next node in the demotion path + * @node: The starting node to lookup the next node + * + * @returns: node id for next memory node in the demotion path hierarchy + * from @node; NUMA_NO_NODE if @node is terminal. This does not keep + * @node online or guarantee that it *continues* to be the next demotion + * target. + */ +int next_demotion_node(int node) +{ + int target; + + /* + * node_demotion[] is updated without excluding this + * function from running. RCU doesn't provide any + * compiler barriers, so the READ_ONCE() is required + * to avoid compiler reordering or read merging. + * + * Make sure to use RCU over entire code blocks if + * node_demotion[] reads need to be consistent. + */ + rcu_read_lock(); + target = READ_ONCE(node_demotion[node]); + rcu_read_unlock(); + + return target; +} + /* * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. @@ -2982,3 +3056,145 @@ void migrate_vma_finalize(struct migrate_vma *migrate) } EXPORT_SYMBOL(migrate_vma_finalize); #endif /* CONFIG_DEVICE_PRIVATE */ + +/* Disable reclaim-based migration. */ +static void __disable_all_migrate_targets(void) +{ + int node; + + for_each_online_node(node) + node_demotion[node] = NUMA_NO_NODE; +} + +static void disable_all_migrate_targets(void) +{ + __disable_all_migrate_targets(); + + /* + * Ensure that the "disable" is visible across the system. + * Readers will see either a combination of before+disable + * state or disable+after. They will never see before and + * after state together. + * + * The before+after state together might have cycles and + * could cause readers to do things like loop until this + * function finishes. This ensures they can only see a + * single "bad" read and would, for instance, only loop + * once. + */ + synchronize_rcu(); +} + +/* + * Find an automatic demotion target for 'node'. + * Failing here is OK. It might just indicate + * being at the end of a chain. + */ +static int establish_migrate_target(int node, nodemask_t *used) +{ + int migration_target; + + /* + * Can not set a migration target on a + * node with it already set. + * + * No need for READ_ONCE() here since this + * in the write path for node_demotion[]. + * This should be the only thread writing. + */ + if (node_demotion[node] != NUMA_NO_NODE) + return NUMA_NO_NODE; + + migration_target = find_next_best_node(node, used); + if (migration_target == NUMA_NO_NODE) + return NUMA_NO_NODE; + + node_demotion[node] = migration_target; + + return migration_target; +} + +/* + * When memory fills up on a node, memory contents can be + * automatically migrated to another node instead of + * discarded at reclaim. + * + * Establish a "migration path" which will start at nodes + * with CPUs and will follow the priorities used to build the + * page allocator zonelists. + * + * The difference here is that cycles must be avoided. If + * node0 migrates to node1, then neither node1, nor anything + * node1 migrates to can migrate to node0. + * + * This function can run simultaneously with readers of + * node_demotion[]. However, it can not run simultaneously + * with itself. Exclusion is provided by memory hotplug events + * being single-threaded. + */ +static void __set_migration_target_nodes(void) +{ + nodemask_t next_pass = NODE_MASK_NONE; + nodemask_t this_pass = NODE_MASK_NONE; + nodemask_t used_targets = NODE_MASK_NONE; + int node; + + /* + * Avoid any oddities like cycles that could occur + * from changes in the topology. This will leave + * a momentary gap when migration is disabled. + */ + disable_all_migrate_targets(); + + /* + * Allocations go close to CPUs, first. Assume that + * the migration path starts at the nodes with CPUs. + */ + next_pass = node_states[N_CPU]; +again: + this_pass = next_pass; + next_pass = NODE_MASK_NONE; + /* + * To avoid cycles in the migration "graph", ensure + * that migration sources are not future targets by + * setting them in 'used_targets'. Do this only + * once per pass so that multiple source nodes can + * share a target node. + * + * 'used_targets' will become unavailable in future + * passes. This limits some opportunities for + * multiple source nodes to share a destination. + */ + nodes_or(used_targets, used_targets, this_pass); + for_each_node_mask(node, this_pass) { + int target_node = establish_migrate_target(node, &used_targets); + + if (target_node == NUMA_NO_NODE) + continue; + + /* + * Visit targets from this pass in the next pass. + * Eventually, every node will have been part of + * a pass, and will become set in 'used_targets'. + */ + node_set(target_node, next_pass); + } + /* + * 'next_pass' contains nodes which became migration + * targets in this pass. Make additional passes until + * no more migrations targets are available. + */ + if (!nodes_empty(next_pass)) + goto again; +} + +/* + * For callers that do not hold get_online_mems() already. + */ +__maybe_unused // <- temporay to prevent warnings during bisects +static void set_migration_target_nodes(void) +{ + get_online_mems(); + __set_migration_target_nodes(); + put_online_mems(); +} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cc1dd3871aad7..8f52abc922f27 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6156,7 +6156,7 @@ static int node_load[MAX_NUMNODES]; * * Return: node id of the found node or %NUMA_NO_NODE if no node is found. */ -static int find_next_best_node(int node, nodemask_t *used_node_mask) +int find_next_best_node(int node, nodemask_t *used_node_mask) { int n, val; int min_val = INT_MAX; From 7795e252befe998bf0b0ae203a0ed5957d97d29d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 21 Jul 2021 11:42:18 +1000 Subject: [PATCH 383/851] mm/migrate: update node demotion order on hotplug events Reclaim-based migration is attempting to optimize data placement in memory based on the system topology. If the system changes, so must the migration ordering. The implementation is conceptually simple and entirely unoptimized. On any memory or CPU hotplug events, assume that a node was added or removed and recalculate all migration targets. This ensures that the node_demotion[] array is always ready to be used in case the new reclaim mode is enabled. This recalculation is far from optimal, most glaringly that it does not even attempt to figure out the hotplug event would have some *actual* effect on the demotion order. But, given the expected paucity of hotplug events, this should be fine. Link: https://lkml.kernel.org/r/20210715055145.195411-3-ying.huang@intel.com Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Reviewed-by: Zi Yan Cc: Michal Hocko Cc: Wei Xu Cc: Oscar Salvador Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Keith Busch Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/migrate.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index b7a40ab47648a..a40c391f9ca75 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -3057,6 +3058,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate) EXPORT_SYMBOL(migrate_vma_finalize); #endif /* CONFIG_DEVICE_PRIVATE */ +#if defined(CONFIG_MEMORY_HOTPLUG) /* Disable reclaim-based migration. */ static void __disable_all_migrate_targets(void) { @@ -3191,10 +3193,96 @@ static void __set_migration_target_nodes(void) /* * For callers that do not hold get_online_mems() already. */ -__maybe_unused // <- temporay to prevent warnings during bisects static void set_migration_target_nodes(void) { get_online_mems(); __set_migration_target_nodes(); put_online_mems(); } + +/* + * React to hotplug events that might affect the migration targets + * like events that online or offline NUMA nodes. + * + * The ordering is also currently dependent on which nodes have + * CPUs. That means we need CPU on/offline notification too. + */ +static int migration_online_cpu(unsigned int cpu) +{ + set_migration_target_nodes(); + return 0; +} + +static int migration_offline_cpu(unsigned int cpu) +{ + set_migration_target_nodes(); + return 0; +} + +/* + * This leaves migrate-on-reclaim transiently disabled between + * the MEM_GOING_OFFLINE and MEM_OFFLINE events. This runs + * whether reclaim-based migration is enabled or not, which + * ensures that the user can turn reclaim-based migration at + * any time without needing to recalculate migration targets. + * + * These callbacks already hold get_online_mems(). That is why + * __set_migration_target_nodes() can be used as opposed to + * set_migration_target_nodes(). + */ +static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + switch (action) { + case MEM_GOING_OFFLINE: + /* + * Make sure there are not transient states where + * an offline node is a migration target. This + * will leave migration disabled until the offline + * completes and the MEM_OFFLINE case below runs. + */ + disable_all_migrate_targets(); + break; + case MEM_OFFLINE: + case MEM_ONLINE: + /* + * Recalculate the target nodes once the node + * reaches its final state (online or offline). + */ + __set_migration_target_nodes(); + break; + case MEM_CANCEL_OFFLINE: + /* + * MEM_GOING_OFFLINE disabled all the migration + * targets. Reenable them. + */ + __set_migration_target_nodes(); + break; + case MEM_GOING_ONLINE: + case MEM_CANCEL_ONLINE: + break; + } + + return notifier_from_errno(0); +} + +static int __init migrate_on_reclaim_init(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim", + migration_online_cpu, + migration_offline_cpu); + /* + * In the unlikely case that this fails, the automatic + * migration targets may become suboptimal for nodes + * where N_CPU changes. With such a small impact in a + * rare case, do not bother trying to do anything special. + */ + WARN_ON(ret < 0); + + hotplug_memory_notifier(migrate_on_reclaim_callback, 100); + return 0; +} +late_initcall(migrate_on_reclaim_init); +#endif /* CONFIG_MEMORY_HOTPLUG */ From 6f517bc2e5f2f450023d56a2e6e6121e52b5c10b Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 21 Jul 2021 11:42:20 +1000 Subject: [PATCH 384/851] mm/migrate: enable returning precise migrate_pages() success count Under normal circumstances, migrate_pages() returns the number of pages migrated. In error conditions, it returns an error code. When returning an error code, there is no way to know how many pages were migrated or not migrated. Make migrate_pages() return how many pages are demoted successfully for all cases, including when encountering errors. Page reclaim behavior will depend on this in subsequent patches. Link: https://lkml.kernel.org/r/20210715055145.195411-4-ying.huang@intel.com Signed-off-by: Yang Shi Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Suggested-by: Oscar Salvador [optional parameter] Reviewed-by: Yang Shi Reviewed-by: Zi Yan Cc: Michal Hocko Cc: Wei Xu Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Greg Thelen Cc: Keith Busch Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/migrate.h | 5 +++-- mm/compaction.c | 2 +- mm/gup.c | 2 +- mm/memory-failure.c | 2 +- mm/memory_hotplug.c | 2 +- mm/mempolicy.c | 4 ++-- mm/migrate.c | 11 ++++++++--- mm/page_alloc.c | 2 +- 8 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 23dadf7aeba89..8ab88d46318ea 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -41,7 +41,8 @@ extern int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, - unsigned long private, enum migrate_mode mode, int reason); + unsigned long private, enum migrate_mode mode, int reason, + unsigned int *ret_succeeded); extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); @@ -56,7 +57,7 @@ extern int migrate_page_move_mapping(struct address_space *mapping, static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, - int reason) + int reason, unsigned int *ret_succeeded) { return -ENOSYS; } static inline struct page *alloc_migration_target(struct page *page, unsigned long private) diff --git a/mm/compaction.c b/mm/compaction.c index 621508e0ecd5d..61fb64f47a069 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2398,7 +2398,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc) err = migrate_pages(&cc->migratepages, compaction_alloc, compaction_free, (unsigned long)cc, cc->mode, - MR_COMPACTION); + MR_COMPACTION, NULL); trace_mm_compaction_migratepages(cc->nr_migratepages, err, &cc->migratepages); diff --git a/mm/gup.c b/mm/gup.c index 42b8b1fa65218..c4441fc4cfba2 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1772,7 +1772,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, if (!list_empty(&movable_page_list)) { ret = migrate_pages(&movable_page_list, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, - MR_LONGTERM_PIN); + MR_LONGTERM_PIN, NULL); if (ret && !list_empty(&movable_page_list)) putback_movable_pages(&movable_page_list); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index eefd823deb679..3eed65e56f93d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2093,7 +2093,7 @@ static int __soft_offline_page(struct page *page) if (isolate_page(hpage, &pagelist)) { ret = migrate_pages(&pagelist, alloc_migration_target, NULL, - (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE); + (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE, NULL); if (!ret) { bool release = !huge; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 8cb75b26ea4ff..0c46458a3402f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1469,7 +1469,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (nodes_empty(nmask)) node_set(mtc.nid, nmask); ret = migrate_pages(&source, alloc_migration_target, NULL, - (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG); + (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG, NULL); if (ret) { list_for_each_entry(page, &source, lru) { if (__ratelimit(&migrate_rs)) { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e32360e902744..939eabcaf488f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1084,7 +1084,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, if (!list_empty(&pagelist)) { err = migrate_pages(&pagelist, alloc_migration_target, NULL, - (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL); + (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL); if (err) putback_movable_pages(&pagelist); } @@ -1338,7 +1338,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (!list_empty(&pagelist)) { WARN_ON_ONCE(flags & MPOL_MF_LAZY); nr_failed = migrate_pages(&pagelist, new_page, NULL, - start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND); + start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL); if (nr_failed) putback_movable_pages(&pagelist); } diff --git a/mm/migrate.c b/mm/migrate.c index a40c391f9ca75..35d34ef837eda 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1429,6 +1429,8 @@ static inline int try_split_thp(struct page *page, struct page **page2, * @mode: The migration mode that specifies the constraints for * page migration, if any. * @reason: The reason for page migration. + * @ret_succeeded: Set to the number of pages migrated successfully if + * the caller passes a non-NULL pointer. * * The function returns after 10 attempts or if no pages are movable any more * because the list has become empty or no retryable pages exist any more. @@ -1439,7 +1441,7 @@ static inline int try_split_thp(struct page *page, struct page **page2, */ int migrate_pages(struct list_head *from, new_page_t get_new_page, free_page_t put_new_page, unsigned long private, - enum migrate_mode mode, int reason) + enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { int retry = 1; int thp_retry = 1; @@ -1594,6 +1596,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, if (!swapwrite) current->flags &= ~PF_SWAPWRITE; + if (ret_succeeded) + *ret_succeeded = nr_succeeded; + return rc; } @@ -1663,7 +1668,7 @@ static int do_move_pages_to_node(struct mm_struct *mm, }; err = migrate_pages(pagelist, alloc_migration_target, NULL, - (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL); + (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL); if (err) putback_movable_pages(pagelist); return err; @@ -2178,7 +2183,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, list_add(&page->lru, &migratepages); nr_remaining = migrate_pages(&migratepages, *new, NULL, node, - MIGRATE_ASYNC, MR_NUMA_MISPLACED); + MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL); if (nr_remaining) { if (!list_empty(&migratepages)) { list_del(&page->lru); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8f52abc922f27..015908d45d16f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8989,7 +8989,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, cc->nr_migratepages -= nr_reclaimed; ret = migrate_pages(&cc->migratepages, alloc_migration_target, - NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE); + NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL); /* * On -ENOMEM, migrate_pages() bails out right away. It is pointless From c2833334565856fa671a7caba350ec6f5188ae7a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 21 Jul 2021 11:42:20 +1000 Subject: [PATCH 385/851] mm/migrate: demote pages during reclaim This is mostly derived from a patch from Yang Shi: https://lore.kernel.org/linux-mm/1560468577-101178-10-git-send-email-yang.shi@linux.alibaba.com/ Add code to the reclaim path (shrink_page_list()) to "demote" data to another NUMA node instead of discarding the data. This always avoids the cost of I/O needed to read the page back in and sometimes avoids the writeout cost when the page is dirty. A second pass through shrink_page_list() will be made if any demotions fail. This essentially falls back to normal reclaim behavior in the case that demotions fail. Previous versions of this patch may have simply failed to reclaim pages which were eligible for demotion but were unable to be demoted in practice. For some cases, for example, MADV_PAGEOUT, the pages are always discarded instead of demoted to follow the kernel API definition. Because MADV_PAGEOUT is defined as freeing specified pages regardless in which tier they are. Note: This just adds the start of infrastructure for migration. It is actually disabled next to the FIXME in migrate_demote_page_ok(). Link: https://lkml.kernel.org/r/20210715055145.195411-5-ying.huang@intel.com Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Reviewed-by: Wei Xu Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: Zi Yan Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Keith Busch Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/migrate.h | 9 ++++ include/trace/events/migrate.h | 3 +- mm/vmscan.c | 85 ++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 8ab88d46318ea..326250996b4e0 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -28,6 +28,7 @@ enum migrate_reason { MR_NUMA_MISPLACED, MR_CONTIG_RANGE, MR_LONGTERM_PIN, + MR_DEMOTION, MR_TYPES }; @@ -167,6 +168,14 @@ struct migrate_vma { int migrate_vma_setup(struct migrate_vma *args); void migrate_vma_pages(struct migrate_vma *migrate); void migrate_vma_finalize(struct migrate_vma *migrate); +int next_demotion_node(int node); + +#else /* CONFIG_MIGRATION disabled: */ + +static inline int next_demotion_node(int node) +{ + return NUMA_NO_NODE; +} #endif /* CONFIG_MIGRATION */ diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 9fb2a3bbcdfb9..779f3fad9ecd5 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -21,7 +21,8 @@ EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \ EM( MR_NUMA_MISPLACED, "numa_misplaced") \ EM( MR_CONTIG_RANGE, "contig_range") \ - EMe(MR_LONGTERM_PIN, "longterm_pin") + EM( MR_LONGTERM_PIN, "longterm_pin") \ + EMe(MR_DEMOTION, "demotion") /* * First define the enums in the above macros to be exported to userspace diff --git a/mm/vmscan.c b/mm/vmscan.c index 1cc05ab8ca159..fb714722f2497 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,9 @@ struct scan_control { /* The file pages on the current node are dangerously low */ unsigned int file_is_tiny:1; + /* Always discard instead of demoting to lower tier memory */ + unsigned int no_demotion:1; + /* Allocation order */ s8 order; @@ -515,6 +519,17 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]); } +static bool can_demote_anon_pages(int nid, struct scan_control *sc) +{ + if (sc->no_demotion) + return false; + if (next_demotion_node(nid) == NUMA_NO_NODE) + return false; + + // FIXME: actually enable this later in the series + return false; +} + /* * This misses isolated pages which are not accounted for to save counters. * As the data only determines if reclaim or compaction continues, it is @@ -1267,6 +1282,49 @@ static void page_check_dirty_writeback(struct page *page, mapping->a_ops->is_dirty_writeback(page, dirty, writeback); } +static struct page *alloc_demote_page(struct page *page, unsigned long node) +{ + struct migration_target_control mtc = { + /* + * Allocate from 'node', or fail quickly and quietly. + * When this happens, 'page' will likely just be discarded + * instead of migrated. + */ + .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | + __GFP_THISNODE | __GFP_NOWARN | + __GFP_NOMEMALLOC | GFP_NOWAIT, + .nid = node + }; + + return alloc_migration_target(page, (unsigned long)&mtc); +} + +/* + * Take pages on @demote_list and attempt to demote them to + * another node. Pages which are not demoted are left on + * @demote_pages. + */ +static unsigned int demote_page_list(struct list_head *demote_pages, + struct pglist_data *pgdat) +{ + int target_nid = next_demotion_node(pgdat->node_id); + unsigned int nr_succeeded; + int err; + + if (list_empty(demote_pages)) + return 0; + + if (target_nid == NUMA_NO_NODE) + return 0; + + /* Demotion ignores all cpuset and mempolicy settings */ + err = migrate_pages(demote_pages, alloc_demote_page, NULL, + target_nid, MIGRATE_ASYNC, MR_DEMOTION, + &nr_succeeded); + + return nr_succeeded; +} + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -1278,12 +1336,16 @@ static unsigned int shrink_page_list(struct list_head *page_list, { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); + LIST_HEAD(demote_pages); unsigned int nr_reclaimed = 0; unsigned int pgactivate = 0; + bool do_demote_pass; memset(stat, 0, sizeof(*stat)); cond_resched(); + do_demote_pass = can_demote_anon_pages(pgdat->node_id, sc); +retry: while (!list_empty(page_list)) { struct address_space *mapping; struct page *page; @@ -1432,6 +1494,17 @@ static unsigned int shrink_page_list(struct list_head *page_list, ; /* try to reclaim the page below */ } + /* + * Before reclaiming the page, try to relocate + * its contents to another node. + */ + if (do_demote_pass && + (thp_migration_supported() || !PageTransHuge(page))) { + list_add(&page->lru, &demote_pages); + unlock_page(page); + continue; + } + /* * Anonymous process memory has backing store? * Try to allocate it some swap space here. @@ -1683,6 +1756,17 @@ static unsigned int shrink_page_list(struct list_head *page_list, list_add(&page->lru, &ret_pages); VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); } + /* 'page_list' is always empty here */ + + /* Migrate pages selected for demotion */ + nr_reclaimed += demote_page_list(&demote_pages, pgdat); + /* Pages that could not be demoted are still in @demote_pages */ + if (!list_empty(&demote_pages)) { + /* Pages which failed to demoted go back on @page_list for retry: */ + list_splice_init(&demote_pages, page_list); + do_demote_pass = false; + goto retry; + } pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; @@ -2330,6 +2414,7 @@ unsigned long reclaim_pages(struct list_head *page_list) .may_writepage = 1, .may_unmap = 1, .may_swap = 1, + .no_demotion = 1, }; noreclaim_flag = memalloc_noreclaim_save(); From 89f387f714bc52b422ca50440efc5ee9ea59108b Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 21 Jul 2021 11:42:21 +1000 Subject: [PATCH 386/851] mm/vmscan: add page demotion counter Account the number of demoted pages. Add pgdemote_kswapd and pgdemote_direct VM counters showed in /proc/vmstat. [ daveh: - __count_vm_events() a bit, and made them look at the THP size directly rather than getting data from migrate_pages() ] Link: https://lkml.kernel.org/r/20210715055145.195411-6-ying.huang@intel.com Signed-off-by: Yang Shi Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Reviewed-by: Wei Xu Cc: Michal Hocko Cc: Zi Yan Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Oscar Salvador Cc: Greg Thelen Cc: Keith Busch Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/vm_event_item.h | 2 ++ mm/vmscan.c | 5 +++++ mm/vmstat.c | 2 ++ 3 files changed, 9 insertions(+) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index ae0dd1948c2b0..a185cc75ff52a 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -33,6 +33,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGREUSE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, + PGDEMOTE_KSWAPD, + PGDEMOTE_DIRECT, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_DIRECT_THROTTLE, diff --git a/mm/vmscan.c b/mm/vmscan.c index fb714722f2497..5bf617f26160f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1322,6 +1322,11 @@ static unsigned int demote_page_list(struct list_head *demote_pages, target_nid, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); + if (current_is_kswapd()) + __count_vm_events(PGDEMOTE_KSWAPD, nr_succeeded); + else + __count_vm_events(PGDEMOTE_DIRECT, nr_succeeded); + return nr_succeeded; } diff --git a/mm/vmstat.c b/mm/vmstat.c index b0534e068166c..ec5a2e789dd2e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1217,6 +1217,8 @@ const char * const vmstat_text[] = { "pgreuse", "pgsteal_kswapd", "pgsteal_direct", + "pgdemote_kswapd", + "pgdemote_direct", "pgscan_kswapd", "pgscan_direct", "pgscan_direct_throttle", From 06c96d68999603288a2b3af3ac7ff3b2f8fcf5e6 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 21 Jul 2021 11:42:22 +1000 Subject: [PATCH 387/851] mm/vmscan: add helper for querying ability to age anonymous pages Anonymous pages are kept on their own LRU(s). These lists could theoretically always be scanned and maintained. But, without swap, there is currently nothing the kernel can *do* with the results of a scanned, sorted LRU for anonymous pages. A check for '!total_swap_pages' currently serves as a valid check as to whether anonymous LRUs should be maintained. However, another method will be added shortly: page demotion. Abstract out the 'total_swap_pages' checks into a helper, give it a logically significant name, and check for the possibility of page demotion. Link: https://lkml.kernel.org/r/20210715055145.195411-7-ying.huang@intel.com Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Reviewed-by: Greg Thelen Cc: Michal Hocko Cc: Wei Xu Cc: Oscar Salvador Cc: Zi Yan Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Keith Busch Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 5bf617f26160f..d8011800bbd84 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2730,6 +2730,21 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, } } +/* + * Anonymous LRU management is a waste if there is + * ultimately no way to reclaim the memory. + */ +static bool can_age_anon_pages(struct pglist_data *pgdat, + struct scan_control *sc) +{ + /* Aging the anon LRU is valuable if swap is present: */ + if (total_swap_pages > 0) + return true; + + /* Also valuable if anon pages can be demoted: */ + return can_demote_anon_pages(pgdat->node_id, sc); +} + static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; @@ -2839,7 +2854,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (total_swap_pages && inactive_is_low(lruvec, LRU_INACTIVE_ANON)) + if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) && + inactive_is_low(lruvec, LRU_INACTIVE_ANON)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -3674,7 +3690,7 @@ static void age_active_anon(struct pglist_data *pgdat, struct mem_cgroup *memcg; struct lruvec *lruvec; - if (!total_swap_pages) + if (!can_age_anon_pages(pgdat, sc)) return; lruvec = mem_cgroup_lruvec(NULL, pgdat); From 77fd36297ff355f0b33f6909bbf2634c004eec62 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 21 Jul 2021 11:42:23 +1000 Subject: [PATCH 388/851] mm/vmscan: Consider anonymous pages without swap Reclaim anonymous pages if a migration path is available now that demotion provides a non-swap recourse for reclaiming anon pages. Note that this check is subtly different from the can_age_anon_pages() checks. This mechanism checks whether a specific page in a specific context can actually be reclaimed, given current swap space and cgroup limits. can_age_anon_pages() is a much simpler and more preliminary check which just says whether there is a possibility of future reclaim. Link: https://lkml.kernel.org/r/20210715055145.195411-8-ying.huang@intel.com Cc: Keith Busch Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Reviewed-by: Yang Shi Cc: Michal Hocko Cc: Zi Yan Cc: Wei Xu Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index d8011800bbd84..d610f3d520b03 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -521,7 +521,7 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, static bool can_demote_anon_pages(int nid, struct scan_control *sc) { - if (sc->no_demotion) + if (sc && sc->no_demotion) return false; if (next_demotion_node(nid) == NUMA_NO_NODE) return false; @@ -530,6 +530,31 @@ static bool can_demote_anon_pages(int nid, struct scan_control *sc) return false; } +static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg, + int nid, + struct scan_control *sc) +{ + if (memcg == NULL) { + /* + * For non-memcg reclaim, is there + * space in any swap device? + */ + if (get_nr_swap_pages() > 0) + return true; + } else { + /* Is the memcg below its swap limit? */ + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) + return true; + } + + /* + * The page can not be swapped. + * + * Can it be reclaimed from this node via demotion? + */ + return can_demote_anon_pages(nid, sc); +} + /* * This misses isolated pages which are not accounted for to save counters. * As the data only determines if reclaim or compaction continues, it is @@ -541,7 +566,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone) nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE); - if (get_nr_swap_pages() > 0) + if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL)) nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON); @@ -2545,6 +2570,7 @@ enum scan_balance { static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long *nr) { + struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); unsigned long anon_cost, file_cost, total_cost; int swappiness = mem_cgroup_swappiness(memcg); @@ -2555,7 +2581,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru; /* If we have no swap space, do not bother scanning anon pages. */ - if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { + if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) { scan_balance = SCAN_FILE; goto out; } @@ -2925,7 +2951,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, */ pages_for_compaction = compact_gap(sc->order); inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); - if (get_nr_swap_pages() > 0) + if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc)) inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); return inactive_lru_pages > pages_for_compaction; From 484e693413eb33849c33ac1d7dd8a6316bb94bf8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 21 Jul 2021 11:42:24 +1000 Subject: [PATCH 389/851] mm/vmscan: never demote for memcg reclaim Global reclaim aims to reduce the amount of memory used on a given node or set of nodes. Migrating pages to another node serves this purpose. memcg reclaim is different. Its goal is to reduce the total memory consumption of the entire memcg, across all nodes. Migration does not assist memcg reclaim because it just moves page contents between nodes rather than actually reducing memory consumption. Link: https://lkml.kernel.org/r/20210715055145.195411-9-ying.huang@intel.com Signed-off-by: Dave Hansen Signed-off-by: "Huang, Ying" Suggested-by: Yang Shi Reviewed-by: Yang Shi Cc: Michal Hocko Cc: Wei Xu Cc: Oscar Salvador Cc: Zi Yan Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Keith Busch Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index d610f3d520b03..a6f638607f9fd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -521,8 +521,13 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, static bool can_demote_anon_pages(int nid, struct scan_control *sc) { - if (sc && sc->no_demotion) - return false; + if (sc) { + if (sc->no_demotion) + return false; + /* It is pointless to do demotion in memcg reclaim */ + if (cgroup_reclaim(sc)) + return false; + } if (next_demotion_node(nid) == NUMA_NO_NODE) return false; From 5aba4142ec4fd4fcc23feea5f410834583775983 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 21 Jul 2021 11:42:25 +1000 Subject: [PATCH 390/851] mm/migrate: add sysfs interface to enable reclaim migration Some method is obviously needed to enable reclaim-based migration. Just like traditional autonuma, there will be some workloads that will benefit like workloads with more "static" configurations where hot pages stay hot and cold pages stay cold. If pages come and go from the hot and cold sets, the benefits of this approach will be more limited. The benefits are truly workload-based and *not* hardware-based. We do not believe that there is a viable threshold where certain hardware configurations should have this mechanism enabled while others do not. To be conservative, earlier work defaulted to disable reclaim- based migration and did not include a mechanism to enable it. This proposes add a new sysfs file /sys/kernel/mm/numa/demotion_enabled as a method to enable it. We are open to any alternative that allows end users to enable this mechanism or disable it if workload harm is detected (just like traditional autonuma). Once this is enabled page demotion may move data to a NUMA node that does not fall into the cpuset of the allocating process. This could be construed to violate the guarantees of cpusets. However, since this is an opt-in mechanism, the assumption is that anyone enabling it is content to relax the guarantees. Originally-by: Dave Hansen Link: https://lkml.kernel.org/r/20210715055145.195411-10-ying.huang@intel.com Signed-off-by: Huang Ying Cc: Michal Hocko Cc: Wei Xu Cc: Yang Shi Cc: Zi Yan Cc: David Rientjes Cc: Dan Williams Cc: David Hildenbrand Cc: Greg Thelen Cc: Keith Busch Cc: Oscar Salvador Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../ABI/testing/sysfs-kernel-mm-numa | 24 ++++++++ include/linux/mempolicy.h | 4 ++ mm/mempolicy.c | 61 +++++++++++++++++++ mm/vmscan.c | 5 +- 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-numa diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-numa b/Documentation/ABI/testing/sysfs-kernel-mm-numa new file mode 100644 index 0000000000000..77e559d4ed800 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm-numa @@ -0,0 +1,24 @@ +What: /sys/kernel/mm/numa/ +Date: June 2021 +Contact: Linux memory management mailing list +Description: Interface for NUMA + +What: /sys/kernel/mm/numa/demotion_enabled +Date: June 2021 +Contact: Linux memory management mailing list +Description: Enable/disable demoting pages during reclaim + + Page migration during reclaim is intended for systems + with tiered memory configurations. These systems have + multiple types of memory with varied performance + characteristics instead of plain NUMA systems where + the same kind of memory is found at varied distances. + Allowing page migration during reclaim enables these + systems to migrate pages from fast tiers to slow tiers + when the fast tier is under pressure. This migration + is performed before swap. It may move data to a NUMA + node that does not fall into the cpuset of the + allocating process which might be construed to violate + the guarantees of cpusets. This should not be enabled + on systems which need strict cpuset location + guarantees. diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0aaf91b496e2f..4ca025e2a77ef 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -184,6 +184,8 @@ extern bool vma_migratable(struct vm_area_struct *vma); extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); +extern bool numa_demotion_enabled; + #else struct mempolicy {}; @@ -292,5 +294,7 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp) { return NULL; } + +#define numa_demotion_enabled false #endif /* CONFIG_NUMA */ #endif diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 939eabcaf488f..e675bfb856da7 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -3021,3 +3021,64 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) p += scnprintf(p, buffer + maxlen - p, ":%*pbl", nodemask_pr_args(&nodes)); } + +bool numa_demotion_enabled = false; + +#ifdef CONFIG_SYSFS +static ssize_t numa_demotion_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", + numa_demotion_enabled? "true" : "false"); +} + +static ssize_t numa_demotion_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) + numa_demotion_enabled = true; + else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) + numa_demotion_enabled = false; + else + return -EINVAL; + + return count; +} + +static struct kobj_attribute numa_demotion_enabled_attr = + __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show, + numa_demotion_enabled_store); + +static struct attribute *numa_attrs[] = { + &numa_demotion_enabled_attr.attr, + NULL, +}; + +static const struct attribute_group numa_attr_group = { + .attrs = numa_attrs, +}; + +static int __init numa_init_sysfs(void) +{ + int err; + struct kobject *numa_kobj; + + numa_kobj = kobject_create_and_add("numa", mm_kobj); + if (!numa_kobj) { + pr_err("failed to create numa kobject\n"); + return -ENOMEM; + } + err = sysfs_create_group(numa_kobj, &numa_attr_group); + if (err) { + pr_err("failed to register numa group\n"); + goto delete_obj; + } + return 0; + +delete_obj: + kobject_put(numa_kobj); + return err; +} +subsys_initcall(numa_init_sysfs); +#endif diff --git a/mm/vmscan.c b/mm/vmscan.c index a6f638607f9fd..f83223d726811 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -521,6 +521,8 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, static bool can_demote_anon_pages(int nid, struct scan_control *sc) { + if (!numa_demotion_enabled) + return false; if (sc) { if (sc->no_demotion) return false; @@ -531,8 +533,7 @@ static bool can_demote_anon_pages(int nid, struct scan_control *sc) if (next_demotion_node(nid) == NUMA_NO_NODE) return false; - // FIXME: actually enable this later in the series - return false; + return true; } static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg, From 9a075d058ec477a89bbf71f99ff0264bad798bef Mon Sep 17 00:00:00 2001 From: Hui Su Date: Wed, 21 Jul 2021 11:42:25 +1000 Subject: [PATCH 391/851] mm/vmpressure: replace vmpressure_to_css() with vmpressure_to_memcg() We can get memcg directly form vmpr instead of vmpr->memcg->css->memcg, so add a new func helper vmpressure_to_memcg(). And no code will use vmpressure_to_css(), so delete it. Link: https://lkml.kernel.org/r/20210630112146.455103-1-suhui@zeku.com Signed-off-by: Hui Su Acked-by: Michal Hocko Acked-by: Chris Down Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/vmpressure.h | 2 +- mm/memcontrol.c | 4 ++-- mm/vmpressure.c | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 6d28bc433c1cf..6a2f51ebbfd35 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -37,7 +37,7 @@ extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); extern void vmpressure_init(struct vmpressure *vmpr); extern void vmpressure_cleanup(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); -extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); +extern struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr); extern int vmpressure_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fb3da5e8b8d2f..c010164172dd7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -256,9 +256,9 @@ struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) return &memcg->vmpressure; } -struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) +struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr) { - return &container_of(vmpr, struct mem_cgroup, vmpressure)->css; + return container_of(vmpr, struct mem_cgroup, vmpressure); } #ifdef CONFIG_MEMCG_KMEM diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 9b172561fded7..76518e4166dc9 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -74,8 +74,7 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work) static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) { - struct cgroup_subsys_state *css = vmpressure_to_css(vmpr); - struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct mem_cgroup *memcg = vmpressure_to_memcg(vmpr); memcg = parent_mem_cgroup(memcg); if (!memcg) From a8c53a27e831b8d58cb28fce9b45a70e57f4c8f8 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:26 +1000 Subject: [PATCH 392/851] mm/vmscan: remove the PageDirty check after MADV_FREE pages are page_ref_freezed Patch series "Cleanups for vmscan", v2. This series contains cleanups to remove unneeded return value, misleading setting and so on. Also this remove the PageDirty check after MADV_FREE pages are page_ref_freezed. More details can be found in the respective changelogs. This patch (of 4): If the MADV_FREE pages are redirtied before they could be reclaimed, put the pages back to anonymous LRU list by setting SwapBacked flag and the pages will be reclaimed in normal swapout way. But as Yu Zhao pointed out, "The page has only one reference left, which is from the isolation. After the caller puts the page back on lru and drops the reference, the page will be freed anyway. It doesn't matter which lru it goes." So we don't bother checking PageDirty here. [Yu Zhao's comment is also quoted in the code.] Link: https://lkml.kernel.org/r/20210717065911.61497-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20210717065911.61497-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Michal Hocko Cc: Jens Axboe Cc: Joonsoo Kim Cc: Alex Shi Cc: Alistair Popple Cc: Matthew Wilcox Cc: Minchan Kim Cc: David Hildenbrand Cc: Shaohua Li Cc: Hillf Danton Cc: Yu Zhao Cc: John Hubbard Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index f83223d726811..60bba493706a1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1736,11 +1736,14 @@ static unsigned int shrink_page_list(struct list_head *page_list, /* follow __remove_mapping for reference */ if (!page_ref_freeze(page, 1)) goto keep_locked; - if (PageDirty(page)) { - page_ref_unfreeze(page, 1); - goto keep_locked; - } - + /* + * The page has only one reference left, which is + * from the isolation. After the caller puts the + * page back on lru and drops the reference, the + * page will be freed anyway. It doesn't matter + * which lru it goes. So we don't bother checking + * PageDirty here. + */ count_vm_event(PGLAZYFREED); count_memcg_page_event(page, PGLAZYFREED); } else if (!mapping || !__remove_mapping(mapping, page, true, From 88a4aa43cea5e6b06899addf33b13880009237b4 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:27 +1000 Subject: [PATCH 393/851] mm/vmscan: remove misleading setting to sc->priority The priority field of sc is used to control how many pages we should scan at once while we always traverse the list to shrink the pages in these functions. So these settings are unneeded and misleading. Link: https://lkml.kernel.org/r/20210717065911.61497-3-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Alex Shi Cc: Alistair Popple Cc: David Hildenbrand Cc: Hillf Danton Cc: Jens Axboe Cc: Johannes Weiner Cc: John Hubbard Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Michal Hocko Cc: Minchan Kim Cc: Shaohua Li Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 60bba493706a1..04d9721cce48e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1824,7 +1824,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, { struct scan_control sc = { .gfp_mask = GFP_KERNEL, - .priority = DEF_PRIORITY, .may_unmap = 1, }; struct reclaim_stat stat; @@ -2449,7 +2448,6 @@ unsigned long reclaim_pages(struct list_head *page_list) unsigned int noreclaim_flag; struct scan_control sc = { .gfp_mask = GFP_KERNEL, - .priority = DEF_PRIORITY, .may_writepage = 1, .may_unmap = 1, .may_swap = 1, From 0d7c26af7a938543988bb0864008318e9bbcbc36 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:28 +1000 Subject: [PATCH 394/851] mm/vmscan: remove unneeded return value of kswapd_run() The return value of kswapd_run() is unused now. Clean it up. Link: https://lkml.kernel.org/r/20210717065911.61497-4-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Michal Hocko Cc: Alex Shi Cc: Alistair Popple Cc: David Hildenbrand Cc: Hillf Danton Cc: Jens Axboe Cc: Johannes Weiner Cc: John Hubbard Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Minchan Kim Cc: Shaohua Li Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/swap.h | 2 +- mm/vmscan.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index f30d26b0f71db..ba52f3a3478e3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -408,7 +408,7 @@ static inline bool node_reclaim_enabled(void) extern void check_move_unevictable_pages(struct pagevec *pvec); -extern int kswapd_run(int nid); +extern void kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_SWAP diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d9721cce48e..0eca5ea45e7a6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4430,23 +4430,20 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) * This kswapd start function will be called by init and node-hot-add. * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. */ -int kswapd_run(int nid) +void kswapd_run(int nid) { pg_data_t *pgdat = NODE_DATA(nid); - int ret = 0; if (pgdat->kswapd) - return 0; + return; pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */ BUG_ON(system_state < SYSTEM_RUNNING); pr_err("Failed to start kswapd on node %d\n", nid); - ret = PTR_ERR(pgdat->kswapd); pgdat->kswapd = NULL; } - return ret; } /* From b9413558da108cbe3ffffd6d13d92c93d62ebb97 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:29 +1000 Subject: [PATCH 395/851] mm/vmscan: add 'else' to remove check_pending label We could add 'else' to remove the somewhat odd check_pending label to make code core succinct. Link: https://lkml.kernel.org/r/20210717065911.61497-5-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Michal Hocko Cc: Alex Shi Cc: Alistair Popple Cc: David Hildenbrand Cc: Hillf Danton Cc: Jens Axboe Cc: Johannes Weiner Cc: John Hubbard Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Minchan Kim Cc: Shaohua Li Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0eca5ea45e7a6..7679cea707ab7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3574,18 +3574,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, * blocked waiting on the same lock. Instead, throttle for up to a * second before continuing. */ - if (!(gfp_mask & __GFP_FS)) { + if (!(gfp_mask & __GFP_FS)) wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, allow_direct_reclaim(pgdat), HZ); + else + /* Throttle until kswapd wakes the process */ + wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, + allow_direct_reclaim(pgdat)); - goto check_pending; - } - - /* Throttle until kswapd wakes the process */ - wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, - allow_direct_reclaim(pgdat)); - -check_pending: if (fatal_signal_pending(current)) return true; From 24056927149d4c7ad8f23ef9dbf03257a0cf2a31 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 21 Jul 2021 11:42:30 +1000 Subject: [PATCH 396/851] mm/mempolicy: add MPOL_PREFERRED_MANY for multiple preferred nodes Patch series "Introduce multi-preference mempolicy", v6. This patch series introduces the concept of the MPOL_PREFERRED_MANY mempolicy. This mempolicy mode can be used with either the set_mempolicy(2) or mbind(2) interfaces. Like the MPOL_PREFERRED interface, it allows an application to set a preference for nodes which will fulfil memory allocation requests. Unlike the MPOL_PREFERRED mode, it takes a set of nodes. Like the MPOL_BIND interface, it works over a set of nodes. Unlike MPOL_BIND, it will not cause a SIGSEGV or invoke the OOM killer if those preferred nodes are not available. Along with these patches are patches for libnuma, numactl, numademo, and memhog. They still need some polish, but can be found here: https://gitlab.com/bwidawsk/numactl/-/tree/prefer-many It allows new usage: `numactl -P 0,3,4` The goal of the new mode is to enable some use-cases when using tiered memory usage models which I've lovingly named 1a. The Hare - The interconnect is fast enough to meet bandwidth and latency requirements allowing preference to be given to all nodes with "fast" memory. 1b. The Indiscriminate Hare - An application knows it wants fast memory (or perhaps slow memory), but doesn't care which node it runs on. The application can prefer a set of nodes and then xpu bind to the local node (cpu, accelerator, etc). This reverses the nodes are chosen today where the kernel attempts to use local memory to the CPU whenever possible. This will attempt to use the local accelerator to the memory. 2. The Tortoise - The administrator (or the application itself) is aware it only needs slow memory, and so can prefer that. Much of this is almost achievable with the bind interface, but the bind interface suffers from an inability to fallback to another set of nodes if binding fails to all nodes in the nodemask. Like MPOL_BIND a nodemask is given. Inherently this removes ordering from the preference. : /* Set first two nodes as preferred in an 8 node system. */ : const unsigned long nodes = 0x3 : set_mempolicy(MPOL_PREFER_MANY, &nodes, 8); : /* Mimic interleave policy, but have fallback *. : const unsigned long nodes = 0xaa : set_mempolicy(MPOL_PREFER_MANY, &nodes, 8); Some internal discussion took place around the interface. There are two alternatives which we have discussed, plus one I stuck in: 1. Ordered list of nodes. Currently it's believed that the added complexity is nod needed for expected usecases. 2. A flag for bind to allow falling back to other nodes. This confuses the notion of binding and is less flexible than the current solution. 3. Create flags or new modes that helps with some ordering. This offers both a friendlier API as well as a solution for more customized usage. It's unknown if it's worth the complexity to support this. Here is sample code for how this might work: : // Prefer specific nodes for some something wacky : set_mempolicy(MPOL_PREFER_MANY, 0x17c, 1024); : : // Default : set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_SOCKET, NULL, 0); : // which is the same as : set_mempolicy(MPOL_DEFAULT, NULL, 0); : : // The Hare : set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE, NULL, 0); : : // The Tortoise : set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE_REV, NULL, 0); : : // Prefer the fast memory of the first two sockets : set_mempolicy(MPOL_PREFER_MANY | MPOL_F_PREFER_ORDER_TYPE, -1, 2); : In v1, Andi Kleen brought up reusing MPOL_PREFERRED as the mode for the API. There wasn't consensus around this, so I've left the existing API as it was. I'm open to more feedback here, but my slight preference is to use a new API as it ensures if people are using it, they are entirely aware of what they're doing and not accidentally misusing the old interface. (In a similar way to how MPOL_LOCAL was introduced). In v1, Michal also brought up renaming this MPOL_PREFERRED_MASK. I'm equally fine with that change, but I hadn't heard much emphatic support for one way or another, so I've left that too. This patch (of 6): The NUMA APIs currently allow passing in a "preferred node" as a single bit set in a nodemask. If more than one bit it set, bits after the first are ignored. This single node is generally OK for location-based NUMA where memory being allocated will eventually be operated on by a single CPU. However, in systems with multiple memory types, folks want to target a *type* of memory instead of a location. For instance, someone might want some high-bandwidth memory but do not care about the CPU next to which it is allocated. Or, they want a cheap, high capacity allocation and want to target all NUMA nodes which have persistent memory in volatile mode. In both of these cases, the application wants to target a *set* of nodes, but does not want strict MPOL_BIND behavior as that could lead to OOM killer or SIGSEGV. So add MPOL_PREFERRED_MANY policy to support the multiple preferred nodes requirement. This is not a pie-in-the-sky dream for an API. This was a response to a specific ask of more than one group at Intel. Specifically: 1. There are existing libraries that target memory types such as https://github.com/memkind/memkind. These are known to suffer from SIGSEGV's when memory is low on targeted memory "kinds" that span more than one node. The MCDRAM on a Xeon Phi in "Cluster on Die" mode is an example of this. 2. Volatile-use persistent memory users want to have a memory policy which is targeted at either "cheap and slow" (PMEM) or "expensive and fast" (DRAM). However, they do not want to experience allocation failures when the targeted type is unavailable. 3. Allocate-then-run. Generally, we let the process scheduler decide on which physical CPU to run a task. That location provides a default allocation policy, and memory availability is not generally considered when placing tasks. For situations where memory is valuable and constrained, some users want to allocate memory first, *then* allocate close compute resources to the allocation. This is the reverse of the normal (CPU) model. Accelerators such as GPUs that operate on core-mm-managed memory are interested in this model. A check is added in sanitize_mpol_flags() to not permit 'prefer_many' policy to be used for now, and will be removed in later patch after all implementations for 'prefer_many' are ready, as suggested by Michal Hocko. Link: https://lore.kernel.org/r/20200630212517.308045-4-ben.widawsky@intel.com Link: https://lkml.kernel.org/r/1626077374-81682-2-git-send-email-feng.tang@intel.com Signed-off-by: Feng Tang Signed-off-by: Ben Widawsky Co-developed-by: Ben Widawsky Signed-off-by: Dave Hansen Cc: Michal Hocko Cc: David Rientjes Cc: Andrea Arcangeli Cc: Mel Gorman Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Andi Kleen Cc: Dan Williams Cc: Huang Ying Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 44 ++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 19a00bc7fe865..046d0ccba4cd7 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -22,6 +22,7 @@ enum { MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, + MPOL_PREFERRED_MANY, MPOL_MAX, /* always last member of enum */ }; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e675bfb856da7..dc7584f79edac 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -31,6 +31,9 @@ * but useful to set in a VMA when you have a non default * process policy. * + * preferred many Try a set of nodes first before normal fallback. This is + * similar to preferred without the special case. + * * default Allocate on the local node first, or when on a VMA * use the process policy. This is what Linux always did * in a NUMA aware kernel and still does by, ahem, default. @@ -207,6 +210,14 @@ static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) return 0; } +static int mpol_new_preferred_many(struct mempolicy *pol, const nodemask_t *nodes) +{ + if (nodes_empty(*nodes)) + return -EINVAL; + pol->nodes = *nodes; + return 0; +} + static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) @@ -408,6 +419,10 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { [MPOL_LOCAL] = { .rebind = mpol_rebind_default, }, + [MPOL_PREFERRED_MANY] = { + .create = mpol_new_preferred_many, + .rebind = mpol_rebind_preferred, + }, }; static int migrate_page_add(struct page *page, struct list_head *pagelist, @@ -900,6 +915,7 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes) case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: + case MPOL_PREFERRED_MANY: *nodes = p->nodes; break; case MPOL_LOCAL: @@ -1446,7 +1462,13 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags) { *flags = *mode & MPOL_MODE_FLAGS; *mode &= ~MPOL_MODE_FLAGS; - if ((unsigned int)(*mode) >= MPOL_MAX) + + /* + * The check should be 'mode >= MPOL_MAX', but as 'prefer_many' + * is not fully implemented, don't permit it to be used for now, + * and the logic will be restored in following patch + */ + if ((unsigned int)(*mode) >= MPOL_PREFERRED_MANY) return -EINVAL; if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES)) return -EINVAL; @@ -1887,7 +1909,8 @@ nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) /* Return the node id preferred by the given mempolicy, or the given id */ static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd) { - if (policy->mode == MPOL_PREFERRED) { + if (policy->mode == MPOL_PREFERRED || + policy->mode == MPOL_PREFERRED_MANY) { nd = first_node(policy->nodes); } else { /* @@ -1931,6 +1954,7 @@ unsigned int mempolicy_slab_node(void) switch (policy->mode) { case MPOL_PREFERRED: + case MPOL_PREFERRED_MANY: return first_node(policy->nodes); case MPOL_INTERLEAVE: @@ -2063,6 +2087,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask) mempolicy = current->mempolicy; switch (mempolicy->mode) { case MPOL_PREFERRED: + case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: *mask = mempolicy->nodes; @@ -2173,10 +2198,12 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * node and don't fall back to other nodes, as the cost of * remote accesses would likely offset THP benefits. * - * If the policy is interleave, or does not allow the current - * node in its nodemask, we allocate the standard way. + * If the policy is interleave or multiple preferred nodes, or + * does not allow the current node in its nodemask, we allocate + * the standard way. */ - if (pol->mode == MPOL_PREFERRED) + if ((pol->mode == MPOL_PREFERRED || + pol->mode == MPOL_PREFERRED_MANY)) hpage_node = first_node(pol->nodes); nmask = policy_nodemask(gfp, pol); @@ -2311,6 +2338,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) case MPOL_BIND: case MPOL_INTERLEAVE: case MPOL_PREFERRED: + case MPOL_PREFERRED_MANY: return !!nodes_equal(a->nodes, b->nodes); case MPOL_LOCAL: return true; @@ -2451,6 +2479,9 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long break; case MPOL_PREFERRED: + case MPOL_PREFERRED_MANY: + if (node_isset(curnid, pol->nodes)) + goto out; polnid = first_node(pol->nodes); break; @@ -2829,6 +2860,7 @@ static const char * const policy_modes[] = [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", [MPOL_LOCAL] = "local", + [MPOL_PREFERRED_MANY] = "prefer (many)", }; @@ -2907,6 +2939,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) if (!nodelist) err = 0; goto out; + case MPOL_PREFERRED_MANY: case MPOL_BIND: /* * Insist on a nodelist @@ -2993,6 +3026,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) case MPOL_LOCAL: break; case MPOL_PREFERRED: + case MPOL_PREFERRED_MANY: case MPOL_BIND: case MPOL_INTERLEAVE: nodes = pol->nodes; From 2c72c3e8395b3465242eea93f3437e713b52c35e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 21 Jul 2021 11:42:31 +1000 Subject: [PATCH 397/851] mm/memplicy: add page allocation function for MPOL_PREFERRED_MANY policy The semantics of MPOL_PREFERRED_MANY is similar to MPOL_PREFERRED, that it will first try to allocate memory from the preferred node(s), and fallback to all nodes in system when first try fails. Add a dedicated function for it just like 'interleave' policy. Link: https://lore.kernel.org/r/20200630212517.308045-9-ben.widawsky@intel.com Link: https://lkml.kernel.org/r/1626077374-81682-3-git-send-email-feng.tang@intel.com Signed-off-by: Feng Tang Signed-off-by: Ben Widawsky Co-developed-by: Ben Widawsky Suggested-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dan Williams Cc: Dave Hansen Cc: David Rientjes Cc: Huang Ying Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mempolicy.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index dc7584f79edac..fdfbe8538f6d0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2153,6 +2153,25 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, return page; } +static struct page *alloc_page_preferred_many(gfp_t gfp, unsigned int order, + struct mempolicy *pol) +{ + struct page *page; + + /* + * This is a two pass approach. The first pass will only try the + * preferred nodes but skip the direct reclaim and allow the + * allocation to fail, while the second pass will try all the + * nodes in system. + */ + page = __alloc_pages(((gfp | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM), + order, first_node(pol->nodes), &pol->nodes); + if (!page) + page = __alloc_pages(gfp, order, numa_node_id(), NULL); + + return page; +} + /** * alloc_pages_vma - Allocate a page for a VMA. * @gfp: GFP flags. From b409a0ed62c1e61bcad1009772225359794bc7fc Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 21 Jul 2021 11:42:32 +1000 Subject: [PATCH 398/851] mm/mempolicy: enable page allocation for MPOL_PREFERRED_MANY for general cases In order to support MPOL_PREFERRED_MANY which is used by set_mempolicy(2), mbind(2), enable both alloc_pages() and alloc_pages_vma() by using alloc_page_preferred_many(). Link: https://lkml.kernel.org/r/1626077374-81682-4-git-send-email-feng.tang@intel.com Signed-off-by: Ben Widawsky Signed-off-by: Feng Tang Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dan Williams Cc: Dave Hansen Cc: David Rientjes Cc: Huang Ying Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mempolicy.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index fdfbe8538f6d0..b700089835933 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2207,6 +2207,12 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, goto out; } + if (pol->mode == MPOL_PREFERRED_MANY) { + page = alloc_page_preferred_many(gfp, order, pol); + mpol_cond_put(pol); + goto out; + } + if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { int hpage_node = node; @@ -2286,6 +2292,8 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) */ if (pol->mode == MPOL_INTERLEAVE) page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); + else if (pol->mode == MPOL_PREFERRED_MANY) + page = alloc_page_preferred_many(gfp, order, pol); else page = __alloc_pages(gfp, order, policy_node(gfp, pol, numa_node_id()), From e52978e14b3966dce6717a3321566efce0c80011 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 21 Jul 2021 11:42:33 +1000 Subject: [PATCH 399/851] mm/hugetlb: add support for mempolicy MPOL_PREFERRED_MANY Implement the missing huge page allocation functionality while obeying the preferred node semantics. This is similar to the implementation for general page allocation, as it uses a fallback mechanism to try multiple preferred nodes first, and then all other nodes. [Thanks to 0day bot for caching the missing #ifdef CONFIG_NUMA issue] Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com Link: https://lkml.kernel.org/r/1626077374-81682-5-git-send-email-feng.tang@intel.com Signed-off-by: Ben Widawsky Signed-off-by: Feng Tang Co-developed-by: Feng Tang Suggested-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dan Williams Cc: Dave Hansen Cc: David Rientjes Cc: Huang Ying Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 25 +++++++++++++++++++++++++ mm/mempolicy.c | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 95714fb281503..c6ba4fcf9da99 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1166,7 +1166,18 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, gfp_mask = htlb_alloc_mask(h); nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); +#ifdef CONFIG_NUMA + if (mpol->mode == MPOL_PREFERRED_MANY) { + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + if (page) + goto check_reserve; + /* Fallback to all nodes */ + nodemask = NULL; + } +#endif page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + +check_reserve: if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; @@ -2147,6 +2158,20 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, nodemask_t *nodemask; nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); +#ifdef CONFIG_NUMA + if (mpol->mode == MPOL_PREFERRED_MANY) { + gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; + + page = alloc_surplus_huge_page(h, gfp, nid, nodemask); + if (page) { + mpol_cond_put(mpol); + return page; + } + + /* Fallback to all nodes */ + nodemask = NULL; + } +#endif page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false); mpol_cond_put(mpol); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b700089835933..96664a9a54c41 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2054,7 +2054,8 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, huge_page_shift(hstate_vma(vma))); } else { nid = policy_node(gfp_flags, *mpol, numa_node_id()); - if ((*mpol)->mode == MPOL_BIND) + if ((*mpol)->mode == MPOL_BIND || + (*mpol)->mode == MPOL_PREFERRED_MANY) *nodemask = &(*mpol)->nodes; } return nid; From e302fbaef4cea2dfc0fd0b5717ca4767da7ba1af Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 21 Jul 2021 11:42:33 +1000 Subject: [PATCH 400/851] mm-hugetlb-add-support-for-mempolicy-mpol_preferred_many-fix fix it for "hugetlb: drop ref count earlier after page allocation" Cc: Ben Widawsky Cc: Feng Tang Cc: Michal Hocko Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c6ba4fcf9da99..ae1a39e11bcf4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2162,7 +2162,7 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, if (mpol->mode == MPOL_PREFERRED_MANY) { gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; - page = alloc_surplus_huge_page(h, gfp, nid, nodemask); + page = alloc_surplus_huge_page(h, gfp, nid, nodemask, false); if (page) { mpol_cond_put(mpol); return page; From 5f387c0f4f464dbdac4509cb274ca832e19ef1cc Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 21 Jul 2021 11:42:34 +1000 Subject: [PATCH 401/851] mm/hugetlb: fix compile warning for !CONFIG_NUMA build Stephen Rothwell reported the i386 build with CONFIG_NUMA=n will have a warning: mm/hugetlb.c: In function 'dequeue_huge_page_vma': mm/hugetlb.c:1180:1: warning: label 'check_reserve' defined but not used [-Wunused-label] 1180 | check_reserve: | ^~~~~~~~~~~~~ introduced by commit df178183cf05 ("mm/hugetlb: add support for mempolicy MPOL_PREFERRED_MANY") Link: https://lkml.kernel.org/r/20210719084752.GA51285@shbuild999.sh.intel.com Signed-off-by: Feng Tang Reported-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ae1a39e11bcf4..528947da65c8f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1177,7 +1177,9 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, #endif page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); +#ifdef CONFIG_NUMA check_reserve: +#endif if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; From 27bdb233ea48448fdef998b5f7de33e566813b96 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 21 Jul 2021 11:42:35 +1000 Subject: [PATCH 402/851] mm/mempolicy: advertise new MPOL_PREFERRED_MANY Adds a new mode to the existing mempolicy modes, MPOL_PREFERRED_MANY. MPOL_PREFERRED_MANY will be adequately documented in the internal admin-guide with this patch. Eventually, the man pages for mbind(2), get_mempolicy(2), set_mempolicy(2) and numactl(8) will also have text about this mode. Those shall contain the canonical reference. NUMA systems continue to become more prevalent. New technologies like PMEM make finer grain control over memory access patterns increasingly desirable. MPOL_PREFERRED_MANY allows userspace to specify a set of nodes that will be tried first when performing allocations. If those allocations fail, all remaining nodes will be tried. It's a straight forward API which solves many of the presumptive needs of system administrators wanting to optimize workloads on such machines. The mode will work either per VMA, or per thread. Link: https://lore.kernel.org/r/20200630212517.308045-13-ben.widawsky@intel.com Link: https://lkml.kernel.org/r/1626077374-81682-6-git-send-email-feng.tang@intel.com Signed-off-by: Ben Widawsky Signed-off-by: Feng Tang Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dan Williams Cc: Dave Hansen Cc: David Rientjes Cc: Huang Ying Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../admin-guide/mm/numa_memory_policy.rst | 16 ++++++++++++---- mm/mempolicy.c | 7 +------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst index 067a90a1499c4..cd653561e5318 100644 --- a/Documentation/admin-guide/mm/numa_memory_policy.rst +++ b/Documentation/admin-guide/mm/numa_memory_policy.rst @@ -245,6 +245,14 @@ MPOL_INTERLEAVED address range or file. During system boot up, the temporary interleaved system default policy works in this mode. +MPOL_PREFERRED_MANY + This mode specifies that the allocation should be attempted from the + nodemask specified in the policy. If that allocation fails, the kernel + will search other nodes, in order of increasing distance from the first + set bit in the nodemask based on information provided by the platform + firmware. It is similar to MPOL_PREFERRED with the main exception that + is an error to have an empty nodemask. + NUMA memory policy supports the following optional mode flags: MPOL_F_STATIC_NODES @@ -253,10 +261,10 @@ MPOL_F_STATIC_NODES nodes changes after the memory policy has been defined. Without this flag, any time a mempolicy is rebound because of a - change in the set of allowed nodes, the node (Preferred) or - nodemask (Bind, Interleave) is remapped to the new set of - allowed nodes. This may result in nodes being used that were - previously undesired. + change in the set of allowed nodes, the preferred nodemask (Preferred + Many), preferred node (Preferred) or nodemask (Bind, Interleave) is + remapped to the new set of allowed nodes. This may result in nodes + being used that were previously undesired. With this flag, if the user-specified nodes overlap with the nodes allowed by the task's cpuset, then the memory policy is diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 96664a9a54c41..cc6636bdbea28 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1463,12 +1463,7 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags) *flags = *mode & MPOL_MODE_FLAGS; *mode &= ~MPOL_MODE_FLAGS; - /* - * The check should be 'mode >= MPOL_MAX', but as 'prefer_many' - * is not fully implemented, don't permit it to be used for now, - * and the logic will be restored in following patch - */ - if ((unsigned int)(*mode) >= MPOL_PREFERRED_MANY) + if ((unsigned int)(*mode) >= MPOL_MAX) return -EINVAL; if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES)) return -EINVAL; From 3d73d8bd0b241ec1ef88e0b5db5a6eff7c02458b Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 21 Jul 2021 11:42:36 +1000 Subject: [PATCH 403/851] mm/mempolicy: unify the create() func for bind/interleave/prefer-many policies As they all do the same thing: sanity check and save nodemask info, create one mpol_new_nodemask() to reduce redundancy. Link: https://lkml.kernel.org/r/1626077374-81682-7-git-send-email-feng.tang@intel.com Signed-off-by: Feng Tang Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Ben Widawsky Cc: Dan Williams Cc: Dave Hansen Cc: David Rientjes Cc: Huang Ying Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Randy Dunlap Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mempolicy.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index cc6636bdbea28..a0535b73697f3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -192,7 +192,7 @@ static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, nodes_onto(*ret, tmp, *rel); } -static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes) +static int mpol_new_nodemask(struct mempolicy *pol, const nodemask_t *nodes) { if (nodes_empty(*nodes)) return -EINVAL; @@ -210,22 +210,6 @@ static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) return 0; } -static int mpol_new_preferred_many(struct mempolicy *pol, const nodemask_t *nodes) -{ - if (nodes_empty(*nodes)) - return -EINVAL; - pol->nodes = *nodes; - return 0; -} - -static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) -{ - if (nodes_empty(*nodes)) - return -EINVAL; - pol->nodes = *nodes; - return 0; -} - /* * mpol_set_nodemask is called after mpol_new() to set up the nodemask, if * any, for the new policy. mpol_new() has already validated the nodes @@ -405,7 +389,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { .rebind = mpol_rebind_default, }, [MPOL_INTERLEAVE] = { - .create = mpol_new_interleave, + .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_PREFERRED] = { @@ -413,14 +397,14 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { .rebind = mpol_rebind_preferred, }, [MPOL_BIND] = { - .create = mpol_new_bind, + .create = mpol_new_nodemask, .rebind = mpol_rebind_nodemask, }, [MPOL_LOCAL] = { .rebind = mpol_rebind_default, }, [MPOL_PREFERRED_MANY] = { - .create = mpol_new_preferred_many, + .create = mpol_new_nodemask, .rebind = mpol_rebind_preferred, }, }; From 70b9b22c3f47dd2dbd76eedc310cbb316e8eb03b Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 21 Jul 2021 11:42:37 +1000 Subject: [PATCH 404/851] mm/mempolicy: convert from atomic_t to refcount_t on mempolicy->refcnt refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Link: https://lkml.kernel.org/r/1626683671-64407-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Acked-by: Ben Widawsky Cc: Feng Tang Cc: Mike Kravetz Cc: Muchun Song Cc: Yanfei Xu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mempolicy.h | 5 +++-- mm/mempolicy.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 4ca025e2a77ef..0117e1ec7b1e1 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -6,6 +6,7 @@ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 +#include #include #include #include @@ -43,7 +44,7 @@ struct mm_struct; * to 1, representing the caller of mpol_dup(). */ struct mempolicy { - atomic_t refcnt; + refcount_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ nodemask_t nodes; /* interleave/bind/perfer */ @@ -94,7 +95,7 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) static inline void mpol_get(struct mempolicy *pol) { if (pol) - atomic_inc(&pol->refcnt); + refcount_inc(&pol->refcnt); } extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a0535b73697f3..5fe6c4586f78e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -293,7 +293,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!policy) return ERR_PTR(-ENOMEM); - atomic_set(&policy->refcnt, 1); + refcount_set(&policy->refcnt, 1); policy->mode = mode; policy->flags = flags; @@ -303,7 +303,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, /* Slow path of a mpol destructor. */ void __mpol_put(struct mempolicy *p) { - if (!atomic_dec_and_test(&p->refcnt)) + if (!refcount_dec_and_test(&p->refcnt)) return; kmem_cache_free(policy_cache, p); } @@ -2324,7 +2324,7 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(new, &mems); } - atomic_set(&new->refcnt, 1); + refcount_set(&new->refcnt, 1); return new; } @@ -2619,7 +2619,7 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, goto alloc_new; *mpol_new = *n->policy; - atomic_set(&mpol_new->refcnt, 1); + refcount_set(&mpol_new->refcnt, 1); sp_node_init(n_new, end, n->end, mpol_new); n->end = start; sp_insert(sp, n_new); From 15e6fb2883d5df5b010cee253ecab0b21c4230ce Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 21 Jul 2021 11:42:37 +1000 Subject: [PATCH 405/851] mm-mempolicy-convert-from-atomic_t-to-refcount_t-on-mempolicy-refcnt-fix fix warnings mm/mempolicy.c:125:42: warning: missing braces around initializer [-Wmissing-braces] 125 | static struct mempolicy default_policy = { | ^ mm/mempolicy.c:125:42: warning: missing braces around initializer [-Wmissing-braces] mm/mempolicy.c: In function 'numa_policy_init': mm/mempolicy.c:2815:32: warning: missing braces around initializer [-Wmissing-braces] 2815 | preferred_node_policy[nid] = (struct mempolicy) { | ^ mm/mempolicy.c:2815:32: warning: missing braces around initializer [-Wmissing-braces] Cc: Xiyu Yang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mempolicy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 5fe6c4586f78e..8fb4b12eddde1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -123,7 +123,7 @@ enum zone_type policy_zone = 0; * run-time system-wide default policy => local allocation */ static struct mempolicy default_policy = { - .refcnt = ATOMIC_INIT(1), /* never free it */ + .refcnt = { ATOMIC_INIT(1), }, /* never free it */ .mode = MPOL_LOCAL, }; @@ -2813,7 +2813,7 @@ void __init numa_policy_init(void) for_each_node(nid) { preferred_node_policy[nid] = (struct mempolicy) { - .refcnt = ATOMIC_INIT(1), + .refcnt = { ATOMIC_INIT(1), }, .mode = MPOL_PREFERRED, .flags = MPOL_F_MOF | MPOL_F_MORON, .nodes = nodemask_of_node(nid), From b09c391e71454271d48eb954314cd991401f6330 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 21 Jul 2021 11:42:38 +1000 Subject: [PATCH 406/851] oom_kill: oom_score_adj broken for processes with small memory usage If you have a process with less than 1000 totalpages, the calculation: adj = (long)p->signal->oom_score_adj; ... adj *= totalpages / 1000; will always result in adj being zero no matter what oom_score_adj is, which could result in the wrong process being picked for killing. Fix by adding 1000 to totalpages before dividing. I ran across this trying to diagnose another problem where I set up a cgroup with a small amount of memory and couldn't get a test program to work right. I'm not sure this is quite right, to keep closer to the current behavior you could do: if (totalpages >= 1000) adj *= totalpages / 1000; but that would map 0-1999 to the same value. But this at least shows the issue. I can provide a test program the shows the issue, but I think it's pretty obvious from the code. Link: https://lkml.kernel.org/r/20210701125430.836308-1-minyard@acm.org Signed-off-by: Corey Minyard Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/oom_kill.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c729a4c4a1ace..a15ddf7fc3b83 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -233,8 +233,11 @@ long oom_badness(struct task_struct *p, unsigned long totalpages) mm_pgtables_bytes(p->mm) / PAGE_SIZE; task_unlock(p); - /* Normalize to oom_score_adj units */ - adj *= totalpages / 1000; + /* + * Normalize to oom_score_adj units. You should never + * multiply by zero here, or oom_score_adj will not work. + */ + adj *= (totalpages + 1000) / 1000; points += adj; return points; From 35a685c6c002dfc85b5952df444de70e08c15e37 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 21 Jul 2021 11:42:39 +1000 Subject: [PATCH 407/851] mm/thp: make ALLOC_SPLIT_PTLOCKS dependent on USE_SPLIT_PTE_PTLOCKS Split ptlocks need not be defined and allocated unless they are being used. ALLOC_SPLIT_PTLOCKS is inherently dependent on USE_SPLIT_PTE_PTLOCKS. This just makes it explicit and clear. While here drop the spinlock_t element from the struct page when USE_SPLIT_PTE_PTLOCKS is not enabled. Link: https://lkml.kernel.org/r/1621409586-5555-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Randy Dunlap # build-tested Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mm_types.h | 2 ++ include/linux/mm_types_task.h | 5 +++++ mm/memory.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 52bbd2b7cb465..f37abb2d222e8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -159,10 +159,12 @@ struct page { struct mm_struct *pt_mm; /* x86 pgds only */ atomic_t pt_frag_refcount; /* powerpc */ }; +#if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; #else spinlock_t ptl; +#endif #endif }; struct { /* ZONE_DEVICE pages */ diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index c1bc6731125cb..1b222f8039d10 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -22,7 +22,12 @@ #define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) #define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) + +#if USE_SPLIT_PTE_PTLOCKS #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) +#else +#define ALLOC_SPLIT_PTLOCKS 0 +#endif /* * The per task VMA cache array: diff --git a/mm/memory.c b/mm/memory.c index 747a01d495f2c..db86558791f1d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5432,7 +5432,7 @@ long copy_huge_page_from_user(struct page *dst_page, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ -#if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS +#if ALLOC_SPLIT_PTLOCKS static struct kmem_cache *page_ptl_cachep; From ca83b8287c5760f8854b6fcf98d6e10d146ab8bd Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Wed, 21 Jul 2021 11:42:40 +1000 Subject: [PATCH 408/851] selftests: vm: add KSM merge test Patch series "add KSM selftests". Introduce selftests to validate the functionality of KSM. The tests are run on private anonymous pages. Since some KSM tunables are modified, their starting values are saved and restored after testing. At the start, run is set to 2 to ensure that only test pages will be merged (we assume that no applications make madvise syscalls in the background). If KSM config not enabled, all tests will be skipped. This patch (of 4): Add check_ksm_merge() function to check the basic merging feature of KSM. First, some number of identical pages are allocated and the MADV_MERGEABLE advice is given to merge these pages. Then, pages_shared and pages_sharing values are compared with the expected numbers using assert_ksm_pages_count() function. The number of pages can be changed using -p option. Link: https://lkml.kernel.org/r/cover.1626252248.git.zhansayabagdaulet@gmail.com Link: https://lkml.kernel.org/r/90287685c13300972ea84de93d1f3f900373f9fe.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Cc: Shuah Khan Cc: Tyler Hicks Cc: Pavel Tatashin Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/ksm_tests.c | 306 ++++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 16 ++ 4 files changed, 324 insertions(+) create mode 100644 tools/testing/selftests/vm/ksm_tests.c diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index f0fd80ef17dfc..b02eac613fdda 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -27,3 +27,4 @@ hmm-tests memfd_secret local_config.* split_huge_page_test +ksm_tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 521243770f26d..e6f22a801b71a 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -45,6 +45,7 @@ TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += split_huge_page_test +TEST_GEN_FILES += ksm_tests ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c new file mode 100644 index 0000000000000..d74d5aa34b167 --- /dev/null +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include "../kselftest.h" + +#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/" +#define KSM_FP(s) (KSM_SYSFS_PATH s) +#define KSM_SCAN_LIMIT_SEC_DEFAULT 120 +#define KSM_PAGE_COUNT_DEFAULT 10l +#define KSM_PROT_STR_DEFAULT "rw" + +struct ksm_sysfs { + unsigned long max_page_sharing; + unsigned long merge_across_nodes; + unsigned long pages_to_scan; + unsigned long run; + unsigned long sleep_millisecs; + unsigned long stable_node_chains_prune_millisecs; + unsigned long use_zero_pages; +}; + +static int ksm_write_sysfs(const char *file_path, unsigned long val) +{ + FILE *f = fopen(file_path, "w"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fprintf(f, "%lu", val) < 0) { + perror("fprintf"); + return 1; + } + fclose(f); + + return 0; +} + +static int ksm_read_sysfs(const char *file_path, unsigned long *val) +{ + FILE *f = fopen(file_path, "r"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fscanf(f, "%lu", val) != 1) { + perror("fscanf"); + return 1; + } + fclose(f); + + return 0; +} + +static int str_to_prot(char *prot_str) +{ + int prot = 0; + + if ((strchr(prot_str, 'r')) != NULL) + prot |= PROT_READ; + if ((strchr(prot_str, 'w')) != NULL) + prot |= PROT_WRITE; + if ((strchr(prot_str, 'x')) != NULL) + prot |= PROT_EXEC; + + return prot; +} + +static void print_help(void) +{ + printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + printf(" -a: specify the access protections of pages.\n" + " must be of the form [rwx].\n" + " Default: %s\n", KSM_PROT_STR_DEFAULT); + printf(" -p: specify the number of pages to test.\n" + " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT); + printf(" -l: limit the maximum running time (in seconds) for a test.\n" + " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT); + + exit(0); +} + +static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size) +{ + void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0); + + if (!map_ptr) { + perror("mmap"); + return NULL; + } + memset(map_ptr, data, map_size); + if (mprotect(map_ptr, map_size, prot)) { + perror("mprotect"); + munmap(map_ptr, map_size); + return NULL; + } + + return map_ptr; +} + +static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout) +{ + struct timespec cur_time; + unsigned long cur_scan, init_scan; + + if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan)) + return 1; + cur_scan = init_scan; + + while (cur_scan < init_scan + scan_count) { + if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan)) + return 1; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) { + perror("clock_gettime"); + return 1; + } + if ((cur_time.tv_sec - start_time.tv_sec) > timeout) { + printf("Scan time limit exceeded\n"); + return 1; + } + } + + return 0; +} + +static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, int timeout) +{ + if (madvise(addr, size, MADV_MERGEABLE)) { + perror("madvise"); + return 1; + } + if (ksm_write_sysfs(KSM_FP("run"), 1)) + return 1; + + /* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */ + if (ksm_do_scan(2, start_time, timeout)) + return 1; + + return 0; +} + +static bool assert_ksm_pages_count(long dupl_page_count) +{ + unsigned long max_page_sharing, pages_sharing, pages_shared; + + if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) || + ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) || + ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing)) + return false; + + /* + * Since there must be at least 2 pages for merging and 1 page can be + * shared with the limited number of pages (max_page_sharing), sometimes + * there are 'leftover' pages that cannot be merged. For example, if there + * are 11 pages and max_page_sharing = 10, then only 10 pages will be + * merged and the 11th page won't be affected. As a result, when the number + * of duplicate pages is divided by max_page_sharing and the remainder is 1, + * pages_shared and pages_sharing values will be equal between dupl_page_count + * and dupl_page_count - 1. + */ + if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) { + if (pages_shared == dupl_page_count / max_page_sharing && + pages_sharing == pages_shared * (max_page_sharing - 1)) + return true; + } else { + if (pages_shared == (dupl_page_count / max_page_sharing + 1) && + pages_sharing == dupl_page_count - pages_shared) + return true; + } + + return false; +} + +static int ksm_save_def(struct ksm_sysfs *ksm_sysfs) +{ + if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) || + ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) || + ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) || + ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) || + ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) || + ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"), + &ksm_sysfs->stable_node_chains_prune_millisecs) || + ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages)) + return 1; + + return 0; +} + +static int ksm_restore(struct ksm_sysfs *ksm_sysfs) +{ + if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) || + ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) || + ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) || + ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) || + ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) || + ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"), + ksm_sysfs->stable_node_chains_prune_millisecs) || + ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages)) + return 1; + + return 0; +} + +static int check_ksm_merge(int mapping, int prot, long page_count, int timeout, size_t page_size) +{ + void *map_ptr; + struct timespec start_time; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + /* fill pages with the same data and merge them */ + map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count); + if (!map_ptr) + return KSFT_FAIL; + + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout)) + goto err_out; + + /* verify that the right number of pages are merged */ + if (assert_ksm_pages_count(page_count)) { + printf("OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_PASS; + } + +err_out: + printf("Not OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_FAIL; +} + +int main(int argc, char *argv[]) +{ + int ret, opt; + int prot = 0; + int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT; + long page_count = KSM_PAGE_COUNT_DEFAULT; + size_t page_size = sysconf(_SC_PAGESIZE); + struct ksm_sysfs ksm_sysfs_old; + + while ((opt = getopt(argc, argv, "ha:p:l:")) != -1) { + switch (opt) { + case 'a': + prot = str_to_prot(optarg); + break; + case 'p': + page_count = atol(optarg); + if (page_count <= 0) { + printf("The number of pages must be greater than 0\n"); + return KSFT_FAIL; + } + break; + case 'l': + ksm_scan_limit_sec = atoi(optarg); + if (ksm_scan_limit_sec <= 0) { + printf("Timeout value must be greater than 0\n"); + return KSFT_FAIL; + } + break; + case 'h': + print_help(); + break; + default: + return KSFT_FAIL; + } + } + + if (prot == 0) + prot = str_to_prot(KSM_PROT_STR_DEFAULT); + + if (access(KSM_SYSFS_PATH, F_OK)) { + printf("Config KSM not enabled\n"); + return KSFT_SKIP; + } + + if (ksm_save_def(&ksm_sysfs_old)) { + printf("Cannot save default tunables\n"); + return KSFT_FAIL; + } + + if (ksm_write_sysfs(KSM_FP("run"), 2) || + ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) || + ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) || + ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count)) + return KSFT_FAIL; + + ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, ksm_scan_limit_sec, + page_size); + + if (ksm_restore(&ksm_sysfs_old)) { + printf("Cannot restore default tunables\n"); + return KSFT_FAIL; + } + + return ret; +} diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index d09a6b71f1e9d..97b6f712134d7 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -377,6 +377,22 @@ else exitcode=1 fi +echo "-------------------------------------------------------" +echo "running KSM MADV_MERGEABLE test with 10 identical pages" +echo "-------------------------------------------------------" +./ksm_tests -p 10 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode exit $exitcode From 91c380466110d0bf9ebf835d113e665f394aeb35 Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Wed, 21 Jul 2021 11:42:41 +1000 Subject: [PATCH 409/851] selftests: vm: add KSM unmerge test Add check_ksm_unmerge() function to verify that KSM is properly unmerging shared pages. For this, two duplicate pages are merged first and then their contents are modified. Since they are not identical anymore, the pages must be unmerged and the number of merged pages has to be 0. The test is run as follows: ./ksm_tests -U Link: https://lkml.kernel.org/r/c0f55420440d704d5b094275b4365aa1b2ad46b5.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Cc: Hugh Dickins Cc: Pavel Tatashin Cc: Shuah Khan Cc: Tyler Hicks Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/ksm_tests.c | 72 +++++++++++++++++++++-- tools/testing/selftests/vm/run_vmtests.sh | 18 +++++- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index d74d5aa34b167..80302bb8f64cb 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -23,6 +23,11 @@ struct ksm_sysfs { unsigned long use_zero_pages; }; +enum ksm_test_name { + CHECK_KSM_MERGE, + CHECK_KSM_UNMERGE +}; + static int ksm_write_sysfs(const char *file_path, unsigned long val) { FILE *f = fopen(file_path, "w"); @@ -75,7 +80,12 @@ static int str_to_prot(char *prot_str) static void print_help(void) { - printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + + printf("Supported :\n" + " -M (page merging)\n" + " -U (page unmerging)\n\n"); + printf(" -a: specify the access protections of pages.\n" " must be of the form [rwx].\n" " Default: %s\n", KSM_PROT_STR_DEFAULT); @@ -239,6 +249,46 @@ static int check_ksm_merge(int mapping, int prot, long page_count, int timeout, return KSFT_FAIL; } +static int check_ksm_unmerge(int mapping, int prot, int timeout, size_t page_size) +{ + void *map_ptr; + struct timespec start_time; + int page_count = 2; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + /* fill pages with the same data and merge them */ + map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count); + if (!map_ptr) + return KSFT_FAIL; + + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout)) + goto err_out; + + /* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */ + memset(map_ptr, '-', 1); + memset(map_ptr + page_size, '+', 1); + + /* get at least 1 scan, so KSM can detect that the pages were modified */ + if (ksm_do_scan(1, start_time, timeout)) + goto err_out; + + /* check that unmerging was successful and 0 pages are currently merged */ + if (assert_ksm_pages_count(0)) { + printf("OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_PASS; + } + +err_out: + printf("Not OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -247,8 +297,9 @@ int main(int argc, char *argv[]) long page_count = KSM_PAGE_COUNT_DEFAULT; size_t page_size = sysconf(_SC_PAGESIZE); struct ksm_sysfs ksm_sysfs_old; + int test_name = CHECK_KSM_MERGE; - while ((opt = getopt(argc, argv, "ha:p:l:")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:MU")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -270,6 +321,11 @@ int main(int argc, char *argv[]) case 'h': print_help(); break; + case 'M': + break; + case 'U': + test_name = CHECK_KSM_UNMERGE; + break; default: return KSFT_FAIL; } @@ -294,8 +350,16 @@ int main(int argc, char *argv[]) ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count)) return KSFT_FAIL; - ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, ksm_scan_limit_sec, - page_size); + switch (test_name) { + case CHECK_KSM_MERGE: + ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, + ksm_scan_limit_sec, page_size); + break; + case CHECK_KSM_UNMERGE: + ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, + page_size); + break; + } if (ksm_restore(&ksm_sysfs_old)) { printf("Cannot restore default tunables\n"); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 97b6f712134d7..3a23c6b47da28 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -380,7 +380,23 @@ fi echo "-------------------------------------------------------" echo "running KSM MADV_MERGEABLE test with 10 identical pages" echo "-------------------------------------------------------" -./ksm_tests -p 10 +./ksm_tests -M -p 10 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "------------------------" +echo "running KSM unmerge test" +echo "------------------------" +./ksm_tests -U ret_val=$? if [ $ret_val -eq 0 ]; then From 9ea881ab087f04c7fc92936d93be748780407d10 Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Wed, 21 Jul 2021 11:42:42 +1000 Subject: [PATCH 410/851] selftests: vm: add KSM zero page merging test Add check_ksm_zero_page_merge() function to test that empty pages are being handled properly. For this, several zero pages are allocated and merged using madvise. If use_zero_pages is enabled, the pages must be shared with the special kernel zero pages; otherwise, they are merged as usual duplicate pages. The test is run as follows: ./ksm_tests -Z Link: https://lkml.kernel.org/r/6d0caab00d4bdccf5e3791cb95cf6dfd5eb85e45.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Cc: Hugh Dickins Cc: Pavel Tatashin Cc: Shuah Khan Cc: Tyler Hicks Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/ksm_tests.c | 70 ++++++++++++++++++++++- tools/testing/selftests/vm/run_vmtests.sh | 32 +++++++++++ 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 80302bb8f64cb..5843526471e13 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -12,6 +12,7 @@ #define KSM_SCAN_LIMIT_SEC_DEFAULT 120 #define KSM_PAGE_COUNT_DEFAULT 10l #define KSM_PROT_STR_DEFAULT "rw" +#define KSM_USE_ZERO_PAGES_DEFAULT false struct ksm_sysfs { unsigned long max_page_sharing; @@ -25,7 +26,8 @@ struct ksm_sysfs { enum ksm_test_name { CHECK_KSM_MERGE, - CHECK_KSM_UNMERGE + CHECK_KSM_UNMERGE, + CHECK_KSM_ZERO_PAGE_MERGE }; static int ksm_write_sysfs(const char *file_path, unsigned long val) @@ -80,10 +82,12 @@ static int str_to_prot(char *prot_str) static void print_help(void) { - printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n" + "[-z use_zero_pages]\n"); printf("Supported :\n" " -M (page merging)\n" + " -Z (zero pages merging)\n" " -U (page unmerging)\n\n"); printf(" -a: specify the access protections of pages.\n" @@ -93,6 +97,8 @@ static void print_help(void) " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT); printf(" -l: limit the maximum running time (in seconds) for a test.\n" " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT); + printf(" -z: change use_zero_pages tunable\n" + " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT); exit(0); } @@ -289,6 +295,50 @@ static int check_ksm_unmerge(int mapping, int prot, int timeout, size_t page_siz return KSFT_FAIL; } +static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int timeout, + bool use_zero_pages, size_t page_size) +{ + void *map_ptr; + struct timespec start_time; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages)) + return KSFT_FAIL; + + /* fill pages with zero and try to merge them */ + map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count); + if (!map_ptr) + return KSFT_FAIL; + + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout)) + goto err_out; + + /* + * verify that the right number of pages are merged: + * 1) if use_zero_pages is set to 1, empty pages are merged + * with the kernel zero page instead of with each other; + * 2) if use_zero_pages is set to 0, empty pages are not treated specially + * and merged as usual. + */ + if (use_zero_pages && !assert_ksm_pages_count(0)) + goto err_out; + else if (!use_zero_pages && !assert_ksm_pages_count(page_count)) + goto err_out; + + printf("OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -298,8 +348,9 @@ int main(int argc, char *argv[]) size_t page_size = sysconf(_SC_PAGESIZE); struct ksm_sysfs ksm_sysfs_old; int test_name = CHECK_KSM_MERGE; + bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT; - while ((opt = getopt(argc, argv, "ha:p:l:MU")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:MUZ")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -321,11 +372,20 @@ int main(int argc, char *argv[]) case 'h': print_help(); break; + case 'z': + if (strcmp(optarg, "0") == 0) + use_zero_pages = 0; + else + use_zero_pages = 1; + break; case 'M': break; case 'U': test_name = CHECK_KSM_UNMERGE; break; + case 'Z': + test_name = CHECK_KSM_ZERO_PAGE_MERGE; + break; default: return KSFT_FAIL; } @@ -359,6 +419,10 @@ int main(int argc, char *argv[]) ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, page_size); break; + case CHECK_KSM_ZERO_PAGE_MERGE: + ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, + ksm_scan_limit_sec, use_zero_pages, page_size); + break; } if (ksm_restore(&ksm_sysfs_old)) { diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 3a23c6b47da28..9b4e444fc4ed0 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -409,6 +409,38 @@ else exitcode=1 fi +echo "----------------------------------------------------------" +echo "running KSM test with 10 zero pages and use_zero_pages = 0" +echo "----------------------------------------------------------" +./ksm_tests -Z -p 10 -z 0 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "----------------------------------------------------------" +echo "running KSM test with 10 zero pages and use_zero_pages = 1" +echo "----------------------------------------------------------" +./ksm_tests -Z -p 10 -z 1 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode exit $exitcode From 398322d689a196e05322fac5940d172760d96652 Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Wed, 21 Jul 2021 11:42:42 +1000 Subject: [PATCH 411/851] selftests: vm: add KSM merging across nodes test Add check_ksm_numa_merge() function to test that pages in different NUMA nodes are being handled properly. First, two duplicate pages are allocated in two separate NUMA nodes using the libnuma library. Since there is one unique page in each node, with merge_across_nodes = 0, there won't be any shared pages. If merge_across_nodes is set to 1, the pages will be treated as usual duplicate pages and will be merged. If NUMA config is not enabled or the number of NUMA nodes is less than two, then the test is skipped. The test is run as follows: ./ksm_tests -N Link: https://lkml.kernel.org/r/071c17b5b04ebb0dfeba137acc495e5dd9d2a719.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Cc: Hugh Dickins Cc: Pavel Tatashin Cc: Shuah Khan Cc: Tyler Hicks Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/Makefile | 2 + tools/testing/selftests/vm/ksm_tests.c | 88 ++++++++++++++++++++++- tools/testing/selftests/vm/run_vmtests.sh | 32 +++++++++ 3 files changed, 119 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e6f22a801b71a..d9605bd10f2de 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -146,6 +146,8 @@ $(OUTPUT)/hmm-tests: local_config.h # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) +$(OUTPUT)/ksm_tests: LDLIBS += -lnuma + local_config.mk local_config.h: check_config.sh /bin/sh ./check_config.sh $(CC) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 5843526471e13..cdeb4a028538d 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "../kselftest.h" @@ -13,6 +14,7 @@ #define KSM_PAGE_COUNT_DEFAULT 10l #define KSM_PROT_STR_DEFAULT "rw" #define KSM_USE_ZERO_PAGES_DEFAULT false +#define KSM_MERGE_ACROSS_NODES_DEFAULT true struct ksm_sysfs { unsigned long max_page_sharing; @@ -27,7 +29,8 @@ struct ksm_sysfs { enum ksm_test_name { CHECK_KSM_MERGE, CHECK_KSM_UNMERGE, - CHECK_KSM_ZERO_PAGE_MERGE + CHECK_KSM_ZERO_PAGE_MERGE, + CHECK_KSM_NUMA_MERGE }; static int ksm_write_sysfs(const char *file_path, unsigned long val) @@ -83,11 +86,12 @@ static int str_to_prot(char *prot_str) static void print_help(void) { printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n" - "[-z use_zero_pages]\n"); + "[-z use_zero_pages] [-m merge_across_nodes]\n"); printf("Supported :\n" " -M (page merging)\n" " -Z (zero pages merging)\n" + " -N (merging of pages in different NUMA nodes)\n" " -U (page unmerging)\n\n"); printf(" -a: specify the access protections of pages.\n" @@ -99,6 +103,8 @@ static void print_help(void) " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT); printf(" -z: change use_zero_pages tunable\n" " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT); + printf(" -m: change merge_across_nodes tunable\n" + " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT); exit(0); } @@ -339,6 +345,68 @@ static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int return KSFT_FAIL; } +static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes, + size_t page_size) +{ + void *numa1_map_ptr, *numa2_map_ptr; + struct timespec start_time; + int page_count = 2; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + if (numa_available() < 0) { + perror("NUMA support not enabled"); + return KSFT_SKIP; + } + if (numa_max_node() < 1) { + printf("At least 2 NUMA nodes must be available\n"); + return KSFT_SKIP; + } + if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes)) + return KSFT_FAIL; + + /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */ + numa1_map_ptr = numa_alloc_onnode(page_size, 0); + numa2_map_ptr = numa_alloc_onnode(page_size, 1); + if (!numa1_map_ptr || !numa2_map_ptr) { + perror("numa_alloc_onnode"); + return KSFT_FAIL; + } + + memset(numa1_map_ptr, '*', page_size); + memset(numa2_map_ptr, '*', page_size); + + /* try to merge the pages */ + if (ksm_merge_pages(numa1_map_ptr, page_size, start_time, timeout) || + ksm_merge_pages(numa2_map_ptr, page_size, start_time, timeout)) + goto err_out; + + /* + * verify that the right number of pages are merged: + * 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged; + * 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is + * only 1 unique page in each node and they can't be shared. + */ + if (merge_across_nodes && !assert_ksm_pages_count(page_count)) + goto err_out; + else if (!merge_across_nodes && !assert_ksm_pages_count(0)) + goto err_out; + + numa_free(numa1_map_ptr, page_size); + numa_free(numa2_map_ptr, page_size); + printf("OK\n"); + return KSFT_PASS; + +err_out: + numa_free(numa1_map_ptr, page_size); + numa_free(numa2_map_ptr, page_size); + printf("Not OK\n"); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -349,8 +417,9 @@ int main(int argc, char *argv[]) struct ksm_sysfs ksm_sysfs_old; int test_name = CHECK_KSM_MERGE; bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT; + bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; - while ((opt = getopt(argc, argv, "ha:p:l:z:MUZ")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:MUZN")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -378,6 +447,12 @@ int main(int argc, char *argv[]) else use_zero_pages = 1; break; + case 'm': + if (strcmp(optarg, "0") == 0) + merge_across_nodes = 0; + else + merge_across_nodes = 1; + break; case 'M': break; case 'U': @@ -386,6 +461,9 @@ int main(int argc, char *argv[]) case 'Z': test_name = CHECK_KSM_ZERO_PAGE_MERGE; break; + case 'N': + test_name = CHECK_KSM_NUMA_MERGE; + break; default: return KSFT_FAIL; } @@ -423,6 +501,10 @@ int main(int argc, char *argv[]) ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, ksm_scan_limit_sec, use_zero_pages, page_size); break; + case CHECK_KSM_NUMA_MERGE: + ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, + merge_across_nodes, page_size); + break; } if (ksm_restore(&ksm_sysfs_old)) { diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 9b4e444fc4ed0..45e803af7c775 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -441,6 +441,38 @@ else exitcode=1 fi +echo "-------------------------------------------------------------" +echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1" +echo "-------------------------------------------------------------" +./ksm_tests -N -m 1 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "-------------------------------------------------------------" +echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0" +echo "-------------------------------------------------------------" +./ksm_tests -N -m 0 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode exit $exitcode From 5cba6772fb76db64d4f43d33ca1627b5acf82daf Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:43 +1000 Subject: [PATCH 412/851] mm/vmstat: correct some wrong comments Patch series "Cleanup for vmstat". This series contains cleanups to remove unneeded return value, correct wrong comment and simplify the array size calculation. More details can be found in the respective changelogs. This patch (of 3): Correct wrong fls(mem+1) to fls(mem)+1 and remove the duplicated comment with quiet_vmstat(). Link: https://lkml.kernel.org/r/20210715122911.15700-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20210715122911.15700-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmstat.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index ec5a2e789dd2e..5bd621eadc482 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -204,7 +204,7 @@ int calculate_normal_threshold(struct zone *zone) * * Some sample thresholds: * - * Threshold Processors (fls) Zonesize fls(mem+1) + * Threshold Processors (fls) Zonesize fls(mem)+1 * ------------------------------------------------------------------ * 8 1 1 0.9-1 GB 4 * 16 2 2 0.9-1 GB 4 @@ -1875,11 +1875,6 @@ static void vmstat_update(struct work_struct *w) } } -/* - * Switch off vmstat processing and then fold all the remaining differentials - * until the diffs stay at zero. The function is used by NOHZ and can only be - * invoked when tick processing is not active. - */ /* * Check if the diffs for a certain cpu indicate that * an update is needed. From ae3573957c99cb123b73a5405b2e1fa1c086a2db Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:44 +1000 Subject: [PATCH 413/851] mm/vmstat: simplify the array size calculation We can replace the array_num * sizeof(array[0]) with sizeof(array) to simplify the code. Link: https://lkml.kernel.org/r/20210715122911.15700-3-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmstat.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 5bd621eadc482..c4634dc83916b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1891,17 +1891,15 @@ static bool need_update(int cpu) /* * The fast way of checking if there are any vmstat diffs. */ - if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * - sizeof(pzstats->vm_stat_diff[0]))) + if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff))) return true; if (last_pgdat == zone->zone_pgdat) continue; last_pgdat = zone->zone_pgdat; n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu); - if (memchr_inv(n->vm_node_stat_diff, 0, NR_VM_NODE_STAT_ITEMS * - sizeof(n->vm_node_stat_diff[0]))) - return true; + if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff))) + return true; } return false; } From be8061271ca0b6466dff45fe8c6c76828be7f0ce Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:45 +1000 Subject: [PATCH 414/851] mm/vmstat: remove unneeded return value The return value of pagetypeinfo_showfree and pagetypeinfo_showblockcount are unused now. Remove them. Link: https://lkml.kernel.org/r/20210715122911.15700-4-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmstat.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index c4634dc83916b..13ff25d0d96a4 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1454,7 +1454,7 @@ static void pagetypeinfo_showfree_print(struct seq_file *m, } /* Print out the free pages at each order for each migatetype */ -static int pagetypeinfo_showfree(struct seq_file *m, void *arg) +static void pagetypeinfo_showfree(struct seq_file *m, void *arg) { int order; pg_data_t *pgdat = (pg_data_t *)arg; @@ -1466,8 +1466,6 @@ static int pagetypeinfo_showfree(struct seq_file *m, void *arg) seq_putc(m, '\n'); walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print); - - return 0; } static void pagetypeinfo_showblockcount_print(struct seq_file *m, @@ -1503,7 +1501,7 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, } /* Print out the number of pageblocks for each migratetype */ -static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) +static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg) { int mtype; pg_data_t *pgdat = (pg_data_t *)arg; @@ -1514,8 +1512,6 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) seq_putc(m, '\n'); walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showblockcount_print); - - return 0; } /* From c8542b499720d4cf26c573e7d07b964713563f05 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 21 Jul 2021 11:42:46 +1000 Subject: [PATCH 415/851] memory-hotplug.rst: remove locking details from admin-guide Patch series "memory-hotplug.rst: complete admin-guide overhaul", v3. This patch (of 2): We have the same content at Documentation/core-api/memory-hotplug.rst and it doesn't fit into the admin-guide. The documentation was accidentially duplicated when merging. Link: https://lkml.kernel.org/r/20210707073205.3835-1-david@redhat.com Link: https://lkml.kernel.org/r/20210707073205.3835-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Mike Rapoport Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Cc: Mike Kravetz Cc: Dave Hansen Cc: Matthew Wilcox Cc: Anshuman Khandual Cc: Muchun Song Cc: Pavel Tatashin Cc: Jonathan Corbet Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../admin-guide/mm/memory-hotplug.rst | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index c6bae2d771609..a783cf7c8e4c2 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -415,45 +415,6 @@ Need more implementation yet.... - Guard from remove if not yet. -Locking Internals -================= - -When adding/removing memory that uses memory block devices (i.e. ordinary RAM), -the device_hotplug_lock should be held to: - -- synchronize against online/offline requests (e.g. via sysfs). This way, memory - block devices can only be accessed (.online/.state attributes) by user - space once memory has been fully added. And when removing memory, we - know nobody is in critical sections. -- synchronize against CPU hotplug and similar (e.g. relevant for ACPI and PPC) - -Especially, there is a possible lock inversion that is avoided using -device_hotplug_lock when adding memory and user space tries to online that -memory faster than expected: - -- device_online() will first take the device_lock(), followed by - mem_hotplug_lock -- add_memory_resource() will first take the mem_hotplug_lock, followed by - the device_lock() (while creating the devices, during bus_add_device()). - -As the device is visible to user space before taking the device_lock(), this -can result in a lock inversion. - -onlining/offlining of memory should be done via device_online()/ -device_offline() - to make sure it is properly synchronized to actions -via sysfs. Holding device_hotplug_lock is advised (to e.g. protect online_type) - -When adding/removing/onlining/offlining memory or adding/removing -heterogeneous/device memory, we should always hold the mem_hotplug_lock in -write mode to serialise memory hotplug (e.g. access to global/zone -variables). - -In addition, mem_hotplug_lock (in contrast to device_hotplug_lock) in read -mode allows for a quite efficient get_online_mems/put_online_mems -implementation, so code accessing memory can protect from that memory -vanishing. - - Future Work =========== From 99e3683cf70997fcac473370d39c1a99ce0f090c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 21 Jul 2021 11:42:47 +1000 Subject: [PATCH 416/851] memory-hotplug.rst: complete admin-guide overhaul The memory hot(un)plug documentation is outdated and incomplete. Most of the content dates back to 2007, so it's time for a major overhaul. Let's rewrite, reorganize and update most parts of the documentation. In addition to memory hot(un)plug, also add some details regarding ZONE_MOVABLE, with memory hotunplug being one of its main consumers. Drop the file history, that information can more reliably be had from the git log. The style of the document is also properly fixed that e.g., "restview" renders it cleanly now. In the future, we might add some more details about virt users like virtio-mem, the XEN balloon, the Hyper-V balloon and ppc64 dlpar. Link: https://lkml.kernel.org/r/20210707073205.3835-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Reviewed-by: Mike Rapoport Reviewed-by: Oscar Salvador Cc: Mike Kravetz Cc: Dave Hansen Cc: Matthew Wilcox Cc: Anshuman Khandual Cc: Muchun Song Cc: Pavel Tatashin Cc: Jonathan Corbet Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../admin-guide/mm/memory-hotplug.rst | 761 +++++++++++------- 1 file changed, 455 insertions(+), 306 deletions(-) diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index a783cf7c8e4c2..03dfbc9252529 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -1,427 +1,576 @@ .. _admin_guide_memory_hotplug: -============== -Memory Hotplug -============== +================== +Memory Hot(Un)Plug +================== -:Created: Jul 28 2007 -:Updated: Add some details about locking internals: Aug 20 2018 - -This document is about memory hotplug including how-to-use and current status. -Because Memory Hotplug is still under development, contents of this text will -be changed often. +This document describes generic Linux support for memory hot(un)plug with +a focus on System RAM, including ZONE_MOVABLE support. .. contents:: :local: -.. note:: +Introduction +============ - (1) x86_64's has special implementation for memory hotplug. - This text does not describe it. - (2) This text assumes that sysfs is mounted at ``/sys``. +Memory hot(un)plug allows for increasing and decreasing the size of physical +memory available to a machine at runtime. In the simplest case, it consists of +physically plugging or unplugging a DIMM at runtime, coordinated with the +operating system. +Memory hot(un)plug is used for various purposes: -Introduction -============ +- The physical memory available to a machine can be adjusted at runtime, up- or + downgrading the memory capacity. This dynamic memory resizing, sometimes + referred to as "capacity on demand", is frequently used with virtual machines + and logical partitions. + +- Replacing hardware, such as DIMMs or whole NUMA nodes, without downtime. One + example is replacing failing memory modules. -Purpose of memory hotplug -------------------------- +- Reducing energy consumption either by physically unplugging memory modules or + by logically unplugging (parts of) memory modules from Linux. -Memory Hotplug allows users to increase/decrease the amount of memory. -Generally, there are two purposes. +Further, the basic memory hot(un)plug infrastructure in Linux is nowadays also +used to expose persistent memory, other performance-differentiated memory and +reserved memory regions as ordinary system RAM to Linux. -(A) For changing the amount of memory. - This is to allow a feature like capacity on demand. -(B) For installing/removing DIMMs or NUMA-nodes physically. - This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc. +Linux only supports memory hot(un)plug on selected 64 bit architectures, such as +x86_64, arm64, ppc64, s390x and ia64. -(A) is required by highly virtualized environments and (B) is required by -hardware which supports memory power management. +Memory Hot(Un)Plug Granularity +------------------------------ -Linux memory hotplug is designed for both purpose. +Memory hot(un)plug in Linux uses the SPARSEMEM memory model, which divides the +physical memory address space into chunks of the same size: memory sections. The +size of a memory section is architecture dependent. For example, x86_64 uses +128 MiB and ppc64 uses 16 MiB. -Phases of memory hotplug +Memory sections are combined into chunks referred to as "memory blocks". The +size of a memory block is architecture dependent and corresponds to the smallest +granularity that can be hot(un)plugged. The default size of a memory block is +the same as memory section size, unless an architecture specifies otherwise. + +All memory blocks have the same size. + +Phases of Memory Hotplug ------------------------ -There are 2 phases in Memory Hotplug: +Memory hotplug consists of two phases: - 1) Physical Memory Hotplug phase - 2) Logical Memory Hotplug phase. +(1) Adding the memory to Linux +(2) Onlining memory blocks -The First phase is to communicate hardware/firmware and make/erase -environment for hotplugged memory. Basically, this phase is necessary -for the purpose (B), but this is good phase for communication between -highly virtualized environments too. +In the first phase, metadata, such as the memory map ("memmap") and page tables +for the direct mapping, is allocated and initialized, and memory blocks are +created; the latter also creates sysfs files for managing newly created memory +blocks. -When memory is hotplugged, the kernel recognizes new memory, makes new memory -management tables, and makes sysfs files for new memory's operation. +In the second phase, added memory is exposed to the page allocator. After this +phase, the memory is visible in memory statistics, such as free and total +memory, of the system. -If firmware supports notification of connection of new memory to OS, -this phase is triggered automatically. ACPI can notify this event. If not, -"probe" operation by system administration is used instead. -(see :ref:`memory_hotplug_physical_mem`). +Phases of Memory Hotunplug +-------------------------- -Logical Memory Hotplug phase is to change memory state into -available/unavailable for users. Amount of memory from user's view is -changed by this phase. The kernel makes all memory in it as free pages -when a memory range is available. +Memory hotunplug consists of two phases: -In this document, this phase is described as online/offline. +(1) Offlining memory blocks +(2) Removing the memory from Linux -Logical Memory Hotplug phase is triggered by write of sysfs file by system -administrator. For the hot-add case, it must be executed after Physical Hotplug -phase by hand. -(However, if you writes udev's hotplug scripts for memory hotplug, these -phases can be execute in seamless way.) +In the fist phase, memory is "hidden" from the page allocator again, for +example, by migrating busy memory to other memory locations and removing all +relevant free pages from the page allocator After this phase, the memory is no +longer visible in memory statistics of the system. -Unit of Memory online/offline operation ---------------------------------------- +In the second phase, the memory blocks are removed and metadata is freed. -Memory hotplug uses SPARSEMEM memory model which allows memory to be divided -into chunks of the same size. These chunks are called "sections". The size of -a memory section is architecture dependent. For example, power uses 16MiB, ia64 -uses 1GiB. +Memory Hotplug Notifications +============================ -Memory sections are combined into chunks referred to as "memory blocks". The -size of a memory block is architecture dependent and represents the logical -unit upon which memory online/offline operations are to be performed. The -default size of a memory block is the same as memory section size unless an -architecture specifies otherwise. (see :ref:`memory_hotplug_sysfs_files`.) +There are various ways how Linux is notified about memory hotplug events such +that it can start adding hotplugged memory. This description is limited to +systems that support ACPI; mechanisms specific to other firmware interfaces or +virtual machines are not described. -To determine the size (in bytes) of a memory block please read this file:: +ACPI Notifications +------------------ - /sys/devices/system/memory/block_size_bytes +Platforms that support ACPI, such as x86_64, can support memory hotplug +notifications via ACPI. -Kernel Configuration -==================== +In general, a firmware supporting memory hotplug defines a memory class object +HID "PNP0C80". When notified about hotplug of a new memory device, the ACPI +driver will hotplug the memory to Linux. -To use memory hotplug feature, kernel must be compiled with following -config options. +If the firmware supports hotplug of NUMA nodes, it defines an object _HID +"ACPI0004", "PNP0A05", or "PNP0A06". When notified about an hotplug event, all +assigned memory devices are added to Linux by the ACPI driver. -- For all memory hotplug: - - Memory model -> Sparse Memory (``CONFIG_SPARSEMEM``) - - Allow for memory hot-add (``CONFIG_MEMORY_HOTPLUG``) +Similarly, Linux can be notified about requests to hotunplug a memory device or +a NUMA node via ACPI. The ACPI driver will try offlining all relevant memory +blocks, and, if successful, hotunplug the memory from Linux. -- To enable memory removal, the following are also necessary: - - Allow for memory hot remove (``CONFIG_MEMORY_HOTREMOVE``) - - Page Migration (``CONFIG_MIGRATION``) +Manual Probing +-------------- -- For ACPI memory hotplug, the following are also necessary: - - Memory hotplug (under ACPI Support menu) (``CONFIG_ACPI_HOTPLUG_MEMORY``) - - This option can be kernel module. +On some architectures, the firmware may not be able to notify the operating +system about a memory hotplug event. Instead, the memory has to be manually +probed from user space. -- As a related configuration, if your box has a feature of NUMA-node hotplug - via ACPI, then this option is necessary too. +The probe interface is located at:: - - ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu) - (``CONFIG_ACPI_CONTAINER``). + /sys/devices/system/memory/probe - This option can be kernel module too. +Only complete memory blocks can be probed. Individual memory blocks are probed +by providing the physical start address of the memory block:: + % echo addr > /sys/devices/system/memory/probe -.. _memory_hotplug_sysfs_files: +Which results in a memory block for the range [addr, addr + memory_block_size) +being created. -sysfs files for memory hotplug -============================== +.. note:: -All memory blocks have their device information in sysfs. Each memory block -is described under ``/sys/devices/system/memory`` as:: + Using the probe interface is discouraged as it is easy to crash the kernel, + because Linux cannot validate user input; this interface might be removed in + the future. - /sys/devices/system/memory/memoryXXX +Onlining and Offlining Memory Blocks +==================================== -where XXX is the memory block id. +After a memory block has been created, Linux has to be instructed to actually +make use of that memory: the memory block has to be "online". -For the memory block covered by the sysfs directory. It is expected that all -memory sections in this range are present and no memory holes exist in the -range. Currently there is no way to determine if there is a memory hole, but -the existence of one should not affect the hotplug capabilities of the memory -block. +Before a memory block can be removed, Linux has to stop using any memory part of +the memory block: the memory block has to be "offlined". -For example, assume 1GiB memory block size. A device for a memory starting at -0x100000000 is ``/sys/device/system/memory/memory4``:: +The Linux kernel can be configured to automatically online added memory blocks +and drivers automatically trigger offlining of memory blocks when trying +hotunplug of memory. Memory blocks can only be removed once offlining succeeded +and drivers may trigger offlining of memory blocks when attempting hotunplug of +memory. - (0x100000000 / 1Gib = 4) +Onlining Memory Blocks Manually +------------------------------- -This device covers address range [0x100000000 ... 0x140000000) +If auto-onlining of memory blocks isn't enabled, user-space has to manually +trigger onlining of memory blocks. Often, udev rules are used to automate this +task in user space. -Under each memory block, you can see 5 files: +Onlining of a memory block can be triggered via:: -- ``/sys/devices/system/memory/memoryXXX/phys_index`` -- ``/sys/devices/system/memory/memoryXXX/phys_device`` -- ``/sys/devices/system/memory/memoryXXX/state`` -- ``/sys/devices/system/memory/memoryXXX/removable`` -- ``/sys/devices/system/memory/memoryXXX/valid_zones`` + % echo online > /sys/devices/system/memory/memoryXXX/state -=================== ============================================================ -``phys_index`` read-only and contains memory block id, same as XXX. -``state`` read-write +Or alternatively:: - - at read: contains online/offline state of memory. - - at write: user can specify "online_kernel", + % echo 1 > /sys/devices/system/memory/memoryXXX/online - "online_movable", "online", "offline" command - which will be performed on all sections in the block. -``phys_device`` read-only: legacy interface only ever used on s390x to - expose the covered storage increment. -``removable`` read-only: legacy interface that indicated whether a memory - block was likely to be offlineable or not. Newer kernel - versions return "1" if and only if the kernel supports - memory offlining. -``valid_zones`` read-only: designed to show by which zone memory provided by - a memory block is managed, and to show by which zone memory - provided by an offline memory block could be managed when - onlining. - - The first column shows it`s default zone. - - "memory6/valid_zones: Normal Movable" shows this memoryblock - can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE - by online_movable. - - "memory7/valid_zones: Movable Normal" shows this memoryblock - can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL - by online_kernel. -=================== ============================================================ +The kernel will select the target zone automatically, usually defaulting to +``ZONE_NORMAL`` unless ``movablecore=1`` has been specified on the kernel +command line or if the memory block would intersect the ZONE_MOVABLE already. -.. note:: +One can explicitly request to associate an offline memory block with +ZONE_MOVABLE by:: - These directories/files appear after physical memory hotplug phase. + % echo online_movable > /sys/devices/system/memory/memoryXXX/state -If CONFIG_NUMA is enabled the memoryXXX/ directories can also be accessed -via symbolic links located in the ``/sys/devices/system/node/node*`` directories. +Or one can explicitly request a kernel zone (usually ZONE_NORMAL) by:: -For example:: + % echo online_kernel > /sys/devices/system/memory/memoryXXX/state - /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 +In any case, if onlining succeeds, the state of the memory block is changed to +be "online". If it fails, the state of the memory block will remain unchanged +and the above commands will fail. -A backlink will also be created:: +Onlining Memory Blocks Automatically +------------------------------------ - /sys/devices/system/memory/memory9/node0 -> ../../node/node0 +The kernel can be configured to try auto-onlining of newly added memory blocks. +If this feature is disabled, the memory blocks will stay offline until +explicitly onlined from user space. -.. _memory_hotplug_physical_mem: +The configured auto-online behavior can be observed via:: -Physical memory hot-add phase -============================= + % cat /sys/devices/system/memory/auto_online_blocks -Hardware(Firmware) Support --------------------------- +Auto-onlining can be enabled by writing ``online``, ``online_kernel`` or +``online_movable`` to that file, like:: -On x86_64/ia64 platform, memory hotplug by ACPI is supported. + % echo online > /sys/devices/system/memory/auto_online_blocks -In general, the firmware (ACPI) which supports memory hotplug defines -memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80, -Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev -script. This will be done automatically. +Modifying the auto-online behavior will only affect all subsequently added +memory blocks only. -But scripts for memory hotplug are not contained in generic udev package(now). -You may have to write it by yourself or online/offline memory by hand. -Please see :ref:`memory_hotplug_how_to_online_memory` and -:ref:`memory_hotplug_how_to_offline_memory`. +.. note:: -If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004", -"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler -calls hotplug code for all of objects which are defined in it. -If memory device is found, memory hotplug code will be called. + In corner cases, auto-onlining can fail. The kernel won't retry. Note that + auto-onlining is not expected to fail in default configurations. -Notify memory hot-add event by hand ------------------------------------ +.. note:: -On some architectures, the firmware may not notify the kernel of a memory -hotplug event. Therefore, the memory "probe" interface is supported to -explicitly notify the kernel. This interface depends on -CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86 -if hotplug is supported, although for x86 this should be handled by ACPI -notification. + DLPAR on ppc64 ignores the ``offline`` setting and will still online added + memory blocks; if onlining fails, memory blocks are removed again. -Probe interface is located at:: +Offlining Memory Blocks +----------------------- - /sys/devices/system/memory/probe +In the current implementation, Linux's memory offlining will try migrating all +movable pages off the affected memory block. As most kernel allocations, such as +page tables, are unmovable, page migration can fail and, therefore, inhibit +memory offlining from succeeding. -You can tell the physical address of new memory to the kernel by:: +Having the memory provided by memory block managed by ZONE_MOVABLE significantly +increases memory offlining reliability; still, memory offlining can fail in +some corner cases. - % echo start_address_of_new_memory > /sys/devices/system/memory/probe +Further, memory offlining might retry for a long time (or even forever), until +aborted by the user. -Then, [start_address_of_new_memory, start_address_of_new_memory + -memory_block_size] memory range is hot-added. In this case, hotplug script is -not called (in current implementation). You'll have to online memory by -yourself. Please see :ref:`memory_hotplug_how_to_online_memory`. +Offlining of a memory block can be triggered via:: -Logical Memory hot-add phase -============================ + % echo offline > /sys/devices/system/memory/memoryXXX/state -State of memory ---------------- +Or alternatively:: -To see (online/offline) state of a memory block, read 'state' file:: + % echo 0 > /sys/devices/system/memory/memoryXXX/online - % cat /sys/device/system/memory/memoryXXX/state +If offlining succeeds, the state of the memory block is changed to be "offline". +If it fails, the state of the memory block will remain unchanged and the above +commands will fail, for example, via:: + bash: echo: write error: Device or resource busy -- If the memory block is online, you'll read "online". -- If the memory block is offline, you'll read "offline". +or via:: + bash: echo: write error: Invalid argument -.. _memory_hotplug_how_to_online_memory: +Observing the State of Memory Blocks +------------------------------------ -How to online memory --------------------- +The state (online/offline/going-offline) of a memory block can be observed +either via:: -When the memory is hot-added, the kernel decides whether or not to "online" -it according to the policy which can be read from "auto_online_blocks" file:: + % cat /sys/device/system/memory/memoryXXX/state - % cat /sys/devices/system/memory/auto_online_blocks +Or alternatively (1/0) via:: -The default depends on the CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config -option. If it is disabled the default is "offline" which means the newly added -memory is not in a ready-to-use state and you have to "online" the newly added -memory blocks manually. Automatic onlining can be requested by writing "online" -to "auto_online_blocks" file:: + % cat /sys/device/system/memory/memoryXXX/online - % echo online > /sys/devices/system/memory/auto_online_blocks +For an online memory block, the managing zone can be observed via:: -This sets a global policy and impacts all memory blocks that will subsequently -be hotplugged. Currently offline blocks keep their state. It is possible, under -certain circumstances, that some memory blocks will be added but will fail to -online. User space tools can check their "state" files -(``/sys/devices/system/memory/memoryXXX/state``) and try to online them manually. + % cat /sys/device/system/memory/memoryXXX/valid_zones -If the automatic onlining wasn't requested, failed, or some memory block was -offlined it is possible to change the individual block's state by writing to the -"state" file:: +Configuring Memory Hot(Un)Plug +============================== - % echo online > /sys/devices/system/memory/memoryXXX/state +There are various ways how system administrators can configure memory +hot(un)plug and interact with memory blocks, especially, to online them. -This onlining will not change the ZONE type of the target memory block, -If the memory block doesn't belong to any zone an appropriate kernel zone -(usually ZONE_NORMAL) will be used unless movable_node kernel command line -option is specified when ZONE_MOVABLE will be used. +Memory Hot(Un)Plug Configuration via Sysfs +------------------------------------------ -You can explicitly request to associate it with ZONE_MOVABLE by:: +Some memory hot(un)plug properties can be configured or inspected via sysfs in:: - % echo online_movable > /sys/devices/system/memory/memoryXXX/state + /sys/devices/system/memory/ -.. note:: current limit: this memory block must be adjacent to ZONE_MOVABLE +The following files are currently defined: -Or you can explicitly request a kernel zone (usually ZONE_NORMAL) by:: +====================== ========================================================= +``auto_online_blocks`` read-write: set or get the default state of new memory + blocks; configure auto-onlining. - % echo online_kernel > /sys/devices/system/memory/memoryXXX/state + The default value depends on the + CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel configuration + option. -.. note:: current limit: this memory block must be adjacent to ZONE_NORMAL + See the ``state`` property of memory blocks for details. +``block_size_bytes`` read-only: the size in bytes of a memory block. +``probe`` write-only: add (probe) selected memory blocks manually + from user space by supplying the physical start address. -An explicit zone onlining can fail (e.g. when the range is already within -and existing and incompatible zone already). + Availability depends on the CONFIG_ARCH_MEMORY_PROBE + kernel configuration option. +``uevent`` read-write: generic udev file for device subsystems. +====================== ========================================================= -After this, memory block XXX's state will be 'online' and the amount of -available memory will be increased. +.. note:: -This may be changed in future. + When the CONFIG_MEMORY_FAILURE kernel configuration option is enabled, two + additional files ``hard_offline_page`` and ``soft_offline_page`` are available + to trigger hwpoisoning of pages, for example, for testing purposes. Note that + this functionality is not really related to memory hot(un)plug or actual + offlining of memory blocks. -Logical memory remove -===================== +Memory Block Configuration via Sysfs +------------------------------------ -Memory offline and ZONE_MOVABLE -------------------------------- +Each memory block is represented as a memory block device that can be +onlined or offlined. All memory blocks have their device information located in +sysfs. Each present memory block is listed under +``/sys/devices/system/memory`` as:: -Memory offlining is more complicated than memory online. Because memory offline -has to make the whole memory block be unused, memory offline can fail if -the memory block includes memory which cannot be freed. + /sys/devices/system/memory/memoryXXX -In general, memory offline can use 2 techniques. +where XXX is the memory block id; the number of digits is variable. -(1) reclaim and free all memory in the memory block. -(2) migrate all pages in the memory block. +A present memory block indicates that some memory in the range is present; +however, a memory block might span memory holes. A memory block spanning memory +holes cannot be offlined. -In the current implementation, Linux's memory offline uses method (2), freeing -all pages in the memory block by page migration. But not all pages are -migratable. Under current Linux, migratable pages are anonymous pages and -page caches. For offlining a memory block by migration, the kernel has to -guarantee that the memory block contains only migratable pages. +For example, assume 1 GiB memory block size. A device for a memory starting at +0x100000000 is ``/sys/device/system/memory/memory4``:: -Now, a boot option for making a memory block which consists of migratable pages -is supported. By specifying "kernelcore=" or "movablecore=" boot option, you can -create ZONE_MOVABLE...a zone which is just used for movable pages. -(See also Documentation/admin-guide/kernel-parameters.rst) + (0x100000000 / 1Gib = 4) -Assume the system has "TOTAL" amount of memory at boot time, this boot option -creates ZONE_MOVABLE as following. +This device covers address range [0x100000000 ... 0x140000000) -1) When kernelcore=YYYY boot option is used, - Size of memory not for movable pages (not for offline) is YYYY. - Size of memory for movable pages (for offline) is TOTAL-YYYY. +The following files are currently defined: -2) When movablecore=ZZZZ boot option is used, - Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ. - Size of memory for movable pages (for offline) is ZZZZ. +=================== ============================================================ +``online`` read-write: simplified interface to trigger onlining / + offlining and to observe the state of a memory block. + When onlining, the zone is selected automatically. +``phys_device`` read-only: legacy interface only ever used on s390x to + expose the covered storage increment. +``phys_index`` read-only: the memory block id (XXX). +``removable`` read-only: legacy interface that indicated whether a memory + block was likely to be offlineable or not. Nowadays, the + kernel return ``1`` if and only if it supports memory + offlining. +``state`` read-write: advanced interface to trigger onlining / + offlining and to observe the state of a memory block. + + When writing, ``online``, ``offline``, ``online_kernel`` and + ``online_movable`` are supported. + + ``online_movable`` specifies onlining to ZONE_MOVABLE. + ``online_kernel`` specifies onlining to the default kernel + zone for the memory block, such as ZONE_NORMAL. + ``online`` let's the kernel select the zone automatically. + + When reading, ``online``, ``offline`` and ``going-offline`` + may be returned. +``uevent`` read-write: generic uevent file for devices. +``valid_zones`` read-only: when a block is online, shows the zone it + belongs to; when a block is offline, shows what zone will + manage it when the block will be onlined. + + For online memory blocks, ``DMA``, ``DMA32``, ``Normal``, + ``Movable`` and ``none`` may be returned. ``none`` indicates + that memory provided by a memory block is managed by + multiple zones or spans multiple nodes; such memory blocks + cannot be offlined. ``Movable`` indicates ZONE_MOVABLE. + Other values indicate a kernel zone. + + For offline memory blocks, the first column shows the + zone the kernel would select when onlining the memory block + right now without further specifying a zone. + + Availability depends on the CONFIG_MEMORY_HOTREMOVE + kernel configuration option. +=================== ============================================================ .. note:: - Unfortunately, there is no information to show which memory block belongs - to ZONE_MOVABLE. This is TBD. + If the CONFIG_NUMA kernel configuration option is enabled, the memoryXXX/ + directories can also be accessed via symbolic links located in the + ``/sys/devices/system/node/node*`` directories. + + For example:: + + /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 + + A backlink will also be created:: + + /sys/devices/system/memory/memory9/node0 -> ../../node/node0 + +Command Line Parameters +----------------------- + +Some command line parameters affect memory hot(un)plug handling. The following +command line parameters are relevant: + +======================== ======================================================= +``memhp_default_state`` configure auto-onlining by essentially setting + ``/sys/devices/system/memory/auto_online_blocks``. +``movablecore`` configure automatic zone selection of the kernel. When + set, the kernel will default to ZONE_MOVABLE, unless + other zones can be kept contiguous. +======================== ======================================================= + +Module Parameters +------------------ - Memory offlining can fail when dissolving a free huge page on ZONE_MOVABLE - and the feature of freeing unused vmemmap pages associated with each hugetlb - page is enabled. +Instead of additional command line parameters or sysfs files, the +``memory_hotplug`` subsystem now provides a dedicated namespace for module +parameters. Module parameters can be set via the command line by predicating +them with ``memory_hotplug.`` such as:: + + memory_hotplug.memmap_on_memory=1 + +and they can be observed (and some even modified at runtime) via:: + + /sys/modules/memory_hotplug/parameters/ + +The following module parameters are currently defined: + +======================== ======================================================= +``memmap_on_memory`` read-write: Allocate memory for the memmap from the + added memory block itself. Even if enabled, actual + support depends on various other system properties and + should only be regarded as a hint whether the behavior + would be desired. + + While allocating the memmap from the memory block + itself makes memory hotplug less likely to fail and + keeps the memmap on the same NUMA node in any case, it + can fragment physical memory in a way that huge pages + in bigger granularity cannot be formed on hotplugged + memory. +======================== ======================================================= + +ZONE_MOVABLE +============ + +ZONE_MOVABLE is an important mechanism for more reliable memory offlining. +Further, having system RAM managed by ZONE_MOVABLE instead of one of the +kernel zones can increase the number of possible transparent huge pages and +dynamically allocated huge pages. + +Most kernel allocations are unmovable. Important examples include the memory +map (usually 1/64ths of memory), page tables, and kmalloc(). Such allocations +can only be served from the kernel zones. + +Most user space pages, such as anonymous memory, and page cache pages are +movable. Such allocations can be served from ZONE_MOVABLE and the kernel zones. + +Only movable allocations are served from ZONE_MOVABLE, resulting in unmovable +allocations being limited to the kernel zones. Without ZONE_MOVABLE, there is +absolutely no guarantee whether a memory block can be offlined successfully. + +Zone Imbalances +--------------- - This can happen when we have plenty of ZONE_MOVABLE memory, but not enough - kernel memory to allocate vmemmmap pages. We may even be able to migrate - huge page contents, but will not be able to dissolve the source huge page. - This will prevent an offline operation and is unfortunate as memory offlining - is expected to succeed on movable zones. Users that depend on memory hotplug - to succeed for movable zones should carefully consider whether the memory - savings gained from this feature are worth the risk of possibly not being - able to offline memory in certain situations. +Having too much system RAM managed by ZONE_MOVABLE is called a zone imbalance, +which can harm the system or degrade performance. As one example, the kernel +might crash because it runs out of free memory for unmovable allocations, +although there is still plenty of free memory left in ZONE_MOVABLE. + +Usually, MOVABLE:KERNEL ratios of up to 3:1 or even 4:1 are fine. Ratios of 63:1 +are definitely impossible due to the overhead for the memory map. + +Actual safe zone ratios depend on the workload. Extreme cases, like excessive +long-term pinning of pages, might not be able to deal with ZONE_MOVABLE at all. .. note:: - Techniques that rely on long-term pinnings of memory (especially, RDMA and - vfio) are fundamentally problematic with ZONE_MOVABLE and, therefore, memory - hot remove. Pinned pages cannot reside on ZONE_MOVABLE, to guarantee that - memory can still get hot removed - be aware that pinning can fail even if - there is plenty of free memory in ZONE_MOVABLE. In addition, using - ZONE_MOVABLE might make page pinning more expensive, because pages have to be - migrated off that zone first. -.. _memory_hotplug_how_to_offline_memory: + CMA memory part of a kernel zone essentially behaves like memory in + ZONE_MOVABLE and similar considerations apply, especially when combining + CMA with ZONE_MOVABLE. -How to offline memory ---------------------- +ZONE_MOVABLE Sizing Considerations +---------------------------------- -You can offline a memory block by using the same sysfs interface that was used -in memory onlining:: +We usually expect that a large portion of available system RAM will actually +be consumed by user space, either directly or indirectly via the page cache. In +the normal case, ZONE_MOVABLE can be used when allocating such pages just fine. - % echo offline > /sys/devices/system/memory/memoryXXX/state +With that in mind, it makes sense that we can have a big portion of system RAM +managed by ZONE_MOVABLE. However, there are some things to consider when using +ZONE_MOVABLE, especially when fine-tuning zone ratios: + +- Having a lot of offline memory blocks. Even offline memory blocks consume + memory for metadata and page tables in the direct map; having a lot of offline + memory blocks is not a typical case, though. + +- Memory ballooning without balloon compaction is incompatible with + ZONE_MOVABLE. Only some implementations, such as virtio-balloon and + pseries CMM, fully support balloon compaction. + + Further, the CONFIG_BALLOON_COMPACTION kernel configuration option might be + disabled. In that case, balloon inflation will only perform unmovable + allocations and silently create a zone imbalance, usually triggered by + inflation requests from the hypervisor. + +- Gigantic pages are unmovable, resulting in user space consuming a + lot of unmovable memory. + +- Huge pages are unmovable when an architectures does not support huge + page migration, resulting in a similar issue as with gigantic pages. + +- Page tables are unmovable. Excessive swapping, mapping extremely large + files or ZONE_DEVICE memory can be problematic, although only really relevant + in corner cases. When we manage a lot of user space memory that has been + swapped out or is served from a file/persistent memory/... we still need a lot + of page tables to manage that memory once user space accessed that memory. + +- In certain DAX configurations the memory map for the device memory will be + allocated from the kernel zones. + +- KASAN can have a significant memory overhead, for example, consuming 1/8th of + the total system memory size as (unmovable) tracking metadata. + +- Long-term pinning of pages. Techniques that rely on long-term pinnings + (especially, RDMA and vfio/mdev) are fundamentally problematic with + ZONE_MOVABLE, and therefore, memory offlining. Pinned pages cannot reside + on ZONE_MOVABLE as that would turn these pages unmovable. Therefore, they + have to be migrated off that zone while pinning. Pinning a page can fail + even if there is plenty of free memory in ZONE_MOVABLE. + + In addition, using ZONE_MOVABLE might make page pinning more expensive, + because of the page migration overhead. + +By default, all the memory configured at boot time is managed by the kernel +zones and ZONE_MOVABLE is not used. + +To enable ZONE_MOVABLE to include the memory present at boot and to control the +ratio between movable and kernel zones there are two command line options: +``kernelcore=`` and ``movablecore=``. See +Documentation/admin-guide/kernel-parameters.rst for their description. + +Memory Offlining and ZONE_MOVABLE +--------------------------------- + +Even with ZONE_MOVABLE, there are some corner cases where offlining a memory +block might fail: + +- Memory blocks with memory holes; this applies to memory blocks present during + boot and can apply to memory blocks hotplugged via the XEN balloon and the + Hyper-V balloon. + +- Mixed NUMA nodes and mixed zones within a single memory block prevent memory + offlining; this applies to memory blocks present during boot only. + +- Special memory blocks prevented by the system from getting offlined. Examples + include any memory available during boot on arm64 or memory blocks spanning + the crashkernel area on s390x; this usually applies to memory blocks present + during boot only. + +- Memory blocks overlapping with CMA areas cannot be offlined, this applies to + memory blocks present during boot only. + +- Concurrent activity that operates on the same physical memory area, such as + allocating gigantic pages, can result in temporary offlining failures. + +- Out of memory when dissolving huge pages, especially when freeing unused + vmemmap pages associated with each hugetlb page is enabled. + + Offlining code may be able to migrate huge page contents, but may not be able + to dissolve the source huge page because it fails allocating (unmovable) pages + for the vmemmap, because the system might not have free memory in the kernel + zones left. + + Users that depend on memory offlining to succeed for movable zones should + carefully consider whether the memory savings gained from this feature are + worth the risk of possibly not being able to offline memory in certain + situations. + +Further, when running into out of memory situations while migrating pages, or +when still encountering permanently unmovable pages within ZONE_MOVABLE +(-> BUG), memory offlining will keep retrying until it eventually succeeds. + +When offlining is triggered from user space, the offlining context can be +terminated by sending a fatal signal. A timeout based offlining can easily be +implemented via:: -If offline succeeds, the state of the memory block is changed to be "offline". -If it fails, some error core (like -EBUSY) will be returned by the kernel. -Even if a memory block does not belong to ZONE_MOVABLE, you can try to offline -it. If it doesn't contain 'unmovable' memory, you'll get success. - -A memory block under ZONE_MOVABLE is considered to be able to be offlined -easily. But under some busy state, it may return -EBUSY. Even if a memory -block cannot be offlined due to -EBUSY, you can retry offlining it and may be -able to offline it (or not). (For example, a page is referred to by some kernel -internal call and released soon.) - -Consideration: - Memory hotplug's design direction is to make the possibility of memory - offlining higher and to guarantee unplugging memory under any situation. But - it needs more work. Returning -EBUSY under some situation may be good because - the user can decide to retry more or not by himself. Currently, memory - offlining code does some amount of retry with 120 seconds timeout. - -Physical memory remove -====================== - -Need more implementation yet.... - - Notification completion of remove works by OS to firmware. - - Guard from remove if not yet. - - -Future Work -=========== - - - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like - sysctl or new control file. - - showing memory block and physical device relationship. - - test and make it better memory offlining. - - support HugeTLB page migration and offlining. - - memmap removing at memory offline. - - physical remove memory. + % timeout $TIMEOUT offline_block | failure_handling From 9aa271f41c9063aa7c5cc931a31c81cfd970c027 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:42:47 +1000 Subject: [PATCH 417/851] mm: remove pfn_valid_within() and CONFIG_HOLES_IN_ZONE Patch series "mm: remove pfn_valid_within() and CONFIG_HOLES_IN_ZONE". After recent updates to freeing unused parts of the memory map, no architecture can have holes in the memory map within a pageblock. This makes pfn_valid_within() check and CONFIG_HOLES_IN_ZONE configuration option redundant. The first patch removes them both in a mechanical way and the second patch simplifies memory_hotplug::test_pages_in_a_zone() that had pfn_valid_within() surrounded by more logic than simple if. This patch (of 2): After recent changes in freeing of the unused parts of the memory map and rework of pfn_valid() in arm and arm64 there are no architectures that can have holes in the memory map within a pageblock and so nothing can enable CONFIG_HOLES_IN_ZONE which guards non trivial implementation of pfn_valid_within(). With that, pfn_valid_within() is always hardwired to 1 and can be completely removed. Remove calls to pfn_valid_within() and CONFIG_HOLES_IN_ZONE. Link: https://lkml.kernel.org/r/20210713080035.7464-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20210713080035.7464-2-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/base/node.c | 2 -- include/linux/mmzone.h | 12 ------------ mm/Kconfig | 3 --- mm/compaction.c | 20 +++++++------------- mm/memory_hotplug.c | 4 ---- mm/page_alloc.c | 24 ++---------------------- mm/page_isolation.c | 7 +------ mm/page_owner.c | 14 +------------- 8 files changed, 11 insertions(+), 75 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 4a4ae868ad9f8..8ec6b7dfbb0f9 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -768,8 +768,6 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static int __ref get_nid_for_pfn(unsigned long pfn) { - if (!pfn_valid_within(pfn)) - return -1; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT if (system_state < SYSTEM_RUNNING) return early_pfn_to_nid(pfn); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 59bad25ce78e0..5c0318509f9e8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1524,18 +1524,6 @@ void sparse_init(void); #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ -/* - * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we - * need to check pfn validity within that MAX_ORDER_NR_PAGES block. - * pfn_valid_within() should be used in this case; we optimise this away - * when we have no holes within a MAX_ORDER_NR_PAGES block. - */ -#ifdef CONFIG_HOLES_IN_ZONE -#define pfn_valid_within(pfn) pfn_valid(pfn) -#else -#define pfn_valid_within(pfn) (1) -#endif - #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 5dc28e9205e00..1f9bd3371765b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -96,9 +96,6 @@ config HAVE_FAST_GUP depends on MMU bool -config HOLES_IN_ZONE - bool - # Don't discard allocated memory used to track "memory" and "reserved" memblocks # after early boot, so it can still be used to test for validity of memory. # Also, memblocks are updated with memory hot(un)plug. diff --git a/mm/compaction.c b/mm/compaction.c index 61fb64f47a069..79aaf21058da4 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -306,16 +306,14 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source, * is necessary for the block to be a migration source/target. */ do { - if (pfn_valid_within(pfn)) { - if (check_source && PageLRU(page)) { - clear_pageblock_skip(page); - return true; - } + if (check_source && PageLRU(page)) { + clear_pageblock_skip(page); + return true; + } - if (check_target && PageBuddy(page)) { - clear_pageblock_skip(page); - return true; - } + if (check_target && PageBuddy(page)) { + clear_pageblock_skip(page); + return true; } page += (1 << PAGE_ALLOC_COSTLY_ORDER); @@ -585,8 +583,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, break; nr_scanned++; - if (!pfn_valid_within(blockpfn)) - goto isolate_fail; /* * For compound pages such as THP and hugetlbfs, we can save @@ -885,8 +881,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, cond_resched(); } - if (!pfn_valid_within(low_pfn)) - goto isolate_fail; nr_scanned++; page = pfn_to_page(low_pfn); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0c46458a3402f..46872809f6688 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1308,10 +1308,6 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn, for (; pfn < sec_end_pfn && pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES) { i = 0; - /* This is just a CONFIG_HOLES_IN_ZONE check.*/ - while ((i < MAX_ORDER_NR_PAGES) && - !pfn_valid_within(pfn + i)) - i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; /* Check if we got outside of the zone */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 015908d45d16f..631187730eadb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -594,8 +594,6 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page) static int page_is_consistent(struct zone *zone, struct page *page) { - if (!pfn_valid_within(page_to_pfn(page))) - return 0; if (zone != page_zone(page)) return 0; @@ -1025,16 +1023,12 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, if (order >= MAX_ORDER - 2) return false; - if (!pfn_valid_within(buddy_pfn)) - return false; - combined_pfn = buddy_pfn & pfn; higher_page = page + (combined_pfn - pfn); buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); higher_buddy = higher_page + (buddy_pfn - combined_pfn); - return pfn_valid_within(buddy_pfn) && - page_is_buddy(higher_page, higher_buddy, order + 1); + return page_is_buddy(higher_page, higher_buddy, order + 1); } /* @@ -1095,8 +1089,6 @@ static inline void __free_one_page(struct page *page, buddy_pfn = __find_buddy_pfn(pfn, order); buddy = page + (buddy_pfn - pfn); - if (!pfn_valid_within(buddy_pfn)) - goto done_merging; if (!page_is_buddy(page, buddy, order)) goto done_merging; /* @@ -1754,9 +1746,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, /* * Check that the whole (or subset of) a pageblock given by the interval of * [start_pfn, end_pfn) is valid and within the same zone, before scanning it - * with the migration of free compaction scanner. The scanners then need to - * use only pfn_valid_within() check for arches that allow holes within - * pageblocks. + * with the migration of free compaction scanner. * * Return struct page pointer of start_pfn, or NULL if checks were not passed. * @@ -1872,8 +1862,6 @@ static inline void __init pgdat_init_report_one_done(void) */ static inline bool __init deferred_pfn_valid(unsigned long pfn) { - if (!pfn_valid_within(pfn)) - return false; if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn)) return false; return true; @@ -2520,11 +2508,6 @@ static int move_freepages(struct zone *zone, int pages_moved = 0; for (pfn = start_pfn; pfn <= end_pfn;) { - if (!pfn_valid_within(pfn)) { - pfn++; - continue; - } - page = pfn_to_page(pfn); if (!PageBuddy(page)) { /* @@ -8827,9 +8810,6 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, } for (; iter < pageblock_nr_pages - offset; iter++) { - if (!pfn_valid_within(pfn + iter)) - continue; - page = pfn_to_page(pfn + iter); /* diff --git a/mm/page_isolation.c b/mm/page_isolation.c index bddf788f45bff..471e3a13b5411 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -93,8 +93,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) buddy_pfn = __find_buddy_pfn(pfn, order); buddy = page + (buddy_pfn - pfn); - if (pfn_valid_within(buddy_pfn) && - !is_migrate_isolate_page(buddy)) { + if (!is_migrate_isolate_page(buddy)) { __isolate_free_page(page, order); isolated_page = true; } @@ -250,10 +249,6 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, struct page *page; while (pfn < end_pfn) { - if (!pfn_valid_within(pfn)) { - pfn++; - continue; - } page = pfn_to_page(pfn); if (PageBuddy(page)) /* diff --git a/mm/page_owner.c b/mm/page_owner.c index f51a57e92aa38..62402d22539b8 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -276,9 +276,6 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, pageblock_mt = get_pageblock_migratetype(page); for (; pfn < block_end_pfn; pfn++) { - if (!pfn_valid_within(pfn)) - continue; - /* The pageblock is online, no need to recheck. */ page = pfn_to_page(pfn); @@ -479,10 +476,6 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) continue; } - /* Check for holes within a MAX_ORDER area */ - if (!pfn_valid_within(pfn)) - continue; - page = pfn_to_page(pfn); if (PageBuddy(page)) { unsigned long freepage_order = buddy_order_unsafe(page); @@ -560,14 +553,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) block_end_pfn = min(block_end_pfn, end_pfn); for (; pfn < block_end_pfn; pfn++) { - struct page *page; + struct page *page = pfn_to_page(pfn); struct page_ext *page_ext; - if (!pfn_valid_within(pfn)) - continue; - - page = pfn_to_page(pfn); - if (page_zone(page) != zone) continue; From e784d66cd1e82e208d7ff491075a525bf09c2f49 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Jul 2021 11:42:49 +1000 Subject: [PATCH 418/851] mm: memory_hotplug: cleanup after removal of pfn_valid_within() When test_pages_in_a_zone() used pfn_valid_within() is has some logic surrounding pfn_valid_within() checks. Since pfn_valid_within() is gone, this logic can be removed. Link: https://lkml.kernel.org/r/20210713080035.7464-3-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory_hotplug.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 46872809f6688..a8c5405a7a709 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1298,7 +1298,7 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn, unsigned long pfn, sec_end_pfn; struct zone *zone = NULL; struct page *page; - int i; + for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1); pfn < end_pfn; pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) { @@ -1307,13 +1307,10 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn, continue; for (; pfn < sec_end_pfn && pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES) { - i = 0; - if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) - continue; /* Check if we got outside of the zone */ - if (zone && !zone_spans_pfn(zone, pfn + i)) + if (zone && !zone_spans_pfn(zone, pfn)) return NULL; - page = pfn_to_page(pfn + i); + page = pfn_to_page(pfn); if (zone && page_zone(page) != zone) return NULL; zone = page_zone(page); From 55de8dcbb9d608fe37b43a0206fece5f0561fc6d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 21 Jul 2021 11:42:49 +1000 Subject: [PATCH 419/851] mm/memory_hotplug: use "unsigned long" for PFN in zone_for_pfn_range() Patch series "mm/memory_hotplug: preparatory patches for new online policy and memory" These are all cleanups and one fix previously sent as part of [1]: [PATCH v1 00/12] mm/memory_hotplug: "auto-movable" online policy and memory groups. These patches make sense even without the other series, therefore I pulled them out to make the other series easier to digest. [1] https://lkml.kernel.org/r/20210607195430.48228-1-david@redhat.com This patch (of 4): Checkpatch complained on a follow-up patch that we are using "unsigned" here, which defaults to "unsigned int" and checkpatch is correct. As we will search for a fitting zone using the wrong pfn, we might end up onlining memory to one of the special kernel zones, such as ZONE_DMA, which can end badly as the onlined memory does not satisfy properties of these zones. Use "unsigned long" instead, just as we do in other places when handling PFNs. This can bite us once we have physical addresses in the range of multiple TB. Link: https://lkml.kernel.org/r/20210712124052.26491-2-david@redhat.com Fixes: e5e689302633 ("mm, memory_hotplug: display allowed zones in the preferred ordering") Signed-off-by: David Hildenbrand Reviewed-by: Pankaj Gupta Reviewed-by: Muchun Song Reviewed-by: Oscar Salvador Cc: David Hildenbrand Cc: Vitaly Kuznetsov Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Wei Yang Cc: Michal Hocko Cc: Dan Williams Cc: Anshuman Khandual Cc: Dave Hansen Cc: Vlastimil Babka Cc: Mike Rapoport Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Pavel Tatashin Cc: Heiko Carstens Cc: Michael Ellerman Cc: Catalin Marinas Cc: virtualization@lists.linux-foundation.org Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anton Blanchard Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dave Jiang Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jia He Cc: Joe Perches Cc: Kefeng Wang Cc: Laurent Dufour Cc: Michel Lespinasse Cc: Nathan Lynch Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Pierre Morel Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Scott Cheloha Cc: Sergei Trofimovich Cc: Thiago Jung Bauermann Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Cc: Will Deacon Cc: Yoshinori Sato Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memory_hotplug.h | 4 ++-- mm/memory_hotplug.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index a7fd2c3ccb777..d01b504ce06fe 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -339,8 +339,8 @@ extern void sparse_remove_section(struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); -extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, - unsigned long nr_pages); +extern struct zone *zone_for_pfn_range(int online_type, int nid, + unsigned long start_pfn, unsigned long nr_pages); extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a8c5405a7a709..e53f31d3f63df 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -708,8 +708,8 @@ static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn return movable_node_enabled ? movable_zone : kernel_zone; } -struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, - unsigned long nr_pages) +struct zone *zone_for_pfn_range(int online_type, int nid, + unsigned long start_pfn, unsigned long nr_pages) { if (online_type == MMOP_ONLINE_KERNEL) return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages); From 726543a9e3d98cab21ca98166da23a94c7b90164 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 21 Jul 2021 11:42:51 +1000 Subject: [PATCH 420/851] mm/memory_hotplug: remove nid parameter from arch_remove_memory() The parameter is unused, let's remove it. Link: https://lkml.kernel.org/r/20210712124052.26491-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Catalin Marinas Acked-by: Michael Ellerman [powerpc] Acked-by: Heiko Carstens [s390] Reviewed-by: Pankaj Gupta Reviewed-by: Oscar Salvador Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Yoshinori Sato Cc: Rich Felker Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Pavel Tatashin Cc: Baoquan He Cc: Laurent Dufour Cc: Sergei Trofimovich Cc: Kefeng Wang Cc: Michel Lespinasse Cc: Christophe Leroy Cc: "Aneesh Kumar K.V" Cc: Thiago Jung Bauermann Cc: Joe Perches Cc: Pierre Morel Cc: Jia He Cc: Anton Blanchard Cc: Dan Williams Cc: Dave Jiang Cc: Jason Wang Cc: Len Brown Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Nathan Lynch Cc: Pankaj Gupta Cc: "Rafael J. Wysocki" Cc: "Rafael J. Wysocki" Cc: Scott Cheloha Cc: Vishal Verma Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/arm64/mm/mmu.c | 3 +-- arch/ia64/mm/init.c | 3 +-- arch/powerpc/mm/mem.c | 3 +-- arch/s390/mm/init.c | 3 +-- arch/sh/mm/init.c | 3 +-- arch/x86/mm/init_32.c | 3 +-- arch/x86/mm/init_64.c | 3 +-- include/linux/memory_hotplug.h | 3 +-- mm/memory_hotplug.c | 4 ++-- mm/memremap.c | 5 +---- 10 files changed, 11 insertions(+), 22 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d745865084488..af8ab553a2682 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1506,8 +1506,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 064a967a7b6e3..5c6da8d83c1ad 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -484,8 +484,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ad198b4392224..c3c4e31462eca 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -119,8 +119,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return rc; } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8ac710de1ab1b..d85bd7f5d8dc6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -306,8 +306,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return rc; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index ce26c7f8950a3..506784702430c 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -414,8 +414,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 74b78840182df..bd90b8fe81e45 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -801,8 +801,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start_pfn, nr_pages, params); } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ddeaba947eb3d..a6e11763763fb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1255,8 +1255,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true, NULL); } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d01b504ce06fe..010a192298b53 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -130,8 +130,7 @@ static inline bool movable_node_is_enabled(void) return movable_node_enabled; } -extern void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap); +extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap); extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e53f31d3f63df..702a21a5516a8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1106,7 +1106,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) /* create memory block devices after memory was added */ ret = create_memory_block_devices(start, size, mhp_altmap.alloc); if (ret) { - arch_remove_memory(nid, start, size, NULL); + arch_remove_memory(start, size, NULL); goto error; } @@ -1885,7 +1885,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) mem_hotplug_begin(); - arch_remove_memory(nid, start, size, altmap); + arch_remove_memory(start, size, altmap); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { memblock_free(start, size); diff --git a/mm/memremap.c b/mm/memremap.c index 805d761740c42..e99944ab8eb3c 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -160,14 +160,11 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) { struct range *range = &pgmap->ranges[range_id]; struct page *first_page; - int nid; /* make sure to access a memmap that was actually initialized */ first_page = pfn_to_page(pfn_first(pgmap, range_id)); /* pages are dead and unused, undo the arch mapping */ - nid = page_to_nid(first_page); - mem_hotplug_begin(); remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(range->start), PHYS_PFN(range_len(range))); @@ -175,7 +172,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) __remove_pages(PHYS_PFN(range->start), PHYS_PFN(range_len(range)), NULL); } else { - arch_remove_memory(nid, range->start, range_len(range), + arch_remove_memory(range->start, range_len(range), pgmap_altmap(pgmap)); kasan_remove_zero_shadow(__va(range->start), range_len(range)); } From 24ce41d9f19898eeccbf4cccf876d5f1d66ed66a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 21 Jul 2021 11:42:51 +1000 Subject: [PATCH 421/851] mm/memory_hotplug: remove nid parameter from remove_memory() and friends There is only a single user remaining. We can simply lookup the nid only used for node offlining purposes when walking our memory blocks. We don't expect to remove multi-nid ranges; and if we'd ever do, we most probably don't care about removing multi-nid ranges that actually result in empty nodes. If ever required, we can detect the "multi-nid" scenario and simply try offlining all online nodes. Link: https://lkml.kernel.org/r/20210712124052.26491-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michael Ellerman (powerpc) Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Dan Williams Cc: Vishal Verma Cc: Dave Jiang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Nathan Lynch Cc: Laurent Dufour Cc: "Aneesh Kumar K.V" Cc: Scott Cheloha Cc: Anton Blanchard Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dave Hansen Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jia He Cc: Joe Perches Cc: Kefeng Wang Cc: Michal Hocko Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Pierre Morel Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Sergei Trofimovich Cc: Thiago Jung Bauermann Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../platforms/pseries/hotplug-memory.c | 9 +++--- drivers/acpi/acpi_memhotplug.c | 7 +---- drivers/dax/kmem.c | 3 +- drivers/virtio/virtio_mem.c | 4 +-- include/linux/memory_hotplug.h | 10 +++---- mm/memory_hotplug.c | 28 +++++++++++-------- 6 files changed, 30 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index d4f28ee4d5dce..533cb1335b7f2 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -284,7 +284,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si { unsigned long block_sz, start_pfn; int sections_per_block; - int i, nid; + int i; start_pfn = base >> PAGE_SHIFT; @@ -295,10 +295,9 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si block_sz = pseries_memory_block_size(); sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; - nid = memory_add_physaddr_to_nid(base); for (i = 0; i < sections_per_block; i++) { - __remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); + __remove_memory(base, MIN_MEMORY_BLOCK_SIZE); base += MIN_MEMORY_BLOCK_SIZE; } @@ -385,7 +384,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) block_sz = pseries_memory_block_size(); - __remove_memory(mem_block->nid, lmb->base_addr, block_sz); + __remove_memory(lmb->base_addr, block_sz); put_device(&mem_block->dev); /* Update memory regions for memory remove */ @@ -658,7 +657,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 8cc195c4c8619..1d01d9414c407 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -239,19 +239,14 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) static void acpi_memory_remove_memory(struct acpi_memory_device *mem_device) { - acpi_handle handle = mem_device->device->handle; struct acpi_memory_info *info, *n; - int nid = acpi_get_node(handle); list_for_each_entry_safe(info, n, &mem_device->res_list, list) { if (!info->enabled) continue; - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(info->start_addr); - acpi_unbind_memory_blocks(info); - __remove_memory(nid, info->start_addr, info->length); + __remove_memory(info->start_addr, info->length); list_del(&info->list); kfree(info); } diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index ac231cc363595..99e0f60c4c266 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -156,8 +156,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax) if (rc) continue; - rc = remove_memory(dev_dax->target_node, range.start, - range_len(&range)); + rc = remove_memory(range.start, range_len(&range)); if (rc == 0) { release_resource(data->res[i]); kfree(data->res[i]); diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 09ed55de07d7d..774986695dc48 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -677,7 +677,7 @@ static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, addr + size - 1); - rc = remove_memory(vm->nid, addr, size); + rc = remove_memory(addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); /* @@ -720,7 +720,7 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, "offlining and removing memory: 0x%llx - 0x%llx\n", addr, addr + size - 1); - rc = offline_and_remove_memory(vm->nid, addr, size); + rc = offline_and_remove_memory(addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); /* diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 010a192298b53..068e3dcf46904 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -292,9 +292,9 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {} extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); -extern int remove_memory(int nid, u64 start, u64 size); -extern void __remove_memory(int nid, u64 start, u64 size); -extern int offline_and_remove_memory(int nid, u64 start, u64 size); +extern int remove_memory(u64 start, u64 size); +extern void __remove_memory(u64 start, u64 size); +extern int offline_and_remove_memory(u64 start, u64 size); #else static inline void try_offline_node(int nid) {} @@ -304,12 +304,12 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) return -EINVAL; } -static inline int remove_memory(int nid, u64 start, u64 size) +static inline int remove_memory(u64 start, u64 size) { return -EBUSY; } -static inline void __remove_memory(int nid, u64 start, u64 size) {} +static inline void __remove_memory(u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern void set_zone_contiguous(struct zone *zone); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 702a21a5516a8..d45c69d78b830 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1738,7 +1738,9 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) { int ret = !is_memblock_offlined(mem); + int *nid = arg; + *nid = mem->nid; if (unlikely(ret)) { phys_addr_t beginpa, endpa; @@ -1831,12 +1833,12 @@ void try_offline_node(int nid) } EXPORT_SYMBOL(try_offline_node); -static int __ref try_remove_memory(int nid, u64 start, u64 size) +static int __ref try_remove_memory(u64 start, u64 size) { - int rc = 0; struct vmem_altmap mhp_altmap = {}; struct vmem_altmap *altmap = NULL; unsigned long nr_vmemmap_pages; + int rc = 0, nid = NUMA_NO_NODE; BUG_ON(check_hotplug_memory_range(start, size)); @@ -1844,8 +1846,12 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) * All memory blocks must be offlined before removing memory. Check * whether all memory blocks in question are offline and return error * if this is not the case. + * + * While at it, determine the nid. Note that if we'd have mixed nodes, + * we'd only try to offline the last determined one -- which is good + * enough for the cases we care about. */ - rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); + rc = walk_memory_blocks(start, size, &nid, check_memblock_offlined_cb); if (rc) return rc; @@ -1894,7 +1900,8 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) release_mem_region_adjustable(start, size); - try_offline_node(nid); + if (nid != NUMA_NO_NODE) + try_offline_node(nid); mem_hotplug_done(); return 0; @@ -1902,7 +1909,6 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) /** * __remove_memory - Remove memory if every memory block is offline - * @nid: the node ID * @start: physical address of the region to remove * @size: size of the region to remove * @@ -1910,14 +1916,14 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) * and online/offline operations before this call, as required by * try_offline_node(). */ -void __remove_memory(int nid, u64 start, u64 size) +void __remove_memory(u64 start, u64 size) { /* * trigger BUG() if some memory is not offlined prior to calling this * function */ - if (try_remove_memory(nid, start, size)) + if (try_remove_memory(start, size)) BUG(); } @@ -1925,12 +1931,12 @@ void __remove_memory(int nid, u64 start, u64 size) * Remove memory if every memory block is offline, otherwise return -EBUSY is * some memory is not offline */ -int remove_memory(int nid, u64 start, u64 size) +int remove_memory(u64 start, u64 size) { int rc; lock_device_hotplug(); - rc = try_remove_memory(nid, start, size); + rc = try_remove_memory(start, size); unlock_device_hotplug(); return rc; @@ -1990,7 +1996,7 @@ static int try_reonline_memory_block(struct memory_block *mem, void *arg) * unplugged all memory (so it's no longer in use) and want to offline + remove * that memory. */ -int offline_and_remove_memory(int nid, u64 start, u64 size) +int offline_and_remove_memory(u64 start, u64 size) { const unsigned long mb_count = size / memory_block_size_bytes(); uint8_t *online_types, *tmp; @@ -2026,7 +2032,7 @@ int offline_and_remove_memory(int nid, u64 start, u64 size) * This cannot fail as it cannot get onlined in the meantime. */ if (!rc) { - rc = try_remove_memory(nid, start, size); + rc = try_remove_memory(start, size); if (rc) pr_err("%s: Failed to remove memory: %d", __func__, rc); } From 2146b2ddfc85ce254ee0ba518695b8cd05aacddf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 21 Jul 2021 11:42:52 +1000 Subject: [PATCH 422/851] ACPI: memhotplug: memory resources cannot be enabled yet We allocate + initialize everything from scratch. In case enabling the device fails, we free all memory resourcs. Link: https://lkml.kernel.org/r/20210712124052.26491-5-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Rafael J. Wysocki Reviewed-by: Oscar Salvador Reviewed-by: Pankaj Gupta Cc: Andy Lutomirski Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Cc: Anton Blanchard Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dan Williams Cc: Dave Hansen Cc: Dave Jiang Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Jia He Cc: Joe Perches Cc: Kefeng Wang Cc: Laurent Dufour Cc: Len Brown Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nathan Lynch Cc: Nicholas Piggin Cc: Pankaj Gupta Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Pierre Morel Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Scott Cheloha Cc: Sergei Trofimovich Cc: Thiago Jung Bauermann Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/acpi/acpi_memhotplug.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 1d01d9414c407..eb4faf7c5cad0 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -182,10 +182,6 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) * (i.e. memory-hot-remove function) */ list_for_each_entry(info, &mem_device->res_list, list) { - if (info->enabled) { /* just sanity check...*/ - num_enabled++; - continue; - } /* * If the memory block size is zero, please ignore it. * Don't try to do the following memory hotplug flowchart. From 0a2817207a4fff0867816ac428e7d3a96b7f05bb Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Wed, 21 Jul 2021 11:42:53 +1000 Subject: [PATCH 423/851] mm/rmap: convert from atomic_t to refcount_t on anon_vma->refcount refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Link: https://lkml.kernel.org/r/1626665029-49104-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Cc: Alistair Popple Cc: Yang Shi Cc: Shakeel Butt Cc: Hugh Dickins Cc: Xiyu Yang Cc: Miaohe Lin Cc: Cc: Xin Tan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/rmap.h | 8 +++++--- mm/rmap.c | 14 +++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c976cc6de2574..38151efe1a593 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -12,6 +12,8 @@ #include #include +#include + /* * The anon_vma heads a list of private "related" vmas, to scan if * an anonymous page pointing to this anon_vma needs to be unmapped: @@ -36,7 +38,7 @@ struct anon_vma { * the reference is responsible for clearing up the * anon_vma if they are the last user on release */ - atomic_t refcount; + refcount_t refcount; /* * Count of child anon_vmas and VMAs which points to this anon_vma. @@ -100,14 +102,14 @@ enum ttu_flags { #ifdef CONFIG_MMU static inline void get_anon_vma(struct anon_vma *anon_vma) { - atomic_inc(&anon_vma->refcount); + refcount_inc(&anon_vma->refcount); } void __put_anon_vma(struct anon_vma *anon_vma); static inline void put_anon_vma(struct anon_vma *anon_vma) { - if (atomic_dec_and_test(&anon_vma->refcount)) + if (refcount_dec_and_test(&anon_vma->refcount)) __put_anon_vma(anon_vma); } diff --git a/mm/rmap.c b/mm/rmap.c index b9eb5c12f3fe1..7badd786e0957 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -88,7 +88,7 @@ static inline struct anon_vma *anon_vma_alloc(void) anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { - atomic_set(&anon_vma->refcount, 1); + refcount_set(&anon_vma->refcount, 1); anon_vma->degree = 1; /* Reference for first vma */ anon_vma->parent = anon_vma; /* @@ -103,7 +103,7 @@ static inline struct anon_vma *anon_vma_alloc(void) static inline void anon_vma_free(struct anon_vma *anon_vma) { - VM_BUG_ON(atomic_read(&anon_vma->refcount)); + VM_BUG_ON(refcount_read(&anon_vma->refcount)); /* * Synchronize against page_lock_anon_vma_read() such that @@ -445,7 +445,7 @@ static void anon_vma_ctor(void *data) struct anon_vma *anon_vma = data; init_rwsem(&anon_vma->rwsem); - atomic_set(&anon_vma->refcount, 0); + refcount_set(&anon_vma->refcount, 0); anon_vma->rb_root = RB_ROOT_CACHED; } @@ -495,7 +495,7 @@ struct anon_vma *page_get_anon_vma(struct page *page) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - if (!atomic_inc_not_zero(&anon_vma->refcount)) { + if (!refcount_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } @@ -554,7 +554,7 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page) } /* trylock failed, we got to sleep */ - if (!atomic_inc_not_zero(&anon_vma->refcount)) { + if (!refcount_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } @@ -569,7 +569,7 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page) rcu_read_unlock(); anon_vma_lock_read(anon_vma); - if (atomic_dec_and_test(&anon_vma->refcount)) { + if (refcount_dec_and_test(&anon_vma->refcount)) { /* * Oops, we held the last refcount, release the lock * and bail -- can't simply use put_anon_vma() because @@ -2221,7 +2221,7 @@ void __put_anon_vma(struct anon_vma *anon_vma) struct anon_vma *root = anon_vma->root; anon_vma_free(anon_vma); - if (root != anon_vma && atomic_dec_and_test(&root->refcount)) + if (root != anon_vma && refcount_dec_and_test(&root->refcount)) anon_vma_free(root); } From bf2d49747ce91d90d771c9e13a73e3170e723a58 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:53 +1000 Subject: [PATCH 424/851] mm/zsmalloc.c: close race window between zs_pool_dec_isolated() and zs_unregister_migration() There is one possible race window between zs_pool_dec_isolated() and zs_unregister_migration() because wait_for_isolated_drain() checks the isolated count without holding class->lock and there is no order inside zs_pool_dec_isolated(). Thus the below race window could be possible: zs_pool_dec_isolated zs_unregister_migration check pool->destroying != 0 pool->destroying = true; smp_mb(); wait_for_isolated_drain() wait for pool->isolated_pages == 0 atomic_long_dec(&pool->isolated_pages); atomic_long_read(&pool->isolated_pages) == 0 Since we observe the pool->destroying (false) before atomic_long_dec() for pool->isolated_pages, waking pool->migration_wait up is missed. Fix this by ensure checking pool->destroying happens after the atomic_long_dec(&pool->isolated_pages). Link: https://lkml.kernel.org/r/20210708115027.7557-1-linmiaohe@huawei.com Fixes: 701d678599d0 ("mm/zsmalloc.c: fix race condition in zs_destroy_pool") Signed-off-by: Miaohe Lin Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Henry Burns Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/zsmalloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 68e8831068f4b..b897ce3b399a1 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1830,10 +1830,11 @@ static inline void zs_pool_dec_isolated(struct zs_pool *pool) VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); atomic_long_dec(&pool->isolated_pages); /* - * There's no possibility of racing, since wait_for_isolated_drain() - * checks the isolated count under &class->lock after enqueuing - * on migration_wait. + * Checking pool->destroying must happen after atomic_long_dec() + * for pool->isolated_pages above. Paired with the smp_mb() in + * zs_unregister_migration(). */ + smp_mb__after_atomic(); if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) wake_up_all(&pool->migration_wait); } From 4fdd8bff9fb9ce3d53d6657f4548ddaf58e4192b Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 21 Jul 2021 11:42:54 +1000 Subject: [PATCH 425/851] mm/zsmalloc.c: combine two atomic ops in zs_pool_dec_isolated() atomic_long_dec_and_test() is equivalent to atomic_long_dec() and atomic_long_read() == 0. Use it to make code more succinct. Link: https://lkml.kernel.org/r/20210624123930.1769093-3-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/zsmalloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b897ce3b399a1..3e713ff6261ee 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1828,14 +1828,13 @@ static void putback_zspage_deferred(struct zs_pool *pool, static inline void zs_pool_dec_isolated(struct zs_pool *pool) { VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); - atomic_long_dec(&pool->isolated_pages); /* * Checking pool->destroying must happen after atomic_long_dec() * for pool->isolated_pages above. Paired with the smp_mb() in * zs_unregister_migration(). */ smp_mb__after_atomic(); - if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) + if (atomic_long_dec_and_test(&pool->isolated_pages) && pool->destroying) wake_up_all(&pool->migration_wait); } From 96591fdaf55d9cff9d00050c0f98926ae305f1b9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 21 Jul 2021 11:42:55 +1000 Subject: [PATCH 426/851] mm/highmem: Remove deprecated kmap_atomic kmap_atomic() is being deprecated in favor of kmap_local_page(). Replace the uses of kmap_atomic() within the highmem code. On profiling clear_huge_page() using ftrace an improvement of 62% was observed on the below setup. Setup:- Below data has been collected on Qualcomm's SM7250 SoC THP enabled (kernel v4.19.113) with only CPU-0(Cortex-A55) and CPU-7(Cortex-A76) switched on and set to max frequency, also DDR set to perf governor. FTRACE Data:- Base data:- Number of iterations: 48 Mean of allocation time: 349.5 us std deviation: 74.5 us v4 data:- Number of iterations: 48 Mean of allocation time: 131 us std deviation: 32.7 us The following simple userspace experiment to allocate 100MB(BUF_SZ) of pages and writing to it gave us a good insight, we observed an improvement of 42% in allocation and writing timings. ------------------------------------------------------------- Test code snippet ------------------------------------------------------------- clock_start(); buf = malloc(BUF_SZ); /* Allocate 100 MB of memory */ for(i=0; i < BUF_SZ_PAGES; i++) { *((int *)(buf + (i*PAGE_SIZE))) = 1; } clock_end(); ------------------------------------------------------------- Malloc test timings for 100MB anon allocation:- Base data:- Number of iterations: 100 Mean of allocation time: 31831 us std deviation: 4286 us v4 data:- Number of iterations: 100 Mean of allocation time: 18193 us std deviation: 4915 us Link: https://lkml.kernel.org/r/20210204073255.20769-2-prathu.baronia@oneplus.com Signed-off-by: Ira Weiny Signed-off-by: Prathu Baronia Cc: Ira Weiny Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/highmem.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b4c49f9cc379e..31ebee36f26ce 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -143,9 +143,9 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { - void *addr = kmap_atomic(page); + void *addr = kmap_local_page(page); clear_user_page(addr, vaddr, page); - kunmap_atomic(addr); + kunmap_local(addr); } #endif @@ -177,9 +177,9 @@ alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, static inline void clear_highpage(struct page *page) { - void *kaddr = kmap_atomic(page); + void *kaddr = kmap_local_page(page); clear_page(kaddr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); } #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE @@ -202,7 +202,7 @@ static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { - void *kaddr = kmap_atomic(page); + void *kaddr = kmap_local_page(page); unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); @@ -213,7 +213,7 @@ static inline void zero_user_segments(struct page *page, if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); - kunmap_atomic(kaddr); + kunmap_local(kaddr); for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } @@ -238,11 +238,11 @@ static inline void copy_user_highpage(struct page *to, struct page *from, { char *vfrom, *vto; - vfrom = kmap_atomic(from); - vto = kmap_atomic(to); + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); copy_user_page(vto, vfrom, vaddr, to); - kunmap_atomic(vto); - kunmap_atomic(vfrom); + kunmap_local(vto); + kunmap_local(vfrom); } #endif @@ -253,11 +253,11 @@ static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; - vfrom = kmap_atomic(from); - vto = kmap_atomic(to); + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); copy_page(vto, vfrom); - kunmap_atomic(vto); - kunmap_atomic(vfrom); + kunmap_local(vto); + kunmap_local(vfrom); } #endif From 523472ee196d857501412baa038692b0d502f2c4 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 21 Jul 2021 11:42:56 +1000 Subject: [PATCH 427/851] kfence: show cpu and timestamp in alloc/free info Record cpu and timestamp on allocations and frees, and show them in reports. Upon an error, this can help correlate earlier messages in the kernel log via allocation and free timestamps. Link: https://lkml.kernel.org/r/20210714175312.2947941-1-elver@google.com Suggested-by: Joern Engel Signed-off-by: Marco Elver Acked-by: Alexander Potapenko Acked-by: Joern Engel Cc: Yuanyuan Zhong Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/dev-tools/kfence.rst | 98 ++++++++++++++++-------------- mm/kfence/core.c | 3 + mm/kfence/kfence.h | 2 + mm/kfence/report.c | 19 ++++-- 4 files changed, 71 insertions(+), 51 deletions(-) diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst index fdf04e741ea57..0fbe3308bf37f 100644 --- a/Documentation/dev-tools/kfence.rst +++ b/Documentation/dev-tools/kfence.rst @@ -65,25 +65,27 @@ Error reports A typical out-of-bounds access looks like this:: ================================================================== - BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa3/0x22b + BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa6/0x234 - Out-of-bounds read at 0xffffffffb672efff (1B left of kfence-#17): - test_out_of_bounds_read+0xa3/0x22b - kunit_try_run_case+0x51/0x85 + Out-of-bounds read at 0xffff8c3f2e291fff (1B left of kfence-#72): + test_out_of_bounds_read+0xa6/0x234 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - kfence-#17 [0xffffffffb672f000-0xffffffffb672f01f, size=32, cache=kmalloc-32] allocated by task 507: - test_alloc+0xf3/0x25b - test_out_of_bounds_read+0x98/0x22b - kunit_try_run_case+0x51/0x85 + kfence-#72: 0xffff8c3f2e292000-0xffff8c3f2e29201f, size=32, cache=kmalloc-32 + + allocated by task 484 on cpu 0 at 32.919330s: + test_alloc+0xfe/0x738 + test_out_of_bounds_read+0x9b/0x234 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - CPU: 4 PID: 107 Comm: kunit_try_catch Not tainted 5.8.0-rc6+ #7 - Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 + CPU: 0 PID: 484 Comm: kunit_try_catch Not tainted 5.13.0-rc3+ #7 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 ================================================================== The header of the report provides a short summary of the function involved in @@ -96,30 +98,32 @@ Use-after-free accesses are reported as:: ================================================================== BUG: KFENCE: use-after-free read in test_use_after_free_read+0xb3/0x143 - Use-after-free read at 0xffffffffb673dfe0 (in kfence-#24): + Use-after-free read at 0xffff8c3f2e2a0000 (in kfence-#79): test_use_after_free_read+0xb3/0x143 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - kfence-#24 [0xffffffffb673dfe0-0xffffffffb673dfff, size=32, cache=kmalloc-32] allocated by task 507: - test_alloc+0xf3/0x25b + kfence-#79: 0xffff8c3f2e2a0000-0xffff8c3f2e2a001f, size=32, cache=kmalloc-32 + + allocated by task 488 on cpu 2 at 33.871326s: + test_alloc+0xfe/0x738 test_use_after_free_read+0x76/0x143 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - freed by task 507: + freed by task 488 on cpu 2 at 33.871358s: test_use_after_free_read+0xa8/0x143 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - CPU: 4 PID: 109 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7 - Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 + CPU: 2 PID: 488 Comm: kunit_try_catch Tainted: G B 5.13.0-rc3+ #7 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 ================================================================== KFENCE also reports on invalid frees, such as double-frees:: @@ -127,30 +131,32 @@ KFENCE also reports on invalid frees, such as double-frees:: ================================================================== BUG: KFENCE: invalid free in test_double_free+0xdc/0x171 - Invalid free of 0xffffffffb6741000: + Invalid free of 0xffff8c3f2e2a4000 (in kfence-#81): test_double_free+0xdc/0x171 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - kfence-#26 [0xffffffffb6741000-0xffffffffb674101f, size=32, cache=kmalloc-32] allocated by task 507: - test_alloc+0xf3/0x25b + kfence-#81: 0xffff8c3f2e2a4000-0xffff8c3f2e2a401f, size=32, cache=kmalloc-32 + + allocated by task 490 on cpu 1 at 34.175321s: + test_alloc+0xfe/0x738 test_double_free+0x76/0x171 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - freed by task 507: + freed by task 490 on cpu 1 at 34.175348s: test_double_free+0xa8/0x171 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - CPU: 4 PID: 111 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7 - Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 + CPU: 1 PID: 490 Comm: kunit_try_catch Tainted: G B 5.13.0-rc3+ #7 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 ================================================================== KFENCE also uses pattern-based redzones on the other side of an object's guard @@ -160,23 +166,25 @@ These are reported on frees:: ================================================================== BUG: KFENCE: memory corruption in test_kmalloc_aligned_oob_write+0xef/0x184 - Corrupted memory at 0xffffffffb6797ff9 [ 0xac . . . . . . ] (in kfence-#69): + Corrupted memory at 0xffff8c3f2e33aff9 [ 0xac . . . . . . ] (in kfence-#156): test_kmalloc_aligned_oob_write+0xef/0x184 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - kfence-#69 [0xffffffffb6797fb0-0xffffffffb6797ff8, size=73, cache=kmalloc-96] allocated by task 507: - test_alloc+0xf3/0x25b + kfence-#156: 0xffff8c3f2e33afb0-0xffff8c3f2e33aff8, size=73, cache=kmalloc-96 + + allocated by task 502 on cpu 7 at 42.159302s: + test_alloc+0xfe/0x738 test_kmalloc_aligned_oob_write+0x57/0x184 - kunit_try_run_case+0x51/0x85 + kunit_try_run_case+0x61/0xa0 kunit_generic_run_threadfn_adapter+0x16/0x30 - kthread+0x137/0x160 + kthread+0x176/0x1b0 ret_from_fork+0x22/0x30 - CPU: 4 PID: 120 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7 - Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 + CPU: 7 PID: 502 Comm: kunit_try_catch Tainted: G B 5.13.0-rc3+ #7 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 ================================================================== For such errors, the address where the corruption occurred as well as the diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 575c685aa6422..7a97db8bc8e75 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -196,6 +197,8 @@ static noinline void metadata_update_state(struct kfence_metadata *meta, */ track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); track->pid = task_pid_nr(current); + track->cpu = raw_smp_processor_id(); + track->ts_nsec = local_clock(); /* Same source as printk timestamps. */ /* * Pairs with READ_ONCE() in diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index 24065321ff8a7..c1f23c61e5f91 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -36,6 +36,8 @@ enum kfence_object_state { /* Alloc/free tracking information. */ struct kfence_track { pid_t pid; + int cpu; + u64 ts_nsec; int num_stack_entries; unsigned long stack_entries[KFENCE_STACK_DEPTH]; }; diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 2a319c21c939a..cbdd8d442d0bc 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -100,6 +101,13 @@ static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadat bool show_alloc) { const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track; + u64 ts_sec = track->ts_nsec; + unsigned long rem_nsec = do_div(ts_sec, NSEC_PER_SEC); + + /* Timestamp matches printk timestamp format. */ + seq_con_printf(seq, "%s by task %d on cpu %d at %lu.%06lus:\n", + show_alloc ? "allocated" : "freed", track->pid, + track->cpu, (unsigned long)ts_sec, rem_nsec / 1000); if (track->num_stack_entries) { /* Skip allocation/free internals stack. */ @@ -126,15 +134,14 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met return; } - seq_con_printf(seq, - "kfence-#%td [0x%p-0x%p" - ", size=%d, cache=%s] allocated by task %d:\n", - meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size, - (cache && cache->name) ? cache->name : "", meta->alloc_track.pid); + seq_con_printf(seq, "kfence-#%td: 0x%p-0x%p, size=%d, cache=%s\n\n", + meta - kfence_metadata, (void *)start, (void *)(start + size - 1), + size, (cache && cache->name) ? cache->name : ""); + kfence_print_stack(seq, meta, true); if (meta->state == KFENCE_OBJECT_FREED) { - seq_con_printf(seq, "\nfreed by task %d:\n", meta->free_track.pid); + seq_con_printf(seq, "\n"); kfence_print_stack(seq, meta, false); } } From 901a96ffa41cf6793a831a38ea5c08bc66086f35 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 21 Jul 2021 11:42:57 +1000 Subject: [PATCH 428/851] fs/buffer.c: add debug print for __getblk_gfp() stall problem Among syzbot's unresolved hung task reports, 18 out of 65 reports contain __getblk_gfp() line in the backtrace. Since there is a comment block that says that __getblk_gfp() will lock up the machine if try_to_free_buffers() attempt from grow_dev_page() is failing, let's start from checking whether syzbot is hitting that case. This change will be removed after the bug is fixed. Link: http://lkml.kernel.org/r/9b9fcdda-c347-53ee-fdbb-8a7d11cf430e@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Cc: Dmitry Vyukov Cc: Al Viro Cc: Mel Gorman Cc: Michal Hocko Cc: Andi Kleen Cc: Jan Kara Cc: Jeff Layton Cc: Cc: Tim Chen Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/buffer.c | 50 +++++++++++++++++++++++++++++++++++++++++-- include/linux/sched.h | 7 ++++++ lib/Kconfig.debug | 6 ++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 6290c3afdba48..a09e2c864cc53 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -956,10 +956,20 @@ grow_dev_page(struct block_device *bdev, sector_t block, end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits, size); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x01; +#endif goto done; } - if (!try_to_free_buffers(page)) + if (!try_to_free_buffers(page)) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x02; +#endif goto failed; + } +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x04; +#endif } /* @@ -979,6 +989,9 @@ grow_dev_page(struct block_device *bdev, sector_t block, spin_unlock(&inode->i_mapping->private_lock); done: ret = (block < end_block) ? 1 : -ENXIO; +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x08; +#endif failed: unlock_page(page); put_page(page); @@ -1030,6 +1043,12 @@ __getblk_slow(struct block_device *bdev, sector_t block, return NULL; } +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_stamp = jiffies; + current->getblk_executed = 0; + current->getblk_bh_count = 0; + current->getblk_bh_state = 0; +#endif for (;;) { struct buffer_head *bh; int ret; @@ -1041,6 +1060,18 @@ __getblk_slow(struct block_device *bdev, sector_t block, ret = grow_buffers(bdev, block, size, gfp); if (ret < 0) return NULL; + +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + if (!time_after(jiffies, current->getblk_stamp + 3 * HZ)) + continue; + printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx\n", + current->comm, current->pid, current->getblk_executed, + current->getblk_bh_count, current->getblk_bh_state); + current->getblk_executed = 0; + current->getblk_bh_count = 0; + current->getblk_bh_state = 0; + current->getblk_stamp = jiffies; +#endif } } @@ -3187,6 +3218,11 @@ EXPORT_SYMBOL(sync_dirty_buffer); */ static inline int buffer_busy(struct buffer_head *bh) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x80; + current->getblk_bh_count = atomic_read(&bh->b_count); + current->getblk_bh_state = bh->b_state; +#endif return atomic_read(&bh->b_count) | (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock))); } @@ -3225,11 +3261,18 @@ int try_to_free_buffers(struct page *page) int ret = 0; BUG_ON(!PageLocked(page)); - if (PageWriteback(page)) + if (PageWriteback(page)) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x10; +#endif return 0; + } if (mapping == NULL) { /* can this still happen? */ ret = drop_buffers(page, &buffers_to_free); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x20; +#endif goto out; } @@ -3253,6 +3296,9 @@ int try_to_free_buffers(struct page *page) if (ret) cancel_dirty_page(page); spin_unlock(&mapping->private_lock); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x40; +#endif out: if (buffers_to_free) { struct buffer_head *bh = buffers_to_free; diff --git a/include/linux/sched.h b/include/linux/sched.h index ec8d07d88641c..b36fe121da1d2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1400,6 +1400,13 @@ struct task_struct { struct llist_head kretprobe_instances; #endif +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + unsigned long getblk_stamp; + unsigned int getblk_executed; + unsigned int getblk_bh_count; + unsigned long getblk_bh_state; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5ddd575159fb8..b91d8239c5ad2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1807,6 +1807,12 @@ config IO_STRICT_DEVMEM menu "$(SRCARCH) Debugging" +config DEBUG_AID_FOR_SYZBOT + bool "Additional debug code for syzbot" + default n + help + This option is intended for testing by syzbot. + source "arch/$(SRCARCH)/Kconfig.debug" endmenu From c72a928c76741b9499fe4332ff53f36be774272e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 21 Jul 2021 11:42:58 +1000 Subject: [PATCH 429/851] fs/buffer.c: dump more info for __getblk_gfp() stall problem We need to dump more variables on top of "fs/buffer.c: add debug print for __getblk_gfp() stall problem". Link: http://lkml.kernel.org/r/12239545-7d8a-820f-48ba-952e2e98a05c@i-love.sakura.ne.jp Signed-off-by: Tetsuo Handa Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/buffer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index a09e2c864cc53..f5384cff7e0c5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1064,9 +1064,15 @@ __getblk_slow(struct block_device *bdev, sector_t block, #ifdef CONFIG_DEBUG_AID_FOR_SYZBOT if (!time_after(jiffies, current->getblk_stamp + 3 * HZ)) continue; - printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx\n", + printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx bdev_super_blocksize=%ld size=%u bdev_super_blocksize_bits=%d bdev_inode_blkbits=%d\n", current->comm, current->pid, current->getblk_executed, - current->getblk_bh_count, current->getblk_bh_state); + current->getblk_bh_count, current->getblk_bh_state, + IS_ERR_OR_NULL(bdev->bd_super) ? -1L : + bdev->bd_super->s_blocksize, size, + IS_ERR_OR_NULL(bdev->bd_super) ? -1 : + bdev->bd_super->s_blocksize_bits, + IS_ERR_OR_NULL(bdev->bd_inode) ? -1 : + bdev->bd_inode->i_blkbits); current->getblk_executed = 0; current->getblk_bh_count = 0; current->getblk_bh_state = 0; From a01918d6d9c00304d3a2fdcf857ab15f3e09dcff Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 21 Jul 2021 11:42:59 +1000 Subject: [PATCH 430/851] kernel/hung_task.c: Monitor killed tasks. syzbot's current top report is "no output from test machine" where the userspace process failed to spawn a new test process for 300 seconds for some reason. One of reasons which can result in this report is that an already spawned test process was unable to terminate (e.g. trapped at an unkillable retry loop due to some bug) after SIGKILL was sent to that process. Therefore, reporting when a thread is failing to terminate despite a fatal signal is pending would give us more useful information. In the context of syzbot's testing where there are only 2 CPUs in the target VM (which means that only small number of threads and not so much memory) and threads get SIGKILL after 5 seconds from fork(), being unable to reach do_exit() within 10 seconds is likely a sign of something went wrong. Therefore, I would like to try this patch in linux-next.git for feasibility testing whether this patch helps finding more bugs and reproducers for such bugs, by bringing "unable to terminate threads" reports out of "no output from test machine" reports. Potential bad effect of this patch will be that kernel code becomes killable without addressing the root cause of being unable to terminate, for use of killable wait will bypass both TASK_UNINTERRUPTIBLE stall test and SIGKILL after 5 seconds behavior, which will result in failing to detect in real systems where SIGKILL won't be sent after 5 seconds when something went wrong. This version shares existing sysctl settings (e.g. check interval, timeout, whether to panic) used for detecting TASK_UNINTERRUPTIBLE threads. We will likely want to use different sysctl settings for monitoring killed threads. But let's start as linux-next.git patch without introducing new sysctl settings. We can add sysctl settings before sending to linux.git. Link: http://lkml.kernel.org/r/60d1d7f6-b201-3dcb-a51b-76a31bcfa919@i-love.sakura.ne.jp Signed-off-by: Tetsuo Handa Cc: Dmitry Vyukov Cc: Petr Mladek Cc: Ingo Molnar Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Vitaly Kuznetsov Cc: Liu Chuansheng Cc: Valdis Kletnieks Cc: linux-kernel@vger.kernel.org Cc: Dmitry Vyukov Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/sched.h | 1 + kernel/hung_task.c | 44 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index b36fe121da1d2..183a1e0e6fb30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -990,6 +990,7 @@ struct task_struct { #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; unsigned long last_switch_time; + unsigned long killed_time; #endif /* Filesystem information: */ struct fs_struct *fs; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 9888e2bc8c767..8cc07e7f29aa1 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -145,6 +145,47 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) touch_nmi_watchdog(); } +static void check_killed_task(struct task_struct *t, unsigned long timeout) +{ + unsigned long stamp = t->killed_time; + + /* + * Ensure the task is not frozen. + * Also, skip vfork and any other user process that freezer should skip. + */ + if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) + return; + /* + * Skip threads which are already inside do_exit(), for exit_mm() etc. + * might take many seconds. + */ + if (t->flags & PF_EXITING) + return; + if (!stamp) { + stamp = jiffies; + if (!stamp) + stamp++; + t->killed_time = stamp; + return; + } + if (time_is_after_jiffies(stamp + timeout * HZ)) + return; + trace_sched_process_hang(t); + if (sysctl_hung_task_panic) { + console_verbose(); + hung_task_call_panic = true; + } + /* + * This thread failed to terminate for more than + * sysctl_hung_task_timeout_secs seconds, complain: + */ + pr_err("INFO: task %s:%d can't die for more than %ld seconds.\n", + t->comm, t->pid, (jiffies - stamp) / HZ); + sched_show_task(t); + hung_task_show_lock = true; + touch_nmi_watchdog(); +} + /* * To avoid extending the RCU grace period for an unbounded amount of time, * periodically exit the critical section and enter a new one. @@ -196,6 +237,9 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) goto unlock; last_break = jiffies; } + /* Check threads which are about to terminate. */ + if (unlikely(fatal_signal_pending(t))) + check_killed_task(t, timeout); /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE) check_hung_task(t, timeout); From ada5874e03684e5e7297df10894c2531b3262a64 Mon Sep 17 00:00:00 2001 From: Feng Zhou Date: Wed, 21 Jul 2021 11:43:00 +1000 Subject: [PATCH 431/851] fs/proc/kcore.c: add mmap interface When we do the kernel monitor, use the DRGN (https://github.com/osandov/drgn) access to kernel data structures, found that the system calls a lot. DRGN is implemented by reading /proc/kcore. After looking at the kcore code, it is found that kcore does not implement mmap, resulting in frequent context switching triggered by read. Therefore, we want to add mmap interface to optimize performance. Since vmalloc and module areas will change with allocation and release, consistency cannot be guaranteed, so mmap interface only maps KCORE_TEXT and KCORE_RAM. The test results: 1. the default version of kcore real 11.00 user 8.53 sys 3.59 % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 99.64 128.578319 12 11168701 pread64 ... ------ ----------- ----------- --------- --------- ---------------- 100.00 129.042853 11193748 966 total 2. added kcore for the mmap interface real 6.44 user 7.32 sys 0.24 % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 32.94 0.130120 24 5317 315 futex 11.66 0.046077 21 2231 1 lstat 9.23 0.036449 177 206 mmap ... ------ ----------- ----------- --------- --------- ---------------- 100.00 0.395077 25435 971 total The test results show that the number of system calls and time consumption are significantly reduced. Link: https://lkml.kernel.org/r/20210704062208.7898-1-zhoufeng.zf@bytedance.com Co-developed-by: Ying Chen Signed-off-by: Ying Chen Signed-off-by: Feng Zhou Cc: Alexey Dobriyan Cc: Mike Rapoport Cc: Muchun Song Cc: Chengming Zhou Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/kcore.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 982e694aae77d..3f148759a5fd1 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -614,11 +614,84 @@ static int release_kcore(struct inode *inode, struct file *file) return 0; } +static vm_fault_t mmap_kcore_fault(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct kcore_mmap_ops = { + .fault = mmap_kcore_fault, +}; + +static int mmap_kcore(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + u64 start, end, pfn; + int nphdr; + size_t data_offset; + size_t phdrs_len, notes_len; + struct kcore_list *m = NULL; + int ret = 0; + + down_read(&kclist_lock); + + get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); + + data_offset &= PAGE_MASK; + start = (u64)vma->vm_pgoff << PAGE_SHIFT; + if (start < data_offset) { + ret = -EINVAL; + goto out; + } + start = kc_offset_to_vaddr(start - data_offset); + end = start + size; + + list_for_each_entry(m, &kclist_head, list) { + if (start >= m->addr && end <= m->addr + m->size) + break; + } + + if (&m->list == &kclist_head) { + ret = -EINVAL; + goto out; + } + + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) { + ret = -EPERM; + goto out; + } + + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_ops = &kcore_mmap_ops; + + if (kern_addr_valid(start)) { + if (m->type == KCORE_RAM) + pfn = __pa(start) >> PAGE_SHIFT; + else if (m->type == KCORE_TEXT) + pfn = __pa_symbol(start) >> PAGE_SHIFT; + else { + ret = -EFAULT; + goto out; + } + + ret = remap_pfn_range(vma, vma->vm_start, pfn, size, + vma->vm_page_prot); + } else { + ret = -EFAULT; + } + +out: + up_read(&kclist_lock); + return ret; +} + static const struct proc_ops kcore_proc_ops = { .proc_read = read_kcore, .proc_open = open_kcore, .proc_release = release_kcore, .proc_lseek = default_llseek, + .proc_mmap = mmap_kcore, }; /* just remember that we have to update kcore */ From 53eec7dd47576bc4347d0864919820acd6271101 Mon Sep 17 00:00:00 2001 From: Ohhoon Kwon Date: Wed, 21 Jul 2021 11:43:01 +1000 Subject: [PATCH 432/851] connector: send event on write to /proc/[pid]/comm While comm change event via prctl has been reported to proc connector by 'commit f786ecba4158 ("connector: add comm change event report to proc connector")', connector listeners were missing comm changes by explicit writes on /proc/[pid]/comm. Let explicit writes on /proc/[pid]/comm report to proc connector. Link: https://lkml.kernel.org/r/20210701133458epcms1p68e9eb9bd0eee8903ba26679a37d9d960@epcms1p6 Signed-off-by: Ohhoon Kwon Cc: Ingo Molnar Cc: David S. Miller Cc: Christian Brauner Cc: Eric W. Biederman Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index e5b5f7709d48f..533d5836eb9a4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -95,6 +95,7 @@ #include #include #include +#include #include #include "internal.h" #include "fd.h" @@ -1674,8 +1675,10 @@ static ssize_t comm_write(struct file *file, const char __user *buf, if (!p) return -ESRCH; - if (same_thread_group(current, p)) + if (same_thread_group(current, p)) { set_task_comm(p, buffer); + proc_comm_connector(p); + } else count = -EINVAL; From 4bc73b47f90ddee2cc86dc0551e928116ac68860 Mon Sep 17 00:00:00 2001 From: Julius Hemanth Pitti Date: Wed, 21 Jul 2021 11:43:01 +1000 Subject: [PATCH 433/851] proc/sysctl: make protected_* world readable protected_* files have 600 permissions which prevents non-superuser from reading them. Container like "AWS greengrass" refuse to launch unless protected_hardlinks and protected_symlinks are set. When containers like these run with "userns-remap" or "--user" mapping container's root to non-superuser on host, they fail to run due to denied read access to these files. As these protections are hardly a secret, and do not possess any security risk, making them world readable. Though above greengrass usecase needs read access to only protected_hardlinks and protected_symlinks files, setting all other protected_* files to 644 to keep consistency. Link: http://lkml.kernel.org/r/20200709235115.56954-1-jpitti@cisco.com Fixes: 800179c9b8a1 ("fs: add link restrictions") Signed-off-by: Julius Hemanth Pitti Acked-by: Kees Cook Cc: Iurii Zaikin Cc: Luis Chamberlain Cc: Ingo Molnar Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/sysctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 272f4a272f8cf..82d6ff6d85cd5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -3275,7 +3275,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_symlinks", .data = &sysctl_protected_symlinks, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, @@ -3284,7 +3284,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_hardlinks", .data = &sysctl_protected_hardlinks, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, @@ -3293,7 +3293,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_fifos", .data = &sysctl_protected_fifos, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, @@ -3302,7 +3302,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_regular", .data = &sysctl_protected_regular, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, From 81da5c0e19f08786bf765bd9b9c87bdde0c0fa28 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jul 2021 11:43:02 +1000 Subject: [PATCH 434/851] arch: Kconfig: fix spelling mistake "seperate" -> "separate" Threre is a spelling mistake in the Kconfig text. Fix it. Link: https://lkml.kernel.org/r/20210704095207.37342-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 1baac7bfdd441..4f7596092ad8f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -914,7 +914,7 @@ config HAVE_SOFTIRQ_ON_OWN_STACK bool help Architecture provides a function to run __do_softirq() on a - seperate stack. + separate stack. config PGTABLE_LEVELS int From 0d113c320f68e537065c1f745afbbf9481cf5f24 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 21 Jul 2021 11:43:03 +1000 Subject: [PATCH 435/851] math: make RATIONAL tristate Patch series "math: RATIONAL and RATIONAL_KUNIT_TEST improvements". This series makes the RATIONAL symbol tristate, so it is not forced builtin if all users are modular, and makes the RATIONAL_KUNIT_TEST depend on RATIONAL, to avoid enabling RATIONAL if there are no real users. This patch (of 2): All but one symbols that select RATIONAL are tristate, but RATIONAL itself is bool. Change it to tristate, so the rational fractions support code can be modular if no builtin code relies on it. Link: https://lkml.kernel.org/r/20210706100945.3803694-1-geert@linux-m68k.org Link: https://lkml.kernel.org/r/20210706100945.3803694-2-geert@linux-m68k.org Signed-off-by: Geert Uytterhoeven Reviewed-by: Andy Shevchenko Cc: Trent Piepho Cc: Colin Ian King Cc: Brendan Higgins Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/math/Kconfig | 2 +- lib/math/rational.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/math/Kconfig b/lib/math/Kconfig index f19bc9734fa7c..0634b428d0cb7 100644 --- a/lib/math/Kconfig +++ b/lib/math/Kconfig @@ -14,4 +14,4 @@ config PRIME_NUMBERS If unsure, say N. config RATIONAL - bool + tristate diff --git a/lib/math/rational.c b/lib/math/rational.c index c0ab51d8fbb98..ec59d426ea638 100644 --- a/lib/math/rational.c +++ b/lib/math/rational.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * calculate best rational approximation for a given fraction @@ -106,3 +107,5 @@ void rational_best_approximation( } EXPORT_SYMBOL(rational_best_approximation); + +MODULE_LICENSE("GPL v2"); From 861e4188f2a9f6a2d7230204f018c486ac0b4e58 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 21 Jul 2021 11:43:04 +1000 Subject: [PATCH 436/851] math: RATIONAL_KUNIT_TEST should depend on RATIONAL instead of selecting it RATIONAL_KUNIT_TEST selects RATIONAL, thus enabling an optional feature the user may not want to have enabled. Fix this by making the test depend on RATIONAL instead. Link: https://lkml.kernel.org/r/20210706100945.3803694-3-geert@linux-m68k.org Fixes: b6c75c4afceb8bc0 ("lib/math/rational: add Kunit test cases") Signed-off-by: Geert Uytterhoeven Cc: Andy Shevchenko Cc: Brendan Higgins Cc: Colin Ian King Cc: Trent Piepho Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/Kconfig.debug | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b91d8239c5ad2..2193f8f9fd8f6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2466,8 +2466,7 @@ config SLUB_KUNIT_TEST config RATIONAL_KUNIT_TEST tristate "KUnit test for rational.c" if !KUNIT_ALL_TESTS - depends on KUNIT - select RATIONAL + depends on KUNIT && RATIONAL default KUNIT_ALL_TESTS help This builds the rational math unit test. From cc246e078476fe43b97e8d26c22cb1f1d816c738 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 21 Jul 2021 11:43:05 +1000 Subject: [PATCH 437/851] lib/string: optimized memcpy Patch series "lib/string: optimized mem* functions", v2. Rewrite the generic mem{cpy,move,set} so that memory is accessed with the widest size possible, but without doing unaligned accesses. This was originally posted as C string functions for RISC-V[1], but as there was no specific RISC-V code, it was proposed for the generic lib/string.c implementation. Tested on RISC-V and on x86_64 by undefining __HAVE_ARCH_MEM{CPY,SET,MOVE} and HAVE_EFFICIENT_UNALIGNED_ACCESS. These are the performances of memcpy() and memset() of a RISC-V machine on a 32 mbyte buffer: memcpy: original aligned: 75 Mb/s original unaligned: 75 Mb/s new aligned: 114 Mb/s new unaligned: 107 Mb/s memset: original aligned: 140 Mb/s original unaligned: 140 Mb/s new aligned: 241 Mb/s new unaligned: 241 Mb/s The size increase is negligible: $ scripts/bloat-o-meter vmlinux.orig vmlinux add/remove: 0/0 grow/shrink: 4/1 up/down: 427/-6 (421) Function old new delta memcpy 29 351 +322 memset 29 117 +88 strlcat 68 78 +10 strlcpy 50 57 +7 memmove 56 50 -6 Total: Before=8556964, After=8557385, chg +0.00% These functions will be used for RISC-V initially. [1] https://lore.kernel.org/linux-riscv/20210617152754.17960-1-mcroce@linux.microsoft.com/ The only architecture which will use all the three function will be riscv, while memmove() will be used by arc, h8300, hexagon, ia64, openrisc and parisc. Keep in mind that memmove() isn't anything special, it just calls memcpy() when possible (e.g. buffers not overlapping), and fallbacks to the byte by byte copy otherwise. In future we can write two functions, one which copies forward and another one which copies backward, and call the right one depending on the buffers position. Then, we could alias memcpy() and memmove(), as proposed by Linus: https://bugzilla.redhat.com/show_bug.cgi?id=638477#c132 This patch (of 3): Rewrite the generic memcpy() to copy a word at time, without generating unaligned accesses. The procedure is made of three steps: First copy data one byte at time until the destination buffer is aligned to a long boundary. Then copy the data one long at time shifting the current and the next long to compose a long at every cycle. Finally, copy the remainder one byte at time. This is the improvement on RISC-V: original aligned: 75 Mb/s original unaligned: 75 Mb/s new aligned: 114 Mb/s new unaligned: 107 Mb/s and this the binary size increase according to bloat-o-meter: Function old new delta memcpy 36 324 +288 Link: https://lkml.kernel.org/r/20210702123153.14093-1-mcroce@linux.microsoft.com Link: https://lkml.kernel.org/r/20210702123153.14093-2-mcroce@linux.microsoft.com Signed-off-by: Matteo Croce Cc: Nick Kossifidis Cc: Guo Ren Cc: Christoph Hellwig Cc: David Laight Cc: Palmer Dabbelt Cc: Emil Renner Berthing Cc: Drew Fustini Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/string.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/lib/string.c b/lib/string.c index 77bd0b1d32967..1053044883197 100644 --- a/lib/string.c +++ b/lib/string.c @@ -33,6 +33,23 @@ #include #include +#define BYTES_LONG sizeof(long) +#define WORD_MASK (BYTES_LONG - 1) +#define MIN_THRESHOLD (BYTES_LONG * 2) + +/* convenience union to avoid cast between different pointer types */ +union types { + u8 *as_u8; + unsigned long *as_ulong; + uintptr_t as_uptr; +}; + +union const_types { + const u8 *as_u8; + const unsigned long *as_ulong; + uintptr_t as_uptr; +}; + #ifndef __HAVE_ARCH_STRNCASECMP /** * strncasecmp - Case insensitive, length-limited string comparison @@ -869,6 +886,13 @@ EXPORT_SYMBOL(memset64); #endif #ifndef __HAVE_ARCH_MEMCPY + +#ifdef __BIG_ENDIAN +#define MERGE_UL(h, l, d) ((h) << ((d) * 8) | (l) >> ((BYTES_LONG - (d)) * 8)) +#else +#define MERGE_UL(h, l, d) ((h) >> ((d) * 8) | (l) << ((BYTES_LONG - (d)) * 8)) +#endif + /** * memcpy - Copy one area of memory to another * @dest: Where to copy to @@ -880,14 +904,64 @@ EXPORT_SYMBOL(memset64); */ void *memcpy(void *dest, const void *src, size_t count) { - char *tmp = dest; - const char *s = src; + union const_types s = { .as_u8 = src }; + union types d = { .as_u8 = dest }; + int distance = 0; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + if (count < MIN_THRESHOLD) + goto copy_remainder; + + /* Copy a byte at time until destination is aligned. */ + for (; d.as_uptr & WORD_MASK; count--) + *d.as_u8++ = *s.as_u8++; + + distance = s.as_uptr & WORD_MASK; + } + + if (distance) { + unsigned long last, next; + /* + * s is distance bytes ahead of d, and d just reached + * the alignment boundary. Move s backward to word align it + * and shift data to compensate for distance, in order to do + * word-by-word copy. + */ + s.as_u8 -= distance; + + next = s.as_ulong[0]; + for (; count >= BYTES_LONG; count -= BYTES_LONG) { + last = next; + next = s.as_ulong[1]; + + d.as_ulong[0] = MERGE_UL(last, next, distance); + + d.as_ulong++; + s.as_ulong++; + } + + /* Restore s with the original offset. */ + s.as_u8 += distance; + } else { + /* + * If the source and dest lower bits are the same, do a simple + * 32/64 bit wide copy. + */ + for (; count >= BYTES_LONG; count -= BYTES_LONG) + *d.as_ulong++ = *s.as_ulong++; + } + +copy_remainder: while (count--) - *tmp++ = *s++; + *d.as_u8++ = *s.as_u8++; + return dest; } EXPORT_SYMBOL(memcpy); + +#undef MERGE_UL + #endif #ifndef __HAVE_ARCH_MEMMOVE From 2b30837aff7e2c04365df97ebc3e6bcc9118094f Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 21 Jul 2021 11:43:06 +1000 Subject: [PATCH 438/851] lib/string: optimized memmove When the destination buffer is before the source one, or when the buffers doesn't overlap, it's safe to use memcpy() instead, which is optimized to use a bigger data size possible. This "optimization" only covers a common case. In future, proper code which does the same thing as memcpy() does but backwards can be done. Link: https://lkml.kernel.org/r/20210702123153.14093-3-mcroce@linux.microsoft.com Signed-off-by: Matteo Croce Cc: Christoph Hellwig Cc: David Laight Cc: Drew Fustini Cc: Emil Renner Berthing Cc: Guo Ren Cc: Nick Desaulniers Cc: Nick Kossifidis Cc: Palmer Dabbelt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/string.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/string.c b/lib/string.c index 1053044883197..4d3c589897e2b 100644 --- a/lib/string.c +++ b/lib/string.c @@ -975,19 +975,13 @@ EXPORT_SYMBOL(memcpy); */ void *memmove(void *dest, const void *src, size_t count) { - char *tmp; - const char *s; + if (dest < src || src + count <= dest) + return memcpy(dest, src, count); + + if (dest > src) { + const char *s = src + count; + char *tmp = dest + count; - if (dest <= src) { - tmp = dest; - s = src; - while (count--) - *tmp++ = *s++; - } else { - tmp = dest; - tmp += count; - s = src; - s += count; while (count--) *--tmp = *--s; } From c103070e9f2ee4203eeee3b9547f98f5a5ae413a Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 21 Jul 2021 11:43:06 +1000 Subject: [PATCH 439/851] lib/string: optimized memset The generic memset is defined as a byte at time write. This is always safe, but it's slower than a 4 byte or even 8 byte write. Write a generic memset which fills the data one byte at time until the destination is aligned, then fills using the largest size allowed, and finally fills the remaining data one byte at time. On a RISC-V machine the speed goes from 140 Mb/s to 241 Mb/s, and this the binary size increase according to bloat-o-meter: Function old new delta memset 32 148 +116 Link: https://lkml.kernel.org/r/20210702123153.14093-4-mcroce@linux.microsoft.com Signed-off-by: Matteo Croce Cc: Christoph Hellwig Cc: David Laight Cc: Drew Fustini Cc: Emil Renner Berthing Cc: Guo Ren Cc: Nick Desaulniers Cc: Nick Kossifidis Cc: Palmer Dabbelt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/string.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/lib/string.c b/lib/string.c index 4d3c589897e2b..4fec38fc6e58d 100644 --- a/lib/string.c +++ b/lib/string.c @@ -810,10 +810,38 @@ EXPORT_SYMBOL(__sysfs_match_string); */ void *memset(void *s, int c, size_t count) { - char *xs = s; + union types dest = { .as_u8 = s }; + if (count >= MIN_THRESHOLD) { + unsigned long cu = (unsigned long)c; + + /* Compose an ulong with 'c' repeated 4/8 times */ +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER + cu *= 0x0101010101010101UL; +#else + cu |= cu << 8; + cu |= cu << 16; + /* Suppress warning on 32 bit machines */ + cu |= (cu << 16) << 16; +#endif + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { + /* + * Fill the buffer one byte at time until + * the destination is word aligned. + */ + for (; count && dest.as_uptr & WORD_MASK; count--) + *dest.as_u8++ = c; + } + + /* Copy using the largest size allowed */ + for (; count >= BYTES_LONG; count -= BYTES_LONG) + *dest.as_ulong++ = cu; + } + + /* copy the remainder */ while (count--) - *xs++ = c; + *dest.as_u8++ = c; + return s; } EXPORT_SYMBOL(memset); From 2aa099d01e52394f0935e39077b849c1cfed258e Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Wed, 21 Jul 2021 11:43:07 +1000 Subject: [PATCH 440/851] lib/test: convert test_sort.c to use KUnit This follows up commit ebd09577be6c ("lib/test: convert lib/test_list_sort.c to use KUnit"). Converting this test to KUnit makes the test a bit shorter, standardizes how it reports pass/fail, and adds an easier way to run the test [1]. Like ebd09577be6c, this leaves the file and Kconfig option name the same, but slightly changes their dependencies (needs CONFIG_KUNIT). [1] Can be run via $ ./tools/testing/kunit/kunit.py run --kunitconfig /dev/stdin < Cc: Pravin Shedge Cc: Brendan Higgins Cc: David Gow Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/Kconfig.debug | 5 +++-- lib/test_sort.c | 40 +++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2193f8f9fd8f6..ea630a43d3de2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2084,8 +2084,9 @@ config TEST_MIN_HEAP If unsure, say N. config TEST_SORT - tristate "Array-based sort test" - depends on DEBUG_KERNEL || m + tristate "Array-based sort test" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS help This option enables the self-test function of 'sort()' at boot, or at module load time. diff --git a/lib/test_sort.c b/lib/test_sort.c index 52edbe10f2e51..be02e3a098cf5 100644 --- a/lib/test_sort.c +++ b/lib/test_sort.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include + #include #include #include @@ -7,18 +10,17 @@ #define TEST_LEN 1000 -static int __init cmpint(const void *a, const void *b) +static int cmpint(const void *a, const void *b) { return *(int *)a - *(int *)b; } -static int __init test_sort_init(void) +static void test_sort(struct kunit *test) { - int *a, i, r = 1, err = -ENOMEM; + int *a, i, r = 1; - a = kmalloc_array(TEST_LEN, sizeof(*a), GFP_KERNEL); - if (!a) - return err; + a = kunit_kmalloc_array(test, TEST_LEN, sizeof(*a), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, a); for (i = 0; i < TEST_LEN; i++) { r = (r * 725861) % 6599; @@ -27,24 +29,20 @@ static int __init test_sort_init(void) sort(a, TEST_LEN, sizeof(*a), cmpint, NULL); - err = -EINVAL; for (i = 0; i < TEST_LEN-1; i++) - if (a[i] > a[i+1]) { - pr_err("test has failed\n"); - goto exit; - } - err = 0; - pr_info("test passed\n"); -exit: - kfree(a); - return err; + KUNIT_ASSERT_LE(test, a[i], a[i + 1]); } -static void __exit test_sort_exit(void) -{ -} +static struct kunit_case sort_test_cases[] = { + KUNIT_CASE(test_sort), + {} +}; + +static struct kunit_suite sort_test_suite = { + .name = "lib_sort", + .test_cases = sort_test_cases, +}; -module_init(test_sort_init); -module_exit(test_sort_exit); +kunit_test_suites(&sort_test_suite); MODULE_LICENSE("GPL"); From 610093b3d3184f2db24a89057246793d9257919a Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Wed, 21 Jul 2021 11:43:08 +1000 Subject: [PATCH 441/851] init/main.c: silence some -Wunused-parameter warnings There are a bunch of callbacks with unused arguments, go ahead and silence those so "make KCFLAGS=-W init/main.o" is a little quieter. Here's a little sample: init/main.c:182:43: warning: unused parameter 'str' [-Wunused-parameter] static int __init set_reset_devices(char *str) Link: https://lkml.kernel.org/r/20210519162341.1275452-1-ahalaney@redhat.com Signed-off-by: Andrew Halaney Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- init/main.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/init/main.c b/init/main.c index f5b8246e8aa1c..5339b8026cecb 100644 --- a/init/main.c +++ b/init/main.c @@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(static_key_initialized); unsigned int reset_devices; EXPORT_SYMBOL(reset_devices); -static int __init set_reset_devices(char *str) +static int __init set_reset_devices(char *str __always_unused) { reset_devices = 1; return 1; @@ -231,13 +231,13 @@ static bool __init obsolete_checksetup(char *line) unsigned long loops_per_jiffy = (1<<12); EXPORT_SYMBOL(loops_per_jiffy); -static int __init debug_kernel(char *str) +static int __init debug_kernel(char *str __always_unused) { console_loglevel = CONSOLE_LOGLEVEL_DEBUG; return 0; } -static int __init quiet_kernel(char *str) +static int __init quiet_kernel(char *str __always_unused) { console_loglevel = CONSOLE_LOGLEVEL_QUIET; return 0; @@ -470,7 +470,7 @@ static void __init setup_boot_config(void) get_boot_config_from_initrd(NULL, NULL); } -static int __init warn_bootconfig(char *str) +static int __init warn_bootconfig(char *str __always_unused) { pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n"); return 0; @@ -496,7 +496,8 @@ static void __init repair_env_string(char *param, char *val) /* Anything after -- gets handed straight to init. */ static int __init set_init_arg(char *param, char *val, - const char *unused, void *arg) + const char *unused __always_unused, + void *arg __always_unused) { unsigned int i; @@ -521,7 +522,8 @@ static int __init set_init_arg(char *param, char *val, * unused parameters (modprobe will find them in /proc/cmdline). */ static int __init unknown_bootoption(char *param, char *val, - const char *unused, void *arg) + const char *unused __always_unused, + void *arg __always_unused) { size_t len = strlen(param); @@ -716,7 +718,8 @@ noinline void __ref rest_init(void) /* Check for early params. */ static int __init do_early_param(char *param, char *val, - const char *unused, void *arg) + const char *unused __always_unused, + void *arg __always_unused) { const struct obs_kernel_param *p; @@ -1334,8 +1337,10 @@ static const char *initcall_level_names[] __initdata = { "late", }; -static int __init ignore_unknown_bootoption(char *param, char *val, - const char *unused, void *arg) +static int __init ignore_unknown_bootoption(char *param __always_unused, + char *val __always_unused, + const char *unused __always_unused, + void *arg __always_unused) { return 0; } @@ -1473,7 +1478,7 @@ void __weak free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -static int __ref kernel_init(void *unused) +static int __ref kernel_init(void *unused __always_unused) { int ret; From dcbf87a39379c6e9a7ee812cb6ce4c0ea7df20bb Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Wed, 21 Jul 2021 11:43:09 +1000 Subject: [PATCH 442/851] nilfs2: fix memory leak in nilfs_sysfs_create_device_group Patch series "nilfs2: fix incorrect usage of kobject". This patchset from Nanyong Sun fixes memory leak issues and a NULL pointer dereference issue caused by incorrect usage of kboject in nilfs2 sysfs implementation. This patch (of 6): Reported by syzkaller: BUG: memory leak unreferenced object 0xffff888100ca8988 (size 8): comm "syz-executor.1", pid 1930, jiffies 4294745569 (age 18.052s) hex dump (first 8 bytes): 6c 6f 6f 70 31 00 ff ff loop1... backtrace: [<000000009d9e0ac4>] slab_alloc_node mm/slub.c:2972 [inline] [<000000009d9e0ac4>] slab_alloc mm/slub.c:2980 [inline] [<000000009d9e0ac4>] __kmalloc_track_caller+0x164/0x330 mm/slub.c:4644 [<00000000b1825477>] kstrdup+0x36/0x70 mm/util.c:60 [<00000000fa081499>] kstrdup_const+0x35/0x60 mm/util.c:83 [<0000000024d13570>] kvasprintf_const+0xf1/0x180 lib/kasprintf.c:48 [<0000000024b69715>] kobject_set_name_vargs+0x56/0x150 lib/kobject.c:289 [<000000003fedac3d>] kobject_add_varg lib/kobject.c:384 [inline] [<000000003fedac3d>] kobject_init_and_add+0xc9/0x150 lib/kobject.c:473 [<000000002795bd99>] nilfs_sysfs_create_device_group+0x150/0x7d0 fs/nilfs2/sysfs.c:986 [<00000000567fa12d>] init_nilfs+0xa21/0xea0 fs/nilfs2/the_nilfs.c:637 [<00000000082e7458>] nilfs_fill_super fs/nilfs2/super.c:1046 [inline] [<00000000082e7458>] nilfs_mount+0x7b4/0xe80 fs/nilfs2/super.c:1316 [<00000000adc3fd88>] legacy_get_tree+0x105/0x210 fs/fs_context.c:592 [<00000000a98c45b8>] vfs_get_tree+0x8e/0x2d0 fs/super.c:1498 [<00000000e96282d3>] do_new_mount fs/namespace.c:2905 [inline] [<00000000e96282d3>] path_mount+0xf9b/0x1990 fs/namespace.c:3235 [<000000003d2eb1b0>] do_mount+0xea/0x100 fs/namespace.c:3248 [<00000000e1ce771a>] __do_sys_mount fs/namespace.c:3456 [inline] [<00000000e1ce771a>] __se_sys_mount fs/namespace.c:3433 [inline] [<00000000e1ce771a>] __x64_sys_mount+0x14b/0x1f0 fs/namespace.c:3433 [<000000007c7f81e8>] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [<000000007c7f81e8>] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 [<00000000fd23ff06>] entry_SYSCALL_64_after_hwframe+0x44/0xae If kobject_init_and_add return with error, then the cleanup of kobject is needed because memory may be allocated in kobject_init_and_add without freeing. And the place of cleanup_dev_kobject should use kobject_put to free the memory associated with the kobject. As the section "Kobject removal" of "Documentation/core-api/kobject.rst" says, kobject_del() just makes the kobject "invisible", but it is not cleaned up. And no more cleanup will do after cleanup_dev_kobject, so kobject_put is needed here. Link: https://lkml.kernel.org/r/1625651306-10829-1-git-send-email-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/1625651306-10829-2-git-send-email-konishi.ryusuke@gmail.com Reported-by: Hulk Robot Link: https://lkml.kernel.org/r/20210629022556.3985106-2-sunnanyong@huawei.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/nilfs2/sysfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 68e8d61e28dd5..d2d8ea89937ae 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -986,7 +986,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL, "%s", sb->s_id); if (err) - goto free_dev_subgroups; + goto cleanup_dev_kobject; err = nilfs_sysfs_create_mounted_snapshots_group(nilfs); if (err) @@ -1023,9 +1023,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) nilfs_sysfs_delete_mounted_snapshots_group(nilfs); cleanup_dev_kobject: - kobject_del(&nilfs->ns_dev_kobj); - -free_dev_subgroups: + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); failed_create_device_group: From 5413f77a7ac2041e20da5b4a75e6f657e2380a1d Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Wed, 21 Jul 2021 11:43:10 +1000 Subject: [PATCH 443/851] nilfs2: fix NULL pointer in nilfs_##name##_attr_release In nilfs_##name##_attr_release, kobj->parent should not be referenced because it is a NULL pointer. The release() method of kobject is always called in kobject_put(kobj), in the implementation of kobject_put(), the kobj->parent will be assigned as NULL before call the release() method. So just use kobj to get the subgroups, which is more efficient and can fix a NULL pointer reference problem. Link: https://lkml.kernel.org/r/20210629022556.3985106-3-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-3-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/nilfs2/sysfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index d2d8ea89937ae..ec85ac53720d2 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -51,11 +51,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \ #define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \ static void nilfs_##name##_attr_release(struct kobject *kobj) \ { \ - struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \ - struct the_nilfs *nilfs = container_of(kobj->parent, \ - struct the_nilfs, \ - ns_##parent_name##_kobj); \ - subgroups = nilfs->ns_##parent_name##_subgroups; \ + struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \ + struct nilfs_sysfs_##parent_name##_subgroups, \ + sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ static struct kobj_type nilfs_##name##_ktype = { \ From ce7a4ae939b10e8560bbe86555fbfc7851a673d1 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Wed, 21 Jul 2021 11:43:11 +1000 Subject: [PATCH 444/851] nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group If kobject_init_and_add return with error, kobject_put() is needed here to avoid memory leak, because kobject_init_and_add may return error without freeing the memory associated with the kobject it allocated. Link: https://lkml.kernel.org/r/20210629022556.3985106-4-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-4-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/nilfs2/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index ec85ac53720d2..6305e4ef7e39b 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -79,8 +79,8 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \ #name); \ if (err) \ - return err; \ - return 0; \ + kobject_put(kobj); \ + return err; \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ From 88dacc9839c677c3233e61488dc66dbe292fe07c Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Wed, 21 Jul 2021 11:43:11 +1000 Subject: [PATCH 445/851] nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group The kobject_put() should be used to cleanup the memory associated with the kobject instead of kobject_del. See the section "Kobject removal" of "Documentation/core-api/kobject.rst". Link: https://lkml.kernel.org/r/20210629022556.3985106-5-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-5-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/nilfs2/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 6305e4ef7e39b..d989e6500bd7a 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -84,7 +84,7 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ - kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ + kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ } /************************************************************************ From 19c9f45d141b027bb268ef91a2504d0b64c9b7d9 Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Wed, 21 Jul 2021 11:43:12 +1000 Subject: [PATCH 446/851] nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group If kobject_init_and_add returns with error, kobject_put() is needed here to avoid memory leak, because kobject_init_and_add may return error without freeing the memory associated with the kobject it allocated. Link: https://lkml.kernel.org/r/20210629022556.3985106-6-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-6-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/nilfs2/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index d989e6500bd7a..5ba87573ad3bc 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -195,9 +195,9 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) } if (err) - return err; + kobject_put(&root->snapshot_kobj); - return 0; + return err; } void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) From 469e659276c490a4a91d567df1007b78941dd9cf Mon Sep 17 00:00:00 2001 From: Nanyong Sun Date: Wed, 21 Jul 2021 11:43:13 +1000 Subject: [PATCH 447/851] nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group kobject_put() should be used to cleanup the memory associated with the kobject instead of kobject_del(). See the section "Kobject removal" of "Documentation/core-api/kobject.rst". Link: https://lkml.kernel.org/r/20210629022556.3985106-7-sunnanyong@huawei.com Link: https://lkml.kernel.org/r/1625651306-10829-7-git-send-email-konishi.ryusuke@gmail.com Signed-off-by: Nanyong Sun Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/nilfs2/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 5ba87573ad3bc..62f8a7ac19c85 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -202,7 +202,7 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) { - kobject_del(&root->snapshot_kobj); + kobject_put(&root->snapshot_kobj); } /************************************************************************ From e018e9440e7e0204d292b9e2d16bde3a7dec2d87 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 21 Jul 2021 11:43:13 +1000 Subject: [PATCH 448/851] hfsplus: fix out-of-bounds warnings in __hfsplus_setxattr Fix the following out-of-bounds warnings by enclosing structure members file and finder into new struct info: fs/hfsplus/xattr.c:300:5: warning: 'memcpy' offset [65, 80] from the object at 'entry' is out of the bounds of referenced subobject 'user_info' with type 'struct DInfo' at offset 48 [-Warray-bounds] fs/hfsplus/xattr.c:313:5: warning: 'memcpy' offset [65, 80] from the object at 'entry' is out of the bounds of referenced subobject 'user_info' with type 'struct FInfo' at offset 48 [-Warray-bounds] Refactor the code by making it more "structured." Also, this helps with the ongoing efforts to enable -Warray-bounds and makes the code clearer and avoid confusing the compiler. Matthew said: : The offending line is this: : : - memcpy(&entry.file.user_info, value, : + memcpy(&entry.file.info, value, : file_finderinfo_len); : : what it's trying to do is copy two structs which are adjacent to each : other in a single call to memcpy(). gcc legitimately complains that : the memcpy to this struct overruns the bounds of the struct. What : Gustavo has done here is introduce a new struct that contains the two : structs, and now gcc is happy that the memcpy doesn't overrun the : length of this containing struct. Link: https://github.com/KSPP/linux/issues/109 Link: https://lkml.kernel.org/r/20210330145226.GA207011@embeddedor Signed-off-by: Gustavo A. R. Silva Reported-by: kernel test robot Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/hfsplus/catalog.c | 16 ++++++++-------- fs/hfsplus/dir.c | 4 ++-- fs/hfsplus/hfsplus_raw.h | 12 ++++++++---- fs/hfsplus/xattr.c | 18 ++++++++---------- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 35472cba750e1..9cdc6550b468e 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -124,7 +124,7 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, hfsplus_cat_set_perms(inode, &folder->permissions); if (inode == sbi->hidden_dir) /* invisible and namelocked */ - folder->user_info.frFlags = cpu_to_be16(0x5000); + folder->info.user.frFlags = cpu_to_be16(0x5000); return sizeof(*folder); } else { struct hfsplus_cat_file *file; @@ -142,14 +142,14 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, if (cnid == inode->i_ino) { hfsplus_cat_set_perms(inode, &file->permissions); if (S_ISLNK(inode->i_mode)) { - file->user_info.fdType = + file->info.user.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); - file->user_info.fdCreator = + file->info.user.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); } else { - file->user_info.fdType = + file->info.user.fdType = cpu_to_be32(sbi->type); - file->user_info.fdCreator = + file->info.user.fdCreator = cpu_to_be32(sbi->creator); } if (HFSPLUS_FLG_IMMUTABLE & @@ -158,11 +158,11 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); } else { - file->user_info.fdType = + file->info.user.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); - file->user_info.fdCreator = + file->info.user.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); - file->user_info.fdFlags = + file->info.user.fdFlags = cpu_to_be16(0x100); file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 84714bbccc123..135279a19b559 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -73,9 +73,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, goto fail; } cnid = be32_to_cpu(entry.file.id); - if (entry.file.user_info.fdType == + if (entry.file.info.user.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && - entry.file.user_info.fdCreator == + entry.file.info.user.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && HFSPLUS_SB(sb)->hidden_dir && (entry.file.create_date == diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 456e87aec7fd7..005a043bc7eed 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -260,8 +260,10 @@ struct hfsplus_cat_folder { __be32 access_date; __be32 backup_date; struct hfsplus_perm permissions; - struct DInfo user_info; - struct DXInfo finder_info; + struct { + struct DInfo user; + struct DXInfo finder; + } info; __be32 text_encoding; __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */ } __packed; @@ -294,8 +296,10 @@ struct hfsplus_cat_file { __be32 access_date; __be32 backup_date; struct hfsplus_perm permissions; - struct FInfo user_info; - struct FXInfo finder_info; + struct { + struct FInfo user; + struct FXInfo finder; + } info; __be32 text_encoding; u32 reserved2; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index e2855ceefd394..b21811baab0f2 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -261,10 +261,8 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, struct hfs_find_data cat_fd; hfsplus_cat_entry entry; u16 cat_entry_flags, cat_entry_type; - u16 folder_finderinfo_len = sizeof(struct DInfo) + - sizeof(struct DXInfo); - u16 file_finderinfo_len = sizeof(struct FInfo) + - sizeof(struct FXInfo); + u16 folder_finderinfo_len = sizeof(entry.folder.info); + u16 file_finderinfo_len = sizeof(entry.file.info); if ((!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) || @@ -296,7 +294,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, sizeof(hfsplus_cat_entry)); if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) { if (size == folder_finderinfo_len) { - memcpy(&entry.folder.user_info, value, + memcpy(&entry.folder.info, value, folder_finderinfo_len); hfs_bnode_write(cat_fd.bnode, &entry, cat_fd.entryoffset, @@ -309,7 +307,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, } } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) { if (size == file_finderinfo_len) { - memcpy(&entry.file.user_info, value, + memcpy(&entry.file.info, value, file_finderinfo_len); hfs_bnode_write(cat_fd.bnode, &entry, cat_fd.entryoffset, @@ -462,14 +460,14 @@ static ssize_t hfsplus_getxattr_finder_info(struct inode *inode, if (entry_type == HFSPLUS_FOLDER) { hfs_bnode_read(fd.bnode, folder_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_folder, user_info), + offsetof(struct hfsplus_cat_folder, info.user), folder_rec_len); memcpy(value, folder_finder_info, folder_rec_len); res = folder_rec_len; } else if (entry_type == HFSPLUS_FILE) { hfs_bnode_read(fd.bnode, file_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_file, user_info), + offsetof(struct hfsplus_cat_file, info.user), file_rec_len); memcpy(value, file_finder_info, file_rec_len); res = file_rec_len; @@ -630,14 +628,14 @@ static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, len = sizeof(struct DInfo) + sizeof(struct DXInfo); hfs_bnode_read(fd.bnode, folder_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_folder, user_info), + offsetof(struct hfsplus_cat_folder, info.user), len); found_bit = find_first_bit((void *)folder_finder_info, len*8); } else if (entry_type == HFSPLUS_FILE) { len = sizeof(struct FInfo) + sizeof(struct FXInfo); hfs_bnode_read(fd.bnode, file_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_file, user_info), + offsetof(struct hfsplus_cat_file, info.user), len); found_bit = find_first_bit((void *)file_finder_info, len*8); } else { From 2934908ea4bbd97b9ce073c4f43a70b13e62450d Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Wed, 21 Jul 2021 11:43:14 +1000 Subject: [PATCH 449/851] pid: cleanup the stale comment mentioning pidmap_init(). pidmap_init() has already been replaced with pid_idr_init() in the commit 95846ecf9dac ("pid: replace pid bitmap implementation with IDR API"). Cleanup the stale comment which still mentions it. Link: https://lkml.kernel.org/r/20210714120713.19825-1-itazur@amazon.com Signed-off-by: Takahiro Itazuri Cc: Kuniyuki Iwashima Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/threads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/threads.h b/include/linux/threads.h index 18d5a74bcc3dd..c34173e6c5f18 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -38,7 +38,7 @@ * Define a minimum number of pids per cpu. Heuristically based * on original pid max of 32k for 32 cpus. Also, increase the * minimum settable value for pid_max on the running system based - * on similar defaults. See kernel/pid.c:pidmap_init() for details. + * on similar defaults. See kernel/pid.c:pid_idr_init() for details. */ #define PIDS_PER_CPU_DEFAULT 1024 #define PIDS_PER_CPU_MIN 8 From d8046a8e567c24ef37f9b1e9c189d8bc0d0ebfbc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 21 Jul 2021 11:43:15 +1000 Subject: [PATCH 450/851] prctl: allow to setup brk for et_dyn executables Keno Fischer reported that when a binray loaded via ld-linux-x the prctl(PR_SET_MM_MAP) doesn't allow to setup brk value because it lays before mm:end_data. For example a test program shows | # ~/t | | start_code 401000 | end_code 401a15 | start_stack 7ffce4577dd0 | start_data 403e10 | end_data 40408c | start_brk b5b000 | sbrk(0) b5b000 and when executed via ld-linux | # /lib64/ld-linux-x86-64.so.2 ~/t | | start_code 7fc25b0a4000 | end_code 7fc25b0c4524 | start_stack 7fffcc6b2400 | start_data 7fc25b0ce4c0 | end_data 7fc25b0cff98 | start_brk 55555710c000 | sbrk(0) 55555710c000 This of course prevent criu from restoring such programs. Looking into how kernel operates with brk/start_brk inside brk() syscall I don't see any problem if we allow to setup brk/start_brk without checking for end_data. Even if someone pass some weird address here on a purpose then the worst possible result will be an unexpected unmapping of existing vma (own vma, since prctl works with the callers memory) but test for RLIMIT_DATA is still valid and a user won't be able to gain more memory in case of expanding VMAs via new values shipped with prctl call. Link: https://lkml.kernel.org/r/20210121221207.GB2174@grain Fixes: bbdc6076d2e5 ("binfmt_elf: move brk out of mmap when doing direct loader exec") Signed-off-by: Cyrill Gorcunov Reported-by: Keno Fischer Acked-by: Andrey Vagin Tested-by: Andrey Vagin Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Kirill Tkhai Cc: Eric W. Biederman Cc: Pavel Tikhomirov Cc: Alexander Mikhalitsyn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/sys.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index ef1a78f5d71c7..6ec50924b5176 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1959,13 +1959,6 @@ static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map) error = -EINVAL; - /* - * @brk should be after @end_data in traditional maps. - */ - if (prctl_map->start_brk <= prctl_map->end_data || - prctl_map->brk <= prctl_map->end_data) - goto out; - /* * Neither we should allow to override limits if they set. */ From ace6e27b90197751ad3e6baadae3b8b065fd4e2d Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 21 Jul 2021 11:43:15 +1000 Subject: [PATCH 451/851] selftests/memfd: remove unused variable Commit 544029862cbb ("selftests/memfd: add tests for F_SEAL_FUTURE_WRITE seal") added an unused variable to mfd_assert_reopen_fd(). Delete the unused variable. Link: https://lkml.kernel.org/r/20210702045509.1517643-1-gthelen@google.com Fixes: 544029862cbb ("selftests/memfd: add tests for F_SEAL_FUTURE_WRITE seal") Signed-off-by: Greg Thelen Cc: Shuah Khan Cc: Michael Ellerman Cc: "Joel Fernandes (Google)" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/memfd/memfd_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 74baab83fec35..192a2899bae8f 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -56,7 +56,7 @@ static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) static int mfd_assert_reopen_fd(int fd_in) { - int r, fd; + int fd; char path[100]; sprintf(path, "/proc/self/fd/%d", fd_in); From 6178925933d00d1f9648424490df1ceaf5e5f236 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 5 Jun 2021 23:20:17 -0400 Subject: [PATCH 452/851] mm: Convert get_page_unless_zero() to return bool atomic_add_unless() returns bool, so remove the widening casts to int in page_ref_add_unless() and get_page_unless_zero(). This causes gcc to produce slightly larger code in isolate_migratepages_block(), but it's not clear that it's worse code. Net +19 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov --- include/linux/mm.h | 2 +- include/linux/page_ref.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ca22e6e694a8..8dd65290bac0d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -755,7 +755,7 @@ static inline int put_page_testzero(struct page *page) * This can be called when MMU is off so it must not access * any of the virtual mappings. */ -static inline int get_page_unless_zero(struct page *page) +static inline bool get_page_unless_zero(struct page *page) { return page_ref_add_unless(page, 1, 0); } diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 7ad46f45df396..3a799de8ad526 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -161,9 +161,9 @@ static inline int page_ref_dec_return(struct page *page) return ret; } -static inline int page_ref_add_unless(struct page *page, int nr, int u) +static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - int ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = atomic_add_unless(&page->_refcount, nr, u); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); From ca855b58c2dfe3f5c7c0a38adf980edced9e3314 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 6 Dec 2020 22:22:48 -0500 Subject: [PATCH 453/851] mm: Introduce struct folio A struct folio is a new abstraction to replace the venerable struct page. A function which takes a struct folio argument declares that it will operate on the entire (possibly compound) page, not just PAGE_SIZE bytes. In return, the caller guarantees that the pointer it is passing does not point to a tail page. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Mike Rapoport --- Documentation/core-api/mm-api.rst | 1 + include/linux/mm.h | 75 +++++++++++++++++++++++++++++++ include/linux/mm_types.h | 60 +++++++++++++++++++++++++ include/linux/page-flags.h | 28 ++++++++++++ 4 files changed, 164 insertions(+) diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index a42f9baddfbf8..2a94e6164f80f 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -95,6 +95,7 @@ More Memory Management Functions .. kernel-doc:: mm/mempolicy.c .. kernel-doc:: include/linux/mm_types.h :internal: +.. kernel-doc:: include/linux/page-flags.h .. kernel-doc:: include/linux/mm.h :internal: .. kernel-doc:: include/linux/mmzone.h diff --git a/include/linux/mm.h b/include/linux/mm.h index 8dd65290bac0d..c5f88f886af35 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -949,6 +949,20 @@ static inline unsigned int compound_order(struct page *page) return page[1].compound_order; } +/** + * folio_order - The allocation order of a folio. + * @folio: The folio. + * + * A folio is composed of 2^order pages. See get_order() for the definition + * of order. + * + * Return: The order of the folio. + */ +static inline unsigned int folio_order(struct folio *folio) +{ + return compound_order(&folio->page); +} + static inline bool hpage_pincount_available(struct page *page) { /* @@ -1594,6 +1608,66 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +/** + * folio_nr_pages - The number of pages in the folio. + * @folio: The folio. + * + * Return: A number which is a power of two. + */ +static inline unsigned long folio_nr_pages(struct folio *folio) +{ + return compound_nr(&folio->page); +} + +/** + * folio_next - Move to the next physical folio. + * @folio: The folio we're currently operating on. + * + * If you have physically contiguous memory which may span more than + * one folio (eg a &struct bio_vec), use this function to move from one + * folio to the next. Do not use it if the memory is only virtually + * contiguous as the folios are almost certainly not adjacent to each + * other. This is the folio equivalent to writing ``page++``. + * + * Context: We assume that the folios are refcounted and/or locked at a + * higher level and do not adjust the reference counts. + * Return: The next struct folio. + */ +static inline struct folio *folio_next(struct folio *folio) +{ + return (struct folio *)folio_page(folio, folio_nr_pages(folio)); +} + +/** + * folio_shift - The size of the memory described by this folio. + * @folio: The folio. + * + * A folio represents a number of bytes which is a power-of-two in size. + * This function tells you which power-of-two the folio is. See also + * folio_size() and folio_order(). + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The base-2 logarithm of the size of this folio. + */ +static inline unsigned int folio_shift(struct folio *folio) +{ + return PAGE_SHIFT + folio_order(folio); +} + +/** + * folio_size - The number of bytes in a folio. + * @folio: The folio. + * + * Context: The caller should have a reference on the folio to prevent + * it from being split. It is not necessary for the folio to be locked. + * Return: The number of bytes in this folio. + */ +static inline size_t folio_size(struct folio *folio) +{ + return PAGE_SIZE << folio_order(folio); +} + /* * Some inline functions in vmstat.h depend on page_zone() */ @@ -1699,6 +1773,7 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) #define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) +#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* * Flags passed to show_mem() and show_free_areas() to suppress output in diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 52bbd2b7cb465..5749448b486d3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -231,6 +231,66 @@ struct page { #endif } _struct_page_alignment; +/** + * struct folio - Represents a contiguous set of bytes. + * @flags: Identical to the page flags. + * @lru: Least Recently Used list; tracks how recently this folio was used. + * @mapping: The file this page belongs to, or refers to the anon_vma for + * anonymous memory. + * @index: Offset within the file, in units of pages. For anonymous memory, + * this is the index from the beginning of the mmap. + * @private: Filesystem per-folio data (see folio_attach_private()). + * Used for swp_entry_t if folio_test_swapcache(). + * @_mapcount: Do not access this member directly. Use folio_mapcount() to + * find out how many times this folio is mapped by userspace. + * @_refcount: Do not access this member directly. Use folio_ref_count() + * to find how many references there are to this folio. + * @memcg_data: Memory Control Group data. + * + * A folio is a physically, virtually and logically contiguous set + * of bytes. It is a power-of-two in size, and it is aligned to that + * same power-of-two. It is at least as large as %PAGE_SIZE. If it is + * in the page cache, it is at a file offset which is a multiple of that + * power-of-two. It may be mapped into userspace at an address which is + * at an arbitrary page offset, but its kernel virtual address is aligned + * to its size. + */ +struct folio { + /* private: don't document the anon union */ + union { + struct { + /* public: */ + unsigned long flags; + struct list_head lru; + struct address_space *mapping; + pgoff_t index; + void *private; + atomic_t _mapcount; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long memcg_data; +#endif + /* private: the union with struct page is transitional */ + }; + struct page page; + }; +}; + +static_assert(sizeof(struct page) == sizeof(struct folio)); +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) +FOLIO_MATCH(flags, flags); +FOLIO_MATCH(lru, lru); +FOLIO_MATCH(compound_head, lru); +FOLIO_MATCH(index, index); +FOLIO_MATCH(private, private); +FOLIO_MATCH(_mapcount, _mapcount); +FOLIO_MATCH(_refcount, _refcount); +#ifdef CONFIG_MEMCG +FOLIO_MATCH(memcg_data, memcg_data); +#endif +#undef FOLIO_MATCH + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5922031ffab6e..70ede83455386 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -191,6 +191,34 @@ static inline unsigned long _compound_head(const struct page *page) #define compound_head(page) ((typeof(page))_compound_head(page)) +/** + * page_folio - Converts from page to folio. + * @p: The page. + * + * Every page is part of a folio. This function cannot be called on a + * NULL pointer. + * + * Context: No reference, nor lock is required on @page. If the caller + * does not hold a reference, this call may race with a folio split, so + * it should re-check the folio still contains this page after gaining + * a reference on the folio. + * Return: The folio which contains this page. + */ +#define page_folio(p) (_Generic((p), \ + const struct page *: (const struct folio *)_compound_head(p), \ + struct page *: (struct folio *)_compound_head(p))) + +/** + * folio_page - Return a page from a folio. + * @folio: The folio. + * @n: The page number to return. + * + * @n is relative to the start of the folio. This function does not + * check that the page number lies within @folio; the caller is presumed + * to have a reference to the page. + */ +#define folio_page(folio, n) nth_page(&(folio)->page, n) + static __always_inline int PageTail(struct page *page) { return READ_ONCE(page->compound_head) & 1; From 6b474ab1d9c8cc969b57147b9a9e87f69502ecdc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Jan 2021 07:40:36 -0500 Subject: [PATCH 454/851] mm: Add folio_pgdat(), folio_zone() and folio_zonenum() These are just convenience wrappers for callers with folios; pgdat and zone can be reached from tail pages as well as head pages. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index c5f88f886af35..6fdebffb44502 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1144,6 +1144,11 @@ static inline enum zone_type page_zonenum(const struct page *page) return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } +static inline enum zone_type folio_zonenum(const struct folio *folio) +{ + return page_zonenum(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE static inline bool is_zone_device_page(const struct page *page) { @@ -1559,6 +1564,16 @@ static inline pg_data_t *page_pgdat(const struct page *page) return NODE_DATA(page_to_nid(page)); } +static inline struct zone *folio_zone(const struct folio *folio) +{ + return page_zone(&folio->page); +} + +static inline pg_data_t *folio_pgdat(const struct folio *folio) +{ + return page_pgdat(&folio->page); +} + #ifdef SECTION_IN_PAGE_FLAGS static inline void set_page_section(struct page *page, unsigned long section) { From 5865040721643c7340df8976b1631a05f3678065 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Jan 2021 08:14:00 -0500 Subject: [PATCH 455/851] mm/vmstat: Add functions to account folio statistics Allow page counters to be more readily modified by callers which have a folio. Name these wrappers with 'stat' instead of 'state' as requested by Linus here: https://lore.kernel.org/linux-mm/CAHk-=wj847SudR-kt+46fT3+xFFgiwpgThvm7DJWGdi4cVrbnQ@mail.gmail.com/ No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/vmstat.h | 107 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d6a6cf53b1278..241bd0f53fb97 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } #endif /* CONFIG_SMP */ +static inline void __zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + __mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void __zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void __zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void zone_stat_mod_folio(struct folio *folio, + enum zone_stat_item item, long nr) +{ + mod_zone_page_state(folio_zone(folio), item, nr); +} + +static inline void zone_stat_add_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); +} + +static inline void zone_stat_sub_folio(struct folio *folio, + enum zone_stat_item item) +{ + mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); +} + +static inline void __node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + __mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void __node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void __node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + +static inline void node_stat_mod_folio(struct folio *folio, + enum node_stat_item item, long nr) +{ + mod_node_page_state(folio_pgdat(folio), item, nr); +} + +static inline void node_stat_add_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); +} + +static inline void node_stat_sub_folio(struct folio *folio, + enum node_stat_item item) +{ + mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); +} + static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, int migratetype) { @@ -543,6 +615,24 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } +static inline void __lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + __mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void __lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void __lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} + static inline void inc_lruvec_page_state(struct page *page, enum node_stat_item idx) { @@ -555,4 +645,21 @@ static inline void dec_lruvec_page_state(struct page *page, mod_lruvec_page_state(page, idx, -1); } +static inline void lruvec_stat_mod_folio(struct folio *folio, + enum node_stat_item idx, int val) +{ + mod_lruvec_page_state(&folio->page, idx, val); +} + +static inline void lruvec_stat_add_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); +} + +static inline void lruvec_stat_sub_folio(struct folio *folio, + enum node_stat_item idx) +{ + lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); +} #endif /* _LINUX_VMSTAT_H */ From 3c68c9bb347823808e0a52e3c363c47a504bb063 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 10:52:37 -0500 Subject: [PATCH 456/851] mm/debug: Add VM_BUG_ON_FOLIO() and VM_WARN_ON_ONCE_FOLIO() These are the folio equivalents of VM_BUG_ON_PAGE and VM_WARN_ON_ONCE_PAGE. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mmdebug.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 1935d4c72d104..d7285f8148a3b 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_BUG_ON_FOLIO(cond, folio) \ + do { \ + if (unlikely(cond)) { \ + dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\ + BUG(); \ + } \ + } while (0) #define VM_BUG_ON_VMA(cond, vma) \ do { \ if (unlikely(cond)) { \ @@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) @@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm); #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) +#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond) #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond) #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif From 25f5b0e994d9a90994a28377a426c5c8d0373a66 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 31 Mar 2021 10:39:55 -0400 Subject: [PATCH 457/851] mm: Add folio reference count functions These functions mirror their page reference counterparts. Also add the kernel-doc to the mm-api and correct the return type of page_ref_add_unless() to bool. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- Documentation/core-api/mm-api.rst | 1 + include/linux/page_ref.h | 88 ++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index 2a94e6164f80f..5c459ee2accee 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -98,4 +98,5 @@ More Memory Management Functions .. kernel-doc:: include/linux/page-flags.h .. kernel-doc:: include/linux/mm.h :internal: +.. kernel-doc:: include/linux/page_ref.h .. kernel-doc:: include/linux/mmzone.h diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 3a799de8ad526..717d53c9ddf1e 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page) return atomic_read(&page->_refcount); } +/** + * folio_ref_count - The reference count on this folio. + * @folio: The folio. + * + * The refcount is usually incremented by calls to folio_get() and + * decremented by calls to folio_put(). Some typical users of the + * folio refcount: + * + * - Each reference from a page table + * - The page cache + * - Filesystem private data + * - The LRU list + * - Pipes + * - Direct IO which references this page in the process address space + * + * Return: The number of references to this folio. + */ +static inline int folio_ref_count(const struct folio *folio) +{ + return page_ref_count(&folio->page); +} + static inline int page_count(const struct page *page) { - return atomic_read(&compound_head(page)->_refcount); + return folio_ref_count(page_folio(page)); } static inline void set_page_count(struct page *page, int v) @@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v) __page_ref_set(page, v); } +static inline void folio_set_count(struct folio *folio, int v) +{ + set_page_count(&folio->page, v); +} + /* * Setup the page count before being freed into the page allocator for * the first time (boot or memory hotplug) @@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr) __page_ref_mod(page, nr); } +static inline void folio_ref_add(struct folio *folio, int nr) +{ + page_ref_add(&folio->page, nr); +} + static inline void page_ref_sub(struct page *page, int nr) { atomic_sub(nr, &page->_refcount); @@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr) __page_ref_mod(page, -nr); } +static inline void folio_ref_sub(struct folio *folio, int nr) +{ + page_ref_sub(&folio->page, nr); +} + static inline int page_ref_sub_return(struct page *page, int nr) { int ret = atomic_sub_return(nr, &page->_refcount); @@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_return(struct folio *folio, int nr) +{ + return page_ref_sub_return(&folio->page, nr); +} + static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); @@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page) __page_ref_mod(page, 1); } +static inline void folio_ref_inc(struct folio *folio) +{ + page_ref_inc(&folio->page); +} + static inline void page_ref_dec(struct page *page) { atomic_dec(&page->_refcount); @@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page) __page_ref_mod(page, -1); } +static inline void folio_ref_dec(struct folio *folio) +{ + page_ref_dec(&folio->page); +} + static inline int page_ref_sub_and_test(struct page *page, int nr) { int ret = atomic_sub_and_test(nr, &page->_refcount); @@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr) return ret; } +static inline int folio_ref_sub_and_test(struct folio *folio, int nr) +{ + return page_ref_sub_and_test(&folio->page, nr); +} + static inline int page_ref_inc_return(struct page *page) { int ret = atomic_inc_return(&page->_refcount); @@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page) return ret; } +static inline int folio_ref_inc_return(struct folio *folio) +{ + return page_ref_inc_return(&folio->page); +} + static inline int page_ref_dec_and_test(struct page *page) { int ret = atomic_dec_and_test(&page->_refcount); @@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page) return ret; } +static inline int folio_ref_dec_and_test(struct folio *folio) +{ + return page_ref_dec_and_test(&folio->page); +} + static inline int page_ref_dec_return(struct page *page) { int ret = atomic_dec_return(&page->_refcount); @@ -161,6 +228,11 @@ static inline int page_ref_dec_return(struct page *page) return ret; } +static inline int folio_ref_dec_return(struct folio *folio) +{ + return page_ref_dec_return(&folio->page); +} + static inline bool page_ref_add_unless(struct page *page, int nr, int u) { bool ret = atomic_add_unless(&page->_refcount, nr, u); @@ -170,6 +242,11 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u) return ret; } +static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) +{ + return page_ref_add_unless(&folio->page, nr, u); +} + static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); @@ -179,6 +256,11 @@ static inline int page_ref_freeze(struct page *page, int count) return ret; } +static inline int folio_ref_freeze(struct folio *folio, int count) +{ + return page_ref_freeze(&folio->page, count); +} + static inline void page_ref_unfreeze(struct page *page, int count) { VM_BUG_ON_PAGE(page_count(page) != 0, page); @@ -189,4 +271,8 @@ static inline void page_ref_unfreeze(struct page *page, int count) __page_ref_unfreeze(page, count); } +static inline void folio_ref_unfreeze(struct folio *folio, int count) +{ + page_ref_unfreeze(&folio->page, count); +} #endif From 532e83a85797b2efde486eed9c8d6c4cf9df59d2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 6 Dec 2020 23:04:57 -0500 Subject: [PATCH 458/851] mm: Add folio_put() If we know we have a folio, we can call folio_put() instead of put_page() and save the overhead of calling compound_head(). Also skips the devmap checks. This commit looks like it should be a no-op, but actually saves 684 bytes of text with the distro-derived config that I'm testing. Some functions grow a little while others shrink. I presume the compiler is making different inlining decisions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm.h | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6fdebffb44502..6e25e8d5b98e5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -749,6 +749,11 @@ static inline int put_page_testzero(struct page *page) return page_ref_dec_and_test(page); } +static inline int folio_put_testzero(struct folio *folio) +{ + return put_page_testzero(&folio->page); +} + /* * Try to grab a ref unless the page has a refcount of zero, return false if * that is the case. @@ -1246,9 +1251,28 @@ static inline __must_check bool try_get_page(struct page *page) return true; } +/** + * folio_put - Decrement the reference count on a folio. + * @folio: The folio. + * + * If the folio's reference count reaches zero, the memory will be + * released back to the page allocator and may be used by another + * allocation immediately. Do not access the memory or the struct folio + * after calling folio_put() unless you can be sure that it wasn't the + * last reference. + * + * Context: May be called in process or interrupt context, but not in NMI + * context. May be called while holding a spinlock. + */ +static inline void folio_put(struct folio *folio) +{ + if (folio_put_testzero(folio)) + __put_page(&folio->page); +} + static inline void put_page(struct page *page) { - page = compound_head(page); + struct folio *folio = page_folio(page); /* * For devmap managed pages we need to catch refcount transition from @@ -1256,13 +1280,12 @@ static inline void put_page(struct page *page) * need to inform the device driver through callback. See * include/linux/memremap.h and HMM for details. */ - if (page_is_devmap_managed(page)) { - put_devmap_managed_page(page); + if (page_is_devmap_managed(&folio->page)) { + put_devmap_managed_page(&folio->page); return; } - if (put_page_testzero(page)) - __put_page(page); + folio_put(folio); } /* From 43a839dc66bf4d46d7c0721b37006603b3d1c15a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 6 Dec 2020 23:04:57 -0500 Subject: [PATCH 459/851] mm: Add folio_get() If we know we have a folio, we can call folio_get() instead of get_page() and save the overhead of calling compound_head(). No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6e25e8d5b98e5..ea15bd7ae55ef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1223,18 +1223,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page) } /* 127: arbitrary random number, small enough to assemble well */ -#define page_ref_zero_or_close_to_overflow(page) \ - ((unsigned int) page_ref_count(page) + 127u <= 127u) +#define folio_ref_zero_or_close_to_overflow(folio) \ + ((unsigned int) folio_ref_count(folio) + 127u <= 127u) + +/** + * folio_get - Increment the reference count on a folio. + * @folio: The folio. + * + * Context: May be called in any context, as long as you know that + * you have a refcount on the folio. If you do not already have one, + * folio_try_get() may be the right interface for you to use. + */ +static inline void folio_get(struct folio *folio) +{ + VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio); + folio_ref_inc(folio); +} static inline void get_page(struct page *page) { - page = compound_head(page); - /* - * Getting a normal page or the head of a compound page - * requires to already have an elevated page->_refcount. - */ - VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); - page_ref_inc(page); + folio_get(page_folio(page)); } bool __must_check try_grab_page(struct page *page, unsigned int flags); From 3f6e3c58f2acfd58ecf244c26e820b7d1f8abe0e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 10 May 2021 16:33:22 -0400 Subject: [PATCH 460/851] mm: Add folio_try_get_rcu() This is the equivalent of page_cache_get_speculative(). Also add folio_ref_try_add_rcu (the equivalent of page_cache_add_speculative) and folio_get_unless_zero() (the equivalent of get_page_unless_zero()). The new kernel-doc attempts to explain from the user's point of view when to use folio_try_get_rcu() and when to use folio_get_unless_zero(), because there seems to be some confusion currently between the users of page_cache_get_speculative() and get_page_unless_zero(). Reimplement page_cache_add_speculative() and page_cache_get_speculative() as wrappers around the folio equivalents, but leave get_page_unless_zero() alone for now. This commit reduces text size by 3 bytes due to slightly different register allocation & instruction selections. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport --- include/linux/page_ref.h | 66 +++++++++++++++++++++++++++++++ include/linux/pagemap.h | 84 ++-------------------------------------- mm/filemap.c | 20 ++++++++++ 3 files changed, 90 insertions(+), 80 deletions(-) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 717d53c9ddf1e..2e677e6ad09fc 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -247,6 +247,72 @@ static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u) return page_ref_add_unless(&folio->page, nr, u); } +/** + * folio_try_get - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * If you do not already have a reference to a folio, you can attempt to + * get one using this function. It may fail if, for example, the folio + * has been freed since you found a pointer to it, or it is frozen for + * the purposes of splitting or migration. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get(struct folio *folio) +{ + return folio_ref_add_unless(folio, 1, 0); +} + +static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) +{ +#ifdef CONFIG_TINY_RCU + /* + * The caller guarantees the folio will not be freed from interrupt + * context, so (on !SMP) we only need preemption to be disabled + * and TINY_RCU does that for us. + */ +# ifdef CONFIG_PREEMPT_COUNT + VM_BUG_ON(!in_atomic() && !irqs_disabled()); +# endif + VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); + folio_ref_add(folio, count); +#else + if (unlikely(!folio_ref_add_unless(folio, count, 0))) { + /* Either the folio has been freed, or will be freed. */ + return false; + } +#endif + return true; +} + +/** + * folio_try_get_rcu - Attempt to increase the refcount on a folio. + * @folio: The folio. + * + * This is a version of folio_try_get() optimised for non-SMP kernels. + * If you are still holding the rcu_read_lock() after looking up the + * page and know that the page cannot have its refcount decreased to + * zero in interrupt context, you can use this instead of folio_try_get(). + * + * Example users include get_user_pages_fast() (as pages are not unmapped + * from interrupt context) and the page cache lookups (as pages are not + * truncated from interrupt context). We also know that pages are not + * frozen in interrupt context for the purposes of splitting or migration. + * + * You can also use this function if you're holding a lock that prevents + * pages being frozen & removed; eg the i_pages lock for the page cache + * or the mmap_sem or page table lock for page tables. In this case, + * it will always succeed, and you could have used a plain folio_get(), + * but it's sometimes more convenient to have a common function called + * from both locked and RCU-protected contexts. + * + * Return: True if the reference count was successfully incremented. + */ +static inline bool folio_try_get_rcu(struct folio *folio) +{ + return folio_ref_try_add_rcu(folio, 1); +} + static inline int page_ref_freeze(struct page *page, int count) { int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ed02aa5222634..db1726b1bc1c1 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -172,91 +172,15 @@ static inline struct address_space *page_mapping_file(struct page *page) return page_mapping(page); } -/* - * speculatively take a reference to a page. - * If the page is free (_refcount == 0), then _refcount is untouched, and 0 - * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned. - * - * This function must be called inside the same rcu_read_lock() section as has - * been used to lookup the page in the pagecache radix-tree (or page table): - * this allows allocators to use a synchronize_rcu() to stabilize _refcount. - * - * Unless an RCU grace period has passed, the count of all pages coming out - * of the allocator must be considered unstable. page_count may return higher - * than expected, and put_page must be able to do the right thing when the - * page has been finished with, no matter what it is subsequently allocated - * for (because put_page is what is used here to drop an invalid speculative - * reference). - * - * This is the interesting part of the lockless pagecache (and lockless - * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page) - * has the following pattern: - * 1. find page in radix tree - * 2. conditionally increment refcount - * 3. check the page is still in pagecache (if no, goto 1) - * - * Remove-side that cares about stability of _refcount (eg. reclaim) has the - * following (with the i_pages lock held): - * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) - * B. remove page from pagecache - * C. free the page - * - * There are 2 critical interleavings that matter: - * - 2 runs before A: in this case, A sees elevated refcount and bails out - * - A runs before 2: in this case, 2 sees zero refcount and retries; - * subsequently, B will complete and 1 will find no page, causing the - * lookup to return NULL. - * - * It is possible that between 1 and 2, the page is removed then the exact same - * page is inserted into the same position in pagecache. That's OK: the - * old find_get_page using a lock could equally have run before or after - * such a re-insertion, depending on order that locks are granted. - * - * Lookups racing against pagecache insertion isn't a big problem: either 1 - * will find the page or it will not. Likewise, the old find_get_page could run - * either before the insertion or afterwards, depending on timing. - */ -static inline int __page_cache_add_speculative(struct page *page, int count) +static inline bool page_cache_add_speculative(struct page *page, int count) { -#ifdef CONFIG_TINY_RCU -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - /* - * Preempt must be disabled here - we rely on rcu_read_lock doing - * this for us. - * - * Pagecache won't be truncated from interrupt context, so if we have - * found a page in the radix tree here, we have pinned its refcount by - * disabling preempt, and hence no need for the "speculative get" that - * SMP requires. - */ - VM_BUG_ON_PAGE(page_count(page) == 0, page); - page_ref_add(page, count); - -#else - if (unlikely(!page_ref_add_unless(page, count, 0))) { - /* - * Either the page has been freed, or will be freed. - * In either case, retry here and the caller should - * do the right thing (see comments above). - */ - return 0; - } -#endif VM_BUG_ON_PAGE(PageTail(page), page); - - return 1; -} - -static inline int page_cache_get_speculative(struct page *page) -{ - return __page_cache_add_speculative(page, 1); + return folio_ref_try_add_rcu((struct folio *)page, count); } -static inline int page_cache_add_speculative(struct page *page, int count) +static inline bool page_cache_get_speculative(struct page *page) { - return __page_cache_add_speculative(page, count); + return page_cache_add_speculative(page, 1); } /** diff --git a/mm/filemap.c b/mm/filemap.c index d1458ecf2f51e..e7ce8bb137e05 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1746,6 +1746,26 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, } EXPORT_SYMBOL(page_cache_prev_miss); +/* + * Lockless page cache protocol: + * On the lookup side: + * 1. Load the folio from i_pages + * 2. Increment the refcount if it's not zero + * 3. If the folio is not found by xas_reload(), put the refcount and retry + * + * On the removal side: + * A. Freeze the page (by zeroing the refcount if nobody else has a reference) + * B. Remove the page from i_pages + * C. Return the page to the page allocator + * + * This means that any page may have its reference count temporarily + * increased by a speculative page cache (or fast GUP) lookup as it can + * be allocated by another user before the RCU grace period expires. + * Because the refcount temporarily acquired here may end up being the + * last refcount on the page, any page allocation must be freeable by + * folio_put(). + */ + /* * mapping_get_entry - Get a page cache entry. * @mapping: the address_space to search From 8c02d1a1387610edb84cd5f5fcbc78adcd0591cf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 7 Dec 2020 15:42:09 -0500 Subject: [PATCH 461/851] mm: Add folio flag manipulation functions These new functions are the folio analogues of the various PageFlags functions. If CONFIG_DEBUG_VM_PGFLAGS is enabled, we check the folio is not a tail page at every invocation. This will also catch the PagePoisoned case as a poisoned page has every bit set, which would include PageTail. This saves 1684 bytes of text with the distro-derived config that I'm testing due to removing a double call to compound_head() in PageSwapCache(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/page-flags.h | 219 ++++++++++++++++++++++++++----------- 1 file changed, 156 insertions(+), 63 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 70ede83455386..ddb6606880860 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -143,6 +143,8 @@ enum pageflags { #endif __NR_PAGEFLAGS, + PG_readahead = PG_reclaim, + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -243,6 +245,15 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static unsigned long *folio_flags(struct folio *folio, unsigned n) +{ + struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + /* * Page flags policies wrt compound pages * @@ -287,36 +298,64 @@ static inline void page_init_poison(struct page *page, size_t size) VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ PF_POISONED_CHECK(&page[1]); }) +/* Which page is the flag stored in */ +#define FOLIO_PF_ANY 0 +#define FOLIO_PF_HEAD 0 +#define FOLIO_PF_ONLY_HEAD 0 +#define FOLIO_PF_NO_TAIL 0 +#define FOLIO_PF_NO_COMPOUND 0 +#define FOLIO_PF_SECOND 1 + /* * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname, policy) \ +static __always_inline bool folio_test_##lname(struct folio *folio) \ +{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int Page##uname(struct page *page) \ - { return test_bit(PG_##lname, &policy(page, 0)->flags); } +{ return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_set_##lname(struct folio *folio) \ +{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void SetPage##uname(struct page *page) \ - { set_bit(PG_##lname, &policy(page, 1)->flags); } +{ set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_clear_##lname(struct folio *folio) \ +{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void ClearPage##uname(struct page *page) \ - { clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_set_##lname(struct folio *folio) \ +{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __SetPage##uname(struct page *page) \ - { __set_bit(PG_##lname, &policy(page, 1)->flags); } +{ __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_clear_##lname(struct folio *folio) \ +{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __ClearPage##uname(struct page *page) \ - { __clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_set_##lname(struct folio *folio) \ +{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestSetPage##uname(struct page *page) \ - { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_clear_##lname(struct folio *folio) \ +{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestClearPage##uname(struct page *page) \ - { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ @@ -332,29 +371,37 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) -#define TESTPAGEFLAG_FALSE(uname) \ +#define TESTPAGEFLAG_FALSE(uname, lname) \ +static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \ static inline int Page##uname(const struct page *page) { return 0; } -#define SETPAGEFLAG_NOOP(uname) \ +#define SETPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_set_##lname(struct folio *folio) { } \ static inline void SetPage##uname(struct page *page) { } -#define CLEARPAGEFLAG_NOOP(uname) \ +#define CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_clear_##lname(struct folio *folio) { } \ static inline void ClearPage##uname(struct page *page) { } -#define __CLEARPAGEFLAG_NOOP(uname) \ +#define __CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void __folio_clear_##lname(struct folio *folio) { } \ static inline void __ClearPage##uname(struct page *page) { } -#define TESTSETFLAG_FALSE(uname) \ +#define TESTSETFLAG_FALSE(uname, lname) \ +static inline bool folio_test_set_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestSetPage##uname(struct page *page) { return 0; } -#define TESTCLEARFLAG_FALSE(uname) \ +#define TESTCLEARFLAG_FALSE(uname, lname) \ +static inline bool folio_test_clear_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestClearPage##uname(struct page *page) { return 0; } -#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \ - SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname) +#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ + SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) -#define TESTSCFLAG_FALSE(uname) \ - TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) +#define TESTSCFLAG_FALSE(uname, lname) \ + TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) @@ -410,8 +457,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND) +PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) + TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) #ifdef CONFIG_HIGHMEM /* @@ -420,22 +467,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #else -PAGEFLAG_FALSE(HighMem) +PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline int PageSwapCache(struct page *page) +static __always_inline bool folio_test_swapcache(struct folio *folio) { -#ifdef CONFIG_THP_SWAP - page = compound_head(page); -#endif - return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags); + return folio_test_swapbacked(folio) && + test_bit(PG_swapcache, folio_flags(folio, 0)); +} +static __always_inline bool PageSwapCache(struct page *page) +{ + return folio_test_swapcache(page_folio(page)); } + SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) #else -PAGEFLAG_FALSE(SwapCache) +PAGEFLAG_FALSE(SwapCache, swapcache) #endif PAGEFLAG(Unevictable, unevictable, PF_HEAD) @@ -447,14 +497,14 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) #else -PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) - TESTSCFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) + TESTSCFLAG_FALSE(Mlocked, mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) #else -PAGEFLAG_FALSE(Uncached) +PAGEFLAG_FALSE(Uncached, uncached) #endif #ifdef CONFIG_MEMORY_FAILURE @@ -463,7 +513,7 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) extern bool take_page_off_buddy(struct page *page); #else -PAGEFLAG_FALSE(HWPoison) +PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif @@ -477,7 +527,7 @@ PAGEFLAG(Idle, idle, PF_ANY) #ifdef CONFIG_KASAN_HW_TAGS PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) #else -PAGEFLAG_FALSE(SkipKASanPoison) +PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison) #endif /* @@ -515,10 +565,14 @@ static __always_inline int PageMappingFlags(struct page *page) return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageAnon(struct page *page) +static __always_inline bool folio_test_anon(struct folio *folio) +{ + return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; +} + +static __always_inline bool PageAnon(struct page *page) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; + return folio_test_anon(page_folio(page)); } static __always_inline int __PageMovable(struct page *page) @@ -534,30 +588,32 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline int PageKsm(struct page *page) +static __always_inline bool folio_test_ksm(struct folio *folio) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } + +static __always_inline bool PageKsm(struct page *page) +{ + return folio_test_ksm(page_folio(page)); +} #else -TESTPAGEFLAG_FALSE(Ksm) +TESTPAGEFLAG_FALSE(Ksm, ksm) #endif u64 stable_page_flags(struct page *page); -static inline int PageUptodate(struct page *page) +static inline bool folio_test_uptodate(struct folio *folio) { - int ret; - page = compound_head(page); - ret = test_bit(PG_uptodate, &(page)->flags); + bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); /* - * Must ensure that the data we read out of the page is loaded - * _after_ we've loaded page->flags to check for PageUptodate. - * We can skip the barrier if the page is not uptodate, because + * Must ensure that the data we read out of the folio is loaded + * _after_ we've loaded folio->flags to check the uptodate bit. + * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * - * See SetPageUptodate() for the other side of the story. + * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); @@ -565,23 +621,36 @@ static inline int PageUptodate(struct page *page) return ret; } -static __always_inline void __SetPageUptodate(struct page *page) +static inline int PageUptodate(struct page *page) +{ + return folio_test_uptodate(page_folio(page)); +} + +static __always_inline void __folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); smp_wmb(); - __set_bit(PG_uptodate, &page->flags); + __set_bit(PG_uptodate, folio_flags(folio, 0)); } -static __always_inline void SetPageUptodate(struct page *page) +static __always_inline void folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); /* * Memory barrier must be issued before setting the PG_uptodate bit, - * so that all previous stores issued in order to bring the page - * uptodate are actually visible before PageUptodate becomes true. + * so that all previous stores issued in order to bring the folio + * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); - set_bit(PG_uptodate, &page->flags); + set_bit(PG_uptodate, folio_flags(folio, 0)); +} + +static __always_inline void __SetPageUptodate(struct page *page) +{ + __folio_mark_uptodate((struct folio *)page); +} + +static __always_inline void SetPageUptodate(struct page *page) +{ + folio_mark_uptodate((struct folio *)page); } CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) @@ -606,6 +675,17 @@ static inline void set_page_writeback_keepwrite(struct page *page) __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +/* Whether there are one or multiple pages in a folio */ +static inline bool folio_single(struct folio *folio) +{ + return !folio_test_head(folio); +} + +static inline bool folio_multi(struct folio *folio) +{ + return folio_test_head(folio); +} + static __always_inline void set_compound_head(struct page *page, struct page *head) { WRITE_ONCE(page->compound_head, (unsigned long)head + 1); @@ -629,12 +709,15 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); int PageHeadHuge(struct page *page); +static inline bool folio_test_hugetlb(struct folio *folio) +{ + return PageHeadHuge(&folio->page); +} #else -TESTPAGEFLAG_FALSE(Huge) -TESTPAGEFLAG_FALSE(HeadHuge) +TESTPAGEFLAG_FALSE(Huge, hugetlb) +TESTPAGEFLAG_FALSE(HeadHuge, headhuge) #endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -650,6 +733,11 @@ static inline int PageTransHuge(struct page *page) return PageHead(page); } +static inline bool folio_test_transhuge(struct folio *folio) +{ + return folio_test_head(folio); +} + /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -723,12 +811,12 @@ static inline int PageTransTail(struct page *page) PAGEFLAG(DoubleMap, double_map, PF_SECOND) TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else -TESTPAGEFLAG_FALSE(TransHuge) -TESTPAGEFLAG_FALSE(TransCompound) -TESTPAGEFLAG_FALSE(TransCompoundMap) -TESTPAGEFLAG_FALSE(TransTail) -PAGEFLAG_FALSE(DoubleMap) - TESTSCFLAG_FALSE(DoubleMap) +TESTPAGEFLAG_FALSE(TransHuge, transhuge) +TESTPAGEFLAG_FALSE(TransCompound, transcompound) +TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) +TESTPAGEFLAG_FALSE(TransTail, transtail) +PAGEFLAG_FALSE(DoubleMap, double_map) + TESTSCFLAG_FALSE(DoubleMap, double_map) #endif /* @@ -903,6 +991,11 @@ static inline int page_has_private(struct page *page) return !!(page->flags & PAGE_FLAGS_PRIVATE); } +static inline bool folio_has_private(struct folio *folio) +{ + return page_has_private(&folio->page); +} + #undef PF_ANY #undef PF_HEAD #undef PF_ONLY_HEAD From 63984049e50aeeb18fce18f579fb14a1b2a137f6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 25 Feb 2021 09:47:41 -0500 Subject: [PATCH 462/851] mm/lru: Add folio LRU functions Handle arbitrary-order folios being added to the LRU. By definition, all pages being added to the LRU were already head or base pages, but call page_folio() on them anyway to get the type right and avoid the buried calls to compound_head(). Saves 783 bytes of kernel text; no functions grow. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Yu Zhao Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport --- Documentation/core-api/mm-api.rst | 1 + include/linux/mm_inline.h | 101 +++++++++++++++++++----------- include/trace/events/pagemap.h | 2 +- 3 files changed, 68 insertions(+), 36 deletions(-) diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index 5c459ee2accee..971149f5d2416 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -95,6 +95,7 @@ More Memory Management Functions .. kernel-doc:: mm/mempolicy.c .. kernel-doc:: include/linux/mm_types.h :internal: +.. kernel-doc:: include/linux/mm_inline.h .. kernel-doc:: include/linux/page-flags.h .. kernel-doc:: include/linux/mm.h :internal: diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 355ea1ee32bd7..d39537c5471b9 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -6,22 +6,28 @@ #include /** - * page_is_file_lru - should the page be on a file LRU or anon LRU? - * @page: the page to test - * - * Returns 1 if @page is a regular filesystem backed page cache page or a lazily - * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal - * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by - * functions that manipulate the LRU lists, to sort a page onto the right LRU - * list. + * folio_is_file_lru - Should the folio be on a file LRU or anon LRU? + * @folio: The folio to test. * * We would like to get this info without a page flag, but the state - * needs to survive until the page is last deleted from the LRU, which + * needs to survive until the folio is last deleted from the LRU, which * could be as far down as __page_cache_release. + * + * Return: An integer (not a boolean!) used to sort a folio onto the + * right LRU list and to account folios correctly. + * 1 if @folio is a regular filesystem backed page cache folio + * or a lazily freed anonymous folio (e.g. via MADV_FREE). + * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise + * ram or swap backed folio. */ +static inline int folio_is_file_lru(struct folio *folio) +{ + return !folio_test_swapbacked(folio); +} + static inline int page_is_file_lru(struct page *page) { - return !PageSwapBacked(page); + return folio_is_file_lru(page_folio(page)); } static __always_inline void update_lru_size(struct lruvec *lruvec, @@ -39,69 +45,94 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, } /** - * __clear_page_lru_flags - clear page lru flags before releasing a page - * @page: the page that was on lru and now has a zero reference + * __folio_clear_lru_flags - Clear page lru flags before releasing a page. + * @folio: The folio that was on lru and now has a zero reference. */ -static __always_inline void __clear_page_lru_flags(struct page *page) +static __always_inline void __folio_clear_lru_flags(struct folio *folio) { - VM_BUG_ON_PAGE(!PageLRU(page), page); + VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio); - __ClearPageLRU(page); + __folio_clear_lru(folio); /* this shouldn't happen, so leave the flags to bad_page() */ - if (PageActive(page) && PageUnevictable(page)) + if (folio_test_active(folio) && folio_test_unevictable(folio)) return; - __ClearPageActive(page); - __ClearPageUnevictable(page); + __folio_clear_active(folio); + __folio_clear_unevictable(folio); +} + +static __always_inline void __clear_page_lru_flags(struct page *page) +{ + __folio_clear_lru_flags(page_folio(page)); } /** - * page_lru - which LRU list should a page be on? - * @page: the page to test + * folio_lru_list - Which LRU list should a folio be on? + * @folio: The folio to test. * - * Returns the LRU list a page should be on, as an index + * Return: The LRU list a folio should be on, as an index * into the array of LRU lists. */ -static __always_inline enum lru_list page_lru(struct page *page) +static __always_inline enum lru_list folio_lru_list(struct folio *folio) { enum lru_list lru; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); - if (PageUnevictable(page)) + if (folio_test_unevictable(folio)) return LRU_UNEVICTABLE; - lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; - if (PageActive(page)) + lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + if (folio_test_active(folio)) lru += LRU_ACTIVE; return lru; } +static __always_inline +void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); + + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add(&folio->lru, &lruvec->lists[lru]); +} + static __always_inline void add_page_to_lru_list(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio(lruvec, page_folio(page)); +} - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add(&page->lru, &lruvec->lists[lru]); +static __always_inline +void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio) +{ + enum lru_list lru = folio_lru_list(folio); + + update_lru_size(lruvec, lru, folio_zonenum(folio), + folio_nr_pages(folio)); + list_add_tail(&folio->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, struct lruvec *lruvec) { - enum lru_list lru = page_lru(page); + lruvec_add_folio_tail(lruvec, page_folio(page)); +} - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add_tail(&page->lru, &lruvec->lists[lru]); +static __always_inline +void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio) +{ + list_del(&folio->lru); + update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio), + -folio_nr_pages(folio)); } static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec) { - list_del(&page->lru); - update_lru_size(lruvec, page_lru(page), page_zonenum(page), - -thp_nr_pages(page)); + lruvec_del_folio(lruvec, page_folio(page)); } #endif diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 1d28431e85bdc..92ad176210ff5 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -41,7 +41,7 @@ TRACE_EVENT(mm_lru_insertion, TP_fast_assign( __entry->page = page; __entry->pfn = page_to_pfn(page); - __entry->lru = page_lru(page); + __entry->lru = folio_lru_list(page_folio(page)); __entry->flags = trace_pagemap_flags(page); ), From f1f9854412318729f0875709b52d369316e6b9ee Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 11 Jan 2021 10:04:40 -0500 Subject: [PATCH 463/851] mm: Handle per-folio private data Add folio_get_private() which mirrors page_private() -- ie folio private data is the same as page private data. The only difference is that these return a void * instead of an unsigned long, which matches the majority of users. Turn attach_page_private() into folio_attach_private() and reimplement attach_page_private() as a wrapper. No filesystem which uses page private data currently supports compound pages, so we're free to define the rules. attach_page_private() may only be called on a head page; if you want to add private data to a tail page, you can call set_page_private() directly (and shouldn't increment the page refcount! That should be done when adding private data to the head page / folio). This saves 813 bytes of text with the distro-derived config that I'm testing due to removing the calls to compound_head() in get_page() & put_page(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/mm_types.h | 11 +++++++++ include/linux/pagemap.h | 48 ++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5749448b486d3..f38ab53fcf7c8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -309,6 +309,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page) #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) +/* + * page_private can be used on tail pages. However, PagePrivate is only + * checked by the VM on the head page. So page_private on the tail pages + * should be used for data that's ancillary to the head page (eg attaching + * buffer heads to tail pages after attaching buffer heads to the head page) + */ #define page_private(page) ((page)->private) static inline void set_page_private(struct page *page, unsigned long private) @@ -316,6 +322,11 @@ static inline void set_page_private(struct page *page, unsigned long private) page->private = private; } +static inline void *folio_get_private(struct folio *folio) +{ + return folio->private; +} + struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index db1726b1bc1c1..3279c731ee047 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -184,42 +184,52 @@ static inline bool page_cache_get_speculative(struct page *page) } /** - * attach_page_private - Attach private data to a page. - * @page: Page to attach data to. - * @data: Data to attach to page. + * folio_attach_private - Attach private data to a folio. + * @folio: Folio to attach data to. + * @data: Data to attach to folio. * - * Attaching private data to a page increments the page's reference count. - * The data must be detached before the page will be freed. + * Attaching private data to a folio increments the page's reference count. + * The data must be detached before the folio will be freed. */ -static inline void attach_page_private(struct page *page, void *data) +static inline void folio_attach_private(struct folio *folio, void *data) { - get_page(page); - set_page_private(page, (unsigned long)data); - SetPagePrivate(page); + folio_get(folio); + folio->private = data; + folio_set_private(folio); } /** - * detach_page_private - Detach private data from a page. - * @page: Page to detach data from. + * folio_detach_private - Detach private data from a folio. + * @folio: Folio to detach data from. * - * Removes the data that was previously attached to the page and decrements + * Removes the data that was previously attached to the folio and decrements * the refcount on the page. * - * Return: Data that was attached to the page. + * Return: Data that was attached to the folio. */ -static inline void *detach_page_private(struct page *page) +static inline void *folio_detach_private(struct folio *folio) { - void *data = (void *)page_private(page); + void *data = folio_get_private(folio); - if (!PagePrivate(page)) + if (!folio_test_private(folio)) return NULL; - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); + folio_clear_private(folio); + folio->private = NULL; + folio_put(folio); return data; } +static inline void attach_page_private(struct page *page, void *data) +{ + folio_attach_private(page_folio(page), data); +} + +static inline void *detach_page_private(struct page *page) +{ + return folio_detach_private(page_folio(page)); +} + #ifdef CONFIG_NUMA extern struct page *__page_cache_alloc(gfp_t gfp); #else From 67431f80c18a45d74097d8a0bc0f3268c94e7113 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 23:39:21 -0500 Subject: [PATCH 464/851] mm/filemap: Add folio_index(), folio_file_page() and folio_contains() folio_index() is the equivalent of page_index() for folios. folio_file_page() is the equivalent of find_subpage(). folio_contains() is the equivalent of thp_contains(). No changes to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 56 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3279c731ee047..f7c165b5991f0 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -386,6 +386,62 @@ static inline bool thp_contains(struct page *head, pgoff_t index) return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); } +#define swapcache_index(folio) __page_file_index(&(folio)->page) + +/** + * folio_index - File index of a folio. + * @folio: The folio. + * + * For a folio which is either in the page cache or the swap cache, + * return its index within the address_space it belongs to. If you know + * the page is definitely in the page cache, you can look at the folio's + * index directly. + * + * Return: The index (offset in units of pages) of a folio in its file. + */ +static inline pgoff_t folio_index(struct folio *folio) +{ + if (unlikely(folio_test_swapcache(folio))) + return swapcache_index(folio); + return folio->index; +} + +/** + * folio_file_page - The page for a particular index. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Sometimes after looking up a folio in the page cache, we need to + * obtain the specific page for an index (eg a page fault). + * + * Return: The page containing the file data for this index. + */ +static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (folio_test_hugetlb(folio)) + return &folio->page; + return folio_page(folio, index & (folio_nr_pages(folio) - 1)); +} + +/** + * folio_contains - Does this folio contain this index? + * @folio: The folio. + * @index: The page index within the file. + * + * Context: The caller should have the page locked in order to prevent + * (eg) shmem from moving the page between the page cache and swap cache + * and changing its index in the middle of the operation. + * Return: true or false. + */ +static inline bool folio_contains(struct folio *folio, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (folio_test_hugetlb(folio)) + return folio->index == index; + return index - folio_index(folio) < folio_nr_pages(folio); +} + /* * Given the page we found in the page cache, return the page corresponding * to this index in the file From b7cf2ff5130937c9702862044ada5c4412f0f972 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 21 Mar 2021 16:24:31 -0400 Subject: [PATCH 465/851] mm/filemap: Add folio_next_index() This helper returns the page index of the next folio in the file (ie the end of this folio, plus one). No changes to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f7c165b5991f0..bd0e7e91bfd40 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -406,6 +406,17 @@ static inline pgoff_t folio_index(struct folio *folio) return folio->index; } +/** + * folio_next_index - Get the index of the next folio. + * @folio: The current folio. + * + * Return: The index of the folio which follows this folio in the file. + */ +static inline pgoff_t folio_next_index(struct folio *folio) +{ + return folio->index + folio_nr_pages(folio); +} + /** * folio_file_page - The page for a particular index. * @folio: The folio which contains this index. From b12454fd08e2d788792ee805852a0d68e30efb3c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 24 Dec 2020 07:25:19 -0500 Subject: [PATCH 466/851] mm/filemap: Add folio_pos() and folio_file_pos() These are just wrappers around page_offset() and page_file_offset() respectively. No change to generated code. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- include/linux/pagemap.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bd0e7e91bfd40..aa71fa82d6bef 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -562,6 +562,27 @@ static inline loff_t page_file_offset(struct page *page) return ((loff_t)page_index(page)) << PAGE_SHIFT; } +/** + * folio_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + */ +static inline loff_t folio_pos(struct folio *folio) +{ + return page_offset(&folio->page); +} + +/** + * folio_file_pos - Returns the byte position of this folio in its file. + * @folio: The folio. + * + * This differs from folio_pos() for folios which belong to a swap file. + * NFS is the only filesystem today which needs to use folio_file_pos(). + */ +static inline loff_t folio_file_pos(struct folio *folio) +{ + return page_file_offset(&folio->page); +} + extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, unsigned long address); From 4480cfeb324694ab640a9e51d0bb2c06713134a5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 10 Dec 2020 10:55:05 -0500 Subject: [PATCH 467/851] mm/util: Add folio_mapping() and folio_file_mapping() These are the folio equivalent of page_mapping() and page_file_mapping(). Add an out-of-line page_mapping() wrapper around folio_mapping() in order to prevent the page_folio() call from bloating every caller of page_mapping(). Adjust page_file_mapping() and page_mapping_file() to use folios internally. Rename __page_file_mapping() to swapcache_mapping() and change it to take a folio. This ends up saving 122 bytes of text overall. folio_mapping() is 45 bytes shorter than page_mapping() was, but the new page_mapping() wrapper is 30 bytes. The major reduction is a few bytes less in dozens of nfs functions (which call page_file_mapping()). Most of these appear to be a slight change in gcc's register allocation decisions, which allow: 48 8b 56 08 mov 0x8(%rsi),%rdx 48 8d 42 ff lea -0x1(%rdx),%rax 83 e2 01 and $0x1,%edx 48 0f 44 c6 cmove %rsi,%rax to become: 48 8b 46 08 mov 0x8(%rsi),%rax 48 8d 78 ff lea -0x1(%rax),%rdi a8 01 test $0x1,%al 48 0f 44 fe cmove %rsi,%rdi for a reduction of a single byte. Once the NFS client is converted to use folios, this entire sequence will disappear. Also add folio_mapping() documentation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- Documentation/core-api/mm-api.rst | 2 ++ include/linux/mm.h | 14 ------------- include/linux/pagemap.h | 35 +++++++++++++++++++++++++++++-- include/linux/swap.h | 6 ++++++ mm/Makefile | 2 +- mm/folio-compat.c | 13 ++++++++++++ mm/swapfile.c | 8 +++---- mm/util.c | 30 +++++++++++++++----------- 8 files changed, 77 insertions(+), 33 deletions(-) create mode 100644 mm/folio-compat.c diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst index 971149f5d2416..395835f9289f1 100644 --- a/Documentation/core-api/mm-api.rst +++ b/Documentation/core-api/mm-api.rst @@ -101,3 +101,5 @@ More Memory Management Functions :internal: .. kernel-doc:: include/linux/page_ref.h .. kernel-doc:: include/linux/mmzone.h +.. kernel-doc:: mm/util.c + :functions: folio_mapping diff --git a/include/linux/mm.h b/include/linux/mm.h index ea15bd7ae55ef..cc8c2e09a34bf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1754,19 +1754,6 @@ void page_address_init(void); extern void *page_rmapping(struct page *page); extern struct anon_vma *page_anon_vma(struct page *page); -extern struct address_space *page_mapping(struct page *page); - -extern struct address_space *__page_file_mapping(struct page *); - -static inline -struct address_space *page_file_mapping(struct page *page) -{ - if (unlikely(PageSwapCache(page))) - return __page_file_mapping(page); - - return page->mapping; -} - extern pgoff_t __page_file_index(struct page *page); /* @@ -1781,7 +1768,6 @@ static inline pgoff_t page_index(struct page *page) } bool page_mapped(struct page *page); -struct address_space *page_mapping(struct page *page); /* * Return true only if the page has been allocated with diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index aa71fa82d6bef..a0925a89ba116 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -162,14 +162,45 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) void release_pages(struct page **pages, int nr); +struct address_space *page_mapping(struct page *); +struct address_space *folio_mapping(struct folio *); +struct address_space *swapcache_mapping(struct folio *); + +/** + * folio_file_mapping - Find the mapping this folio belongs to. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Folios in the swap cache return the mapping of the + * swap file or swap device where the data is stored. This is different + * from the mapping returned by folio_mapping(). The only reason to + * use it is if, like NFS, you return 0 from ->activate_swapfile. + * + * Do not call this for folios which aren't in the page cache or swap cache. + */ +static inline struct address_space *folio_file_mapping(struct folio *folio) +{ + if (unlikely(folio_test_swapcache(folio))) + return swapcache_mapping(folio); + + return folio->mapping; +} + +static inline struct address_space *page_file_mapping(struct page *page) +{ + return folio_file_mapping(page_folio(page)); +} + /* * For file cache pages, return the address_space, otherwise return NULL */ static inline struct address_space *page_mapping_file(struct page *page) { - if (unlikely(PageSwapCache(page))) + struct folio *folio = page_folio(page); + + if (unlikely(folio_test_swapcache(folio))) return NULL; - return page_mapping(page); + return folio_mapping(folio); } static inline bool page_cache_add_speculative(struct page *page, int count) diff --git a/include/linux/swap.h b/include/linux/swap.h index 6f5a43251593c..3d3d85354026f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -320,6 +320,12 @@ struct vma_swap_readahead { #endif }; +static inline swp_entry_t folio_swap_entry(struct folio *folio) +{ + swp_entry_t entry = { .val = page_private(&folio->page) }; + return entry; +} + /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); diff --git a/mm/Makefile b/mm/Makefile index e3436741d5391..d7488bcbbb2b8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -46,7 +46,7 @@ mmu-$(CONFIG_MMU) += process_vm_access.o endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ - maccess.o page-writeback.o \ + maccess.o page-writeback.o folio-compat.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o percpu.o slab_common.o \ diff --git a/mm/folio-compat.c b/mm/folio-compat.c new file mode 100644 index 0000000000000..5e107aa30a623 --- /dev/null +++ b/mm/folio-compat.c @@ -0,0 +1,13 @@ +/* + * Compatibility functions which bloat the callers too much to make inline. + * All of the callers of these functions should be converted to use folios + * eventually. + */ + +#include + +struct address_space *page_mapping(struct page *page) +{ + return folio_mapping(page_folio(page)); +} +EXPORT_SYMBOL(page_mapping); diff --git a/mm/swapfile.c b/mm/swapfile.c index 1e07d1c776f2a..3a6c094310daa 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3528,13 +3528,13 @@ struct swap_info_struct *page_swap_info(struct page *page) } /* - * out-of-line __page_file_ methods to avoid include hell. + * out-of-line methods to avoid include hell. */ -struct address_space *__page_file_mapping(struct page *page) +struct address_space *swapcache_mapping(struct folio *folio) { - return page_swap_info(page)->swap_file->f_mapping; + return page_swap_info(&folio->page)->swap_file->f_mapping; } -EXPORT_SYMBOL_GPL(__page_file_mapping); +EXPORT_SYMBOL_GPL(swapcache_mapping); pgoff_t __page_file_index(struct page *page) { diff --git a/mm/util.c b/mm/util.c index 9043d03750a73..1cde6218d6d14 100644 --- a/mm/util.c +++ b/mm/util.c @@ -686,30 +686,36 @@ struct anon_vma *page_anon_vma(struct page *page) return __page_rmapping(page); } -struct address_space *page_mapping(struct page *page) +/** + * folio_mapping - Find the mapping where this folio is stored. + * @folio: The folio. + * + * For folios which are in the page cache, return the mapping that this + * page belongs to. Folios in the swap cache return the swap mapping + * this page is stored in (which is different from the mapping for the + * swap file or swap device where the data is stored). + * + * You can call this for folios which aren't in the swap cache or page + * cache and it will return NULL. + */ +struct address_space *folio_mapping(struct folio *folio) { struct address_space *mapping; - page = compound_head(page); - /* This happens if someone calls flush_dcache_page on slab page */ - if (unlikely(PageSlab(page))) + if (unlikely(folio_test_slab(folio))) return NULL; - if (unlikely(PageSwapCache(page))) { - swp_entry_t entry; - - entry.val = page_private(page); - return swap_address_space(entry); - } + if (unlikely(folio_test_swapcache(folio))) + return swap_address_space(folio_swap_entry(folio)); - mapping = page->mapping; + mapping = folio->mapping; if ((unsigned long)mapping & PAGE_MAPPING_ANON) return NULL; return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); } -EXPORT_SYMBOL(page_mapping); +EXPORT_SYMBOL(folio_mapping); /* Slow path of page_mapcount() for compound pages */ int __page_mapcount(struct page *page) From 0235f13bc03d178b948bc529cdca69daf0d67739 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 7 Dec 2020 15:44:35 -0500 Subject: [PATCH 468/851] mm/filemap: Add folio_unlock() Convert unlock_page() to call folio_unlock(). By using a folio we avoid a call to compound_head(). This shortens the function from 39 bytes to 25 and removes 4 instructions on x86-64. Because we still have unlock_page(), it's a net increase of 16 bytes of text for the kernel as a whole, but any path that uses folio_unlock() will execute 4 fewer instructions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 3 ++- mm/filemap.c | 29 ++++++++++++----------------- mm/folio-compat.c | 6 ++++++ 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a0925a89ba116..a13edc7a29161 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -658,7 +658,8 @@ extern int __lock_page_killable(struct page *page); extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); -extern void unlock_page(struct page *page); +void unlock_page(struct page *page); +void folio_unlock(struct folio *folio); /* * Return true if the page was successfully locked diff --git a/mm/filemap.c b/mm/filemap.c index e7ce8bb137e05..f936dd963331e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1435,29 +1435,24 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem #endif /** - * unlock_page - unlock a locked page - * @page: the page + * folio_unlock - Unlock a locked folio. + * @folio: The folio. * - * Unlocks the page and wakes up sleepers in wait_on_page_locked(). - * Also wakes sleepers in wait_on_page_writeback() because the wakeup - * mechanism between PageLocked pages and PageWriteback pages is shared. - * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. + * Unlocks the folio and wakes up any thread sleeping on the page lock. * - * Note that this depends on PG_waiters being the sign bit in the byte - * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to - * clear the PG_locked bit and test PG_waiters at the same time fairly - * portably (architectures that do LL/SC can test any bit, while x86 can - * test the sign bit). + * Context: May be called from interrupt or process context. May not be + * called from NMI context. */ -void unlock_page(struct page *page) +void folio_unlock(struct folio *folio) { + /* Bit 7 allows x86 to check the byte's sign bit */ BUILD_BUG_ON(PG_waiters != 7); - page = compound_head(page); - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) - wake_up_page_bit(page, PG_locked); + BUILD_BUG_ON(PG_locked > 7); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0))) + wake_up_page_bit(&folio->page, PG_locked); } -EXPORT_SYMBOL(unlock_page); +EXPORT_SYMBOL(folio_unlock); /** * end_page_private_2 - Clear PG_private_2 and release any waiters diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 5e107aa30a623..91b3d00a92f74 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -11,3 +11,9 @@ struct address_space *page_mapping(struct page *page) return folio_mapping(page_folio(page)); } EXPORT_SYMBOL(page_mapping); + +void unlock_page(struct page *page) +{ + return folio_unlock(page_folio(page)); +} +EXPORT_SYMBOL(unlock_page); From 2896ce5ea108e681462dae13512e640a3c59aa79 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 1 Mar 2021 19:38:25 -0500 Subject: [PATCH 469/851] mm/filemap: Add folio_lock() This is like lock_page() but for use by callers who know they have a folio. Convert __lock_page() to be __folio_lock(). This saves one call to compound_head() per contended call to lock_page(). Saves 455 bytes of text; mostly from improved register allocation and inlining decisions. __folio_lock is 59 bytes while __lock_page was 79. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 24 +++++++++++++++++++----- mm/filemap.c | 29 +++++++++++++++-------------- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a13edc7a29161..c3673c55125bf 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -653,7 +653,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, return true; } -extern void __lock_page(struct page *page); +void __folio_lock(struct folio *folio); extern int __lock_page_killable(struct page *page); extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, @@ -661,13 +661,24 @@ extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, void unlock_page(struct page *page); void folio_unlock(struct folio *folio); +static inline bool folio_trylock(struct folio *folio) +{ + return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0))); +} + /* * Return true if the page was successfully locked */ static inline int trylock_page(struct page *page) { - page = compound_head(page); - return (likely(!test_and_set_bit_lock(PG_locked, &page->flags))); + return folio_trylock(page_folio(page)); +} + +static inline void folio_lock(struct folio *folio) +{ + might_sleep(); + if (!folio_trylock(folio)) + __folio_lock(folio); } /* @@ -675,9 +686,12 @@ static inline int trylock_page(struct page *page) */ static inline void lock_page(struct page *page) { + struct folio *folio; might_sleep(); - if (!trylock_page(page)) - __lock_page(page); + + folio = page_folio(page); + if (!folio_trylock(folio)) + __folio_lock(folio); } /* diff --git a/mm/filemap.c b/mm/filemap.c index f936dd963331e..0c8b5a043e200 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1187,7 +1187,7 @@ static void wake_up_page(struct page *page, int bit) */ enum behavior { EXCLUSIVE, /* Hold ref to page and take the bit when woken, like - * __lock_page() waiting on then setting PG_locked. + * __folio_lock() waiting on then setting PG_locked. */ SHARED, /* Hold ref to page and check the bit when woken, like * wait_on_page_writeback() waiting on PG_writeback. @@ -1578,17 +1578,16 @@ void page_endio(struct page *page, bool is_write, int err) EXPORT_SYMBOL_GPL(page_endio); /** - * __lock_page - get a lock on the page, assuming we need to sleep to get it - * @__page: the page to lock + * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it. + * @folio: The folio to lock */ -void __lock_page(struct page *__page) +void __folio_lock(struct folio *folio) { - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, + wait_queue_head_t *q = page_waitqueue(&folio->page); + wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); } -EXPORT_SYMBOL(__lock_page); +EXPORT_SYMBOL(__folio_lock); int __lock_page_killable(struct page *__page) { @@ -1663,10 +1662,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, return 0; } } else { - __lock_page(page); + __folio_lock(page_folio(page)); } - return 1; + return 1; } /** @@ -2837,7 +2836,9 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, struct file **fpin) { - if (trylock_page(page)) + struct folio *folio = page_folio(page); + + if (folio_trylock(folio)) return 1; /* @@ -2850,7 +2851,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); if (vmf->flags & FAULT_FLAG_KILLABLE) { - if (__lock_page_killable(page)) { + if (__lock_page_killable(&folio->page)) { /* * We didn't have the right flags to drop the mmap_lock, * but all fault_handlers only check for fatal signals @@ -2862,11 +2863,11 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, return 0; } } else - __lock_page(page); + __folio_lock(folio); + return 1; } - /* * Synchronous readahead happens when we don't even find a page in the page * cache at all. We don't want to perform IO under the mmap sem, so if we have From a7a9dbf5c6586f6ca0df57f57538cce1e4e77e2e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Dec 2020 00:07:31 -0500 Subject: [PATCH 470/851] mm/filemap: Add folio_lock_killable() This is like lock_page_killable() but for use by callers who know they have a folio. Convert __lock_page_killable() to be __folio_lock_killable(). This saves one call to compound_head() per contended call to lock_page_killable(). __folio_lock_killable() is 19 bytes smaller than __lock_page_killable() was. filemap_fault() shrinks by 74 bytes and __lock_page_or_retry() shrinks by 71 bytes. That's a total of 164 bytes of text saved. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Acked-by: Mike Rapoport --- include/linux/pagemap.h | 15 ++++++++++----- mm/filemap.c | 17 +++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c3673c55125bf..88727c74e0594 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -654,7 +654,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, } void __folio_lock(struct folio *folio); -extern int __lock_page_killable(struct page *page); +int __folio_lock_killable(struct folio *folio); extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); @@ -694,6 +694,14 @@ static inline void lock_page(struct page *page) __folio_lock(folio); } +static inline int folio_lock_killable(struct folio *folio) +{ + might_sleep(); + if (!folio_trylock(folio)) + return __folio_lock_killable(folio); + return 0; +} + /* * lock_page_killable is like lock_page but can be interrupted by fatal * signals. It returns 0 if it locked the page and -EINTR if it was @@ -701,10 +709,7 @@ static inline void lock_page(struct page *page) */ static inline int lock_page_killable(struct page *page) { - might_sleep(); - if (!trylock_page(page)) - return __lock_page_killable(page); - return 0; + return folio_lock_killable(page_folio(page)); } /* diff --git a/mm/filemap.c b/mm/filemap.c index 0c8b5a043e200..45983e98cf236 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1589,14 +1589,13 @@ void __folio_lock(struct folio *folio) } EXPORT_SYMBOL(__folio_lock); -int __lock_page_killable(struct page *__page) +int __folio_lock_killable(struct folio *folio) { - struct page *page = compound_head(__page); - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, + wait_queue_head_t *q = page_waitqueue(&folio->page); + return wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_KILLABLE, EXCLUSIVE); } -EXPORT_SYMBOL_GPL(__lock_page_killable); +EXPORT_SYMBOL_GPL(__folio_lock_killable); int __lock_page_async(struct page *page, struct wait_page_queue *wait) { @@ -1638,6 +1637,8 @@ int __lock_page_async(struct page *page, struct wait_page_queue *wait) int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { + struct folio *folio = page_folio(page); + if (fault_flag_allow_retry_first(flags)) { /* * CAUTION! In this case, mmap_lock is not released @@ -1656,13 +1657,13 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, if (flags & FAULT_FLAG_KILLABLE) { int ret; - ret = __lock_page_killable(page); + ret = __folio_lock_killable(folio); if (ret) { mmap_read_unlock(mm); return 0; } } else { - __folio_lock(page_folio(page)); + __folio_lock(folio); } return 1; @@ -2851,7 +2852,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page, *fpin = maybe_unlock_mmap_for_io(vmf, *fpin); if (vmf->flags & FAULT_FLAG_KILLABLE) { - if (__lock_page_killable(&folio->page)) { + if (__folio_lock_killable(folio)) { /* * We didn't have the right flags to drop the mmap_lock, * but all fault_handlers only check for fatal signals From e3700f8b6abeca88e6fb6bb7d6d9482d35554822 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 30 Dec 2020 17:58:40 -0500 Subject: [PATCH 471/851] mm/filemap: Add __folio_lock_async() There aren't any actual callers of lock_page_async(), so remove it. Convert filemap_update_page() to call __folio_lock_async(). __folio_lock_async() is 21 bytes smaller than __lock_page_async(), but the real savings come from using a folio in filemap_update_page(), shrinking it from 515 bytes to 404 bytes, saving 110 bytes. The text shrinks by 132 bytes in total. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- fs/io_uring.c | 2 +- include/linux/pagemap.h | 17 ----------------- mm/filemap.c | 31 ++++++++++++++++--------------- 3 files changed, 17 insertions(+), 33 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0cac361bf6b8d..c83763135b48a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3149,7 +3149,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) } /* - * This is our waitqueue callback handler, registered through lock_page_async() + * This is our waitqueue callback handler, registered through __folio_lock_async() * when we initially tried to do the IO with the iocb armed our waitqueue. * This gets called when the page is unlocked, and we generally expect that to * happen when the page IO is completed and the page is now uptodate. This will diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 88727c74e0594..6f631a3e42dce 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -655,7 +655,6 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, void __folio_lock(struct folio *folio); int __folio_lock_killable(struct folio *folio); -extern int __lock_page_async(struct page *page, struct wait_page_queue *wait); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); void unlock_page(struct page *page); @@ -712,22 +711,6 @@ static inline int lock_page_killable(struct page *page) return folio_lock_killable(page_folio(page)); } -/* - * lock_page_async - Lock the page, unless this would block. If the page - * is already locked, then queue a callback when the page becomes unlocked. - * This callback can then retry the operation. - * - * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page - * was already locked and the callback defined in 'wait' was queued. - */ -static inline int lock_page_async(struct page *page, - struct wait_page_queue *wait) -{ - if (!trylock_page(page)) - return __lock_page_async(page, wait); - return 0; -} - /* * lock_page_or_retry - Lock the page, unless this would block and the * caller indicated that it can handle a retry. diff --git a/mm/filemap.c b/mm/filemap.c index 45983e98cf236..554c6150eda06 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1597,18 +1597,18 @@ int __folio_lock_killable(struct folio *folio) } EXPORT_SYMBOL_GPL(__folio_lock_killable); -int __lock_page_async(struct page *page, struct wait_page_queue *wait) +static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) { - struct wait_queue_head *q = page_waitqueue(page); + struct wait_queue_head *q = page_waitqueue(&folio->page); int ret = 0; - wait->page = page; + wait->page = &folio->page; wait->bit_nr = PG_locked; spin_lock_irq(&q->lock); __add_wait_queue_entry_tail(q, &wait->wait); - SetPageWaiters(page); - ret = !trylock_page(page); + folio_set_waiters(folio); + ret = !folio_trylock(folio); /* * If we were successful now, we know we're still on the * waitqueue as we're still under the lock. This means it's @@ -2381,41 +2381,42 @@ static int filemap_update_page(struct kiocb *iocb, struct address_space *mapping, struct iov_iter *iter, struct page *page) { + struct folio *folio = page_folio(page); int error; - if (!trylock_page(page)) { + if (!folio_trylock(folio)) { if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) return -EAGAIN; if (!(iocb->ki_flags & IOCB_WAITQ)) { - put_and_wait_on_page_locked(page, TASK_KILLABLE); + put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE); return AOP_TRUNCATED_PAGE; } - error = __lock_page_async(page, iocb->ki_waitq); + error = __folio_lock_async(folio, iocb->ki_waitq); if (error) return error; } - if (!page->mapping) + if (!folio->mapping) goto truncated; error = 0; - if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page)) + if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page)) goto unlock; error = -EAGAIN; if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ)) goto unlock; - error = filemap_read_page(iocb->ki_filp, mapping, page); + error = filemap_read_page(iocb->ki_filp, mapping, &folio->page); if (error == AOP_TRUNCATED_PAGE) - put_page(page); + folio_put(folio); return error; truncated: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return AOP_TRUNCATED_PAGE; unlock: - unlock_page(page); + folio_unlock(folio); return error; } From 35e305eee20d02172fc24acd382ab52c93490dd4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 10:21:02 -0500 Subject: [PATCH 472/851] mm/filemap: Add folio_wait_locked() Also add folio_wait_locked_killable(). Turn wait_on_page_locked() and wait_on_page_locked_killable() into wrappers. This eliminates a call to compound_head() from each call-site, reducing text size by 193 bytes for me. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 26 ++++++++++++++++++-------- mm/filemap.c | 4 ++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 6f631a3e42dce..03fea8bbfd8e9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -733,23 +733,33 @@ extern void wait_on_page_bit(struct page *page, int bit_nr); extern int wait_on_page_bit_killable(struct page *page, int bit_nr); /* - * Wait for a page to be unlocked. + * Wait for a folio to be unlocked. * - * This must be called with the caller "holding" the page, - * ie with increased "page->count" so that the page won't + * This must be called with the caller "holding" the folio, + * ie with increased "page->count" so that the folio won't * go away during the wait.. */ +static inline void folio_wait_locked(struct folio *folio) +{ + if (folio_test_locked(folio)) + wait_on_page_bit(&folio->page, PG_locked); +} + +static inline int folio_wait_locked_killable(struct folio *folio) +{ + if (!folio_test_locked(folio)) + return 0; + return wait_on_page_bit_killable(&folio->page, PG_locked); +} + static inline void wait_on_page_locked(struct page *page) { - if (PageLocked(page)) - wait_on_page_bit(compound_head(page), PG_locked); + folio_wait_locked(page_folio(page)); } static inline int wait_on_page_locked_killable(struct page *page) { - if (!PageLocked(page)) - return 0; - return wait_on_page_bit_killable(compound_head(page), PG_locked); + return folio_wait_locked_killable(page_folio(page)); } int put_and_wait_on_page_locked(struct page *page, int state); diff --git a/mm/filemap.c b/mm/filemap.c index 554c6150eda06..50f901f25d82c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1649,9 +1649,9 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, mmap_read_unlock(mm); if (flags & FAULT_FLAG_KILLABLE) - wait_on_page_locked_killable(page); + folio_wait_locked_killable(folio); else - wait_on_page_locked(page); + folio_wait_locked(folio); return 0; } if (flags & FAULT_FLAG_KILLABLE) { From d42e083e5eff0141ab954dee7fd4e39169eb85db Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 18 Mar 2021 21:39:45 -0400 Subject: [PATCH 473/851] mm/filemap: Add __folio_lock_or_retry() Convert __lock_page_or_retry() to __folio_lock_or_retry(). This actually saves 4 bytes in the only caller of lock_page_or_retry() (due to better register allocation) and saves the 14 byte cost of calling page_folio() in __folio_lock_or_retry() for a total saving of 18 bytes. Also use a bool for the return type. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Reviewed-by: William Kucharski Acked-by: Mike Rapoport --- include/linux/pagemap.h | 11 +++++++---- mm/filemap.c | 20 +++++++++----------- mm/memory.c | 8 ++++---- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 03fea8bbfd8e9..626dbccbfb90a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -655,7 +655,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page, void __folio_lock(struct folio *folio); int __folio_lock_killable(struct folio *folio); -extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, +bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, unsigned int flags); void unlock_page(struct page *page); void folio_unlock(struct folio *folio); @@ -716,13 +716,16 @@ static inline int lock_page_killable(struct page *page) * caller indicated that it can handle a retry. * * Return value and mmap_lock implications depend on flags; see - * __lock_page_or_retry(). + * __folio_lock_or_retry(). */ -static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, +static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { + struct folio *folio; might_sleep(); - return trylock_page(page) || __lock_page_or_retry(page, mm, flags); + + folio = page_folio(page); + return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags); } /* diff --git a/mm/filemap.c b/mm/filemap.c index 50f901f25d82c..270f1e7b67c1c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1625,48 +1625,46 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) /* * Return values: - * 1 - page is locked; mmap_lock is still held. - * 0 - page is not locked. + * true - folio is locked; mmap_lock is still held. + * false - folio is not locked. * mmap_lock has been released (mmap_read_unlock(), unless flags had both * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in * which case mmap_lock is still held. * * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1 - * with the page locked and the mmap_lock unperturbed. + * with the folio locked and the mmap_lock unperturbed. */ -int __lock_page_or_retry(struct page *page, struct mm_struct *mm, +bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, unsigned int flags) { - struct folio *folio = page_folio(page); - if (fault_flag_allow_retry_first(flags)) { /* * CAUTION! In this case, mmap_lock is not released * even though return 0. */ if (flags & FAULT_FLAG_RETRY_NOWAIT) - return 0; + return false; mmap_read_unlock(mm); if (flags & FAULT_FLAG_KILLABLE) folio_wait_locked_killable(folio); else folio_wait_locked(folio); - return 0; + return false; } if (flags & FAULT_FLAG_KILLABLE) { - int ret; + bool ret; ret = __folio_lock_killable(folio); if (ret) { mmap_read_unlock(mm); - return 0; + return false; } } else { __folio_lock(folio); } - return 1; + return true; } /** diff --git a/mm/memory.c b/mm/memory.c index 747a01d495f2c..2f111f9b3dbc6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4248,7 +4248,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults). * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). * If mmap_lock is released, vma may become invalid (for example * by other thread calling munmap()). */ @@ -4489,7 +4489,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) * concurrent faults). * * The mmap_lock may have been released depending on flags and our return value. - * See filemap_fault() and __lock_page_or_retry(). + * See filemap_fault() and __folio_lock_or_retry(). */ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { @@ -4593,7 +4593,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). */ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -4749,7 +4749,7 @@ static inline void mm_account_fault(struct pt_regs *regs, * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __lock_page_or_retry(). + * return value. See filemap_fault() and __folio_lock_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs) From 6ac258a39fe42ccd8442770b2a74d165093aa6af Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Dec 2020 01:25:39 -0500 Subject: [PATCH 474/851] mm/swap: Add folio_rotate_reclaimable() Convert rotate_reclaimable_page() to folio_rotate_reclaimable(). This eliminates all five of the calls to compound_head() in this function, saving 75 bytes at the cost of adding 15 bytes to its one caller, end_page_writeback(). We also save 36 bytes from pagevec_move_tail_fn() due to using folios there. Net 96 bytes savings. Also move its declaration to mm/internal.h as it's only used by filemap.c. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport --- include/linux/swap.h | 1 - mm/filemap.c | 3 ++- mm/internal.h | 1 + mm/page_io.c | 4 ++-- mm/swap.c | 30 ++++++++++++++++-------------- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 3d3d85354026f..8394716a002b5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -371,7 +371,6 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/mm/filemap.c b/mm/filemap.c index 270f1e7b67c1c..8a97d89204001 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1529,8 +1529,9 @@ void end_page_writeback(struct page *page) * ever page writeback. */ if (PageReclaim(page)) { + struct folio *folio = page_folio(page); ClearPageReclaim(page); - rotate_reclaimable_page(page); + folio_rotate_reclaimable(folio); } /* diff --git a/mm/internal.h b/mm/internal.h index 31ff935b2547d..1a8851b730310 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -35,6 +35,7 @@ void page_writeback_init(void); vm_fault_t do_swap_page(struct vm_fault *vmf); +void folio_rotate_reclaimable(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); diff --git a/mm/page_io.c b/mm/page_io.c index c493ce9ebcf50..d597bc6e6e451 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -38,7 +38,7 @@ void end_swap_bio_write(struct bio *bio) * Also print a dire warning that things will go BAD (tm) * very quickly. * - * Also clear PG_reclaim to avoid rotate_reclaimable_page() + * Also clear PG_reclaim to avoid folio_rotate_reclaimable() */ set_page_dirty(page); pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", @@ -317,7 +317,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, * temporary failure if the system has limited * memory for allocating transmit buffers. * Mark the page dirty and avoid - * rotate_reclaimable_page but rate-limit the + * folio_rotate_reclaimable but rate-limit the * messages but do not flag PageError like * the normal direct-to-bio case as it could * be temporary. diff --git a/mm/swap.c b/mm/swap.c index 19600430e536b..095a5ec6f986c 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -228,11 +228,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec) { - if (!PageUnevictable(page)) { - del_page_from_lru_list(page, lruvec); - ClearPageActive(page); - add_page_to_lru_list_tail(page, lruvec); - __count_vm_events(PGROTATED, thp_nr_pages(page)); + struct folio *folio = page_folio(page); + + if (!folio_test_unevictable(folio)) { + lruvec_del_folio(lruvec, folio); + folio_clear_active(folio); + lruvec_add_folio_tail(lruvec, folio); + __count_vm_events(PGROTATED, folio_nr_pages(folio)); } } @@ -249,23 +251,23 @@ static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page) } /* - * Writeback is about to end against a page which has been marked for immediate - * reclaim. If it still appears to be reclaimable, move it to the tail of the - * inactive list. + * Writeback is about to end against a folio which has been marked for + * immediate reclaim. If it still appears to be reclaimable, move it + * to the tail of the inactive list. * - * rotate_reclaimable_page() must disable IRQs, to prevent nasty races. + * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races. */ -void rotate_reclaimable_page(struct page *page) +void folio_rotate_reclaimable(struct folio *folio) { - if (!PageLocked(page) && !PageDirty(page) && - !PageUnevictable(page) && PageLRU(page)) { + if (!folio_test_locked(folio) && !folio_test_dirty(folio) && + !folio_test_unevictable(folio) && folio_test_lru(folio)) { struct pagevec *pvec; unsigned long flags; - get_page(page); + folio_get(folio); local_lock_irqsave(&lru_rotate.lock, flags); pvec = this_cpu_ptr(&lru_rotate.pvec); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, pagevec_move_tail_fn); local_unlock_irqrestore(&lru_rotate.lock, flags); } From 545c0f03d3dd47480a5f52afda98c904df499221 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Mar 2021 15:21:55 -0500 Subject: [PATCH 475/851] mm/filemap: Add folio_end_writeback() Add an end_page_writeback() wrapper function for users that are not yet converted to folios. folio_end_writeback() is less than half the size of end_page_writeback() at just 105 bytes compared to 228 bytes, due to removing all the compound_head() calls. The 30 byte wrapper function makes this a net saving of 93 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 3 ++- mm/filemap.c | 43 ++++++++++++++++++++--------------------- mm/folio-compat.c | 6 ++++++ 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 626dbccbfb90a..66a0191785507 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -768,7 +768,8 @@ static inline int wait_on_page_locked_killable(struct page *page) int put_and_wait_on_page_locked(struct page *page, int state); void wait_on_page_writeback(struct page *page); int wait_on_page_writeback_killable(struct page *page); -extern void end_page_writeback(struct page *page); +void end_page_writeback(struct page *page); +void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); void __set_page_dirty(struct page *, struct address_space *, int warn); diff --git a/mm/filemap.c b/mm/filemap.c index 8a97d89204001..a56878a553e4f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1175,11 +1175,11 @@ static void wake_up_page_bit(struct page *page, int bit_nr) spin_unlock_irqrestore(&q->lock, flags); } -static void wake_up_page(struct page *page, int bit) +static void folio_wake(struct folio *folio, int bit) { - if (!PageWaiters(page)) + if (!folio_test_waiters(folio)) return; - wake_up_page_bit(page, bit); + wake_up_page_bit(&folio->page, bit); } /* @@ -1516,39 +1516,38 @@ int wait_on_page_private_2_killable(struct page *page) EXPORT_SYMBOL(wait_on_page_private_2_killable); /** - * end_page_writeback - end writeback against a page - * @page: the page + * folio_end_writeback - End writeback against a folio. + * @folio: The folio. */ -void end_page_writeback(struct page *page) +void folio_end_writeback(struct folio *folio) { /* - * TestClearPageReclaim could be used here but it is an atomic - * operation and overkill in this particular case. Failing to - * shuffle a page marked for immediate reclaim is too mild to - * justify taking an atomic operation penalty at the end of - * ever page writeback. + * folio_test_clear_reclaim() could be used here but it is an + * atomic operation and overkill in this particular case. Failing + * to shuffle a folio marked for immediate reclaim is too mild + * a gain to justify taking an atomic operation penalty at the + * end of every folio writeback. */ - if (PageReclaim(page)) { - struct folio *folio = page_folio(page); - ClearPageReclaim(page); + if (folio_test_reclaim(folio)) { + folio_clear_reclaim(folio); folio_rotate_reclaimable(folio); } /* - * Writeback does not hold a page reference of its own, relying + * Writeback does not hold a folio reference of its own, relying * on truncation to wait for the clearing of PG_writeback. - * But here we must make sure that the page is not freed and - * reused before the wake_up_page(). + * But here we must make sure that the folio is not freed and + * reused before the folio_wake(). */ - get_page(page); - if (!test_clear_page_writeback(page)) + folio_get(folio); + if (!test_clear_page_writeback(&folio->page)) BUG(); smp_mb__after_atomic(); - wake_up_page(page, PG_writeback); - put_page(page); + folio_wake(folio, PG_writeback); + folio_put(folio); } -EXPORT_SYMBOL(end_page_writeback); +EXPORT_SYMBOL(folio_end_writeback); /* * After completing I/O on a page, call this routine to update the page diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 91b3d00a92f74..526843d03d582 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -17,3 +17,9 @@ void unlock_page(struct page *page) return folio_unlock(page_folio(page)); } EXPORT_SYMBOL(unlock_page); + +void end_page_writeback(struct page *page) +{ + return folio_end_writeback(page_folio(page)); +} +EXPORT_SYMBOL(end_page_writeback); From ec086dcf8229d4fa257dc941dd1dcb9886889059 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 11:09:17 -0500 Subject: [PATCH 476/851] mm/writeback: Add folio_wait_writeback() wait_on_page_writeback_killable() only has one caller, so convert it to call folio_wait_writeback_killable(). For the wait_on_page_writeback() callers, add a compatibility wrapper around folio_wait_writeback(). Turning PageWriteback() into folio_test_writeback() eliminates a call to compound_head() which saves 8 bytes and 15 bytes in the two functions. Unfortunately, that is more than offset by adding the wait_on_page_writeback compatibility wrapper for a net increase in text of 7 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Acked-by: Mike Rapoport --- fs/afs/write.c | 9 ++++---- include/linux/pagemap.h | 3 ++- mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 48 ++++++++++++++++++++++++++++------------- 4 files changed, 46 insertions(+), 20 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 3104b62c20826..fb7d5c1cabde8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -839,7 +839,8 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) { - struct page *page = thp_head(vmf->page); + struct folio *folio = page_folio(vmf->page); + struct page *page = &folio->page; struct file *file = vmf->vma->vm_file; struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); @@ -859,7 +860,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) goto out; #endif - if (wait_on_page_writeback_killable(page)) + if (folio_wait_writeback_killable(folio)) goto out; if (lock_page_killable(page) < 0) @@ -869,8 +870,8 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) * details the portion of the page we need to write back and we might * need to redirty the page if there's a problem. */ - if (wait_on_page_writeback_killable(page) < 0) { - unlock_page(page); + if (folio_wait_writeback_killable(folio) < 0) { + folio_unlock(folio); goto out; } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 66a0191785507..0c5f53368fe98 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -767,7 +767,8 @@ static inline int wait_on_page_locked_killable(struct page *page) int put_and_wait_on_page_locked(struct page *page, int state); void wait_on_page_writeback(struct page *page); -int wait_on_page_writeback_killable(struct page *page); +void folio_wait_writeback(struct folio *folio); +int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 526843d03d582..41275dac7a92a 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -23,3 +23,9 @@ void end_page_writeback(struct page *page) return folio_end_writeback(page_folio(page)); } EXPORT_SYMBOL(end_page_writeback); + +void wait_on_page_writeback(struct page *page) +{ + return folio_wait_writeback(page_folio(page)); +} +EXPORT_SYMBOL_GPL(wait_on_page_writeback); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9f63548f247c3..c2c00e1533ad9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2830,33 +2830,51 @@ int __test_set_page_writeback(struct page *page, bool keep_write) } EXPORT_SYMBOL(__test_set_page_writeback); -/* - * Wait for a page to complete writeback +/** + * folio_wait_writeback - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. */ -void wait_on_page_writeback(struct page *page) +void folio_wait_writeback(struct folio *folio) { - while (PageWriteback(page)) { - trace_wait_on_page_writeback(page, page_mapping(page)); - wait_on_page_bit(page, PG_writeback); + while (folio_test_writeback(folio)) { + trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + wait_on_page_bit(&folio->page, PG_writeback); } } -EXPORT_SYMBOL_GPL(wait_on_page_writeback); +EXPORT_SYMBOL_GPL(folio_wait_writeback); -/* - * Wait for a page to complete writeback. Returns -EINTR if we get a - * fatal signal while waiting. +/** + * folio_wait_writeback_killable - Wait for a folio to finish writeback. + * @folio: The folio to wait for. + * + * If the folio is currently being written back to storage, wait for the + * I/O to complete or a fatal signal to arrive. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. + * Return: 0 on success, -EINTR if we get a fatal signal while waiting. */ -int wait_on_page_writeback_killable(struct page *page) +int folio_wait_writeback_killable(struct folio *folio) { - while (PageWriteback(page)) { - trace_wait_on_page_writeback(page, page_mapping(page)); - if (wait_on_page_bit_killable(page, PG_writeback)) + while (folio_test_writeback(folio)) { + trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + if (wait_on_page_bit_killable(&folio->page, PG_writeback)) return -EINTR; } return 0; } -EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); +EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); /** * wait_for_stable_page() - wait for writeback to finish, if necessary. From f9b3e3eb6c9e8090a27d2d46349f674022308195 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 11:25:25 -0500 Subject: [PATCH 477/851] mm/writeback: Add folio_wait_stable() Move wait_for_stable_page() into the folio compatibility file. folio_wait_stable() avoids a call to compound_head() and is 14 bytes smaller than wait_for_stable_page() was. The net text size grows by 16 bytes as a result of this patch. We can also remove thp_head() as this was the last user. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- include/linux/huge_mm.h | 15 --------------- include/linux/pagemap.h | 1 + mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 24 ++++++++++++++---------- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f123e15d966e8..f280f33ff2237 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -250,15 +250,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, return NULL; } -/** - * thp_head - Head page of a transparent huge page. - * @page: Any page (tail, head or regular) found in the page cache. - */ -static inline struct page *thp_head(struct page *page) -{ - return compound_head(page); -} - /** * thp_order - Order of a transparent huge page. * @page: Head page of a transparent huge page. @@ -336,12 +327,6 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -static inline struct page *thp_head(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return page; -} - static inline unsigned int thp_order(struct page *page) { VM_BUG_ON_PGFLAGS(PageTail(page), page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0c5f53368fe98..96b62a2331fbd 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -772,6 +772,7 @@ int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); +void folio_wait_stable(struct folio *folio); void __set_page_dirty(struct page *, struct address_space *, int warn); int __set_page_dirty_nobuffers(struct page *page); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 41275dac7a92a..3c83f03b80d7f 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -29,3 +29,9 @@ void wait_on_page_writeback(struct page *page) return folio_wait_writeback(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_on_page_writeback); + +void wait_for_stable_page(struct page *page) +{ + return folio_wait_stable(page_folio(page)); +} +EXPORT_SYMBOL_GPL(wait_for_stable_page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c2c00e1533ad9..a078e9786cc4e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2877,17 +2877,21 @@ int folio_wait_writeback_killable(struct folio *folio) EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); /** - * wait_for_stable_page() - wait for writeback to finish, if necessary. - * @page: The page to wait on. + * folio_wait_stable() - wait for writeback to finish, if necessary. + * @folio: The folio to wait on. * - * This function determines if the given page is related to a backing device - * that requires page contents to be held stable during writeback. If so, then - * it will wait for any pending writeback to complete. + * This function determines if the given folio is related to a backing + * device that requires folio contents to be held stable during writeback. + * If so, then it will wait for any pending writeback to complete. + * + * Context: Sleeps. Must be called in process context and with + * no spinlocks held. Caller should hold a reference on the folio. + * If the folio is not locked, writeback may start again after writeback + * has finished. */ -void wait_for_stable_page(struct page *page) +void folio_wait_stable(struct folio *folio) { - page = thp_head(page); - if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) - wait_on_page_writeback(page); + if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) + folio_wait_writeback(folio); } -EXPORT_SYMBOL_GPL(wait_for_stable_page); +EXPORT_SYMBOL_GPL(folio_wait_stable); From ee44f674fd1cb531e7908a2cfa02579f34f5a19d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 4 Mar 2021 12:02:54 -0500 Subject: [PATCH 478/851] mm/filemap: Add folio_wait_bit() Rename wait_on_page_bit() to folio_wait_bit(). We must always wait on the folio, otherwise we won't be woken up due to the tail page hashing to a different bucket from the head page. This commit shrinks the kernel by 770 bytes, mostly due to moving the page waitqueue lookup into folio_wait_bit_common(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- include/linux/pagemap.h | 10 +++--- mm/filemap.c | 77 +++++++++++++++++++---------------------- mm/page-writeback.c | 4 +-- 3 files changed, 43 insertions(+), 48 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 96b62a2331fbd..7eb02baf6f9f9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -729,11 +729,11 @@ static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm, } /* - * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc., + * This is exported only for folio_wait_locked/folio_wait_writeback, etc., * and should not be used directly. */ -extern void wait_on_page_bit(struct page *page, int bit_nr); -extern int wait_on_page_bit_killable(struct page *page, int bit_nr); +void folio_wait_bit(struct folio *folio, int bit_nr); +int folio_wait_bit_killable(struct folio *folio, int bit_nr); /* * Wait for a folio to be unlocked. @@ -745,14 +745,14 @@ extern int wait_on_page_bit_killable(struct page *page, int bit_nr); static inline void folio_wait_locked(struct folio *folio) { if (folio_test_locked(folio)) - wait_on_page_bit(&folio->page, PG_locked); + folio_wait_bit(folio, PG_locked); } static inline int folio_wait_locked_killable(struct folio *folio) { if (!folio_test_locked(folio)) return 0; - return wait_on_page_bit_killable(&folio->page, PG_locked); + return folio_wait_bit_killable(folio, PG_locked); } static inline void wait_on_page_locked(struct page *page) diff --git a/mm/filemap.c b/mm/filemap.c index a56878a553e4f..4b0288cfc1655 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1102,7 +1102,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, * * So update the flags atomically, and wake up the waiter * afterwards to avoid any races. This store-release pairs - * with the load-acquire in wait_on_page_bit_common(). + * with the load-acquire in folio_wait_bit_common(). */ smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN); wake_up_state(wait->private, mode); @@ -1183,7 +1183,7 @@ static void folio_wake(struct folio *folio, int bit) } /* - * A choice of three behaviors for wait_on_page_bit_common(): + * A choice of three behaviors for folio_wait_bit_common(): */ enum behavior { EXCLUSIVE, /* Hold ref to page and take the bit when woken, like @@ -1198,16 +1198,16 @@ enum behavior { }; /* - * Attempt to check (or get) the page bit, and mark us done + * Attempt to check (or get) the folio flag, and mark us done * if successful. */ -static inline bool trylock_page_bit_common(struct page *page, int bit_nr, +static inline bool folio_trylock_flag(struct folio *folio, int bit_nr, struct wait_queue_entry *wait) { if (wait->flags & WQ_FLAG_EXCLUSIVE) { - if (test_and_set_bit(bit_nr, &page->flags)) + if (test_and_set_bit(bit_nr, &folio->flags)) return false; - } else if (test_bit(bit_nr, &page->flags)) + } else if (test_bit(bit_nr, &folio->flags)) return false; wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE; @@ -1217,9 +1217,10 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr, /* How many times do we accept lock stealing from under a waiter? */ int sysctl_page_lock_unfairness = 5; -static inline int wait_on_page_bit_common(wait_queue_head_t *q, - struct page *page, int bit_nr, int state, enum behavior behavior) +static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, + int state, enum behavior behavior) { + wait_queue_head_t *q = page_waitqueue(&folio->page); int unfairness = sysctl_page_lock_unfairness; struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; @@ -1228,8 +1229,8 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, unsigned long pflags; if (bit_nr == PG_locked && - !PageUptodate(page) && PageWorkingset(page)) { - if (!PageSwapBacked(page)) { + !folio_test_uptodate(folio) && folio_test_workingset(folio)) { + if (!folio_test_swapbacked(folio)) { delayacct_thrashing_start(); delayacct = true; } @@ -1239,7 +1240,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, init_wait(wait); wait->func = wake_page_function; - wait_page.page = page; + wait_page.page = &folio->page; wait_page.bit_nr = bit_nr; repeat: @@ -1254,7 +1255,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, * Do one last check whether we can get the * page bit synchronously. * - * Do the SetPageWaiters() marking before that + * Do the folio_set_waiters() marking before that * to let any waker we _just_ missed know they * need to wake us up (otherwise they'll never * even go to the slow case that looks at the @@ -1265,8 +1266,8 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, * lock to avoid races. */ spin_lock_irq(&q->lock); - SetPageWaiters(page); - if (!trylock_page_bit_common(page, bit_nr, wait)) + folio_set_waiters(folio); + if (!folio_trylock_flag(folio, bit_nr, wait)) __add_wait_queue_entry_tail(q, wait); spin_unlock_irq(&q->lock); @@ -1276,10 +1277,10 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, * see whether the page bit testing has already * been done by the wake function. * - * We can drop our reference to the page. + * We can drop our reference to the folio. */ if (behavior == DROP) - put_page(page); + folio_put(folio); /* * Note that until the "finish_wait()", or until @@ -1316,7 +1317,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, * * And if that fails, we'll have to retry this all. */ - if (unlikely(test_and_set_bit(bit_nr, &page->flags))) + if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0)))) goto repeat; wait->flags |= WQ_FLAG_DONE; @@ -1325,7 +1326,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, /* * If a signal happened, this 'finish_wait()' may remove the last - * waiter from the wait-queues, but the PageWaiters bit will remain + * waiter from the wait-queues, but the folio waiters bit will remain * set. That's ok. The next wakeup will take care of it, and trying * to do it here would be difficult and prone to races. */ @@ -1356,19 +1357,17 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; } -void wait_on_page_bit(struct page *page, int bit_nr) +void folio_wait_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); - wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); + folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); } -EXPORT_SYMBOL(wait_on_page_bit); +EXPORT_SYMBOL(folio_wait_bit); -int wait_on_page_bit_killable(struct page *page, int bit_nr) +int folio_wait_bit_killable(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED); + return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED); } -EXPORT_SYMBOL(wait_on_page_bit_killable); +EXPORT_SYMBOL(folio_wait_bit_killable); /** * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked @@ -1385,11 +1384,8 @@ EXPORT_SYMBOL(wait_on_page_bit_killable); */ int put_and_wait_on_page_locked(struct page *page, int state) { - wait_queue_head_t *q; - - page = compound_head(page); - q = page_waitqueue(page); - return wait_on_page_bit_common(q, page, PG_locked, state, DROP); + return folio_wait_bit_common(page_folio(page), PG_locked, state, + DROP); } /** @@ -1483,9 +1479,10 @@ EXPORT_SYMBOL(end_page_private_2); */ void wait_on_page_private_2(struct page *page) { - page = compound_head(page); - while (PagePrivate2(page)) - wait_on_page_bit(page, PG_private_2); + struct folio *folio = page_folio(page); + + while (folio_test_private_2(folio)) + folio_wait_bit(folio, PG_private_2); } EXPORT_SYMBOL(wait_on_page_private_2); @@ -1502,11 +1499,11 @@ EXPORT_SYMBOL(wait_on_page_private_2); */ int wait_on_page_private_2_killable(struct page *page) { + struct folio *folio = page_folio(page); int ret = 0; - page = compound_head(page); - while (PagePrivate2(page)) { - ret = wait_on_page_bit_killable(page, PG_private_2); + while (folio_test_private_2(folio)) { + ret = folio_wait_bit_killable(folio, PG_private_2); if (ret < 0) break; } @@ -1583,16 +1580,14 @@ EXPORT_SYMBOL_GPL(page_endio); */ void __folio_lock(struct folio *folio) { - wait_queue_head_t *q = page_waitqueue(&folio->page); - wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_UNINTERRUPTIBLE, + folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE, EXCLUSIVE); } EXPORT_SYMBOL(__folio_lock); int __folio_lock_killable(struct folio *folio) { - wait_queue_head_t *q = page_waitqueue(&folio->page); - return wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_KILLABLE, + return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE, EXCLUSIVE); } EXPORT_SYMBOL_GPL(__folio_lock_killable); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a078e9786cc4e..b34278d05395f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2846,7 +2846,7 @@ void folio_wait_writeback(struct folio *folio) { while (folio_test_writeback(folio)) { trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); - wait_on_page_bit(&folio->page, PG_writeback); + folio_wait_bit(folio, PG_writeback); } } EXPORT_SYMBOL_GPL(folio_wait_writeback); @@ -2868,7 +2868,7 @@ int folio_wait_writeback_killable(struct folio *folio) { while (folio_test_writeback(folio)) { trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); - if (wait_on_page_bit_killable(&folio->page, PG_writeback)) + if (folio_wait_bit_killable(folio, PG_writeback)) return -EINTR; } From 3e638af9e1da9fa5fa2dc4f2bdab14f2be863987 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 17:14:48 -0500 Subject: [PATCH 479/851] mm/filemap: Add folio_wake_bit() Convert wake_up_page_bit() to folio_wake_bit(). All callers have a folio, so use it directly. Saves 66 bytes of text in end_page_private_2(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells Acked-by: Mike Rapoport --- mm/filemap.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 4b0288cfc1655..3d504bf235346 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1121,14 +1121,14 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, return (flags & WQ_FLAG_EXCLUSIVE) != 0; } -static void wake_up_page_bit(struct page *page, int bit_nr) +static void folio_wake_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(page); + wait_queue_head_t *q = page_waitqueue(&folio->page); struct wait_page_key key; unsigned long flags; wait_queue_entry_t bookmark; - key.page = page; + key.page = &folio->page; key.bit_nr = bit_nr; key.page_match = 0; @@ -1163,7 +1163,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr) * page waiters. */ if (!waitqueue_active(q) || !key.page_match) { - ClearPageWaiters(page); + folio_clear_waiters(folio); /* * It's possible to miss clearing Waiters here, when we woke * our page waiters, but the hashed waitqueue has waiters for @@ -1179,7 +1179,7 @@ static void folio_wake(struct folio *folio, int bit) { if (!folio_test_waiters(folio)) return; - wake_up_page_bit(&folio->page, bit); + folio_wake_bit(folio, bit); } /* @@ -1446,7 +1446,7 @@ void folio_unlock(struct folio *folio) BUILD_BUG_ON(PG_locked > 7); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0))) - wake_up_page_bit(&folio->page, PG_locked); + folio_wake_bit(folio, PG_locked); } EXPORT_SYMBOL(folio_unlock); @@ -1463,11 +1463,12 @@ EXPORT_SYMBOL(folio_unlock); */ void end_page_private_2(struct page *page) { - page = compound_head(page); - VM_BUG_ON_PAGE(!PagePrivate2(page), page); - clear_bit_unlock(PG_private_2, &page->flags); - wake_up_page_bit(page, PG_private_2); - put_page(page); + struct folio *folio = page_folio(page); + + VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio); + clear_bit_unlock(PG_private_2, folio_flags(folio, 0)); + folio_wake_bit(folio, PG_private_2); + folio_put(folio); } EXPORT_SYMBOL(end_page_private_2); From 9ee05b20d86bc0ab655f183cc5a02100895eec83 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 16 Jan 2021 11:22:14 -0500 Subject: [PATCH 480/851] mm/filemap: Convert page wait queues to be folios Reinforce that page flags are actually in the head page by changing the type from page to folio. Increases the size of cachefiles by two bytes, but the kernel core is unchanged in size. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Jeff Layton Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: David Howells --- fs/cachefiles/rdwr.c | 16 ++++++++-------- include/linux/pagemap.h | 8 ++++---- mm/filemap.c | 38 +++++++++++++++++++------------------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 8ffc40e84a594..fcf4f3b729233 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -25,20 +25,20 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, struct cachefiles_object *object; struct fscache_retrieval *op = monitor->op; struct wait_page_key *key = _key; - struct page *page = wait->private; + struct folio *folio = wait->private; ASSERT(key); _enter("{%lu},%u,%d,{%p,%u}", monitor->netfs_page->index, mode, sync, - key->page, key->bit_nr); + key->folio, key->bit_nr); - if (key->page != page || key->bit_nr != PG_locked) + if (key->folio != folio || key->bit_nr != PG_locked) return 0; - _debug("--- monitor %p %lx ---", page, page->flags); + _debug("--- monitor %p %lx ---", folio, folio->flags); - if (!PageUptodate(page) && !PageError(page)) { + if (!folio_test_uptodate(folio) && !folio_test_error(folio)) { /* unlocked, not uptodate and not erronous? */ _debug("page probably truncated"); } @@ -107,7 +107,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, put_page(backpage2); INIT_LIST_HEAD(&monitor->op_link); - add_page_wait_queue(backpage, &monitor->monitor); + folio_add_wait_queue(page_folio(backpage), &monitor->monitor); if (trylock_page(backpage)) { ret = -EIO; @@ -294,7 +294,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, get_page(backpage); monitor->back_page = backpage; monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); + folio_add_wait_queue(page_folio(backpage), &monitor->monitor); monitor = NULL; /* but the page may have been read before the monitor was installed, so @@ -548,7 +548,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, get_page(backpage); monitor->back_page = backpage; monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); + folio_add_wait_queue(page_folio(backpage), &monitor->monitor); monitor = NULL; /* but the page may have been read before the monitor was diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7eb02baf6f9f9..c8e74d67b01fb 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -629,13 +629,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } struct wait_page_key { - struct page *page; + struct folio *folio; int bit_nr; int page_match; }; struct wait_page_queue { - struct page *page; + struct folio *folio; int bit_nr; wait_queue_entry_t wait; }; @@ -643,7 +643,7 @@ struct wait_page_queue { static inline bool wake_page_match(struct wait_page_queue *wait_page, struct wait_page_key *key) { - if (wait_page->page != key->page) + if (wait_page->folio != key->folio) return false; key->page_match = 1; @@ -803,7 +803,7 @@ int wait_on_page_private_2_killable(struct page *page); /* * Add an arbitrary waiter to a page's wait queue */ -extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); +void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); /* * Fault everything in given userspace address range in. diff --git a/mm/filemap.c b/mm/filemap.c index 3d504bf235346..ac8c9253cfedd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1019,11 +1019,11 @@ EXPORT_SYMBOL(__page_cache_alloc); */ #define PAGE_WAIT_TABLE_BITS 8 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) -static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; +static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; -static wait_queue_head_t *page_waitqueue(struct page *page) +static wait_queue_head_t *folio_waitqueue(struct folio *folio) { - return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; + return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)]; } void __init pagecache_init(void) @@ -1031,7 +1031,7 @@ void __init pagecache_init(void) int i; for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) - init_waitqueue_head(&page_wait_table[i]); + init_waitqueue_head(&folio_wait_table[i]); page_writeback_init(); } @@ -1086,10 +1086,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, */ flags = wait->flags; if (flags & WQ_FLAG_EXCLUSIVE) { - if (test_bit(key->bit_nr, &key->page->flags)) + if (test_bit(key->bit_nr, &key->folio->flags)) return -1; if (flags & WQ_FLAG_CUSTOM) { - if (test_and_set_bit(key->bit_nr, &key->page->flags)) + if (test_and_set_bit(key->bit_nr, &key->folio->flags)) return -1; flags |= WQ_FLAG_DONE; } @@ -1123,12 +1123,12 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, static void folio_wake_bit(struct folio *folio, int bit_nr) { - wait_queue_head_t *q = page_waitqueue(&folio->page); + wait_queue_head_t *q = folio_waitqueue(folio); struct wait_page_key key; unsigned long flags; wait_queue_entry_t bookmark; - key.page = &folio->page; + key.folio = folio; key.bit_nr = bit_nr; key.page_match = 0; @@ -1220,7 +1220,7 @@ int sysctl_page_lock_unfairness = 5; static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, int state, enum behavior behavior) { - wait_queue_head_t *q = page_waitqueue(&folio->page); + wait_queue_head_t *q = folio_waitqueue(folio); int unfairness = sysctl_page_lock_unfairness; struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; @@ -1240,7 +1240,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, init_wait(wait); wait->func = wake_page_function; - wait_page.page = &folio->page; + wait_page.folio = folio; wait_page.bit_nr = bit_nr; repeat: @@ -1389,23 +1389,23 @@ int put_and_wait_on_page_locked(struct page *page, int state) } /** - * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue - * @page: Page defining the wait queue of interest + * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue + * @folio: Folio defining the wait queue of interest * @waiter: Waiter to add to the queue * - * Add an arbitrary @waiter to the wait queue for the nominated @page. + * Add an arbitrary @waiter to the wait queue for the nominated @folio. */ -void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) +void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter) { - wait_queue_head_t *q = page_waitqueue(page); + wait_queue_head_t *q = folio_waitqueue(folio); unsigned long flags; spin_lock_irqsave(&q->lock, flags); __add_wait_queue_entry_tail(q, waiter); - SetPageWaiters(page); + folio_set_waiters(folio); spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL_GPL(add_page_wait_queue); +EXPORT_SYMBOL_GPL(folio_add_wait_queue); #ifndef clear_bit_unlock_is_negative_byte @@ -1595,10 +1595,10 @@ EXPORT_SYMBOL_GPL(__folio_lock_killable); static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) { - struct wait_queue_head *q = page_waitqueue(&folio->page); + struct wait_queue_head *q = folio_waitqueue(folio); int ret = 0; - wait->page = &folio->page; + wait->folio = folio; wait->bit_nr = PG_locked; spin_lock_irq(&q->lock); From f65dbd37b720e4897a51f6cfe989928e83a51159 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:16 +0300 Subject: [PATCH 481/851] fs: add generic helper for filling statx attribute flags The immutable and append-only properties on an inode are published on the inode's i_flags and enforced by the VFS. Create a helper to fill the corresponding STATX_ATTR_ flags in the kstat structure from the inode's i_flags. Only orange was converted to use this helper. Other filesystems could use it in the future. Suggested-by: Miklos Szeredi Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/orangefs/inode.c | 7 +------ fs/stat.c | 18 ++++++++++++++++++ include/linux/fs.h | 1 + include/linux/stat.h | 4 ++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 16ac617df7d7f..c1bb4c4b5d672 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -882,12 +882,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, if (!(request_mask & STATX_SIZE)) stat->result_mask &= ~STATX_SIZE; - stat->attributes_mask = STATX_ATTR_IMMUTABLE | - STATX_ATTR_APPEND; - if (inode->i_flags & S_IMMUTABLE) - stat->attributes |= STATX_ATTR_IMMUTABLE; - if (inode->i_flags & S_APPEND) - stat->attributes |= STATX_ATTR_APPEND; + generic_fill_statx_attr(inode, stat); } return ret; } diff --git a/fs/stat.c b/fs/stat.c index 1fa38bdec1a68..28d2020ba1f42 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -59,6 +59,24 @@ void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode, } EXPORT_SYMBOL(generic_fillattr); +/** + * generic_fill_statx_attr - Fill in the statx attributes from the inode flags + * @inode: Inode to use as the source + * @stat: Where to fill in the attribute flags + * + * Fill in the STATX_ATTR_* flags in the kstat structure for properties of the + * inode that are published on i_flags and enforced by the VFS. + */ +void generic_fill_statx_attr(struct inode *inode, struct kstat *stat) +{ + if (inode->i_flags & S_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (inode->i_flags & S_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + stat->attributes_mask |= KSTAT_ATTR_VFS_FLAGS; +} +EXPORT_SYMBOL(generic_fill_statx_attr); + /** * vfs_getattr_nosec - getattr without security checks * @path: file to get attributes from diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216c..ae6c6c34db944 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3355,6 +3355,7 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *); +void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); diff --git a/include/linux/stat.h b/include/linux/stat.h index fff27e6038141..7df06931f25d8 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -34,6 +34,10 @@ struct kstat { STATX_ATTR_ENCRYPTED | \ STATX_ATTR_VERITY \ )/* Attrs corresponding to FS_*_FL flags */ +#define KSTAT_ATTR_VFS_FLAGS \ + (STATX_ATTR_IMMUTABLE | \ + STATX_ATTR_APPEND \ + ) /* Attrs corresponding to S_* flags that are enforced by the VFS */ u64 ino; dev_t dev; dev_t rdev; From 9b2544fb07decc689f3fffba73c01e9819f87cab Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:17 +0300 Subject: [PATCH 482/851] ovl: pass ovl_fs to ovl_check_setxattr() Instead of passing the overlay dentry. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 10 +++++----- fs/overlayfs/dir.c | 6 ++++-- fs/overlayfs/namei.c | 2 +- fs/overlayfs/overlayfs.h | 6 +++--- fs/overlayfs/util.c | 7 +++---- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 2846b943e80c1..3fa68a5cc16ea 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -331,8 +331,8 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, return ERR_PTR(err); } -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, - struct dentry *lower, struct dentry *upper) +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper) { const struct ovl_fh *fh = NULL; int err; @@ -351,7 +351,7 @@ int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, /* * Do not fail when upper doesn't support xattrs. */ - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf, + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); kfree(fh); @@ -526,13 +526,13 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp); + err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); if (err) return err; } if (c->metacopy) { - err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY, + err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY, NULL, 0, -EOPNOTSUPP); if (err) return err; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 93efe7048a771..258434567a343 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -233,9 +233,10 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr) static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, int xerr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); @@ -1043,6 +1044,7 @@ static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir) static int ovl_set_redirect(struct dentry *dentry, bool samedir) { int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); const char *redirect = ovl_dentry_get_redirect(dentry); bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir); @@ -1053,7 +1055,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, redirect, strlen(redirect), -EXDEV); if (!err) { diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 210cd6f66e28e..da063b18b4195 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -811,7 +811,7 @@ static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, if (err) return err; - err = ovl_set_origin(ofs, dentry, lower, upper); + err = ovl_set_origin(ofs, lower, upper); if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6ec73db4bf9e6..e5dabf7ef339a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -320,7 +320,7 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags); bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry); bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry, enum ovl_xattr ox); -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr); int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); @@ -561,8 +561,8 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, int ovl_set_attr(struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, - struct dentry *lower, struct dentry *upper); +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index b9d03627f364c..81b8f135445a8 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -600,12 +600,11 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), }; -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr) { int err; - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; if (ofs->noxattr) return xerr; @@ -623,6 +622,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; if (ovl_test_flag(OVL_IMPURE, d_inode(dentry))) @@ -632,8 +632,7 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) * Do not fail when upper doesn't support xattrs. * Upper inodes won't have origin nor redirect xattr anyway. */ - err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, - "y", 1, 0); + err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0); if (!err) ovl_set_flag(OVL_IMPURE, d_inode(dentry)); From 7d79bbeb09e493c88e254ae74c888b8ccc34c7b7 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:18 +0300 Subject: [PATCH 483/851] ovl: copy up sync/noatime fileattr flags When a lower file has sync/noatime fileattr flags, the behavior of overlayfs post copy up is inconsistent. Immediately after copy up, ovl inode still has the S_SYNC/S_NOATIME inode flags copied from lower inode, so vfs code still treats the ovl inode as sync/noatime. After ovl inode evict or mount cycle, the ovl inode does not have these inode flags anymore. To fix this inconsistency, try to copy the fileattr flags on copy up if the upper fs supports the fileattr_set() method. This gives consistent behavior post copy up regardless of inode eviction from cache. We cannot copy up the immutable/append-only inode flags in a similar manner, because immutable/append-only inodes cannot be linked and because overlayfs will not be able to set overlay.* xattr on the upper inodes. Those flags will be addressed by a followup patch. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 51 ++++++++++++++++++++++++++++++++++------ fs/overlayfs/inode.c | 44 ++++++++++++++++++++++++---------- fs/overlayfs/overlayfs.h | 15 +++++++++++- 3 files changed, 89 insertions(+), 21 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 3fa68a5cc16ea..daf2afa603d31 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,31 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, return error; } +static int ovl_copy_fileattr(struct path *old, struct path *new) +{ + struct fileattr oldfa = { .flags_valid = true }; + struct fileattr newfa = { .flags_valid = true }; + int err; + + err = ovl_real_fileattr_get(old, &oldfa); + if (err) + return err; + + err = ovl_real_fileattr_get(new, &newfa); + if (err) + return err; + + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); + newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; + newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); + + BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK; + newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK); + + return ovl_real_fileattr_set(new, &newfa); +} + static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old, struct path *new, loff_t len) { @@ -493,20 +519,21 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct inode *inode = d_inode(c->dentry); + struct path upperpath, datapath; int err; + ovl_path_upper(c->dentry, &upperpath); + if (WARN_ON(upperpath.dentry != NULL)) + return -EIO; + + upperpath.dentry = temp; + /* * Copy up data first and then xattrs. Writing data after * xattrs will remove security.capability xattr automatically. */ if (S_ISREG(c->stat.mode) && !c->metacopy) { - struct path upperpath, datapath; - - ovl_path_upper(c->dentry, &upperpath); - if (WARN_ON(upperpath.dentry != NULL)) - return -EIO; - upperpath.dentry = temp; - ovl_path_lowerdata(c->dentry, &datapath); err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size); @@ -518,6 +545,16 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) if (err) return err; + if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) { + /* + * Copy the fileattr inode flags that are the source of already + * copied i_flags + */ + err = ovl_copy_fileattr(&c->lowerpath, &upperpath); + if (err) + return err; + } + /* * Store identifier of lower inode in upper inode xattr to * allow lookup of the copy up origin inode. diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 5e828a1c98a8c..b288843e6b422 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -503,16 +503,14 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Introducing security_inode_fileattr_get/set() hooks would solve this issue * properly. */ -static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa, +static int ovl_security_fileattr(struct path *realpath, struct fileattr *fa, bool set) { - struct path realpath; struct file *file; unsigned int cmd; int err; - ovl_path_real(dentry, &realpath); - file = dentry_open(&realpath, O_RDONLY, current_cred()); + file = dentry_open(realpath, O_RDONLY, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); @@ -527,11 +525,22 @@ static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa, return err; } +int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, true); + if (err) + return err; + + return vfs_fileattr_set(&init_user_ns, realpath->dentry, fa); +} + int ovl_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - struct dentry *upperdentry; + struct path upperpath; const struct cred *old_cred; int err; @@ -541,12 +550,10 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, err = ovl_copy_up(dentry); if (!err) { - upperdentry = ovl_dentry_upper(dentry); + ovl_path_real(dentry, &upperpath); old_cred = ovl_override_creds(inode->i_sb); - err = ovl_security_fileattr(dentry, fa, true); - if (!err) - err = vfs_fileattr_set(&init_user_ns, upperdentry, fa); + err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); ovl_copyflags(ovl_inode_real(inode), inode); } @@ -555,17 +562,28 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, return err; } +int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, false); + if (err) + return err; + + return vfs_fileattr_get(realpath->dentry, fa); +} + int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - struct dentry *realdentry = ovl_dentry_real(dentry); + struct path realpath; const struct cred *old_cred; int err; + ovl_path_real(dentry, &realpath); + old_cred = ovl_override_creds(inode->i_sb); - err = ovl_security_fileattr(dentry, fa, false); - if (!err) - err = vfs_fileattr_get(realdentry, fa); + err = ovl_real_fileattr_get(&realpath, fa); revert_creds(old_cred); return err; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e5dabf7ef339a..2cbebe06d9adb 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -518,9 +518,20 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) i_size_write(to, i_size_read(from)); } +/* vfs inode flags copied from real to ovl inode */ +#define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) + +/* + * fileattr flags copied from lower to upper inode on copy up. + * We cannot copy immutable/append-only flags, because that would prevevnt + * linking temp inode to upper dir. + */ +#define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) +#define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) + static inline void ovl_copyflags(struct inode *from, struct inode *to) { - unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; + unsigned int mask = OVL_COPY_I_FLAGS_MASK; inode_set_flags(to, from->i_flags & mask, mask); } @@ -548,6 +559,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); extern const struct file_operations ovl_file_operations; int __init ovl_aio_request_cache_init(void); void ovl_aio_request_cache_destroy(void); +int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa); +int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); int ovl_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); From 3bff98b5ef9c5f6c98e7e76180e2f5522f336e7a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:19 +0300 Subject: [PATCH 484/851] ovl: consistent behavior for immutable/append-only inodes When a lower file has immutable/append-only fileattr flags, the behavior of overlayfs post copy up is inconsistent. Immediattely after copy up, ovl inode still has the S_IMMUTABLE/S_APPEND inode flags copied from lower inode, so vfs code still treats the ovl inode as immutable/append-only. After ovl inode evict or mount cycle, the ovl inode does not have these inode flags anymore. We cannot copy up the immutable and append-only fileattr flags, because immutable/append-only inodes cannot be linked and because overlayfs will not be able to set overlay.* xattr on the upper inodes. Instead, if any of the fileattr flags of interest exist on the lower inode, we store them in overlay.protattr xattr on the upper inode and we read the flags from xattr on lookup and on fileattr_get(). This gives consistent behavior post copy up regardless of inode eviction from cache. When user sets new fileattr flags, we update or remove the overlay.protattr xattr. Storing immutable/append-only fileattr flags in an xattr instead of upper fileattr also solves other non-standard behavior issues - overlayfs can now copy up children of "ovl-immutable" directories and lower aliases of "ovl-immutable" hardlinks. Reported-by: Chengguang Xu Link: https://lore.kernel.org/linux-unionfs/20201226104618.239739-1-cgxu519@mykernel.net/ Link: https://lore.kernel.org/linux-unionfs/20210210190334.1212210-5-amir73il@gmail.com/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 17 +++++++- fs/overlayfs/inode.c | 50 +++++++++++++++++++++-- fs/overlayfs/overlayfs.h | 13 +++++- fs/overlayfs/util.c | 85 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index daf2afa603d31..9d8ebf0e72375 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -131,7 +131,8 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, return error; } -static int ovl_copy_fileattr(struct path *old, struct path *new) +static int ovl_copy_fileattr(struct inode *inode, struct path *old, + struct path *new) { struct fileattr oldfa = { .flags_valid = true }; struct fileattr newfa = { .flags_valid = true }; @@ -145,6 +146,18 @@ static int ovl_copy_fileattr(struct path *old, struct path *new) if (err) return err; + /* + * We cannot set immutable and append-only flags on upper inode, + * because we would not be able to link upper inode to upper dir + * not set overlay private xattr on upper inode. + * Store these flags in overlay.protattr xattr instead. + */ + if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { + err = ovl_set_protattr(inode, new->dentry, &oldfa); + if (err) + return err; + } + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); @@ -550,7 +563,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) * Copy the fileattr inode flags that are the source of already * copied i_flags */ - err = ovl_copy_fileattr(&c->lowerpath, &upperpath); + err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath); if (err) return err; } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b288843e6b422..37300e972a398 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -162,7 +162,8 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, enum ovl_path_type type; struct path realpath; const struct cred *old_cred; - bool is_dir = S_ISDIR(dentry->d_inode->i_mode); + struct inode *inode = d_inode(dentry); + bool is_dir = S_ISDIR(inode->i_mode); int fsid = 0; int err; bool metacopy_blocks = false; @@ -175,6 +176,9 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, if (err) goto out; + /* Report the effective immutable/append-only STATX flags */ + generic_fill_statx_attr(inode, stat); + /* * For non-dir or same fs, we use st_ino of the copy up origin. * This guaranties constant st_dev/st_ino across copy up. @@ -542,6 +546,7 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, struct inode *inode = d_inode(dentry); struct path upperpath; const struct cred *old_cred; + unsigned int flags; int err; err = ovl_want_write(dentry); @@ -553,15 +558,49 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, ovl_path_real(dentry, &upperpath); old_cred = ovl_override_creds(inode->i_sb); - err = ovl_real_fileattr_set(&upperpath, fa); + /* + * Store immutable/append-only flags in xattr and clear them + * in upper fileattr (in case they were set by older kernel) + * so children of "ovl-immutable" directories lower aliases of + * "ovl-immutable" hardlinks could be copied up. + * Clear xattr when flags are cleared. + */ + err = ovl_set_protattr(inode, upperpath.dentry, fa); + if (!err) + err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); - ovl_copyflags(ovl_inode_real(inode), inode); + + /* + * Merge real inode flags with inode flags read from + * overlay.protattr xattr + */ + flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK; + + BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); + flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; + inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); } ovl_drop_write(dentry); out: return err; } +/* Convert inode protection flags to fileattr flags */ +static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) +{ + BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); + BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + + if (inode->i_flags & S_APPEND) { + fa->flags |= FS_APPEND_FL; + fa->fsx_xflags |= FS_XFLAG_APPEND; + } + if (inode->i_flags & S_IMMUTABLE) { + fa->flags |= FS_IMMUTABLE_FL; + fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; + } +} + int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) { int err; @@ -584,6 +623,7 @@ int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) old_cred = ovl_override_creds(inode->i_sb); err = ovl_real_fileattr_get(&realpath, fa); + ovl_fileattr_prot_flags(inode, fa); revert_creds(old_cred); return err; @@ -1136,6 +1176,10 @@ struct inode *ovl_get_inode(struct super_block *sb, } } + /* Check for immutable/append-only inode flags in xattr */ + if (upperdentry) + ovl_check_protattr(inode, upperdentry); + if (inode->i_state & I_NEW) unlock_new_inode(inode); out: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2cbebe06d9adb..2433cc030c870 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -34,6 +34,7 @@ enum ovl_xattr { OVL_XATTR_NLINK, OVL_XATTR_UPPER, OVL_XATTR_METACOPY, + OVL_XATTR_PROTATTR, }; enum ovl_inode_flag { @@ -520,14 +521,22 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* vfs inode flags copied from real to ovl inode */ #define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) +/* vfs inode flags read from overlay.protattr xattr to ovl inode */ +#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) /* * fileattr flags copied from lower to upper inode on copy up. - * We cannot copy immutable/append-only flags, because that would prevevnt - * linking temp inode to upper dir. + * We cannot copy up immutable/append-only flags, because that would prevent + * linking temp inode to upper dir, so we store them in xattr instead. */ #define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) #define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) +#define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL) +#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE) + +void ovl_check_protattr(struct inode *inode, struct dentry *upper); +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa); static inline void ovl_copyflags(struct inode *from, struct inode *to) { diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 81b8f135445a8..f48284a2a8960 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -585,6 +586,7 @@ bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry, #define OVL_XATTR_NLINK_POSTFIX "nlink" #define OVL_XATTR_UPPER_POSTFIX "upper" #define OVL_XATTR_METACOPY_POSTFIX "metacopy" +#define OVL_XATTR_PROTATTR_POSTFIX "protattr" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -598,6 +600,7 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK), OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER), OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, @@ -639,6 +642,88 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) return err; } + +#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */ + +void ovl_check_protattr(struct inode *inode, struct dentry *upper) +{ + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK; + char buf[OVL_PROTATTR_MAX+1]; + int res, n; + + res = ovl_do_getxattr(ofs, upper, OVL_XATTR_PROTATTR, buf, + OVL_PROTATTR_MAX); + if (res < 0) + return; + + /* + * Initialize inode flags from overlay.protattr xattr and upper inode + * flags. If upper inode has those fileattr flags set (i.e. from old + * kernel), we do not clear them on ovl_get_inode(), but we will clear + * them on next fileattr_set(). + */ + for (n = 0; n < res; n++) { + if (buf[n] == 'a') + iflags |= S_APPEND; + else if (buf[n] == 'i') + iflags |= S_IMMUTABLE; + else + break; + } + + if (!res || n < res) { + pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n", + upper, res); + } else { + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + } +} + +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa) +{ + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + char buf[OVL_PROTATTR_MAX]; + int len = 0, err = 0; + u32 iflags = 0; + + BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX); + + if (fa->flags & FS_APPEND_FL) { + buf[len++] = 'a'; + iflags |= S_APPEND; + } + if (fa->flags & FS_IMMUTABLE_FL) { + buf[len++] = 'i'; + iflags |= S_IMMUTABLE; + } + + /* + * Do not allow to set protection flags when upper doesn't support + * xattrs, because we do not set those fileattr flags on upper inode. + * Remove xattr if it exist and all protection flags are cleared. + */ + if (len) { + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR, + buf, len, -EPERM); + } else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) { + err = ovl_do_removexattr(ofs, upper, OVL_XATTR_PROTATTR); + if (err == -EOPNOTSUPP || err == -ENODATA) + err = 0; + } + if (err) + return err; + + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + + /* Mask out the fileattr flags that should not be set in upper inode */ + fa->flags &= ~OVL_PROT_FS_FLAGS_MASK; + fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK; + + return 0; +} + /** * Caller must hold a reference to inode to prevent it from being freed while * it is marked inuse. From 70882a5de80241311505042a6237e54dff436e49 Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:45 +0200 Subject: [PATCH 485/851] ovl: disable decoding null uuid with redirect_dir Currently decoding origin with lower null uuid is not allowed unless user opted-in to one of the new features that require following the lower inode of non-dir upper (index, xino, metacopy). Now we add redirect_dir too to that feature list. Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b01d4147520d3..97ea35fdd933f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1600,7 +1600,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * lower inode of non-dir upper. */ if (!ofs->config.index && !ofs->config.metacopy && - ofs->config.xino != OVL_XINO_ON && + !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON && uuid_is_null(uuid)) return false; From 00aebaed71f696d62478c9df77531edc99576a40 Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:46 +0200 Subject: [PATCH 486/851] ovl: add ovl_allow_offline_changes() helper Allows to check whether any of extended features are enabled Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 12 ++++++++++++ fs/overlayfs/super.c | 4 +--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2433cc030c870..e9b3e7880fc01 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -263,6 +263,18 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs) +{ + /* + * To avoid regressions in existing setups with overlay lower offline + * changes, we allow lower changes only if none of the new features + * are used. + */ + return (!ofs->config.index && !ofs->config.metacopy && + !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON); +} + + /* util.c */ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 97ea35fdd933f..178daa5e82c9d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1599,9 +1599,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * user opted-in to one of the new features that require following the * lower inode of non-dir upper. */ - if (!ofs->config.index && !ofs->config.metacopy && - !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON && - uuid_is_null(uuid)) + if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) return false; for (i = 0; i < ofs->numfs; i++) { From c69fba27a80a126cb8a5bac10a73106652aa6d79 Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:47 +0200 Subject: [PATCH 487/851] ovl: do not set overlay.opaque for new directories Enable optimizations only if user opted-in for any of extended features. If optimization is enabled, it breaks existing use case when a lower layer directory appears after directory was created on a merged layer. If overlay.opaque is applied, new files on lower layer are not visible. Consider the following scenario: - /lower and /upper are mounted to /merged - directory /merged/new-dir is created with a file test1 - overlay is unmounted - directory /lower/new-dir is created with a file test2 - overlay is mounted again If opaque is applied by default, file test2 is not going to be visible without explicitly clearing the overlay.opaque attribute Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 258434567a343..9154222883e64 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -321,6 +321,7 @@ static bool ovl_type_origin(struct dentry *dentry) static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct ovl_cattr *attr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *udir = upperdir->d_inode; struct dentry *newdentry; @@ -339,7 +340,8 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (IS_ERR(newdentry)) goto out_unlock; - if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && + !ovl_allow_offline_changes(ofs)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } From fcb7f373684d2c543b0914352f1b57ced181e384 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 26 Apr 2021 18:20:21 +0300 Subject: [PATCH 488/851] ovl: skip stale entries in merge dir cache iteration On the first getdents call, ovl_iterate() populates the readdir cache with a list of entries, but for upper entries with origin lower inode, p->ino remains zero. Following getdents calls traverse the readdir cache list and call ovl_cache_update_ino() for entries with zero p->ino to lookup the entry in the overlay and return d_ino that is consistent with st_ino. If the upper file was unlinked between the first getdents call and the getdents call that lists the file entry, ovl_cache_update_ino() will not find the entry and fall back to setting d_ino to the upper real st_ino, which is inconsistent with how this object was presented to users. Instead of listing a stale entry with inconsistent d_ino, simply skip the stale entry, which is better for users. xfstest overlay/077 is failing without this patch. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/fstests/CAOQ4uxgR_cLnC_vdU5=seP3fwqVkuZM_-WfD6maFTMbMYq=a9w@mail.gmail.com/ Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index e8ad2c2c77dd7..150fdf3bc68d4 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -481,6 +481,8 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) } this = lookup_one_len(p->name, dir, p->len); if (IS_ERR_OR_NULL(this) || !this->d_inode) { + /* Mark a stale entry */ + p->is_whiteout = true; if (IS_ERR(this)) { err = PTR_ERR(this); this = NULL; @@ -776,6 +778,9 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) if (err) goto out; } + } + /* ovl_cache_update_ino() sets is_whiteout on stale entry */ + if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; } From 84f8d8964db78fea85c8ec9a84ec421f8e11fc4c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 27 Apr 2021 13:28:26 +0300 Subject: [PATCH 489/851] ovl: relax lookup error on mismatch origin ftype We get occasional reports of lookup errors due to mismatched origin ftype from users that re-format a lower squashfs image. Commit 13c6ad0f45fd ("ovl: document lower modification caveats") tries to discourage the practice of re-formating lower layers and describes the expected behavior as undefined. Commit b0e0f69731cd ("ovl: restrict lower null uuid for "xino=auto"") limits the configurations in which origin file handles are followed. In addition to these measures, change the behavior in case of detecting a mismatch origin ftype in lookup to issue a warning, not follow origin, but not fail the lookup operation either. That should make overall more users happy without any big consequences. Link: https://lore.kernel.org/linux-unionfs/CAOQ4uxgPq9E9xxwU2CDyHy-_yCZZeymg+3n+-6AqkGGE1YtwvQ@mail.gmail.com/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index da063b18b4195..1a9b515fc45d4 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -392,7 +392,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); dput(origin); - return -EIO; + return -ESTALE; } static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, From 444e085783c6b68381febac54d7218e63c0335fe Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 24 Apr 2021 22:03:15 +0800 Subject: [PATCH 490/851] ovl: skip checking lower file's i_writecount on truncate It is possible that a directory tree is shared between multiple overlay instances as a lower layer. In this case when one instance executes a file residing on the lower layer, the other instance denies a truncate(2) call on this file. This only happens for truncate(2) and not for open(2) with the O_TRUNC flag. Fix this interference and inconsistency by removing the preliminary i_writecount check before copy-up. This means that unlike on normal filesystems truncate(argv[0]) will now succeed. If this ever causes a regression in a real world use case this needs to be revisited. One way to fix this properly would be to keep a correct i_writecount in the overlay inode, but that is difficult due to memory mapping code only dealing with the real file/inode. Signed-off-by: Chengguang Xu Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.rst | 3 +++ fs/overlayfs/inode.c | 6 ------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 455ca86eb4fce..7da6c30ed596a 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -427,6 +427,9 @@ b) If a file residing on a lower layer is opened for read-only and then memory mapped with MAP_SHARED, then subsequent changes to the file are not reflected in the memory mapping. +c) If a file residing on a lower layer is being executed, then opening that +file for write or truncating the file will not be denied with ETXTBSY. + The following options allow overlayfs to act more like a standards compliant filesystem: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 37300e972a398..8aa370e8143a7 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -33,12 +33,6 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto out; if (attr->ia_valid & ATTR_SIZE) { - struct inode *realinode = d_inode(ovl_dentry_real(dentry)); - - err = -ETXTBSY; - if (atomic_read(&realinode->i_writecount) < 0) - goto out_drop_write; - /* Truncate should trigger data copy up as well */ full_copy_up = true; } From 97f7de13a76b62f8d27c302dcbbd58bddc4b08c8 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 10 Mar 2021 10:09:25 +0800 Subject: [PATCH 491/851] ovl: update ctime when changing fileattr Currently we keep size, mode and times of overlay inode as the same as upper inode, so should update ctime when changing file attribution as well. Signed-off-by: Chengguang Xu Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 8aa370e8143a7..7d52e5ef2ac74 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -573,6 +573,9 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); + + /* Update ctime */ + ovl_copyattr(ovl_inode_real(inode), inode); } ovl_drop_write(dentry); out: From d5cf6cb1a46515eb2c4a02b6f4b51386fd53dfca Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 22 Apr 2021 22:58:32 -0400 Subject: [PATCH 492/851] mm/filemap: Add folio private_2 functions end_page_private_2() becomes folio_end_private_2(), wait_on_page_private_2() becomes folio_wait_private_2() and wait_on_page_private_2_killable() becomes folio_wait_private_2_killable(). Adjust the fscache equivalents to call page_folio() before calling these functions to avoid adding wrappers. Ends up costing 1 byte of text in ceph & netfs, but the core shrinks by three calls to page_folio(). Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Kirill A. Shutemov --- include/linux/netfs.h | 6 +++--- include/linux/pagemap.h | 6 +++--- mm/filemap.c | 41 ++++++++++++++++++----------------------- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 9062adfa2fb9d..fad8c6209eddf 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -55,7 +55,7 @@ static inline void set_page_fscache(struct page *page) */ static inline void end_page_fscache(struct page *page) { - end_page_private_2(page); + folio_end_private_2(page_folio(page)); } /** @@ -66,7 +66,7 @@ static inline void end_page_fscache(struct page *page) */ static inline void wait_on_page_fscache(struct page *page) { - wait_on_page_private_2(page); + folio_wait_private_2(page_folio(page)); } /** @@ -82,7 +82,7 @@ static inline void wait_on_page_fscache(struct page *page) */ static inline int wait_on_page_fscache_killable(struct page *page) { - return wait_on_page_private_2_killable(page); + return folio_wait_private_2_killable(page_folio(page)); } enum netfs_read_source { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c8e74d67b01fb..edf58a581bce6 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -796,9 +796,9 @@ static inline void set_page_private_2(struct page *page) SetPagePrivate2(page); } -void end_page_private_2(struct page *page); -void wait_on_page_private_2(struct page *page); -int wait_on_page_private_2_killable(struct page *page); +void folio_end_private_2(struct folio *folio); +void folio_wait_private_2(struct folio *folio); +int folio_wait_private_2_killable(struct folio *folio); /* * Add an arbitrary waiter to a page's wait queue diff --git a/mm/filemap.c b/mm/filemap.c index ac8c9253cfedd..1ef737cf63fa0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1451,56 +1451,51 @@ void folio_unlock(struct folio *folio) EXPORT_SYMBOL(folio_unlock); /** - * end_page_private_2 - Clear PG_private_2 and release any waiters - * @page: The page + * folio_end_private_2 - Clear PG_private_2 and wake any waiters. + * @folio: The folio. * - * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for - * this. The page ref held for PG_private_2 being set is released. + * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for + * it. The folio reference held for PG_private_2 being set is released. * - * This is, for example, used when a netfs page is being written to a local - * disk cache, thereby allowing writes to the cache for the same page to be + * This is, for example, used when a netfs folio is being written to a local + * disk cache, thereby allowing writes to the cache for the same folio to be * serialised. */ -void end_page_private_2(struct page *page) +void folio_end_private_2(struct folio *folio) { - struct folio *folio = page_folio(page); - VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio); clear_bit_unlock(PG_private_2, folio_flags(folio, 0)); folio_wake_bit(folio, PG_private_2); folio_put(folio); } -EXPORT_SYMBOL(end_page_private_2); +EXPORT_SYMBOL(folio_end_private_2); /** - * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page - * @page: The page to wait on + * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio. + * @folio: The folio to wait on. * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page. + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio. */ -void wait_on_page_private_2(struct page *page) +void folio_wait_private_2(struct folio *folio) { - struct folio *folio = page_folio(page); - while (folio_test_private_2(folio)) folio_wait_bit(folio, PG_private_2); } -EXPORT_SYMBOL(wait_on_page_private_2); +EXPORT_SYMBOL(folio_wait_private_2); /** - * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page - * @page: The page to wait on + * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio. + * @folio: The folio to wait on. * - * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a * fatal signal is received by the calling task. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ -int wait_on_page_private_2_killable(struct page *page) +int folio_wait_private_2_killable(struct folio *folio) { - struct folio *folio = page_folio(page); int ret = 0; while (folio_test_private_2(folio)) { @@ -1511,7 +1506,7 @@ int wait_on_page_private_2_killable(struct page *page) return ret; } -EXPORT_SYMBOL(wait_on_page_private_2_killable); +EXPORT_SYMBOL(folio_wait_private_2_killable); /** * folio_end_writeback - End writeback against a folio. From 7a4ae325b7467b321957e70251165885ba1c58e4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 14:24:30 -0400 Subject: [PATCH 493/851] fs/netfs: Add folio fscache functions Match the page writeback functions by adding folio_start_fscache(), folio_end_fscache(), folio_wait_fscache() and folio_wait_fscache_killable(). Remove set_page_private_2(). Also rewrite the kernel-doc to describe when to use the function rather than what the function does, and include the kernel-doc in the appropriate rst file. Saves 31 bytes of text in netfs_rreq_unlock() due to set_page_fscache() calling page_folio() once instead of three times. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport --- Documentation/filesystems/netfs_library.rst | 2 + include/linux/netfs.h | 75 +++++++++++++-------- include/linux/pagemap.h | 16 ----- 3 files changed, 50 insertions(+), 43 deletions(-) diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 57a6418478186..bb68d39f03b78 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -524,3 +524,5 @@ Note that these methods are passed a pointer to the cache resource structure, not the read request structure as they could be used in other situations where there isn't a read request structure as well, such as writing dirty data to the cache. + +.. kernel-doc:: include/linux/netfs.h diff --git a/include/linux/netfs.h b/include/linux/netfs.h index fad8c6209eddf..113b5fa9280cf 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -22,6 +22,7 @@ * Overload PG_private_2 to give us PG_fscache - this is used to indicate that * a page is currently backed by a local disk cache */ +#define folio_test_fscache(folio) folio_test_private_2(folio) #define PageFsCache(page) PagePrivate2((page)) #define SetPageFsCache(page) SetPagePrivate2((page)) #define ClearPageFsCache(page) ClearPagePrivate2((page)) @@ -29,57 +30,77 @@ #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) /** - * set_page_fscache - Set PG_fscache on a page and take a ref - * @page: The page. + * folio_start_fscache - Start an fscache write on a folio. + * @folio: The folio. * - * Set the PG_fscache (PG_private_2) flag on a page and take the reference - * needed for the VM to handle its lifetime correctly. This sets the flag and - * takes the reference unconditionally, so care must be taken not to set the - * flag again if it's already set. + * Call this function before writing a folio to a local cache. Starting a + * second write before the first one finishes is not allowed. */ -static inline void set_page_fscache(struct page *page) +static inline void folio_start_fscache(struct folio *folio) { - set_page_private_2(page); + VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); + folio_get(folio); + folio_set_private_2(folio); } /** - * end_page_fscache - Clear PG_fscache and release any waiters - * @page: The page - * - * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers - * waiting for this. The page ref held for PG_private_2 being set is released. + * folio_end_fscache - End an fscache write on a folio. + * @folio: The folio. * - * This is, for example, used when a netfs page is being written to a local - * disk cache, thereby allowing writes to the cache for the same page to be - * serialised. + * Call this function after the folio has been written to the local cache. + * This will wake any sleepers waiting on this folio. */ -static inline void end_page_fscache(struct page *page) +static inline void folio_end_fscache(struct folio *folio) { - folio_end_private_2(page_folio(page)); + folio_end_private_2(folio); } /** - * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page. + * If this folio is currently being written to a local cache, wait for + * the write to finish. Another write may start after this one finishes, + * unless the caller holds the folio lock. */ -static inline void wait_on_page_fscache(struct page *page) +static inline void folio_wait_fscache(struct folio *folio) { - folio_wait_private_2(page_folio(page)); + folio_wait_private_2(folio); } /** - * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page - * @page: The page to wait on + * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. + * @folio: The folio. * - * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a - * fatal signal is received by the calling task. + * If this folio is currently being written to a local cache, wait + * for the write to finish or for a fatal signal to be received. + * Another write may start after this one finishes, unless the caller + * holds the folio lock. * * Return: * - 0 if successful. * - -EINTR if a fatal signal was encountered. */ +static inline int folio_wait_fscache_killable(struct folio *folio) +{ + return folio_wait_private_2_killable(folio); +} + +static inline void set_page_fscache(struct page *page) +{ + folio_start_fscache(page_folio(page)); +} + +static inline void end_page_fscache(struct page *page) +{ + folio_end_private_2(page_folio(page)); +} + +static inline void wait_on_page_fscache(struct page *page) +{ + folio_wait_private_2(page_folio(page)); +} + static inline int wait_on_page_fscache_killable(struct page *page) { return folio_wait_private_2_killable(page_folio(page)); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index edf58a581bce6..08f40e004d97e 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -780,22 +780,6 @@ int __set_page_dirty_no_writeback(struct page *page); void page_endio(struct page *page, bool is_write, int err); -/** - * set_page_private_2 - Set PG_private_2 on a page and take a ref - * @page: The page. - * - * Set the PG_private_2 flag on a page and take the reference needed for the VM - * to handle its lifetime correctly. This sets the flag and takes the - * reference unconditionally, so care must be taken not to set the flag again - * if it's already set. - */ -static inline void set_page_private_2(struct page *page) -{ - page = compound_head(page); - get_page(page); - SetPagePrivate2(page); -} - void folio_end_private_2(struct folio *folio); void folio_wait_private_2(struct folio *folio); int folio_wait_private_2_killable(struct folio *folio); From 043656facf6e5abd2f0578c94c503f7472361987 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 12 Apr 2021 16:45:17 -0400 Subject: [PATCH 494/851] mm: Add folio_mapped() This function is the equivalent of page_mapped(). It is slightly shorter as we do not need to handle the PageTail() case. Reimplement page_mapped() as a wrapper around folio_mapped(). folio_mapped() is 13 bytes smaller than page_mapped(), but the page_mapped() wrapper is 30 bytes, for a net increase of 17 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Kirill A. Shutemov Acked-by: Mike Rapoport --- include/linux/mm.h | 1 + include/linux/mm_types.h | 6 ++++++ mm/folio-compat.c | 6 ++++++ mm/util.c | 29 ++++++++++++++++------------- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index cc8c2e09a34bf..8b0c7ed84f683 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1768,6 +1768,7 @@ static inline pgoff_t page_index(struct page *page) } bool page_mapped(struct page *page); +bool folio_mapped(struct folio *folio); /* * Return true only if the page has been allocated with diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f38ab53fcf7c8..1066afc9a06de 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -291,6 +291,12 @@ FOLIO_MATCH(memcg_data, memcg_data); #endif #undef FOLIO_MATCH +static inline atomic_t *folio_mapcount_ptr(struct folio *folio) +{ + struct page *tail = &folio->page + 1; + return &tail->compound_mapcount; +} + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 3c83f03b80d7f..7044fcc8a8aa8 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -35,3 +35,9 @@ void wait_for_stable_page(struct page *page) return folio_wait_stable(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_for_stable_page); + +bool page_mapped(struct page *page) +{ + return folio_mapped(page_folio(page)); +} +EXPORT_SYMBOL(page_mapped); diff --git a/mm/util.c b/mm/util.c index 1cde6218d6d14..e8c12350b3ebb 100644 --- a/mm/util.c +++ b/mm/util.c @@ -652,28 +652,31 @@ void *page_rmapping(struct page *page) return __page_rmapping(page); } -/* - * Return true if this page is mapped into pagetables. - * For compound page it returns true if any subpage of compound page is mapped. +/** + * folio_mapped - Is this folio mapped into userspace? + * @folio: The folio. + * + * Return: True if any page in this folio is referenced by user page tables. */ -bool page_mapped(struct page *page) +bool folio_mapped(struct folio *folio) { - int i; + int i, nr; - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) >= 0; - page = compound_head(page); - if (atomic_read(compound_mapcount_ptr(page)) >= 0) + if (folio_single(folio)) + return atomic_read(&folio->_mapcount) >= 0; + if (atomic_read(folio_mapcount_ptr(folio)) >= 0) return true; - if (PageHuge(page)) + if (folio_test_hugetlb(folio)) return false; - for (i = 0; i < compound_nr(page); i++) { - if (atomic_read(&page[i]._mapcount) >= 0) + + nr = folio_nr_pages(folio); + for (i = 0; i < nr; i++) { + if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0) return true; } return false; } -EXPORT_SYMBOL(page_mapped); +EXPORT_SYMBOL(folio_mapped); struct anon_vma *page_anon_vma(struct page *page) { From e004ebf606763a60dc930279e4a40f9a707c2220 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 09:27:29 -0400 Subject: [PATCH 495/851] mm: Add folio_nid() This is the folio equivalent of page_to_nid(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b0c7ed84f683..797dd44344429 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1428,6 +1428,11 @@ static inline int page_to_nid(const struct page *page) } #endif +static inline int folio_nid(const struct folio *folio) +{ + return page_to_nid(&folio->page); +} + #ifdef CONFIG_NUMA_BALANCING static inline int cpu_pid_to_cpupid(int cpu, int pid) { From 69d2395ca8efee4799216940ceffec1217aef314 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 13:40:11 -0400 Subject: [PATCH 496/851] mm/memcg: Remove 'page' parameter to mem_cgroup_charge_statistics() The last use of 'page' was removed by commit 468c398233da ("mm: memcontrol: switch to native NR_ANON_THPS counter"), so we can now remove the parameter from the function. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Michal Hocko Acked-by: Johannes Weiner --- mm/memcontrol.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae1f5d0cb5810..ee892daecb8bc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -831,7 +831,6 @@ static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) } static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, - struct page *page, int nr_pages) { /* pagein of a big page is an event. So, ignore page size */ @@ -5692,9 +5691,9 @@ static int mem_cgroup_move_account(struct page *page, ret = 0; local_irq_disable(); - mem_cgroup_charge_statistics(to, page, nr_pages); + mem_cgroup_charge_statistics(to, nr_pages); memcg_check_events(to, page); - mem_cgroup_charge_statistics(from, page, -nr_pages); + mem_cgroup_charge_statistics(from, -nr_pages); memcg_check_events(from, page); local_irq_enable(); out_unlock: @@ -6715,7 +6714,7 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, commit_charge(page, memcg); local_irq_disable(); - mem_cgroup_charge_statistics(memcg, page, nr_pages); + mem_cgroup_charge_statistics(memcg, nr_pages); memcg_check_events(memcg, page); local_irq_enable(); out: @@ -7006,7 +7005,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) commit_charge(newpage, memcg); local_irq_save(flags); - mem_cgroup_charge_statistics(memcg, newpage, nr_pages); + mem_cgroup_charge_statistics(memcg, nr_pages); memcg_check_events(memcg, newpage); local_irq_restore(flags); } @@ -7236,7 +7235,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * only synchronisation we have for updating the per-CPU variables. */ VM_BUG_ON(!irqs_disabled()); - mem_cgroup_charge_statistics(memcg, page, -nr_entries); + mem_cgroup_charge_statistics(memcg, -nr_entries); memcg_check_events(memcg, page); css_put(&memcg->css); From 8dcfca3ceccad061046a76c43dbace3087755685 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 14:51:26 -0400 Subject: [PATCH 497/851] mm/memcg: Use the node id in mem_cgroup_update_tree() By using the node id in mem_cgroup_update_tree(), we can delete soft_limit_tree_from_page() and mem_cgroup_page_nodeinfo(). Saves 42 bytes of kernel text on my config. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Christoph Hellwig --- mm/memcontrol.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ee892daecb8bc..d57ff5c5d330a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -451,28 +451,12 @@ ino_t page_cgroup_ino(struct page *page) return ino; } -static struct mem_cgroup_per_node * -mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page) -{ - int nid = page_to_nid(page); - - return memcg->nodeinfo[nid]; -} - static struct mem_cgroup_tree_per_node * soft_limit_tree_node(int nid) { return soft_limit_tree.rb_tree_per_node[nid]; } -static struct mem_cgroup_tree_per_node * -soft_limit_tree_from_page(struct page *page) -{ - int nid = page_to_nid(page); - - return soft_limit_tree.rb_tree_per_node[nid]; -} - static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) @@ -543,13 +527,13 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg) return excess; } -static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) +static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) { unsigned long excess; struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; - mctz = soft_limit_tree_from_page(page); + mctz = soft_limit_tree_node(nid); if (!mctz) return; /* @@ -557,7 +541,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = mem_cgroup_page_nodeinfo(memcg, page); + mz = memcg->nodeinfo[nid]; excess = soft_limit_excess(memcg); /* * We have to update the tree if mz is on RB-tree or @@ -884,7 +868,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) MEM_CGROUP_TARGET_SOFTLIMIT); mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_tree(memcg, page); + mem_cgroup_update_tree(memcg, page_to_nid(page)); } } From afb628cd888cf2e6eb8ecabfa992fa2518caee2c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 08:57:02 -0400 Subject: [PATCH 498/851] mm/memcg: Remove soft_limit_tree_node() Opencode this one-line function in its three callers. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Christoph Hellwig --- mm/memcontrol.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d57ff5c5d330a..f70e33d691aa6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -451,12 +451,6 @@ ino_t page_cgroup_ino(struct page *page) return ino; } -static struct mem_cgroup_tree_per_node * -soft_limit_tree_node(int nid) -{ - return soft_limit_tree.rb_tree_per_node[nid]; -} - static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) @@ -533,7 +527,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; - mctz = soft_limit_tree_node(nid); + mctz = soft_limit_tree.rb_tree_per_node[nid]; if (!mctz) return; /* @@ -572,7 +566,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) for_each_node(nid) { mz = memcg->nodeinfo[nid]; - mctz = soft_limit_tree_node(nid); + mctz = soft_limit_tree.rb_tree_per_node[nid]; if (mctz) mem_cgroup_remove_exceeded(mz, mctz); } @@ -3420,7 +3414,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, if (order > 0) return 0; - mctz = soft_limit_tree_node(pgdat->node_id); + mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id]; /* * Do not even bother to check the largest node if the root From 5eafda8b0c4d0623fa7cfd4e69bfc12b183bd208 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 09:05:47 -0400 Subject: [PATCH 499/851] mm/memcg: Convert memcg_check_events to take a node ID memcg_check_events only uses the page's nid, so call page_to_nid in the callers to make the interface easier to understand. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Michal Hocko Reviewed-by: Christoph Hellwig --- mm/memcontrol.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f70e33d691aa6..1a049bfa0e0a8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -851,7 +851,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, * Check events in order. * */ -static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) +static void memcg_check_events(struct mem_cgroup *memcg, int nid) { /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, @@ -862,7 +862,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) MEM_CGROUP_TARGET_SOFTLIMIT); mem_cgroup_threshold(memcg); if (unlikely(do_softlimit)) - mem_cgroup_update_tree(memcg, page_to_nid(page)); + mem_cgroup_update_tree(memcg, nid); } } @@ -5578,7 +5578,7 @@ static int mem_cgroup_move_account(struct page *page, struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; - int ret; + int nid, ret; VM_BUG_ON(from == to); VM_BUG_ON_PAGE(PageLRU(page), page); @@ -5667,12 +5667,13 @@ static int mem_cgroup_move_account(struct page *page, __unlock_page_memcg(from); ret = 0; + nid = page_to_nid(page); local_irq_disable(); mem_cgroup_charge_statistics(to, nr_pages); - memcg_check_events(to, page); + memcg_check_events(to, nid); mem_cgroup_charge_statistics(from, -nr_pages); - memcg_check_events(from, page); + memcg_check_events(from, nid); local_irq_enable(); out_unlock: unlock_page(page); @@ -6693,7 +6694,7 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, local_irq_disable(); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, page); + memcg_check_events(memcg, page_to_nid(page)); local_irq_enable(); out: return ret; @@ -6801,7 +6802,7 @@ struct uncharge_gather { unsigned long nr_memory; unsigned long pgpgout; unsigned long nr_kmem; - struct page *dummy_page; + int nid; }; static inline void uncharge_gather_clear(struct uncharge_gather *ug) @@ -6825,7 +6826,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) local_irq_save(flags); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_memory); - memcg_check_events(ug->memcg, ug->dummy_page); + memcg_check_events(ug->memcg, ug->nid); local_irq_restore(flags); /* drop reference from uncharge_page */ @@ -6866,7 +6867,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) uncharge_gather_clear(ug); } ug->memcg = memcg; - ug->dummy_page = page; + ug->nid = page_to_nid(page); /* pairs with css_put in uncharge_batch */ css_get(&memcg->css); @@ -6984,7 +6985,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) local_irq_save(flags); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, newpage); + memcg_check_events(memcg, page_to_nid(newpage)); local_irq_restore(flags); } @@ -7214,7 +7215,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) */ VM_BUG_ON(!irqs_disabled()); mem_cgroup_charge_statistics(memcg, -nr_entries); - memcg_check_events(memcg, page); + memcg_check_events(memcg, page_to_nid(page)); css_put(&memcg->css); } From 61374127431a56bc938d505de1bb831f63177374 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 14:59:26 -0400 Subject: [PATCH 500/851] mm/memcg: Add folio_memcg() and related functions memcg information is only stored in the head page, so the memcg subsystem needs to assure that all accesses are to the head page. The first step is converting page_memcg() to folio_memcg(). The callers of page_memcg() and PageMemcgKmem() are not yet ready to be converted to use folios, so retain them as wrappers around folio_memcg() and folio_memcg_kmem(). They will be converted in a later patch set. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/memcontrol.h | 109 ++++++++++++++++++++++--------------- mm/memcontrol.c | 21 ++++--- 2 files changed, 77 insertions(+), 53 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bfe5c486f4add..eabae58741619 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -372,6 +372,7 @@ enum page_memcg_data_flags { #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) static inline bool PageMemcgKmem(struct page *page); +static inline bool folio_memcg_kmem(struct folio *folio); /* * After the initialization objcg->memcg is always pointing at @@ -386,73 +387,77 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) } /* - * __page_memcg - get the memory cgroup associated with a non-kmem page - * @page: a pointer to the page struct + * __folio_memcg - Get the memory cgroup associated with a non-kmem folio + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * kmem pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * kmem folios. */ -static inline struct mem_cgroup *__page_memcg(struct page *page) +static inline struct mem_cgroup *__folio_memcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * __page_objcg - get the object cgroup associated with a kmem page - * @page: a pointer to the page struct + * __folio_objcg - get the object cgroup associated with a kmem folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the object cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the object cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper object cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages or - * LRU pages. + * against some type of folios, e.g. slab folios or ex-slab folios or + * LRU folios. */ -static inline struct obj_cgroup *__page_objcg(struct page *page) +static inline struct obj_cgroup *__folio_objcg(struct folio *folio) { - unsigned long memcg_data = page->memcg_data; + unsigned long memcg_data = folio->memcg_data; - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* - * page_memcg - get the memory cgroup associated with a page - * @page: a pointer to the page struct + * folio_memcg - Get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. * - * For a non-kmem page any of the following ensures page and memcg binding + * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * - * - the page lock + * - the folio lock * - LRU isolation * - lock_page_memcg() * - exclusive reference * - * For a kmem page a caller should hold an rcu read lock to protect memcg - * associated with a kmem page from being released. + * For a kmem folio a caller should hold an rcu read lock to protect memcg + * associated with a kmem folio from being released. */ +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + if (folio_memcg_kmem(folio)) + return obj_cgroup_memcg(__folio_objcg(folio)); + return __folio_memcg(folio); +} + static inline struct mem_cgroup *page_memcg(struct page *page) { - if (PageMemcgKmem(page)) - return obj_cgroup_memcg(__page_objcg(page)); - else - return __page_memcg(page); + return folio_memcg(page_folio(page)); } /* @@ -525,17 +530,18 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) #ifdef CONFIG_MEMCG_KMEM /* - * PageMemcgKmem - check if the page has MemcgKmem flag set - * @page: a pointer to the page struct + * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. + * @folio: Pointer to the folio. * - * Checks if the page has MemcgKmem flag set. The caller must ensure that - * the page has an associated memory cgroup. It's not safe to call this function - * against some types of pages, e.g. slab pages. + * Checks if the folio has MemcgKmem flag set. The caller must ensure + * that the folio has an associated memory cgroup. It's not safe to call + * this function against some types of folios, e.g. slab folios. */ -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { - VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); - return page->memcg_data & MEMCG_DATA_KMEM; + VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page); + VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio); + return folio->memcg_data & MEMCG_DATA_KMEM; } /* @@ -579,7 +585,7 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #else -static inline bool PageMemcgKmem(struct page *page) +static inline bool folio_memcg_kmem(struct folio *folio) { return false; } @@ -595,6 +601,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) } #endif +static inline bool PageMemcgKmem(struct page *page) +{ + return folio_memcg_kmem(page_folio(page)); +} + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) @@ -1106,6 +1117,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #define MEM_CGROUP_ID_SHIFT 0 #define MEM_CGROUP_ID_MAX 0 +static inline struct mem_cgroup *folio_memcg(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *page_memcg(struct page *page) { return NULL; @@ -1122,6 +1138,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return NULL; } +static inline bool folio_memcg_kmem(struct folio *folio) +{ + return false; +} + static inline bool PageMemcgKmem(struct page *page) { return false; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1a049bfa0e0a8..f0f781dde37a9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3050,15 +3050,16 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { + struct folio *folio = page_folio(page); struct obj_cgroup *objcg; unsigned int nr_pages = 1 << order; - if (!PageMemcgKmem(page)) + if (!folio_memcg_kmem(folio)) return; - objcg = __page_objcg(page); + objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); - page->memcg_data = 0; + folio->memcg_data = 0; obj_cgroup_put(objcg); } @@ -3290,17 +3291,18 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) */ void split_page_memcg(struct page *head, unsigned int nr) { - struct mem_cgroup *memcg = page_memcg(head); + struct folio *folio = page_folio(head); + struct mem_cgroup *memcg = folio_memcg(folio); int i; if (mem_cgroup_disabled() || !memcg) return; for (i = 1; i < nr; i++) - head[i].memcg_data = head->memcg_data; + folio_page(folio, i)->memcg_data = folio->memcg_data; - if (PageMemcgKmem(head)) - obj_cgroup_get_many(__page_objcg(head), nr - 1); + if (folio_memcg_kmem(folio)) + obj_cgroup_get_many(__folio_objcg(folio), nr - 1); else css_get_many(&memcg->css, nr - 1); } @@ -6835,6 +6837,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) static void uncharge_page(struct page *page, struct uncharge_gather *ug) { + struct folio *folio = page_folio(page); unsigned long nr_pages; struct mem_cgroup *memcg; struct obj_cgroup *objcg; @@ -6848,14 +6851,14 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) * exclusive access to the page. */ if (use_objcg) { - objcg = __page_objcg(page); + objcg = __folio_objcg(folio); /* * This get matches the put at the end of the function and * kmem pages do not hold memcg references anymore. */ memcg = get_mem_cgroup_from_objcg(objcg); } else { - memcg = __page_memcg(page); + memcg = __folio_memcg(folio); } if (!memcg) From e02e05b0daa51607e69456f6afebb72a3ce637a5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 14:07:12 -0400 Subject: [PATCH 501/851] mm/memcg: Convert commit_charge() to take a folio The memcg_data is only set on the head page, so enforce that by typing it as a folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Michal Hocko --- mm/memcontrol.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f0f781dde37a9..c2ffad021e091 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2769,9 +2769,9 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) } #endif -static void commit_charge(struct page *page, struct mem_cgroup *memcg) +static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) { - VM_BUG_ON_PAGE(page_memcg(page), page); + VM_BUG_ON_FOLIO(folio_memcg(folio), folio); /* * Any of the following ensures page's memcg stability: * @@ -2780,7 +2780,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg) * - lock_page_memcg() * - exclusive reference */ - page->memcg_data = (unsigned long)memcg; + folio->memcg_data = (unsigned long)memcg; } static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) @@ -6684,7 +6684,8 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, gfp_t gfp) { - unsigned int nr_pages = thp_nr_pages(page); + struct folio *folio = page_folio(page); + unsigned int nr_pages = folio_nr_pages(folio); int ret; ret = try_charge(memcg, gfp, nr_pages); @@ -6692,7 +6693,7 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, goto out; css_get(&memcg->css); - commit_charge(page, memcg); + commit_charge(folio, memcg); local_irq_disable(); mem_cgroup_charge_statistics(memcg, nr_pages); @@ -6952,21 +6953,21 @@ void mem_cgroup_uncharge_list(struct list_head *page_list) */ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) { + struct folio *newfolio = page_folio(newpage); struct mem_cgroup *memcg; - unsigned int nr_pages; + unsigned int nr_pages = folio_nr_pages(newfolio); unsigned long flags; VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); - VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage); - VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage), - newpage); + VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); + VM_BUG_ON_FOLIO(PageAnon(oldpage) != folio_test_anon(newfolio), newfolio); + VM_BUG_ON_FOLIO(compound_nr(oldpage) != nr_pages, newfolio); if (mem_cgroup_disabled()) return; /* Page cache replacement: new page already charged? */ - if (page_memcg(newpage)) + if (folio_memcg(newfolio)) return; memcg = page_memcg(oldpage); @@ -6975,8 +6976,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) return; /* Force-charge the new page. The old one will be freed soon */ - nr_pages = thp_nr_pages(newpage); - if (!mem_cgroup_is_root(memcg)) { page_counter_charge(&memcg->memory, nr_pages); if (do_memsw_account()) @@ -6984,7 +6983,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) } css_get(&memcg->css); - commit_charge(newpage, memcg); + commit_charge(newfolio, memcg); local_irq_save(flags); mem_cgroup_charge_statistics(memcg, nr_pages); From 881060885e5c3903476a5e1b40a2a1ef50c26d01 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 25 Jun 2021 09:27:04 -0400 Subject: [PATCH 502/851] mm/memcg: Convert mem_cgroup_charge() to take a folio Convert all callers of mem_cgroup_charge() to call page_folio() on the page they're currently passing in. Many of them will be converted to use folios themselves soon. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/memcontrol.h | 6 +++--- kernel/events/uprobes.c | 3 ++- mm/filemap.c | 2 +- mm/huge_memory.c | 2 +- mm/khugepaged.c | 4 ++-- mm/ksm.c | 3 ++- mm/memcontrol.c | 30 +++++++++++++++--------------- mm/memory.c | 9 +++++---- mm/migrate.c | 2 +- mm/shmem.c | 2 +- mm/userfaultfd.c | 2 +- 11 files changed, 34 insertions(+), 31 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index eabae58741619..62e2ae5c11f2c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -704,7 +704,7 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) page_counter_read(&memcg->memory); } -int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); @@ -1190,8 +1190,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) return false; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +static inline int mem_cgroup_charge(struct folio *folio, + struct mm_struct *mm, gfp_t gfp) { return 0; } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index af24dc3febbef..6357c3580d07b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -167,7 +167,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, addr + PAGE_SIZE); if (new_page) { - err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL); + err = mem_cgroup_charge(page_folio(new_page), vma->vm_mm, + GFP_KERNEL); if (err) return err; } diff --git a/mm/filemap.c b/mm/filemap.c index 1ef737cf63fa0..7842711f15c06 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -872,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page, page->index = offset; if (!huge) { - error = mem_cgroup_charge(page, NULL, gfp); + error = mem_cgroup_charge(page_folio(page), NULL, gfp); if (error) goto error; charged = true; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index afff3ac870673..ecb1fb1f5f3e1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -603,7 +603,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_charge(page, vma->vm_mm, gfp)) { + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, gfp)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK_CHARGE); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b0412be08fa2c..8f6d7fdea9f4b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1087,7 +1087,7 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { + if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } @@ -1658,7 +1658,7 @@ static void collapse_file(struct mm_struct *mm, goto out; } - if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) { + if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out; } diff --git a/mm/ksm.c b/mm/ksm.c index 3fa9bc8a67cf6..23d36b59f9975 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2580,7 +2580,8 @@ struct page *ksm_might_need_to_copy(struct page *page, return page; /* let do_swap_page report the error */ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); - if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) { + if (new_page && + mem_cgroup_charge(page_folio(new_page), vma->vm_mm, GFP_KERNEL)) { put_page(new_page); new_page = NULL; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c2ffad021e091..ff8014fba1259 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6681,10 +6681,9 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); } -static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, - gfp_t gfp) +static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, + gfp_t gfp) { - struct folio *folio = page_folio(page); unsigned int nr_pages = folio_nr_pages(folio); int ret; @@ -6697,27 +6696,27 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, local_irq_disable(); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, page_to_nid(page)); + memcg_check_events(memcg, folio_nid(folio)); local_irq_enable(); out: return ret; } /** - * mem_cgroup_charge - charge a newly allocated page to a cgroup - * @page: page to charge - * @mm: mm context of the victim - * @gfp_mask: reclaim mode + * mem_cgroup_charge - Charge a newly allocated folio to a cgroup. + * @folio: Folio to charge. + * @mm: mm context of the allocating task. + * @gfp: Reclaim mode. * - * Try to charge @page to the memcg that @mm belongs to, reclaiming - * pages according to @gfp_mask if necessary. if @mm is NULL, try to + * Try to charge @folio to the memcg that @mm belongs to, reclaiming + * pages according to @gfp if necessary. If @mm is NULL, try to * charge to the active memcg. * - * Do not use this for pages allocated for swapin. + * Do not use this for folios allocated for swapin. * - * Returns 0 on success. Otherwise, an error code is returned. + * Return: 0 on success. Otherwise, an error code is returned. */ -int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) +int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { struct mem_cgroup *memcg; int ret; @@ -6726,7 +6725,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) return 0; memcg = get_mem_cgroup_from_mm(mm); - ret = __mem_cgroup_charge(page, memcg, gfp_mask); + ret = charge_memcg(folio, memcg, gfp); css_put(&memcg->css); return ret; @@ -6747,6 +6746,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { + struct folio *folio = page_folio(page); struct mem_cgroup *memcg; unsigned short id; int ret; @@ -6761,7 +6761,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, memcg = get_mem_cgroup_from_mm(mm); rcu_read_unlock(); - ret = __mem_cgroup_charge(page, memcg, gfp); + ret = charge_memcg(folio, memcg, gfp); css_put(&memcg->css); return ret; diff --git a/mm/memory.c b/mm/memory.c index 2f111f9b3dbc6..614418e26e2c9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -990,7 +990,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma, if (!new_page) return NULL; - if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) { + if (mem_cgroup_charge(page_folio(new_page), src_mm, GFP_KERNEL)) { put_page(new_page); return NULL; } @@ -3019,7 +3019,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) } } - if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL)) goto oom_free_new; cgroup_throttle_swaprate(new_page, GFP_KERNEL); @@ -3768,7 +3768,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (!page) goto oom; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) goto oom_free_page; cgroup_throttle_swaprate(page, GFP_KERNEL); @@ -4183,7 +4183,8 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) if (!vmf->cow_page) return VM_FAULT_OOM; - if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) { + if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm, + GFP_KERNEL)) { put_page(vmf->cow_page); return VM_FAULT_OOM; } diff --git a/mm/migrate.c b/mm/migrate.c index 34a9ad3e0a4f9..b5bdae748f82c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2763,7 +2763,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, if (unlikely(anon_vma_prepare(vma))) goto abort; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL)) goto abort; /* diff --git a/mm/shmem.c b/mm/shmem.c index 70d9ce294bb49..3931fed5c8d8c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -685,7 +685,7 @@ static int shmem_add_to_page_cache(struct page *page, page->index = index; if (!PageSwapCache(page)) { - error = mem_cgroup_charge(page, charge_mm, gfp); + error = mem_cgroup_charge(page_folio(page), charge_mm, gfp); if (error) { if (PageTransHuge(page)) { count_vm_event(THP_FILE_FALLBACK); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 0e2132834bc7d..5d0f55f3c0ede 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -164,7 +164,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, __SetPageUptodate(page); ret = -ENOMEM; - if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL)) + if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL)) goto out_release; ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, From 6ce4de45bae2fd86e497f8f8dac8f08ffafb333b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 29 Jun 2021 21:47:12 -0400 Subject: [PATCH 503/851] mm/memcg: Convert uncharge_page() to uncharge_folio() Use a folio rather than a page to ensure that we're only operating on base or head pages, and not tail pages. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/memcontrol.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ff8014fba1259..f88dd3bf38846 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6832,24 +6832,23 @@ static void uncharge_batch(const struct uncharge_gather *ug) memcg_check_events(ug->memcg, ug->nid); local_irq_restore(flags); - /* drop reference from uncharge_page */ + /* drop reference from uncharge_folio */ css_put(&ug->memcg->css); } -static void uncharge_page(struct page *page, struct uncharge_gather *ug) +static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) { - struct folio *folio = page_folio(page); unsigned long nr_pages; struct mem_cgroup *memcg; struct obj_cgroup *objcg; - bool use_objcg = PageMemcgKmem(page); + bool use_objcg = folio_memcg_kmem(folio); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* * Nobody should be changing or seriously looking at - * page memcg or objcg at this point, we have fully - * exclusive access to the page. + * folio memcg or objcg at this point, we have fully + * exclusive access to the folio. */ if (use_objcg) { objcg = __folio_objcg(folio); @@ -6871,19 +6870,19 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) uncharge_gather_clear(ug); } ug->memcg = memcg; - ug->nid = page_to_nid(page); + ug->nid = folio_nid(folio); /* pairs with css_put in uncharge_batch */ css_get(&memcg->css); } - nr_pages = compound_nr(page); + nr_pages = folio_nr_pages(folio); if (use_objcg) { ug->nr_memory += nr_pages; ug->nr_kmem += nr_pages; - page->memcg_data = 0; + folio->memcg_data = 0; obj_cgroup_put(objcg); } else { /* LRU pages aren't accounted at the root level */ @@ -6891,7 +6890,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) ug->nr_memory += nr_pages; ug->pgpgout++; - page->memcg_data = 0; + folio->memcg_data = 0; } css_put(&memcg->css); @@ -6915,7 +6914,7 @@ void mem_cgroup_uncharge(struct page *page) return; uncharge_gather_clear(&ug); - uncharge_page(page, &ug); + uncharge_folio(page_folio(page), &ug); uncharge_batch(&ug); } @@ -6929,14 +6928,14 @@ void mem_cgroup_uncharge(struct page *page) void mem_cgroup_uncharge_list(struct list_head *page_list) { struct uncharge_gather ug; - struct page *page; + struct folio *folio; if (mem_cgroup_disabled()) return; uncharge_gather_clear(&ug); - list_for_each_entry(page, page_list, lru) - uncharge_page(page, &ug); + list_for_each_entry(folio, page_list, lru) + uncharge_folio(folio, &ug); if (ug.memcg) uncharge_batch(&ug); } From 6a95c78dfa4eb9bee3c2750f24c9920e0b056337 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 1 May 2021 20:42:23 -0400 Subject: [PATCH 504/851] mm/memcg: Convert mem_cgroup_uncharge() to take a folio Convert all the callers to call page_folio(). Most of them were already using a head page, but a few of them I can't prove were, so this may actually fix a bug. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport --- include/linux/memcontrol.h | 4 ++-- mm/filemap.c | 2 +- mm/khugepaged.c | 4 ++-- mm/memcontrol.c | 14 +++++++------- mm/memory-failure.c | 2 +- mm/memremap.c | 2 +- mm/page_alloc.c | 2 +- mm/swap.c | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 62e2ae5c11f2c..165cb04034e20 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -709,7 +709,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); -void mem_cgroup_uncharge(struct page *page); +void mem_cgroup_uncharge(struct folio *folio); void mem_cgroup_uncharge_list(struct list_head *page_list); void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); @@ -1206,7 +1206,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) { } -static inline void mem_cgroup_uncharge(struct page *page) +static inline void mem_cgroup_uncharge(struct folio *folio) { } diff --git a/mm/filemap.c b/mm/filemap.c index 7842711f15c06..3c170bc2f8037 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -923,7 +923,7 @@ noinline int __add_to_page_cache_locked(struct page *page, if (xas_error(&xas)) { error = xas_error(&xas); if (charged) - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); goto error; } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 8f6d7fdea9f4b..6b9c98ddcd09f 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1211,7 +1211,7 @@ static void collapse_huge_page(struct mm_struct *mm, mmap_write_unlock(mm); out_nolock: if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(*hpage); + mem_cgroup_uncharge(page_folio(*hpage)); trace_mm_collapse_huge_page(mm, isolated, result); return; } @@ -1975,7 +1975,7 @@ static void collapse_file(struct mm_struct *mm, out: VM_BUG_ON(!list_empty(&pagelist)); if (!IS_ERR_OR_NULL(*hpage)) - mem_cgroup_uncharge(*hpage); + mem_cgroup_uncharge(page_folio(*hpage)); /* TODO: tracepoints */ } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f88dd3bf38846..313977a25f3c8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6897,24 +6897,24 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug) } /** - * mem_cgroup_uncharge - uncharge a page - * @page: page to uncharge + * mem_cgroup_uncharge - Uncharge a folio. + * @folio: Folio to uncharge. * - * Uncharge a page previously charged with mem_cgroup_charge(). + * Uncharge a folio previously charged with mem_cgroup_charge(). */ -void mem_cgroup_uncharge(struct page *page) +void mem_cgroup_uncharge(struct folio *folio) { struct uncharge_gather ug; if (mem_cgroup_disabled()) return; - /* Don't touch page->lru of any random page, pre-check: */ - if (!page_memcg(page)) + /* Don't touch folio->lru of any random page, pre-check: */ + if (!folio_memcg(folio)) return; uncharge_gather_clear(&ug); - uncharge_folio(page_folio(page), &ug); + uncharge_folio(folio, &ug); uncharge_batch(&ug); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index eefd823deb679..9ae7a57a4cc02 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -763,7 +763,7 @@ static int delete_from_lru_cache(struct page *p) * Poisoned page might never drop its ref count to 0 so we have * to uncharge it manually from its memcg. */ - mem_cgroup_uncharge(p); + mem_cgroup_uncharge(page_folio(p)); /* * drop the page count elevated by isolate_lru_page() diff --git a/mm/memremap.c b/mm/memremap.c index 15a074ffb8d73..6eac40f9f62a7 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -508,7 +508,7 @@ void free_devmap_managed_page(struct page *page) __ClearPageWaiters(page); - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); /* * When a device_private page is freed, the page->mapping field diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e97e68aef7a8..dbf25555c9b8b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -726,7 +726,7 @@ static inline void free_the_page(struct page *page, unsigned int order) void free_compound_page(struct page *page) { - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); free_the_page(page, compound_order(page)); } diff --git a/mm/swap.c b/mm/swap.c index 095a5ec6f986c..11ff40104a2cd 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -94,7 +94,7 @@ static void __page_cache_release(struct page *page) static void __put_single_page(struct page *page) { __page_cache_release(page); - mem_cgroup_uncharge(page); + mem_cgroup_uncharge(page_folio(page)); free_unref_page(page, 0); } From 5574ade1f27b0d24765b3e97ad180577ee0f70bf Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 May 2021 18:14:59 -0400 Subject: [PATCH 505/851] mm/memcg: Convert mem_cgroup_migrate() to take folios Convert all callers of mem_cgroup_migrate() to call page_folio() first. They all look like they're using head pages already, but this proves it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport --- include/linux/memcontrol.h | 4 ++-- mm/filemap.c | 4 +++- mm/memcontrol.c | 35 +++++++++++++++++------------------ mm/migrate.c | 4 +++- mm/shmem.c | 5 ++++- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 165cb04034e20..3be67e9ed8696 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -712,7 +712,7 @@ void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); void mem_cgroup_uncharge(struct folio *folio); void mem_cgroup_uncharge_list(struct list_head *page_list); -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); +void mem_cgroup_migrate(struct folio *old, struct folio *new); /** * mem_cgroup_lruvec - get the lru list vector for a memcg & node @@ -1214,7 +1214,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } -static inline void mem_cgroup_migrate(struct page *old, struct page *new) +static inline void mem_cgroup_migrate(struct folio *old, struct folio *new) { } diff --git a/mm/filemap.c b/mm/filemap.c index 3c170bc2f8037..2469f714520ab 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -817,6 +817,8 @@ EXPORT_SYMBOL(file_write_and_wait_range); */ void replace_page_cache_page(struct page *old, struct page *new) { + struct folio *fold = page_folio(old); + struct folio *fnew = page_folio(new); struct address_space *mapping = old->mapping; void (*freepage)(struct page *) = mapping->a_ops->freepage; pgoff_t offset = old->index; @@ -831,7 +833,7 @@ void replace_page_cache_page(struct page *old, struct page *new) new->mapping = mapping; new->index = offset; - mem_cgroup_migrate(old, new); + mem_cgroup_migrate(fold, fnew); xas_lock_irqsave(&xas, flags); xas_store(&xas, new); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 313977a25f3c8..4f7ef67a75516 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6941,36 +6941,35 @@ void mem_cgroup_uncharge_list(struct list_head *page_list) } /** - * mem_cgroup_migrate - charge a page's replacement - * @oldpage: currently circulating page - * @newpage: replacement page + * mem_cgroup_migrate - Charge a folio's replacement. + * @old: Currently circulating folio. + * @new: Replacement folio. * - * Charge @newpage as a replacement page for @oldpage. @oldpage will + * Charge @new as a replacement folio for @old. @old will * be uncharged upon free. * - * Both pages must be locked, @newpage->mapping must be set up. + * Both folios must be locked, @new->mapping must be set up. */ -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) +void mem_cgroup_migrate(struct folio *old, struct folio *new) { - struct folio *newfolio = page_folio(newpage); struct mem_cgroup *memcg; - unsigned int nr_pages = folio_nr_pages(newfolio); + unsigned int nr_pages = folio_nr_pages(new); unsigned long flags; - VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); - VM_BUG_ON_FOLIO(PageAnon(oldpage) != folio_test_anon(newfolio), newfolio); - VM_BUG_ON_FOLIO(compound_nr(oldpage) != nr_pages, newfolio); + VM_BUG_ON_FOLIO(!folio_test_locked(old), old); + VM_BUG_ON_FOLIO(!folio_test_locked(new), new); + VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new); + VM_BUG_ON_FOLIO(folio_nr_pages(old) != nr_pages, new); if (mem_cgroup_disabled()) return; - /* Page cache replacement: new page already charged? */ - if (folio_memcg(newfolio)) + /* Page cache replacement: new folio already charged? */ + if (folio_memcg(new)) return; - memcg = page_memcg(oldpage); - VM_WARN_ON_ONCE_PAGE(!memcg, oldpage); + memcg = folio_memcg(old); + VM_WARN_ON_ONCE_FOLIO(!memcg, old); if (!memcg) return; @@ -6982,11 +6981,11 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) } css_get(&memcg->css); - commit_charge(newfolio, memcg); + commit_charge(new, memcg); local_irq_save(flags); mem_cgroup_charge_statistics(memcg, nr_pages); - memcg_check_events(memcg, page_to_nid(newpage)); + memcg_check_events(memcg, folio_nid(new)); local_irq_restore(flags); } diff --git a/mm/migrate.c b/mm/migrate.c index b5bdae748f82c..910552318df35 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -541,6 +541,8 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, */ void migrate_page_states(struct page *newpage, struct page *page) { + struct folio *folio = page_folio(page); + struct folio *newfolio = page_folio(newpage); int cpupid; if (PageError(page)) @@ -608,7 +610,7 @@ void migrate_page_states(struct page *newpage, struct page *page) copy_page_owner(page, newpage); if (!PageHuge(page)) - mem_cgroup_migrate(page, newpage); + mem_cgroup_migrate(folio, newfolio); } EXPORT_SYMBOL(migrate_page_states); diff --git a/mm/shmem.c b/mm/shmem.c index 3931fed5c8d8c..2fd75b4d49740 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1619,6 +1619,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct page *oldpage, *newpage; + struct folio *old, *new; struct address_space *swap_mapping; swp_entry_t entry; pgoff_t swap_index; @@ -1655,7 +1656,9 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, xa_lock_irq(&swap_mapping->i_pages); error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage); if (!error) { - mem_cgroup_migrate(oldpage, newpage); + old = page_folio(oldpage); + new = page_folio(newpage); + mem_cgroup_migrate(old, new); __inc_lruvec_page_state(newpage, NR_FILE_PAGES); __dec_lruvec_page_state(oldpage, NR_FILE_PAGES); } From 2832679327e108a6d58c8bf6e58664f088b8c412 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 11:43:01 -0400 Subject: [PATCH 506/851] mm/memcg: Convert mem_cgroup_track_foreign_dirty_slowpath() to folio The page was only being used for the memcg and to gather trace information, so this is a simple conversion. The only caller of mem_cgroup_track_foreign_dirty() will be converted to folios in a later patch, so doing this now makes that patch simpler. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/memcontrol.h | 7 ++++--- include/trace/events/writeback.h | 8 ++++---- mm/memcontrol.c | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3be67e9ed8696..f7d429d541177 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1560,17 +1560,18 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, unsigned long *pheadroom, unsigned long *pdirty, unsigned long *pwriteback); -void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, +void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb); static inline void mem_cgroup_track_foreign_dirty(struct page *page, struct bdi_writeback *wb) { + struct folio *folio = page_folio(page); if (mem_cgroup_disabled()) return; - if (unlikely(&page_memcg(page)->css != wb->memcg_css)) - mem_cgroup_track_foreign_dirty_slowpath(page, wb); + if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) + mem_cgroup_track_foreign_dirty_slowpath(folio, wb); } void mem_cgroup_flush_foreign(struct bdi_writeback *wb); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 840d1ba84cf5c..297871ca00047 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs, TRACE_EVENT(track_foreign_dirty, - TP_PROTO(struct page *page, struct bdi_writeback *wb), + TP_PROTO(struct folio *folio, struct bdi_writeback *wb), - TP_ARGS(page, wb), + TP_ARGS(folio, wb), TP_STRUCT__entry( __array(char, name, 32) @@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty, ), TP_fast_assign( - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); struct inode *inode = mapping ? mapping->host : NULL; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); @@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty, __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); - __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup); + __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4f7ef67a75516..d01978a373500 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4571,17 +4571,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, * As being wrong occasionally doesn't matter, updates and accesses to the * records are lockless and racy. */ -void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, +void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb) { - struct mem_cgroup *memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); struct memcg_cgwb_frn *frn; u64 now = get_jiffies_64(); u64 oldest_at = now; int oldest = -1; int i; - trace_track_foreign_dirty(page, wb); + trace_track_foreign_dirty(folio, wb); /* * Pick the slot to use. If there is already a slot for @wb, keep From 754fb2e00a777b98a26bb9f4077928ca05ec69d5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 17:26:00 -0400 Subject: [PATCH 507/851] mm/memcg: Add folio_memcg_lock() and folio_memcg_unlock() These are the folio equivalents of lock_page_memcg() and unlock_page_memcg(). lock_page_memcg() and unlock_page_memcg() have too many callers to be easily replaced in a single patch, so reimplement them as wrappers for now to be cleaned up later when enough callers have been converted to use folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport --- include/linux/memcontrol.h | 10 +++++++++ mm/memcontrol.c | 45 ++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f7d429d541177..c1726350bce92 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -950,6 +950,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); extern bool cgroup_memory_noswap; #endif +void folio_memcg_lock(struct folio *folio); +void folio_memcg_unlock(struct folio *folio); void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); @@ -1367,6 +1369,14 @@ static inline void unlock_page_memcg(struct page *page) { } +static inline void folio_memcg_lock(struct folio *folio) +{ +} + +static inline void folio_memcg_unlock(struct folio *folio) +{ +} + static inline void mem_cgroup_handle_over_high(void) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d01978a373500..24a32e0c563ba 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1965,18 +1965,17 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } /** - * lock_page_memcg - lock a page and memcg binding - * @page: the page + * folio_memcg_lock - Bind a folio to its memcg. + * @folio: The folio. * - * This function protects unlocked LRU pages from being moved to + * This function prevents unlocked LRU folios from being moved to * another cgroup. * - * It ensures lifetime of the locked memcg. Caller is responsible - * for the lifetime of the page. + * It ensures lifetime of the bound memcg. The caller is responsible + * for the lifetime of the folio. */ -void lock_page_memcg(struct page *page) +void folio_memcg_lock(struct folio *folio) { - struct page *head = compound_head(page); /* rmap on tail pages */ struct mem_cgroup *memcg; unsigned long flags; @@ -1990,7 +1989,7 @@ void lock_page_memcg(struct page *page) if (mem_cgroup_disabled()) return; again: - memcg = page_memcg(head); + memcg = folio_memcg(folio); if (unlikely(!memcg)) return; @@ -2004,7 +2003,7 @@ void lock_page_memcg(struct page *page) return; spin_lock_irqsave(&memcg->move_lock, flags); - if (memcg != page_memcg(head)) { + if (memcg != folio_memcg(folio)) { spin_unlock_irqrestore(&memcg->move_lock, flags); goto again; } @@ -2018,9 +2017,15 @@ void lock_page_memcg(struct page *page) memcg->move_lock_task = current; memcg->move_lock_flags = flags; } +EXPORT_SYMBOL(folio_memcg_lock); + +void lock_page_memcg(struct page *page) +{ + folio_memcg_lock(page_folio(page)); +} EXPORT_SYMBOL(lock_page_memcg); -static void __unlock_page_memcg(struct mem_cgroup *memcg) +static void __folio_memcg_unlock(struct mem_cgroup *memcg) { if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -2035,14 +2040,22 @@ static void __unlock_page_memcg(struct mem_cgroup *memcg) } /** - * unlock_page_memcg - unlock a page and memcg binding - * @page: the page + * folio_memcg_unlock - Release the binding between a folio and its memcg. + * @folio: The folio. + * + * This releases the binding created by folio_memcg_lock(). This does + * not change the accounting of this folio to its memcg, but it does + * permit others to change it. */ -void unlock_page_memcg(struct page *page) +void folio_memcg_unlock(struct folio *folio) { - struct page *head = compound_head(page); + __folio_memcg_unlock(folio_memcg(folio)); +} +EXPORT_SYMBOL(folio_memcg_unlock); - __unlock_page_memcg(page_memcg(head)); +void unlock_page_memcg(struct page *page) +{ + folio_memcg_unlock(page_folio(page)); } EXPORT_SYMBOL(unlock_page_memcg); @@ -5666,7 +5679,7 @@ static int mem_cgroup_move_account(struct page *page, page->memcg_data = (unsigned long)to; - __unlock_page_memcg(from); + __folio_memcg_unlock(from); ret = 0; nid = page_to_nid(page); From fbd484b263814820a41dd790d2ddc25b7dfc7a9f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 1 Mar 2021 16:34:06 -0500 Subject: [PATCH 508/851] mm/memcg: Convert mem_cgroup_move_account() to use a folio This saves dozens of bytes of text by eliminating a lot of calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/memcontrol.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 24a32e0c563ba..2b79d1936c3d2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5590,38 +5590,39 @@ static int mem_cgroup_move_account(struct page *page, struct mem_cgroup *from, struct mem_cgroup *to) { + struct folio *folio = page_folio(page); struct lruvec *from_vec, *to_vec; struct pglist_data *pgdat; - unsigned int nr_pages = compound ? thp_nr_pages(page) : 1; + unsigned int nr_pages = compound ? folio_nr_pages(folio) : 1; int nid, ret; VM_BUG_ON(from == to); - VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON(compound && !PageTransHuge(page)); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON(compound && !folio_multi(folio)); /* * Prevent mem_cgroup_migrate() from looking at * page's memory cgroup of its source page while we change it. */ ret = -EBUSY; - if (!trylock_page(page)) + if (!folio_trylock(folio)) goto out; ret = -EINVAL; - if (page_memcg(page) != from) + if (folio_memcg(folio) != from) goto out_unlock; - pgdat = page_pgdat(page); + pgdat = folio_pgdat(folio); from_vec = mem_cgroup_lruvec(from, pgdat); to_vec = mem_cgroup_lruvec(to, pgdat); - lock_page_memcg(page); + folio_memcg_lock(folio); - if (PageAnon(page)) { - if (page_mapped(page)) { + if (folio_test_anon(folio)) { + if (folio_mapped(folio)) { __mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); - if (PageTransHuge(page)) { + if (folio_test_transhuge(folio)) { __mod_lruvec_state(from_vec, NR_ANON_THPS, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_THPS, @@ -5632,18 +5633,18 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(from_vec, NR_FILE_PAGES, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_PAGES, nr_pages); - if (PageSwapBacked(page)) { + if (folio_test_swapbacked(folio)) { __mod_lruvec_state(from_vec, NR_SHMEM, -nr_pages); __mod_lruvec_state(to_vec, NR_SHMEM, nr_pages); } - if (page_mapped(page)) { + if (folio_mapped(folio)) { __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages); } - if (PageDirty(page)) { - struct address_space *mapping = page_mapping(page); + if (folio_test_dirty(folio)) { + struct address_space *mapping = folio_mapping(folio); if (mapping_can_writeback(mapping)) { __mod_lruvec_state(from_vec, NR_FILE_DIRTY, @@ -5654,7 +5655,7 @@ static int mem_cgroup_move_account(struct page *page, } } - if (PageWriteback(page)) { + if (folio_test_writeback(folio)) { __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages); __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); } @@ -5677,12 +5678,12 @@ static int mem_cgroup_move_account(struct page *page, css_get(&to->css); css_put(&from->css); - page->memcg_data = (unsigned long)to; + folio->memcg_data = (unsigned long)to; __folio_memcg_unlock(from); ret = 0; - nid = page_to_nid(page); + nid = folio_nid(folio); local_irq_disable(); mem_cgroup_charge_statistics(to, nr_pages); @@ -5691,7 +5692,7 @@ static int mem_cgroup_move_account(struct page *page, memcg_check_events(from, nid); local_irq_enable(); out_unlock: - unlock_page(page); + folio_unlock(folio); out: return ret; } From 4c1fa0081d16cde620d8ce45cb6c808f817f4698 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 20:00:28 -0400 Subject: [PATCH 509/851] mm/memcg: Add folio_lruvec() This replaces mem_cgroup_page_lruvec(). All callers converted. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Acked-by: Mike Rapoport --- include/linux/memcontrol.h | 20 +++++++++----------- mm/compaction.c | 2 +- mm/memcontrol.c | 9 ++++++--- mm/swap.c | 3 ++- mm/workingset.c | 3 ++- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c1726350bce92..0551c378c2431 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -751,18 +751,17 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, } /** - * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page - * @page: the page + * folio_lruvec - return lruvec for isolating/putting an LRU folio + * @folio: Pointer to the folio. * - * This function relies on page->mem_cgroup being stable. + * This function relies on folio->mem_cgroup being stable. */ -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - struct mem_cgroup *memcg = page_memcg(page); + struct mem_cgroup *memcg = folio_memcg(folio); - VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); - return mem_cgroup_lruvec(memcg, pgdat); + VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio); + return mem_cgroup_lruvec(memcg, folio_pgdat(folio)); } struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); @@ -1226,10 +1225,9 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, return &pgdat->__lruvec; } -static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec(struct folio *folio) { - pg_data_t *pgdat = page_pgdat(page); - + struct pglist_data *pgdat = folio_pgdat(folio); return &pgdat->__lruvec; } diff --git a/mm/compaction.c b/mm/compaction.c index 621508e0ecd5d..a88f7b893f80a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1028,7 +1028,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (!TestClearPageLRU(page)) goto isolate_fail_put; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(page_folio(page)); /* If we already hold the lock, we can skip some rechecking */ if (lruvec != locked) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2b79d1936c3d2..0cfd1890776ae 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1186,9 +1186,10 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) */ struct lruvec *lock_page_lruvec(struct page *page) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); spin_lock(&lruvec->lru_lock); lruvec_memcg_debug(lruvec, page); @@ -1198,9 +1199,10 @@ struct lruvec *lock_page_lruvec(struct page *page) struct lruvec *lock_page_lruvec_irq(struct page *page) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); spin_lock_irq(&lruvec->lru_lock); lruvec_memcg_debug(lruvec, page); @@ -1210,9 +1212,10 @@ struct lruvec *lock_page_lruvec_irq(struct page *page) struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); spin_lock_irqsave(&lruvec->lru_lock, *flags); lruvec_memcg_debug(lruvec, page); diff --git a/mm/swap.c b/mm/swap.c index 11ff40104a2cd..4ba77fc8da4f8 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -315,7 +315,8 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) void lru_note_cost_page(struct page *page) { - lru_note_cost(mem_cgroup_page_lruvec(page), + struct folio *folio = page_folio(page); + lru_note_cost(folio_lruvec(folio), page_is_file_lru(page), thp_nr_pages(page)); } diff --git a/mm/workingset.c b/mm/workingset.c index 5ba3e42446fa6..e62c0f2084a26 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -396,6 +396,7 @@ void workingset_refault(struct page *page, void *shadow) */ void workingset_activation(struct page *page) { + struct folio *folio = page_folio(page); struct mem_cgroup *memcg; struct lruvec *lruvec; @@ -410,7 +411,7 @@ void workingset_activation(struct page *page) memcg = page_memcg_rcu(page); if (!mem_cgroup_disabled() && !memcg) goto out; - lruvec = mem_cgroup_page_lruvec(page); + lruvec = folio_lruvec(folio); workingset_age_nonresident(lruvec, thp_nr_pages(page)); out: rcu_read_unlock(); From acf5b5a4957cdfbbc35f5cf4718ec1bcf4a3a0e1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 28 Jun 2021 21:59:47 -0400 Subject: [PATCH 510/851] mm/memcg: Add folio_lruvec_lock() and similar functions These are the folio equivalents of lock_page_lruvec() and similar functions. Also convert lruvec_memcg_debug() to take a folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/memcontrol.h | 32 ++++++++++++++----------- mm/compaction.c | 2 +- mm/huge_memory.c | 5 ++-- mm/memcontrol.c | 48 ++++++++++++++++---------------------- mm/rmap.c | 2 +- mm/swap.c | 8 ++++--- mm/vmscan.c | 3 ++- 7 files changed, 50 insertions(+), 50 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0551c378c2431..0973c4a78bf1d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -768,15 +768,16 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); -struct lruvec *lock_page_lruvec(struct page *page); -struct lruvec *lock_page_lruvec_irq(struct page *page); -struct lruvec *lock_page_lruvec_irqsave(struct page *page, +struct lruvec *folio_lruvec_lock(struct folio *folio); +struct lruvec *folio_lruvec_lock_irq(struct folio *folio); +struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags); #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page); +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio); #else -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } #endif @@ -1231,7 +1232,8 @@ static inline struct lruvec *folio_lruvec(struct folio *folio) return &pgdat->__lruvec; } -static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +static inline +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { } @@ -1261,26 +1263,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } -static inline struct lruvec *lock_page_lruvec(struct page *page) +static inline struct lruvec *folio_lruvec_lock(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irq(struct page *page) +static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irq(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } -static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flagsp) { - struct pglist_data *pgdat = page_pgdat(page); + struct pglist_data *pgdat = folio_pgdat(folio); spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); return &pgdat->__lruvec; @@ -1537,6 +1539,7 @@ static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) static inline struct lruvec *relock_page_lruvec_irq(struct page *page, struct lruvec *locked_lruvec) { + struct folio *folio = page_folio(page); if (locked_lruvec) { if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; @@ -1544,13 +1547,14 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, unlock_page_lruvec_irq(locked_lruvec); } - return lock_page_lruvec_irq(page); + return folio_lruvec_lock_irq(folio); } /* Don't lock again iff page's lruvec locked */ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, struct lruvec *locked_lruvec, unsigned long *flags) { + struct folio *folio = page_folio(page); if (locked_lruvec) { if (page_matches_lruvec(page, locked_lruvec)) return locked_lruvec; @@ -1558,7 +1562,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, unlock_page_lruvec_irqrestore(locked_lruvec, *flags); } - return lock_page_lruvec_irqsave(page, flags); + return folio_lruvec_lock_irqsave(folio, flags); } #ifdef CONFIG_CGROUP_WRITEBACK diff --git a/mm/compaction.c b/mm/compaction.c index a88f7b893f80a..6f77577be2483 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1038,7 +1038,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, compact_lock_irqsave(&lruvec->lru_lock, &flags, cc); locked = lruvec; - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, page_folio(page)); /* Try get exclusive access under lock */ if (!skip_updated) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ecb1fb1f5f3e1..763bf687ca92a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2431,7 +2431,8 @@ static void __split_huge_page_tail(struct page *head, int tail, static void __split_huge_page(struct page *page, struct list_head *list, pgoff_t end) { - struct page *head = compound_head(page); + struct folio *folio = page_folio(page); + struct page *head = &folio->page; struct lruvec *lruvec; struct address_space *swap_cache = NULL; unsigned long offset = 0; @@ -2450,7 +2451,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, } /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ - lruvec = lock_page_lruvec(head); + lruvec = folio_lruvec_lock(folio); for (i = nr - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0cfd1890776ae..1aeeb4437ffd5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1158,67 +1158,59 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, } #ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page) +void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) { struct mem_cgroup *memcg; if (mem_cgroup_disabled()) return; - memcg = page_memcg(page); + memcg = folio_memcg(folio); if (!memcg) - VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page); + VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != root_mem_cgroup, folio); else - VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page); + VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio); } #endif /** - * lock_page_lruvec - lock and return lruvec for a given page. - * @page: the page + * folio_lruvec_lock - lock and return lruvec for a given folio. + * @folio: Pointer to the folio. * * These functions are safe to use under any of the following conditions: - * - page locked - * - PageLRU cleared - * - lock_page_memcg() - * - page->_refcount is zero + * - folio locked + * - folio_test_lru false + * - folio_memcg_lock() + * - folio frozen (refcount of 0) */ -struct lruvec *lock_page_lruvec(struct page *page) +struct lruvec *folio_lruvec_lock(struct folio *folio) { - struct folio *folio = page_folio(page); - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = folio_lruvec(folio); spin_lock(&lruvec->lru_lock); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } -struct lruvec *lock_page_lruvec_irq(struct page *page) +struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { - struct folio *folio = page_folio(page); - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = folio_lruvec(folio); spin_lock_irq(&lruvec->lru_lock); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } -struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags) +struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, + unsigned long *flags) { - struct folio *folio = page_folio(page); - struct lruvec *lruvec; + struct lruvec *lruvec = folio_lruvec(folio); - lruvec = folio_lruvec(folio); spin_lock_irqsave(&lruvec->lru_lock, *flags); - - lruvec_memcg_debug(lruvec, page); + lruvec_memcg_debug(lruvec, folio); return lruvec; } diff --git a/mm/rmap.c b/mm/rmap.c index b9eb5c12f3fe1..1df8683c4c4cc 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -33,7 +33,7 @@ * mapping->private_lock (in __set_page_dirty_buffers) * lock_page_memcg move_lock (in __set_page_dirty_buffers) * i_pages lock (widely used) - * lruvec->lru_lock (in lock_page_lruvec_irq) + * lruvec->lru_lock (in folio_lruvec_lock_irq) * inode->i_lock (in set_page_dirty's __mark_inode_dirty) * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) diff --git a/mm/swap.c b/mm/swap.c index 4ba77fc8da4f8..6d0d2bfca48e9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -80,10 +80,11 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = { static void __page_cache_release(struct page *page) { if (PageLRU(page)) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; unsigned long flags; - lruvec = lock_page_lruvec_irqsave(page, &flags); + lruvec = folio_lruvec_lock_irqsave(folio, &flags); del_page_from_lru_list(page, lruvec); __clear_page_lru_flags(page); unlock_page_lruvec_irqrestore(lruvec, flags); @@ -372,11 +373,12 @@ static inline void activate_page_drain(int cpu) static void activate_page(struct page *page) { + struct folio *folio = page_folio(page); struct lruvec *lruvec; - page = compound_head(page); + page = &folio->page; if (TestClearPageLRU(page)) { - lruvec = lock_page_lruvec_irq(page); + lruvec = folio_lruvec_lock_irq(folio); __activate_page(page, lruvec); unlock_page_lruvec_irq(lruvec); SetPageLRU(page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 4620df62f0ffa..0d48306d37dc8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1965,6 +1965,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, */ int isolate_lru_page(struct page *page) { + struct folio *folio = page_folio(page); int ret = -EBUSY; VM_BUG_ON_PAGE(!page_count(page), page); @@ -1974,7 +1975,7 @@ int isolate_lru_page(struct page *page) struct lruvec *lruvec; get_page(page); - lruvec = lock_page_lruvec_irq(page); + lruvec = folio_lruvec_lock_irq(folio); del_page_from_lru_list(page, lruvec); unlock_page_lruvec_irq(lruvec); ret = 0; From df506008767a8fc3a6aa95f9f80b7392fb9465fa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 29 Jun 2021 22:27:31 -0400 Subject: [PATCH 511/851] mm/memcg: Add folio_lruvec_relock_irq() and folio_lruvec_relock_irqsave() These are the folio equivalents of relock_page_lruvec_irq() and folio_lruvec_relock_irqsave(). Also convert page_matches_lruvec() to folio_matches_lruvec(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/memcontrol.h | 17 ++++++++--------- mm/mlock.c | 3 ++- mm/swap.c | 11 +++++++---- mm/vmscan.c | 5 +++-- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0973c4a78bf1d..b6ad2a4357d34 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1529,19 +1529,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, } /* Test requires a stable page->memcg binding, see page_memcg() */ -static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec) +static inline bool folio_matches_lruvec(struct folio *folio, + struct lruvec *lruvec) { - return lruvec_pgdat(lruvec) == page_pgdat(page) && - lruvec_memcg(lruvec) == page_memcg(page); + return lruvec_pgdat(lruvec) == folio_pgdat(folio) && + lruvec_memcg(lruvec) == folio_memcg(folio); } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irq(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, struct lruvec *locked_lruvec) { - struct folio *folio = page_folio(page); if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irq(locked_lruvec); @@ -1551,12 +1551,11 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page, } /* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page, +static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, struct lruvec *locked_lruvec, unsigned long *flags) { - struct folio *folio = page_folio(page); if (locked_lruvec) { - if (page_matches_lruvec(page, locked_lruvec)) + if (folio_matches_lruvec(folio, locked_lruvec)) return locked_lruvec; unlock_page_lruvec_irqrestore(locked_lruvec, *flags); diff --git a/mm/mlock.c b/mm/mlock.c index 16d2ee160d43c..e263d62ae2d09 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -271,6 +271,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) /* Phase 1: page isolation */ for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); if (TestClearPageMlocked(page)) { /* @@ -278,7 +279,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) * so we can spare the get_page() here. */ if (TestClearPageLRU(page)) { - lruvec = relock_page_lruvec_irq(page, lruvec); + lruvec = folio_lruvec_relock_irq(folio, lruvec); del_page_from_lru_list(page, lruvec); continue; } else diff --git a/mm/swap.c b/mm/swap.c index 6d0d2bfca48e9..aa9c32b714c5b 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -211,12 +211,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); /* block memcg migration during page moving between lru */ if (!TestClearPageLRU(page)) continue; - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); (*move_fn)(page, lruvec); SetPageLRU(page); @@ -907,6 +908,7 @@ void release_pages(struct page **pages, int nr) for (i = 0; i < nr; i++) { struct page *page = pages[i]; + struct folio *folio = page_folio(page); /* * Make sure the IRQ-safe lock-holding time does not get @@ -918,7 +920,7 @@ void release_pages(struct page **pages, int nr) lruvec = NULL; } - page = compound_head(page); + page = &folio->page; if (is_huge_zero_page(page)) continue; @@ -957,7 +959,7 @@ void release_pages(struct page **pages, int nr) if (PageLRU(page)) { struct lruvec *prev_lruvec = lruvec; - lruvec = relock_page_lruvec_irqsave(page, lruvec, + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); if (prev_lruvec != lruvec) lock_batch = 0; @@ -1061,8 +1063,9 @@ void __pagevec_lru_add(struct pagevec *pvec) for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); + lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); __pagevec_lru_add_fn(page, lruvec); } if (lruvec) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0d48306d37dc8..7a2f25b904d9b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2075,7 +2075,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec, * All pages were isolated from the same lruvec (and isolation * inhibits memcg migration). */ - VM_BUG_ON_PAGE(!page_matches_lruvec(page, lruvec), page); + VM_BUG_ON_PAGE(!folio_matches_lruvec(page_folio(page), lruvec), page); add_page_to_lru_list(page, lruvec); nr_pages = thp_nr_pages(page); nr_moved += nr_pages; @@ -4514,6 +4514,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) for (i = 0; i < pvec->nr; i++) { struct page *page = pvec->pages[i]; + struct folio *folio = page_folio(page); int nr_pages; if (PageTransTail(page)) @@ -4526,7 +4527,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) if (!TestClearPageLRU(page)) continue; - lruvec = relock_page_lruvec_irq(page, lruvec); + lruvec = folio_lruvec_relock_irq(folio, lruvec); if (page_evictable(page) && PageUnevictable(page)) { del_page_from_lru_list(page, lruvec); ClearPageUnevictable(page); From ed8e27c08463c466bbe93a86f9b14f8680a589d3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 17:19:13 -0400 Subject: [PATCH 512/851] mm/workingset: Convert workingset_activation to take a folio This function already assumed it was being passed a head page. No real change here, except that thp_nr_pages() compiles away on kernels with THP compiled out while folio_nr_pages() is always present. Also convert page_memcg_rcu() to folio_memcg_rcu(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/memcontrol.h | 20 +++++++++++--------- include/linux/swap.h | 2 +- mm/swap.c | 2 +- mm/workingset.c | 11 ++++------- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b6ad2a4357d34..7bc6d5a6b7391 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -461,19 +461,21 @@ static inline struct mem_cgroup *page_memcg(struct page *page) } /* - * page_memcg_rcu - locklessly get the memory cgroup associated with a page - * @page: a pointer to the page struct + * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function assumes that the page is known to have a + * This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function - * against some type of pages, e.g. slab pages or ex-slab pages. + * against some type of folios, e.g. slab folios or ex-slab folios. + * + * Return: A pointer to the memory cgroup associated with the folio, + * or NULL. */ -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { - unsigned long memcg_data = READ_ONCE(page->memcg_data); + unsigned long memcg_data = READ_ONCE(folio->memcg_data); - VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); WARN_ON_ONCE(!rcu_read_lock_held()); if (memcg_data & MEMCG_DATA_KMEM) { @@ -1129,7 +1131,7 @@ static inline struct mem_cgroup *page_memcg(struct page *page) return NULL; } -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { WARN_ON_ONCE(!rcu_read_lock_held()); return NULL; diff --git a/include/linux/swap.h b/include/linux/swap.h index 8394716a002b5..989d8f78c256a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -330,7 +330,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); -void workingset_activation(struct page *page); +void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ void workingset_update_node(struct xa_node *node); diff --git a/mm/swap.c b/mm/swap.c index aa9c32b714c5b..85969b36b6360 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -451,7 +451,7 @@ void mark_page_accessed(struct page *page) else __lru_cache_activate_page(page); ClearPageReferenced(page); - workingset_activation(page); + workingset_activation(page_folio(page)); } if (page_is_idle(page)) clear_page_idle(page); diff --git a/mm/workingset.c b/mm/workingset.c index e62c0f2084a26..39bb60d502173 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -392,13 +392,11 @@ void workingset_refault(struct page *page, void *shadow) /** * workingset_activation - note a page activation - * @page: page that is being activated + * @folio: Folio that is being activated. */ -void workingset_activation(struct page *page) +void workingset_activation(struct folio *folio) { - struct folio *folio = page_folio(page); struct mem_cgroup *memcg; - struct lruvec *lruvec; rcu_read_lock(); /* @@ -408,11 +406,10 @@ void workingset_activation(struct page *page) * XXX: See workingset_refault() - this should return * root_mem_cgroup even for !CONFIG_MEMCG. */ - memcg = page_memcg_rcu(page); + memcg = folio_memcg_rcu(folio); if (!mem_cgroup_disabled() && !memcg) goto out; - lruvec = folio_lruvec(folio); - workingset_age_nonresident(lruvec, thp_nr_pages(page)); + workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio)); out: rcu_read_unlock(); } From 1b2fb6101c7160f90d5b13612df03bf79803cb7e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 10:55:27 -0400 Subject: [PATCH 513/851] mm: Add folio_pfn() This is the folio equivalent of page_to_pfn(). Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Mike Rapoport --- include/linux/mm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 797dd44344429..a0db9c0ba18b0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1623,6 +1623,20 @@ static inline unsigned long page_to_section(const struct page *page) } #endif +/** + * folio_pfn - Return the Page Frame Number of a folio. + * @folio: The folio. + * + * A folio may contain multiple pages. The pages have consecutive + * Page Frame Numbers. + * + * Return: The Page Frame Number of the first page in the folio. + */ +static inline unsigned long folio_pfn(struct folio *folio) +{ + return page_to_pfn(&folio->page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_pinnable_page(struct page *page) From 3b7a2acaa70dd1d941c893b9e7c6e5f2eb227f03 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 11:17:34 -0400 Subject: [PATCH 514/851] mm: Add folio_raw_mapping() Convert __page_rmapping to folio_raw_mapping and move it to mm/internal.h. It's only a couple of instructions (load and mask), so it's definitely going to be cheaper to inline it than call it. Leave page_rmapping out of line. Signed-off-by: Matthew Wilcox (Oracle) --- mm/internal.h | 7 +++++++ mm/util.c | 20 ++++---------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 1a8851b730310..fa31a7f0ed798 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -34,6 +34,13 @@ void page_writeback_init(void); +static inline void *folio_raw_mapping(struct folio *folio) +{ + unsigned long mapping = (unsigned long)folio->mapping; + + return (void *)(mapping & ~PAGE_MAPPING_FLAGS); +} + vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); diff --git a/mm/util.c b/mm/util.c index e8c12350b3ebb..d0aa1d9c811e9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -635,21 +635,10 @@ void kvfree_sensitive(const void *addr, size_t len) } EXPORT_SYMBOL(kvfree_sensitive); -static inline void *__page_rmapping(struct page *page) -{ - unsigned long mapping; - - mapping = (unsigned long)page->mapping; - mapping &= ~PAGE_MAPPING_FLAGS; - - return (void *)mapping; -} - /* Neutral page->mapping pointer to address_space or anon_vma or other */ void *page_rmapping(struct page *page) { - page = compound_head(page); - return __page_rmapping(page); + return folio_raw_mapping(page_folio(page)); } /** @@ -680,13 +669,12 @@ EXPORT_SYMBOL(folio_mapped); struct anon_vma *page_anon_vma(struct page *page) { - unsigned long mapping; + struct folio *folio = page_folio(page); + unsigned long mapping = (unsigned long)folio->mapping; - page = compound_head(page); - mapping = (unsigned long)page->mapping; if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) return NULL; - return __page_rmapping(page); + return (void *)(mapping - PAGE_MAPPING_ANON); } /** From 117a27c2743ee3fe7cef9e1a4f1162d9f279c55b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 16 Dec 2020 11:06:33 -0500 Subject: [PATCH 515/851] mm: Add flush_dcache_folio() This is a default implementation which calls flush_dcache_page() on each page in the folio. If architectures can do better, they should implement their own version of it. Signed-off-by: Matthew Wilcox (Oracle) --- Documentation/core-api/cachetlb.rst | 6 ++++++ arch/arm/include/asm/cacheflush.h | 1 + arch/nds32/include/asm/cacheflush.h | 1 + include/asm-generic/cacheflush.h | 6 ++++++ mm/util.c | 13 +++++++++++++ 5 files changed, 27 insertions(+) diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst index fe4290e267296..29682f69a9152 100644 --- a/Documentation/core-api/cachetlb.rst +++ b/Documentation/core-api/cachetlb.rst @@ -325,6 +325,12 @@ maps this page at its virtual address. dirty. Again, see sparc64 for examples of how to deal with this. + ``void flush_dcache_folio(struct folio *folio)`` + This function is called under the same circumstances as + flush_dcache_page(). It allows the architecture to + optimise for flushing the entire folio of pages instead + of flushing one page at a time. + ``void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long user_vaddr, void *dst, void *src, int len)`` ``void copy_from_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 2e24e765e6d3a..23bf823376e19 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -290,6 +290,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +void flush_dcache_folio(struct folio *folio); static inline void flush_kernel_vmap_range(void *addr, int size) { diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 7d6824f7c0e8d..b5037981f023b 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -27,6 +27,7 @@ void flush_cache_vunmap(unsigned long start, unsigned long end); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 void flush_dcache_page(struct page *page); +void flush_dcache_folio(struct folio *folio); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len); void copy_from_user_page(struct vm_area_struct *vma, struct page *page, diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h index 4a674db4e1fa5..fedc0dfa4877c 100644 --- a/include/asm-generic/cacheflush.h +++ b/include/asm-generic/cacheflush.h @@ -49,9 +49,15 @@ static inline void flush_cache_page(struct vm_area_struct *vma, static inline void flush_dcache_page(struct page *page) { } + +static inline void flush_dcache_folio(struct folio *folio) { } #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO #endif +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio); +#endif #ifndef flush_dcache_mmap_lock static inline void flush_dcache_mmap_lock(struct address_space *mapping) diff --git a/mm/util.c b/mm/util.c index d0aa1d9c811e9..149537120a918 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1057,3 +1057,16 @@ void page_offline_end(void) up_write(&page_offline_rwsem); } EXPORT_SYMBOL(page_offline_end); + +#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO +void flush_dcache_folio(struct folio *folio) +{ + unsigned int n = folio_nr_pages(folio); + + do { + n--; + flush_dcache_page(folio_page(folio, n)); + } while (n); +} +EXPORT_SYMBOL(flush_dcache_folio); +#endif From 3508e35dec5c85080e6052cfbe91003fbd4b2b79 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 30 Dec 2020 10:21:39 -0500 Subject: [PATCH 516/851] mm: Add kmap_local_folio() This allows us to map a portion of a folio. Callers can only expect to access up to the next page boundary. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/highmem-internal.h | 11 ++++++++++ include/linux/highmem.h | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 7902c7d8b55f9..d5d6f930ae1dd 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page) return __kmap_local_page_prot(page, kmap_prot); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + struct page *page = folio_page(folio, offset / PAGE_SIZE); + return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return __kmap_local_page_prot(page, prot); @@ -160,6 +166,11 @@ static inline void *kmap_local_page(struct page *page) return page_address(page); } +static inline void *kmap_local_folio(struct folio *folio, size_t offset) +{ + return page_address(&folio->page) + offset; +} + static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) { return kmap_local_page(page); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8c6e8e996c875..861ad00fb32a8 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -96,6 +96,43 @@ static inline void kmap_flush_unused(void); */ static inline void *kmap_local_page(struct page *page); +/** + * kmap_local_folio - Map a page in this folio for temporary usage + * @folio: The folio containing the page. + * @offset: The byte offset within the folio which identifies the page. + * + * Requires careful handling when nesting multiple mappings because the map + * management is stack based. The unmap has to be in the reverse order of + * the map operation:: + * + * addr1 = kmap_local_folio(folio1, offset1); + * addr2 = kmap_local_folio(folio2, offset2); + * ... + * kunmap_local(addr2); + * kunmap_local(addr1); + * + * Unmapping addr1 before addr2 is invalid and causes malfunction. + * + * Contrary to kmap() mappings the mapping is only valid in the context of + * the caller and cannot be handed to other contexts. + * + * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the + * virtual address of the direct mapping. Only real highmem pages are + * temporarily mapped. + * + * While it is significantly faster than kmap() for the higmem case it + * comes with restrictions about the pointer validity. Only use when really + * necessary. + * + * On HIGHMEM enabled systems mapping a highmem page has the side effect of + * disabling migration in order to keep the virtual address stable across + * preemption. No caller of kmap_local_folio() can rely on this side effect. + * + * Context: Can be invoked from any context. + * Return: The virtual address of @offset. + */ +static inline void *kmap_local_folio(struct folio *folio, size_t offset); + /** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! * @page: Pointer to the page to be mapped From fb85f280bf402ca1fca95f7fcbf4968e0a3a63d2 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Tue, 2 Mar 2021 19:25:56 +0800 Subject: [PATCH 517/851] drm/amd/display: Re-enable "Guard ASSR with internal display flag" [Why] ASSR enabling was only considering capability declared in DPCD. We also need to check whether the connector is internal and not just any display. [How] ASSR enabling need to check both DPCD capability and internal display flag passed frm BIOS. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Stylon Wang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 335018f0f0c3f..a42d3dc2e8e91 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1752,6 +1752,38 @@ enum link_training_result dc_link_dp_perform_link_training( return status; } +static enum dp_panel_mode try_enable_assr(struct dc_stream_state *stream) +{ + struct dc_link *link = stream->link; + enum dp_panel_mode panel_mode = dp_get_panel_mode(link); +#ifdef CONFIG_DRM_AMD_DC_HDCP + struct cp_psp *cp_psp = &stream->ctx->cp_psp; +#endif + + /* ASSR must be supported on the panel */ + if (panel_mode == DP_PANEL_MODE_DEFAULT) + return panel_mode; + + /* eDP or internal DP only */ + if (link->connector_signal != SIGNAL_TYPE_EDP && + !(link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && + link->is_internal_display)) + return DP_PANEL_MODE_DEFAULT; + +#ifdef CONFIG_DRM_AMD_DC_HDCP + if (cp_psp && cp_psp->funcs.enable_assr) { + if (!cp_psp->funcs.enable_assr(cp_psp->handle, link)) { + /* since eDP implies ASSR on, change panel + * mode to disable ASSR + */ + panel_mode = DP_PANEL_MODE_DEFAULT; + } + } else + panel_mode = DP_PANEL_MODE_DEFAULT; +#endif + return panel_mode; +} + bool perform_link_training_with_retries( const struct dc_link_settings *link_setting, bool skip_video_pattern, @@ -1764,7 +1796,7 @@ bool perform_link_training_with_retries( uint8_t delay_between_attempts = LINK_TRAINING_RETRY_DELAY; struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - enum dp_panel_mode panel_mode = dp_get_panel_mode(link); + enum dp_panel_mode panel_mode; struct link_encoder *link_enc; enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0; struct dc_link_settings current_setting = *link_setting; @@ -1800,23 +1832,11 @@ bool perform_link_training_with_retries( msleep(delay_dp_power_up_in_ms); } -#ifdef CONFIG_DRM_AMD_DC_HDCP - if (panel_mode == DP_PANEL_MODE_EDP) { - struct cp_psp *cp_psp = &stream->ctx->cp_psp; - - if (cp_psp && cp_psp->funcs.enable_assr) { - if (!cp_psp->funcs.enable_assr(cp_psp->handle, link)) { - /* since eDP implies ASSR on, change panel - * mode to disable ASSR - */ - panel_mode = DP_PANEL_MODE_DEFAULT; - } - } else - panel_mode = DP_PANEL_MODE_DEFAULT; - } -#endif - + panel_mode = try_enable_assr(stream); dp_set_panel_mode(link, panel_mode); + DC_LOG_DETECTION_DP_CAPS("Link: %d ASSR enabled: %d\n", + link->link_index, + panel_mode != DP_PANEL_MODE_DEFAULT); if (link->aux_access_disabled) { dc_link_dp_perform_link_training_skip_aux(link, ¤t_setting); From abc392099e2497345da30a24aac265bb18162b3b Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Mon, 28 Jun 2021 21:41:08 -0400 Subject: [PATCH 518/851] drm/amd/display: add debug print for DCC validation failure [Why&How] Print a debug message when dcc validation fails in the display driver. Most DCC enablement related errors are from userspace. Adding a debug print in case of a failure from display driver will aid quicker triage. Reviewed-by: Harry Wentland Acked-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 36d8d1d5a30f0..818a825539030 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4747,7 +4747,7 @@ fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, const bool force_disable_dcc) { const uint64_t modifier = afb->base.modifier; - int ret; + int ret = 0; fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); @@ -4765,9 +4765,9 @@ fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); if (ret) - return ret; + drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); - return 0; + return ret; } static int From b96e0753c1fb274a8890ce7cc7b1572ad92c994b Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Mon, 28 Jun 2021 21:20:18 -0400 Subject: [PATCH 519/851] drm/amd/display: Reduce delay when sink device not able to ACK 00340h write [Why] Theoretically, per DP 1.4a spec, sink device needs to AUX_ACK 00340h write. However, due to hardware limitation, some sink devices have no 00340h dpcd address at all. This results in sink side fails to reply ACK, and consequently cause source side keep retrying DPCD write on DPCD 00340h. This results in significant delay when DPCD 00340h write is triggered (e.g. at S3 resume). [How] Check whether sink device could ACK on DPCD 00340h write on boot. If sink device fails to ACK, then remember that, so we won't write to DPCD 00340h later on. There will be a drm.debug KMS level message to inform user once a 00340h DPCD write is skipped on purpose. Reviewed-by: Nikola Cornij Acked-by: Rodrigo Siqueira Signed-off-by: Zhan Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 14 +++++++++++--- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 655b48c5ef37e..f68a0d9543f4c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1665,6 +1665,12 @@ struct dc_link *link_create(const struct link_init_data *init_params) if (false == dc_link_construct(link, init_params)) goto construct_fail; + /* + * Must use preferred_link_setting, not reported_link_cap or verified_link_cap, + * since struct preferred_link_setting won't be reset after S3. + */ + link->preferred_link_setting.dpcd_source_device_specific_field_support = true; + return link; construct_fail: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index a42d3dc2e8e91..edb5a8f4a6cb8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4804,10 +4804,18 @@ void dpcd_set_source_specific_data(struct dc_link *link) uint8_t hblank_size = (uint8_t)link->dc->caps.min_horizontal_blanking_period; - result_write_min_hblank = core_link_write_dpcd(link, - DP_SOURCE_MINIMUM_HBLANK_SUPPORTED, (uint8_t *)(&hblank_size), - sizeof(hblank_size)); + if (link->preferred_link_setting.dpcd_source_device_specific_field_support) { + result_write_min_hblank = core_link_write_dpcd(link, + DP_SOURCE_MINIMUM_HBLANK_SUPPORTED, (uint8_t *)(&hblank_size), + sizeof(hblank_size)); + + if (result_write_min_hblank == DC_ERROR_UNEXPECTED) + link->preferred_link_setting.dpcd_source_device_specific_field_support = false; + } else { + DC_LOG_DC("Sink device does not support 00340h DPCD write. Skipping on purpose.\n"); + } } + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, WPP_BIT_FLAG_DC_DETECTION_DP_CAPS, "result=%u link_index=%u enum dce_version=%d DPCD=0x%04X min_hblank=%u branch_dev_id=0x%x branch_dev_name='%c%c%c%c%c%c'", diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 1948cd9427d7e..4f54bde1bb1c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -109,6 +109,7 @@ struct dc_link_settings { enum dc_link_spread link_spread; bool use_link_rate_set; uint8_t link_rate_set; + bool dpcd_source_device_specific_field_support; }; struct dc_lane_settings { From 82d6f32516f6ebed1b624ca0c7616dbd829395df Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Wed, 30 Jun 2021 10:42:23 -0400 Subject: [PATCH 520/851] drm/amd/display: Add copyright notice to new files Reviewed-by: Shahin Khayyer Acked-by: Rodrigo Siqueira Signed-off-by: Wesley Chalmers Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/core/dc_link_dpcd.c | 25 +++++++++++++++++++ .../gpu/drm/amd/display/dc/inc/link_dpcd.h | 25 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c index fe234760a0f59..72970e49800a6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c @@ -1,3 +1,28 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + #include #include #include diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h b/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h index d4d52ef1b1655..3f12b1600d2af 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h @@ -1,3 +1,28 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + #ifndef __LINK_DPCD_H__ #define __LINK_DPCD_H__ #include From 803ac8c05b1b79056033962439b9f60d04bb9554 Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Wed, 30 Jun 2021 13:55:50 -0400 Subject: [PATCH 521/851] drm/amd/display: Fixed hardware power down bypass during headless boot [Why] During headless boot, DIG may be on which causes HW/SW discrepancies. To avoid this we power down hardware on boot if DIG is turned on. With introduction of multiple eDP, hardware power down is being bypassed under certain conditions. [How] Fixed hardware power down bypass, and ensured hardware will power down if DIG is on and seamless boot is not enabled. Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Jake Wang Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 27 +++++++++---------- .../drm/amd/display/dc/dcn30/dcn30_hwseq.c | 25 ++++++++--------- .../drm/amd/display/dc/dcn31/dcn31_hwseq.c | 5 +++- 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index f0f234ee58279..89e68372992fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1502,25 +1502,22 @@ void dcn10_init_hw(struct dc *dc) void dcn10_power_down_on_boot(struct dc *dc) { struct dc_link *edp_links[MAX_NUM_EDP]; - struct dc_link *edp_link; + struct dc_link *edp_link = NULL; int edp_num; int i = 0; get_edp_links(dc, edp_links, &edp_num); - - if (edp_num) { - for (i = 0; i < edp_num; i++) { - edp_link = edp_links[i]; - if (edp_link->link_enc->funcs->is_dig_enabled && - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && - dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && - dc->hwss.edp_power_control) { - dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); - dc->hwss.edp_power_control(edp_link, false); - } - } + if (edp_num) + edp_link = edp_links[0]; + + if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + dc->hwseq->funcs.edp_backlight_control && + dc->hwss.power_down && + dc->hwss.edp_power_control) { + dc->hwseq->funcs.edp_backlight_control(edp_link, false); + dc->hwss.power_down(dc); + dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index c68e3a708a335..2e8ab9775fa33 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -580,22 +580,19 @@ void dcn30_init_hw(struct dc *dc) */ if (dc->config.power_down_display_on_boot) { struct dc_link *edp_links[MAX_NUM_EDP]; - struct dc_link *edp_link; + struct dc_link *edp_link = NULL; get_edp_links(dc, edp_links, &edp_num); - if (edp_num) { - for (i = 0; i < edp_num; i++) { - edp_link = edp_links[i]; - if (edp_link->link_enc->funcs->is_dig_enabled && - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && - dc->hwss.edp_backlight_control && - dc->hwss.power_down && - dc->hwss.edp_power_control) { - dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); - dc->hwss.edp_power_control(edp_link, false); - } - } + if (edp_num) + edp_link = edp_links[0]; + if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + dc->hwss.edp_backlight_control && + dc->hwss.power_down && + dc->hwss.edp_power_control) { + dc->hwss.edp_backlight_control(edp_link, false); + dc->hwss.power_down(dc); + dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 6ac6faf0c533b..83f7904630e6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -226,6 +226,7 @@ void dcn31_init_hw(struct dc *dc) if (dc->config.power_down_display_on_boot) { struct dc_link *edp_links[MAX_NUM_EDP]; struct dc_link *edp_link; + bool power_down = false; get_edp_links(dc, edp_links, &edp_num); if (edp_num) { @@ -239,9 +240,11 @@ void dcn31_init_hw(struct dc *dc) dc->hwss.edp_backlight_control(edp_link, false); dc->hwss.power_down(dc); dc->hwss.edp_power_control(edp_link, false); + power_down = true; } } - } else { + } + if (!power_down) { for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; From d994e633e69a7b066dae81c627395728e6efafc4 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 24 Jun 2021 11:05:42 -0400 Subject: [PATCH 522/851] drm/amd/display: Fix comparison error in dcn21 DML [why] A comparison error made it possible to not iterate through all the specified prefetch modes. [how] Correct "<" to "<=" Reviewed-by: Dmytro Laktyushkin Reviewed-by: Yongqiang Sun Acked-by: Rodrigo Siqueira Signed-off-by: Victor Lu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 20a8cd4eb8b90..506797c721ed9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -4890,7 +4890,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0] - || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); + || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode)); if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0]; From f5a5d03daba62761b2808cd0cb46cf82f3ac91fc Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 5 Jul 2021 20:53:23 -0400 Subject: [PATCH 523/851] drm/amd/display: 3.2.144 Reviewed-by: Shahin Khayyer Acked-by: Rodrigo Siqueira Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 282bd950ac914..0ec07617cbdbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ /* forward declaration */ struct aux_payload; -#define DC_VER "3.2.143" +#define DC_VER "3.2.144" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 1cb6006c8b8ec6878c19beb8c07434ccd0b259a5 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 6 Jul 2021 14:52:12 -0400 Subject: [PATCH 524/851] drm/amd/display: Enable eDP ILR on DCN2.1 [WHY] Enable feature for 21.40 Reviewed-by: Sung Lee Acked-by: Rodrigo Siqueira Signed-off-by: Michael Strauss Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index f3d98e3ba6243..a5dd97a2c5a39 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -883,7 +883,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_48mhz_pwrdwn = false, .usbc_combo_phy_reset_wa = true, .dmub_command_table = true, - .use_max_lb = true + .use_max_lb = true, + .optimize_edp_link_rate = true }; static const struct dc_debug_options debug_defaults_diags = { From fc6f5cc75147deac6991145b2ada8dccea972c4e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 7 Jul 2021 13:19:14 -0400 Subject: [PATCH 525/851] drm/amd/display: Fix max vstartup calculation for modes with borders [Why] Vertical and horizontal borders in timings are treated as increasing the active area - vblank and hblank actually shrink. Our input into DML does not include these borders so it incorrectly assumes it has more time than available for vstartup and tmdl calculations for some modes with borders. An example of such a timing would be 640x480@72Hz: h_total: 832 h_border_left: 8 h_addressable: 640 h_border_right: 8 h_front_porch: 16 h_sync_width: 40 v_total: 520 v_border_top: 8 v_addressable: 480 v_border_bottom: 8 v_front_porch: 1 v_sync_width: 3 pix_clk_100hz: 315000 [How] Include borders as part of destination vactive/hactive. This change DCN20+ so it has wide impact, but the destination vactive and hactive are only really used for vstartup calculation anyway. Most modes do not have vertical or horizontal borders. Reviewed-by: Dmytro Laktyushkin Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index ac981aa92d7de..8bf43597c616e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2093,8 +2093,10 @@ int dcn20_populate_dml_pipes_from_context( - timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.htotal = timing->h_total; pipes[pipe_cnt].pipe.dest.vtotal = v_total; - pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable; - pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable; + pipes[pipe_cnt].pipe.dest.hactive = + timing->h_addressable + timing->h_border_left + timing->h_border_right; + pipes[pipe_cnt].pipe.dest.vactive = + timing->v_addressable + timing->v_border_top + timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE; pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0; if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) From e09811e7efef85f642d19d88c7cf227a252e0f05 Mon Sep 17 00:00:00 2001 From: Bindu Ramamurthy Date: Thu, 27 May 2021 10:11:32 -0400 Subject: [PATCH 526/851] drm/amd/display: Populate socclk entries for dcn3.02/3.03 [Why] Initialize socclk entries in bandwidth params for dcn302, dcn303. [How] Fetch the sockclk values from smu for the DPM levels and for the DPM levels where smu returns 0, previous level values are reported. Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 7 +++++-- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 7 +++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index b98cc315305cb..1861a147a7fa1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -190,6 +190,10 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_levels); + /* SOCCLK */ + dcn3_init_single_clock(clk_mgr, PPCLK_SOCCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, + &num_levels); // DPREFCLK ??? /* DISPCLK */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 16a75ba0ca824..d65c097333a46 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1399,10 +1399,13 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[0].socclk_mhz; dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 34b89464ae022..f8b84722a389c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1327,10 +1327,13 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[0].socclk_mhz; dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ From 5b47adb2440116e833a53b7ef3cb27fe058f3c73 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 7 Jul 2021 16:38:57 -0400 Subject: [PATCH 527/851] drm/amd/display: Query VCO frequency from register for DCN3.1 [Why] Hardcoding the VCO frequency isn't correct since we don't own or control the value. In the case where the hardcode is also missing we can't lightup display. [How] Query from the CLK register instead. Update the DFS frequency to be able to compute the VCO frequency. Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 43 ++++++++++++++- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h | 54 ------------------- 2 files changed, 42 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 7b7d884d58be0..d15c628a2ab0a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -48,6 +48,21 @@ #include "dc_dmub_srv.h" +#include "yellow_carp_offset.h" + +#define regCLK1_CLK_PLL_REQ 0x0237 +#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define REG(reg_name) \ + (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + #define TO_CLK_MGR_DCN31(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn31, base) @@ -229,7 +244,32 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) { - return 0; + /* get FbMult value */ + struct fixed31_32 pll_req; + unsigned int fbmult_frac_val = 0; + unsigned int fbmult_int_val = 0; + + /* + * Register value of fbmult is in 8.16 format, we are converting to 31.32 + * to leverage the fix point operations available in driver + */ + + REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/ + REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */ + + pll_req = dc_fixpt_from_int(fbmult_int_val); + + /* + * since fractional part is only 16 bit in register definition but is 32 bit + * in our fix point definiton, need to shift left by 16 to obtain correct value + */ + pll_req.value |= fbmult_frac_val << 16; + + /* multiply by REFCLK period */ + pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); + + /* integer part is now VCO frequency in kHz */ + return dc_fixpt_floor(pll_req); } static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base) @@ -592,6 +632,7 @@ void dcn31_clk_mgr_construct( clk_mgr->base.dprefclk_ss_percentage = 0; clk_mgr->base.dprefclk_ss_divider = 1000; clk_mgr->base.ss_on_dprefclk = false; + clk_mgr->base.dfs_ref_freq_khz = 48000; clk_mgr->smu_wm_set.wm_set = (struct dcn31_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h index cc21cf75eafd4..f8f100535526d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h @@ -27,60 +27,6 @@ #define __DCN31_CLK_MGR_H__ #include "clk_mgr_internal.h" -//CLK1_CLK_PLL_REQ -#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L -//CLK1_CLK0_DFS_CNTL -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0 -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL -/*DPREF clock related*/ -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL - -//CLK3_0_CLK3_CLK_PLL_REQ -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L - -#define mmCLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60 -#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060 -#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260 - -#define mmCLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10 -#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010 -#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210 - -#define mmCLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D -#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D - -#define mmCLK2_CLK_PLL_REQ 0x17E0D - -/*AMCLK*/ -#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#endif - struct dcn31_watermarks; struct dcn31_smu_watermark_set { From ade2d4987f53458afaa21c54be9cbe6bbf61fb04 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 8 Jul 2021 12:59:59 -0400 Subject: [PATCH 528/851] drm/amd/display: Update bounding box for DCN3.1 [Why & How] We're missing a default value for dram_channel_width_bytes in the DCN3.1 SOC bounding box and we don't currently have the interface in place to query the actual value from VBIOS. Put in a hardcoded default until we have the interface in place. Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 3fe0aac4aaa67..38c010afade15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -220,6 +220,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { .sr_exit_z8_time_us = 402.0, .sr_enter_plus_exit_z8_time_us = 520.0, .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, From cc56242c7c47e95301466f1426a8fe939caa1925 Mon Sep 17 00:00:00 2001 From: Camille Cho Date: Thu, 8 Jul 2021 18:28:37 +0800 Subject: [PATCH 529/851] drm/amd/display: Only set default brightness for OLED [Why] We used to unconditionally set backlight path as AUX for panels capable of backlight adjustment via DPCD in set default brightness. [How] This should be limited to OLED panel only since we control backlight via PWM path for SDR mode in LCD HDR panel. Reviewed-by: Krunoslav Kovac Acked-by: Rodrigo Siqueira Signed-off-by: Camille Cho Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index edb5a8f4a6cb8..8b35cd9d4c01d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4935,9 +4935,7 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link) { uint32_t default_backlight; - if (link && - (link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 || - link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) { + if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!dc_link_read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; // if < 5 nits or > 5000, it might be wrong readback From 0636e3a45f642d872d9aa088988eb1d740d33746 Mon Sep 17 00:00:00 2001 From: sunglee Date: Fri, 9 Jul 2021 10:24:14 -0400 Subject: [PATCH 530/851] drm/amd/display: DCN2X Prefer ODM over bottom pipe to find second pipe [WHY] When finding a second pipe for pipe split, currently will look for bottom pipe in context first to decide the second pipe. This causes issues in 2 plane to 1 plane transitions like fullscreen video where bottom pipe no longer exists in the new configuration. [HOW] If previous context had an ODM pipe, use that to find the secondary pipe first before looking at bottom pipe. Reviewed-by: Dmytro Laktyushkin Acked-by: Rodrigo Siqueira Signed-off-by: sunglee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 8bf43597c616e..193df7625f5be 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2531,16 +2531,16 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, * pick that pipe as secondary * Same logic applies for ODM pipes */ - if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { - preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; + if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { + preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; secondary_pipe->pipe_idx = preferred_pipe_idx; } } if (secondary_pipe == NULL && - dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { - preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; + dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { + preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; secondary_pipe->pipe_idx = preferred_pipe_idx; From 2d5c99f7975d8a53d132a82e0552cc67147b4334 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Mon, 14 Jun 2021 20:21:42 -0400 Subject: [PATCH 531/851] drm/amd/display: Remove MALL function from DCN3.1 [why] DCN31 doesn't have MALL in DMUB so to avoid sending unknown commands to DMUB just remove the function pointer. [how] Remove apply_idle_power_optimizations from function pointers structure for DCN31 Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index de74f62f96cdb..aaf2dbd095fe1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, From 7318e8ed308e0612fb52abfc6c4b7b036d230beb Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Fri, 9 Jul 2021 13:05:11 -0400 Subject: [PATCH 532/851] drm/amd/display: Line Buffer changes DCN 3x increased Line buffer size for DCHUB latency hiding, from 4 lines of 4K resolution lines to 5 lines of 4K resolution lines. All Line Buffer can be used as extended memory for P State change latency hiding. The maximum number of lines is increased to 32 lines. Finally, LB_MEMORY_CONFIG_1 (LB memory piece 1) and LB_MEMORY _CONFIG_2 (LB memory piece 2) are not affected, no change in size, only 3 pieces is affected, i.e., when all 3 pieces are used in both LB_MEMORY_CONFIG_0 and LB_MEMORY_CONFIG_3 (for 4:2:0) modes. Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Nevenko Stupar Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 7 ++++++- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 16 ---------------- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h | 3 +-- .../gpu/drm/amd/display/dc/inc/hw/transform.h | 3 +++ 4 files changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 673b93f4fea51..cb9767ddf93d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -217,6 +217,8 @@ static void dpp1_dscl_set_lb( const struct line_buffer_params *lb_params, enum lb_memory_config mem_size_config) { + uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */ + /* LB */ if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { /* DSCL caps: pixel data processed in fixed format */ @@ -239,9 +241,12 @@ static void dpp1_dscl_set_lb( LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */ } + if (dpp->base.caps->max_lb_partitions == 31) + max_partitions = 31; + REG_SET_2(LB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, - LB_MAX_PARTITIONS, 63); + LB_MAX_PARTITIONS, max_partitions); } static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index 2140b75540cfe..23a52d47e61c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -383,13 +383,6 @@ bool dpp3_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ - if (scl_data->viewport.width != scl_data->h_active && - scl_data->viewport.height != scl_data->v_active && - dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && - scl_data->format == PIXEL_FORMAT_FP16) - return false; - if (scl_data->viewport.width > scl_data->h_active && dpp->ctx->dc->debug.max_downscale_src_width != 0 && scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width) @@ -1440,15 +1433,6 @@ bool dpp3_construct( dpp->tf_shift = tf_shift; dpp->tf_mask = tf_mask; - dpp->lb_pixel_depth_supported = - LB_PIXEL_DEPTH_18BPP | - LB_PIXEL_DEPTH_24BPP | - LB_PIXEL_DEPTH_30BPP | - LB_PIXEL_DEPTH_36BPP; - - dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY; - dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/ - return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h index 3fa86cd090a08..ac644ae6b9f26 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h @@ -154,6 +154,7 @@ SRI(COLOR_KEYER_BLUE, CNVC_CFG, id), \ SRI(CURSOR_CONTROL, CURSOR0_, id),\ SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\ + SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ SRI(DSCL_MEM_PWR_CTRL, DSCL, id) #define DPP_REG_LIST_DCN30(id)\ @@ -163,8 +164,6 @@ SRI(CM_SHAPER_LUT_DATA, CM, id),\ SRI(CM_MEM_PWR_CTRL2, CM, id), \ SRI(CM_MEM_PWR_STATUS2, CM, id), \ - SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ - SRI(DSCL_MEM_PWR_CTRL, DSCL, id), \ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_B, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_G, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_R, CM, id),\ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 2a0db2b03047e..9ac9d5e8df8b9 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -289,6 +289,9 @@ struct dpp_caps { /* DSCL processing pixel data in fixed or float format */ enum dscl_data_processing_format dscl_data_proc_format; + /* max LB partitions */ + unsigned int max_lb_partitions; + /* Calculates the number of partitions in the line buffer. * The implementation of this function is overloaded for * different versions of DSCL LB. From 1ca62b69eefbd8201589acfedef8bca470cba641 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 9 Jul 2021 12:57:50 -0400 Subject: [PATCH 533/851] drm/amd/display: add workaround for riommu invalidation request hang [Why] When an riommu invalidation request come at the same time as a pipe is disabled there can be a case where DCN cannot ACK the request if only one VMID is setup in the inuse list. [How] Setup a second unused VMID will work around the issue. Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 6 ++++++ .../drm/amd/display/dc/dcn31/dcn31_hubbub.c | 20 +++++++++++++++++++ .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 3 +++ 3 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5c2853654ccad..ef185b93b31d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -570,6 +570,12 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) struct hubp *hubp = pipe_ctx->plane_res.hubp; struct dpp *dpp = pipe_ctx->plane_res.dpp; + if (hws->wa.early_riommu_invalidation) { + struct hubbub *hubbub = dc->res_pool->hubbub; + + hubbub->funcs->apply_invalidation_req_wa(hubbub, &hubbub->vmid_cache); + } + dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx); /* In flip immediate with pipe splitting case GSL is used for diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index 2043528d3490a..ef233cb49b317 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -880,6 +880,8 @@ static int hubbub31_init_dchub_sys_ctx(struct hubbub *hubbub, dcn21_dchvm_init(hubbub); + hubbub->vmid_cache = *pa_config; + return NUM_VMID; } @@ -920,6 +922,23 @@ static void hubbub31_get_dchub_ref_freq(struct hubbub *hubbub, } } +static void hubbub31_apply_invalidation_req_wa(struct hubbub *hubbub, + struct dcn_hubbub_phys_addr_config *pa_config) +{ + struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); + struct dcn_vmid_page_table_config phys_config; + + if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) { + phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12; + phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12; + phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr; + phys_config.depth = 0; + phys_config.block_size = 0; + // Program an arbitrary unused VMID + dcn20_vmid_setup(&hubbub1->vmid[15], &phys_config); + } +} + static const struct hubbub_funcs hubbub31_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub31_init_dchub_sys_ctx, @@ -936,6 +955,7 @@ static const struct hubbub_funcs hubbub31_funcs = { .program_compbuf_size = dcn31_program_compbuf_size, .init_crb = dcn31_init_crb, .hubbub_read_state = hubbub2_read_state, + .apply_invalidation_req_wa = hubbub31_apply_invalidation_req_wa }; void hubbub31_construct(struct dcn20_hubbub *hubbub31, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 713f5558f5e17..259283d8bde8f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -166,12 +166,15 @@ struct hubbub_funcs { void (*program_det_size)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_in_kbyte); void (*program_compbuf_size)(struct hubbub *hubbub, unsigned compbuf_size_kb, bool safe_to_increase); void (*init_crb)(struct hubbub *hubbub); + void (*apply_invalidation_req_wa)(struct hubbub *hubbub, + struct dcn_hubbub_phys_addr_config *pa_config); }; struct hubbub { const struct hubbub_funcs *funcs; struct dc_context *ctx; bool riommu_active; + struct dcn_hubbub_phys_addr_config vmid_cache; }; #endif From a276c0fddd9254133ed21b55b3f0f8732be8f74c Mon Sep 17 00:00:00 2001 From: Bindu Ramamurthy Date: Fri, 9 Jul 2021 10:35:33 -0400 Subject: [PATCH 534/851] drm/amd/display: Populate dtbclk entries for dcn3.02/3.03 [Why] Populate dtbclk values from bwparams for dcn302, dcn303. [How] dtbclk values are fetched from bandwidthparams for all DPM levels and for DPM levels where smu returns 0, previous level values are reported. Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Bindu Ramamurthy Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 6 +++++- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index d65c097333a46..7d3ff5d444023 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1398,7 +1398,11 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index f8b84722a389c..833ab13fa8340 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1326,7 +1326,11 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; else From 5a1e085f892d5cb4320010e65e9400f3241d5079 Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Fri, 9 Jul 2021 10:25:44 -0400 Subject: [PATCH 535/851] drm/amd/display: Refine condition for cursor visibility [why] There's a special case where upper plane is not the main plane. If it owns the cursor, it will be invisible in the majority of the screen. [How] The condition for disabling cursor is changed: - check if upper viewport completely covers current. This was the previous change that doesn't handle all scenarios with pipe splitting. - if not, show the cursor only if it's not scaled or no upper pipe. Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Krunoslav Kovac Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 25 ++++++++++++++++--- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 1 + 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 89e68372992fe..35af0401f2567 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -3177,21 +3177,40 @@ void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data) static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx) { struct pipe_ctx *test_pipe; + const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data; + const struct rect *r1 = &scl_data->recout, *r2; + int r1_r = r1->x + r1->width, r1_b = r1->y + r1->height, r2_r, r2_b; int cur_layer = pipe_ctx->plane_state->layer_index; + bool upper_pipe_exists = false; + struct fixed31_32 one = dc_fixpt_from_int(1); /** - * Disable the cursor if there's there's an upper layer active, - * assume it's the one owning the cursor + * Disable the cursor if there's another pipe above this with a + * plane that contains this pipe's viewport to prevent double cursor + * and incorrect scaling artifacts. */ for (test_pipe = pipe_ctx->top_pipe; test_pipe; test_pipe = test_pipe->top_pipe) { if (!test_pipe->plane_state->visible) continue; - if (test_pipe->plane_state->layer_index < cur_layer) + r2 = &test_pipe->plane_res.scl_data.recout; + r2_r = r2->x + r2->width; + r2_b = r2->y + r2->height; + + if (r1->x >= r2->x && r1->y >= r2->y && r1_r <= r2_r && r1_b <= r2_b) return true; + + if (test_pipe->plane_state->layer_index < cur_layer) + upper_pipe_exists = true; } + // if plane scaled, assume an upper plane can handle cursor if it exists. + if (upper_pipe_exists && + (scl_data->ratios.horz.value != one.value || + scl_data->ratios.vert.value != one.value)) + return true; + return false; } diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index cb1a86246673a..df43082c03eb6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -2636,6 +2636,7 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) */ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) { + uint8_t buf[DMUB_RB_CMD_SIZE]; uint32_t rptr = rb->rptr; uint32_t wptr = rb->wrpt; From c2a4ad2f8b255b3b5a3b641e8c10ff331e2e990a Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 10 Jul 2021 22:03:07 -0400 Subject: [PATCH 536/851] drm/amd/display: [FW Promotion] Release 0.0.75 - Add reserved bits for future feature development - Fix issue with mismatch with type const - Replaced problematic code with old memcpy and casted problematic pointers to unsigned char pointers Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Anthony Koo Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 36 +++++++------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index df43082c03eb6..8b0b4d86986c6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -23,8 +23,8 @@ * */ -#ifndef _DMUB_CMD_H_ -#define _DMUB_CMD_H_ +#ifndef DMUB_CMD_H +#define DMUB_CMD_H #if defined(_TEST_HARNESS) || defined(FPGA_USB4) #include "dmub_fw_types.h" @@ -47,10 +47,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0xc761b9efd +#define DMUB_FW_VERSION_GIT_HASH 0x2d2f6f51e #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 73 +#define DMUB_FW_VERSION_REVISION 75 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -1448,10 +1448,6 @@ struct dmub_cmd_psr_set_level_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; - /** - * Explicit padding to 4 byte boundary. - */ - uint8_t pad3[4]; }; /** @@ -2474,16 +2470,14 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const union dmub_rb_cmd *cmd) { - uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); - const uint64_t *src = (const uint64_t *)cmd; - uint8_t i; + uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; + const uint8_t *src = (const uint8_t *)cmd; if (dmub_rb_full(rb)) return false; // copying data - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - *dst++ = *src++; + dmub_memcpy(dst, src, DMUB_RB_CMD_SIZE); rb->wrpt += DMUB_RB_CMD_SIZE; @@ -2590,18 +2584,16 @@ static inline bool dmub_rb_peek_offset(struct dmub_rb *rb, * @return false otherwise */ static inline bool dmub_rb_out_front(struct dmub_rb *rb, - union dmub_rb_out_cmd *cmd) + union dmub_rb_out_cmd *cmd) { - const uint64_t volatile *src = (const uint64_t volatile *)(rb->base_address) + rb->rptr / sizeof(uint64_t); - uint64_t *dst = (uint64_t *)cmd; - uint8_t i; + const uint8_t *src = (const uint8_t *)(rb->base_address) + rb->rptr; + uint8_t *dst = (uint8_t *)cmd; if (dmub_rb_empty(rb)) return false; // copying data - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - *dst++ = *src++; + dmub_memcpy(dst, src, DMUB_RB_CMD_SIZE); return true; } @@ -2641,11 +2633,9 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint32_t wptr = rb->wrpt; while (rptr != wptr) { - uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t); - uint8_t i; + const uint8_t *data = (const uint8_t *)rb->base_address + rptr; - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - *data++; + dmub_memcpy(buf, data, DMUB_RB_CMD_SIZE); rptr += DMUB_RB_CMD_SIZE; if (rptr >= rb->capacity) From 923f1863c03928e67aae9a864b6c4b6e4954f47b Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sat, 10 Jul 2021 21:40:45 -0400 Subject: [PATCH 537/851] drm/amd/display: 3.2.145 DC version 3.2.145 brings improvements in multiple areas. In summary, we highlight: - Code improvements for passive - Cursor manipulation enhancements - Expand debug in some areas - Fix problems in DML - Other minor code refactors Reviewed-by: Anson Jacob Acked-by: Rodrigo Siqueira Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0ec07617cbdbf..4e4e7092f1947 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ /* forward declaration */ struct aux_payload; -#define DC_VER "3.2.144" +#define DC_VER "3.2.145" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 9b05ace159c6df452e7f8957e068fc95789aaf61 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 30 Jun 2021 18:22:51 -0400 Subject: [PATCH 538/851] drm/amd/display: change zstate allow msg condition [Why] PMFW message which previously thought to only control Z9 controls both Z9 and Z10. Also HW design team requested that Z9 must only be supported on eDP due to content protection interop. [How] Change zstate support condition to match updated policy Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Eric Yang Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 16 +++---- drivers/gpu/drm/amd/display/dc/dc.h | 10 ++--- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 44 ++++++++++++++----- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index d15c628a2ab0a..4a4894e9d9c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -139,10 +139,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, * also if safe to lower is false, we just go in the higher state */ if (safe_to_lower) { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_ALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, true); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { @@ -163,10 +163,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, } } } else { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_DISALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, false); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { @@ -286,7 +286,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; - clk_mgr->clks.z9_support = DCN_Z9_SUPPORT_UNKNOWN; + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; } static bool dcn31_are_clock_states_equal(struct dc_clocks *a, @@ -300,7 +300,7 @@ static bool dcn31_are_clock_states_equal(struct dc_clocks *a, return false; else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) return false; - else if (a->z9_support != b->z9_support) + else if (a->zstate_support != b->zstate_support) return false; else if (a->dtbclk_en != b->dtbclk_en) return false; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4e4e7092f1947..3f2a0f1807d24 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -354,10 +354,10 @@ enum dcn_pwr_state { }; #if defined(CONFIG_DRM_AMD_DC_DCN) -enum dcn_z9_support_state { - DCN_Z9_SUPPORT_UNKNOWN, - DCN_Z9_SUPPORT_ALLOW, - DCN_Z9_SUPPORT_DISALLOW, +enum dcn_zstate_support_state { + DCN_ZSTATE_SUPPORT_UNKNOWN, + DCN_ZSTATE_SUPPORT_ALLOW, + DCN_ZSTATE_SUPPORT_DISALLOW, }; #endif /* @@ -378,7 +378,7 @@ struct dc_clocks { int dramclk_khz; bool p_state_change_support; #if defined(CONFIG_DRM_AMD_DC_DCN) - enum dcn_z9_support_state z9_support; + enum dcn_zstate_support_state zstate_support; bool dtbclk_en; #endif enum dcn_pwr_state pwr_state; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 193df7625f5be..0b1cd1dbed8b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3081,6 +3081,37 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) return false; } +static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + int plane_count; + int i; + + plane_count = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + /* + * Zstate is allowed in following scenarios: + * 1. Single eDP with PSR enabled + * 2. 0 planes (No memory requests) + * 3. Single eDP without PSR but > 5ms stutter period + */ + if (plane_count == 0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + + if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled) + || context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else + return DCN_ZSTATE_SUPPORT_DISALLOW; + } else + return DCN_ZSTATE_SUPPORT_DISALLOW; +} + void dcn20_calculate_dlg_params( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -3088,7 +3119,6 @@ void dcn20_calculate_dlg_params( int vlevel) { int i, pipe_idx; - int plane_count; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3104,17 +3134,7 @@ void dcn20_calculate_dlg_params( != dm_dram_clock_change_unsupported; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? - DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; - - plane_count = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].plane_state) - plane_count++; - } - - if (plane_count == 0) - context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); From 179e6e726d7c65260c2cd46efe4098c1f82667eb Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 15 Jul 2021 14:54:49 +0800 Subject: [PATCH 539/851] drm/amd/pm: Support board calibration on aldebaran Add support for board power calibration on Aldebaran. Board calibration is done after DC offset calibration. Signed-off-by: Lijo Lazar Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h | 3 +- drivers/gpu/drm/amd/pm/inc/smu_types.h | 3 +- .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 46 +++++++++++++++---- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h index 610266088ff1c..35fa0d8e92dd3 100644 --- a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h @@ -101,7 +101,8 @@ #define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow 0x41 #define PPSMC_MSG_GfxDriverResetRecovery 0x42 -#define PPSMC_Message_Count 0x43 +#define PPSMC_MSG_BoardPowerCalibration 0x43 +#define PPSMC_Message_Count 0x44 //PPSMC Reset Types #define PPSMC_RESET_TYPE_WARM_RESET 0x00 diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 89a16dcd0fff9..1d3765b873df4 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -225,7 +225,8 @@ __SMU_DUMMY_MAP(DisableDeterminism), \ __SMU_DUMMY_MAP(SetUclkDpmMode), \ __SMU_DUMMY_MAP(LightSBR), \ - __SMU_DUMMY_MAP(GfxDriverResetRecovery), + __SMU_DUMMY_MAP(GfxDriverResetRecovery), \ + __SMU_DUMMY_MAP(BoardPowerCalibration), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index c16ca0c78e930..4b12c3b807e9d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -134,6 +134,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0), MSG_MAP(SetUclkDpmMode, PPSMC_MSG_SetUclkDpmMode, 0), MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), + MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0), }; static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = { @@ -440,6 +441,39 @@ static int aldebaran_setup_pptable(struct smu_context *smu) return ret; } +static bool aldebaran_is_primary(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) + return adev->smuio.funcs->get_die_id(adev) == 0; + + return true; +} + +static int aldebaran_run_board_btc(struct smu_context *smu) +{ + u32 smu_version; + int ret; + + if (!aldebaran_is_primary(smu)) + return 0; + + ret = smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (ret) { + dev_err(smu->adev->dev, "Failed to get smu version!\n"); + return ret; + } + if (smu_version <= 0x00441d00) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BoardPowerCalibration, NULL); + if (ret) + dev_err(smu->adev->dev, "Board power calibration failed!\n"); + + return ret; +} + static int aldebaran_run_btc(struct smu_context *smu) { int ret; @@ -447,6 +481,8 @@ static int aldebaran_run_btc(struct smu_context *smu) ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); if (ret) dev_err(smu->adev->dev, "RunDcBtc failed!\n"); + else + ret = aldebaran_run_board_btc(smu); return ret; } @@ -524,16 +560,6 @@ static int aldebaran_freqs_in_same_level(int32_t frequency1, return (abs(frequency1 - frequency2) <= EPSILON); } -static bool aldebaran_is_primary(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) - return adev->smuio.funcs->get_die_id(adev) == 0; - - return true; -} - static int aldebaran_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) From 1d44c00a2a99865141e26830b9679911f4b41d3f Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Mon, 19 Jul 2021 11:09:40 -0400 Subject: [PATCH 540/851] drm/amdgpu: Fix documentaion for amdgpu_bo_add_to_shadow_list make htmldocs complaints about parameter for amdgpu_bo_add_to_shadow_list ./drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:739: warning: Excess function parameter 'bo' description in 'amdgpu_bo_add_to_shadow_list' ./drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:739: warning: Function parameter or member 'vmbo' not described in 'amdgpu_bo_add_to_shadow_list' ./drivers/gpu/drm/amd/amdgpu/amdgpu_object.c:739: warning: Excess function parameter 'bo' description in 'amdgpu_bo_add_to_shadow_list' Signed-off-by: Anson Jacob Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 795fa7445abec..bc19cf32f62c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -731,7 +731,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) /** * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list * - * @bo: BO that will be inserted into the shadow list + * @vmbo: BO that will be inserted into the shadow list * * Insert a BO to the shadow list. */ From 4658b0c1a5162e20e9c2d3a4d6634e46332a794a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 12 May 2021 12:26:20 -0400 Subject: [PATCH 541/851] drm/amdgpu: add psp command to get num xgmi links between direct peers The TA can now be invoked to provide the number of xgmi links connecting a direct source and destination peer. Non-direct peers will report zero links. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h | 14 +++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 77e794e027a8e..a74455a3f8639 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1078,6 +1078,12 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) return 0; } +static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp) +{ + return psp->adev->asic_type == CHIP_ALDEBARAN && + psp->ta_xgmi_ucode_version >= 0x2000000b; +} + int psp_xgmi_get_topology_info(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology) @@ -1121,6 +1127,23 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; } + /* Invoke xgmi ta again to get the link information */ + if (psp_xgmi_peer_link_info_supported(psp)) { + struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); + + if (ret) + return ret; + + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; + for (i = 0; i < topology->num_nodes; i++) + topology->nodes[i].num_links = + link_info_output->nodes[i].num_links; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 5fd8928d93636..f0e32a958171a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -116,6 +116,7 @@ struct psp_xgmi_node_info { uint8_t num_hops; uint8_t is_sharing_enabled; enum ta_xgmi_assigned_sdma_engine sdma_engine; + uint8_t num_links; }; struct psp_xgmi_topology_info { diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h index ac2c27b7630c9..cce7127afeaad 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -33,7 +33,8 @@ enum ta_command_xgmi { TA_COMMAND_XGMI__GET_NODE_ID = 0x01, TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, - TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04 + TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04, + TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B }; /* XGMI related enumerations */ @@ -75,6 +76,11 @@ struct ta_xgmi_node_info { enum ta_xgmi_assigned_sdma_engine sdma_engine; }; +struct ta_xgmi_peer_link_info { + uint64_t node_id; + uint8_t num_links; +}; + struct ta_xgmi_cmd_initialize_output { uint32_t status; }; @@ -97,6 +103,11 @@ struct ta_xgmi_cmd_get_topology_info_output { struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; +struct ta_xgmi_cmd_get_peer_link_info_output { + uint32_t num_nodes; + struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + struct ta_xgmi_cmd_set_topology_info_input { uint32_t num_nodes; struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; @@ -115,6 +126,7 @@ union ta_xgmi_cmd_output { struct ta_xgmi_cmd_get_node_id_output get_node_id; struct ta_xgmi_cmd_get_hive_id_output get_hive_id; struct ta_xgmi_cmd_get_topology_info_output get_topology_info; + struct ta_xgmi_cmd_get_peer_link_info_output get_link_info; }; /**********************************************************/ From af546ec86218b7000b126b5af88226c90081edaa Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 12 May 2021 12:30:41 -0400 Subject: [PATCH 542/851] drm/amdkfd: report xgmi bandwidth between direct peers to the kfd Report the min/max bandwidth in megabytes to the kfd for direct xgmi connections only. Indirect peers will report 0 since indirect route is unknown. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 24 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 12 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 12 +++++++++++ 5 files changed, 51 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f9c01bdc3d4c7..801403d530589 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -553,6 +553,30 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s return (uint8_t)ret; } +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev; + int num_links; + + if (adev->asic_type != CHIP_ALDEBARAN) + return 0; + + if (src) + peer_adev = (struct amdgpu_device *)src; + + /* num links returns 0 for indirect peers since indirect route is unknown. */ + num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); + if (num_links < 0) { + DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n", + adev->gmc.xgmi.physical_node_id, + peer_adev->gmc.xgmi.physical_node_id, num_links); + num_links = 0; + } + + /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ + return (num_links * 16 * 25000)/BITS_PER_BYTE; +} + uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cf62f43a03da1..9b98ce2253cf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -226,6 +226,7 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 8567d5d773460..258cf86b32f6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -486,6 +486,18 @@ int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, return -EINVAL; } +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) + return top->nodes[i].num_links; + return -EINVAL; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { struct psp_xgmi_topology_info *top_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 12969c0830d5c..d2189bf7d428e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -59,6 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index c6b02aee4993e..40ce6239c8137 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1989,6 +1989,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->num_hops_xgmi = 1; + if (adev->asic_type == CHIP_ALDEBARAN) { + sub_type_hdr->minimum_bandwidth_mbs = + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( + kdev->kgd, NULL, true); + sub_type_hdr->maximum_bandwidth_mbs = + sub_type_hdr->minimum_bandwidth_mbs; + } } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; } @@ -2033,6 +2040,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->proximity_domain_to = proximity_domain_to; sub_type_hdr->num_hops_xgmi = amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); + sub_type_hdr->maximum_bandwidth_mbs = + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false); + sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0; + return 0; } From 5fa5ff51136378ba07c98b7995b463dbc93dbfe3 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 2 Jun 2021 09:46:16 -0400 Subject: [PATCH 543/851] drm/amdkfd: report pcie bandwidth to the kfd Similar to xGMI reporting the min/max bandwidth between direct peers, PCIe will report the min/max bandwidth to the KFD. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 59 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 4 ++ 3 files changed, 64 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 801403d530589..7b46ba551cb20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -21,6 +21,7 @@ */ #include "amdgpu_amdkfd.h" +#include "amd_pcie.h" #include "amd_shared.h" #include "amdgpu.h" @@ -577,6 +578,64 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev return (num_links * 16 * 25000)/BITS_PER_BYTE; } +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dev; + int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : + fls(adev->pm.pcie_mlw_mask)) - 1; + int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & + CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) : + fls(adev->pm.pcie_gen_mask & + CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1; + uint32_t num_lanes_mask = 1 << num_lanes_shift; + uint32_t gen_speed_mask = 1 << gen_speed_shift; + int num_lanes_factor = 0, gen_speed_mbits_factor = 0; + + switch (num_lanes_mask) { + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1: + num_lanes_factor = 1; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2: + num_lanes_factor = 2; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4: + num_lanes_factor = 4; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8: + num_lanes_factor = 8; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12: + num_lanes_factor = 12; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16: + num_lanes_factor = 16; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32: + num_lanes_factor = 32; + break; + } + + switch (gen_speed_mask) { + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1: + gen_speed_mbits_factor = 2500; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2: + gen_speed_mbits_factor = 5000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3: + gen_speed_mbits_factor = 8000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4: + gen_speed_mbits_factor = 16000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5: + gen_speed_mbits_factor = 32000; + break; + } + + return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE; +} + uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 9b98ce2253cf0..a8b05c6ddce5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -227,6 +227,7 @@ uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min); +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 40ce6239c8137..eada22b9ea69b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1998,6 +1998,10 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, } } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; + sub_type_hdr->minimum_bandwidth_mbs = + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true); + sub_type_hdr->maximum_bandwidth_mbs = + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false); } sub_type_hdr->proximity_domain_from = proximity_domain; From 6d9328e8bde70e2f502a5183beabe30ab87b5638 Mon Sep 17 00:00:00 2001 From: Veerabadhran Gopalakrishnan Date: Mon, 19 Jul 2021 19:06:23 +0530 Subject: [PATCH 544/851] drm/amdgpu - Corrected the video codecs array name for yellow carp Signed-off-by: Veerabadhran Gopalakrishnan Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index cf73a6923203d..751c7b8b13e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -173,8 +173,8 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { }; static const struct amdgpu_video_codecs yc_video_codecs_decode = { - .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), - .codec_array = bg_video_codecs_decode_array, + .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array), + .codec_array = yc_video_codecs_decode_array, }; static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, From 83ae57f09d52457f26b351b188a82c69c80f214f Mon Sep 17 00:00:00 2001 From: Roy Sun Date: Mon, 5 Jul 2021 17:47:57 +0800 Subject: [PATCH 545/851] drm/amdgpu: Change the imprecise function name The callback functions are used for SRIOV read/write instead of just for rlcg read/write Signed-off-by: Roy Sun Reviewed-by: Zhou pengju Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 26ccccb9d94f4..f8bc670fe3f9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -563,7 +563,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->is_rlcg_access_range) { if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) - return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0, 0); + return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 7a4775ab68048..00afd0dcae86c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -127,8 +127,8 @@ struct amdgpu_rlc_funcs { void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); - void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip); - u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip); + void (*sriov_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip); + u32 (*sriov_rreg)(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5af5b166ebe17..27c4ce534de29 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1538,7 +1538,7 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 return ret; } -static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip) +static void gfx_v10_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip) { u32 rlcg_flag; @@ -1554,7 +1554,7 @@ static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, WREG32(offset, value); } -static u32 gfx_v10_rlcg_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) +static u32 gfx_v10_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) { u32 rlcg_flag; @@ -8268,8 +8268,8 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { .reset = gfx_v10_0_rlc_reset, .start = gfx_v10_0_rlc_start, .update_spm_vmid = gfx_v10_0_update_spm_vmid, - .rlcg_wreg = gfx_v10_rlcg_wreg, - .rlcg_rreg = gfx_v10_rlcg_rreg, + .sriov_wreg = gfx_v10_sriov_wreg, + .sriov_rreg = gfx_v10_sriov_rreg, .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 044076ec1d036..03acc777adf7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -787,7 +787,7 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 f } -static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, +static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip) { if ((acc_flags & AMDGPU_REGS_RLC) && @@ -5131,7 +5131,7 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .reset = gfx_v9_0_rlc_reset, .start = gfx_v9_0_rlc_start, .update_spm_vmid = gfx_v9_0_update_spm_vmid, - .rlcg_wreg = gfx_v9_0_rlcg_wreg, + .sriov_wreg = gfx_v9_0_sriov_wreg, .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 0eeb5e073be81..8a9ca87d86632 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -28,13 +28,13 @@ #define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \ - ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->rlcg_wreg) ? \ - adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, flag, hwip) : \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_wreg) ? \ + adev->gfx.rlc.funcs->sriov_wreg(adev, reg, value, flag, hwip) : \ WREG32(reg, value)) #define __RREG32_SOC15_RLC__(reg, flag, hwip) \ - ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->rlcg_rreg) ? \ - adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, flag, hwip) : \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_rreg) ? \ + adev->gfx.rlc.funcs->sriov_rreg(adev, reg, flag, hwip) : \ RREG32(reg)) #define WREG32_FIELD15(ip, idx, reg, field, val) \ From 7b9ff4173c19848286d19dda3e8830bcc931d2cc Mon Sep 17 00:00:00 2001 From: Roy Sun Date: Thu, 8 Jul 2021 16:18:30 +0800 Subject: [PATCH 546/851] drm/amdgpu: Add error message when programing registers fails Squash in warning fix (Alex) Signed-off-by: Roy Sun Reviewed-by: Zhou pengju Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 27c4ce534de29..d102cfd36ba2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -56,6 +56,10 @@ #define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1 #define GFX10_MEC_HPD_SIZE 2048 +#define RLCG_INTERFACE_NOT_ENABLED 0x4000000 +#define RLCG_WRONG_OPERATION_TYPE 0x2000000 +#define RLCG_NOT_IN_RANGE 0x1000000 + #define F32_CE_PROGRAM_RAM_SIZE 65536 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -181,6 +185,9 @@ #define GFX_RLCG_GC_READ (0x1 << 28) #define GFX_RLCG_MMHUB_WRITE (0x2 << 28) +#define RLCG_ERROR_REPORT_ENABLED(adev) \ + (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -1486,6 +1493,7 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 uint32_t i = 0; uint32_t retries = 50000; u32 ret = 0; + u32 tmp; scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4; @@ -1519,9 +1527,8 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 writel(v, scratch_reg0); writel(offset | flag, scratch_reg1); writel(1, spare_int); - for (i = 0; i < retries; i++) { - u32 tmp; + for (i = 0; i < retries; i++) { tmp = readl(scratch_reg1); if (!(tmp & flag)) break; @@ -1529,8 +1536,19 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 udelay(10); } - if (i >= retries) - pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); + if (i >= retries) { + if (RLCG_ERROR_REPORT_ENABLED(adev)) { + if (tmp & RLCG_INTERFACE_NOT_ENABLED) + pr_err("The interface is not enabled, program reg:0x%05x failed!\n", offset); + else if (tmp & RLCG_WRONG_OPERATION_TYPE) + pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset); + else if (tmp & RLCG_NOT_IN_RANGE) + pr_err("The register is not in range, program reg:0x%05x failed!\n", offset); + else + pr_err("Unknown error type, program reg:0x%05x failed!\n", offset); + } else + pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset); + } } ret = readl(scratch_reg0); From c94dd38d134b24a26a34ccc781824668e09ab5c9 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 14 Jul 2021 09:50:37 -0500 Subject: [PATCH 547/851] drm/amdgpu: Fix a printing message The printing message "PSP loading VCN firmware" is mis-leading because people might think driver is loading VCN firmware. Actually when this message is printed, driver is just preparing some VCN ucode, not loading VCN firmware yet. The actual VCN firmware loading will be in the PSP block hw_init. Fix the printing message Signed-off-by: Oak Zeng Reviewed-by: Christian Konig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 284bb42d6c866..121ee9f2b8d16 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -119,7 +119,7 @@ static int vcn_v1_0_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8af567c546dbc..f4686e918e0d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -122,7 +122,7 @@ static int vcn_v2_0_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 888b17d84691c..e0c0c3734432e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -152,7 +152,7 @@ static int vcn_v2_5_sw_init(void *handle) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 47d4f04cbd69e..2f017560948eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -160,7 +160,7 @@ static int vcn_v3_0_sw_init(void *handle) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); From 53a253f0056e84cbe8c77ec9f7c58d3e02df2469 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Wed, 14 Jul 2021 09:59:51 -0500 Subject: [PATCH 548/851] drm/amdgpu: Change a few function names Function name "psp_np_fw_load" is not proper as people don't know _np_fw_ means "non psp firmware". Change the function name to psp_load_non_psp_fw for better understanding. Same thing for function psp_execute_np_fw_load. Signed-off-by: Oak Zeng Reviewed-by: Alex Deucher Reviewed-by: Christian Konig Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a74455a3f8639..135f82dfbac6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2373,7 +2373,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, return ret; } -static int psp_execute_np_fw_load(struct psp_context *psp, +static int psp_execute_non_psp_fw_load(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { int ret = 0; @@ -2409,7 +2409,7 @@ static int psp_load_smu_fw(struct psp_context *psp) } } - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) DRM_ERROR("PSP load smu failed!\n"); @@ -2464,14 +2464,14 @@ int psp_load_fw_list(struct psp_context *psp, for (i = 0; i < ucode_count; ++i) { ucode = ucode_list[i]; psp_print_fw_hdr(psp, ucode); - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) return ret; } return ret; } -static int psp_np_fw_load(struct psp_context *psp) +static int psp_load_non_psp_fw(struct psp_context *psp) { int i, ret; struct amdgpu_firmware_info *ucode; @@ -2510,7 +2510,7 @@ static int psp_np_fw_load(struct psp_context *psp) psp_print_fw_hdr(psp, ucode); - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) return ret; @@ -2587,7 +2587,7 @@ static int psp_load_fw(struct amdgpu_device *adev) if (ret) goto failed; - ret = psp_np_fw_load(psp); + ret = psp_load_non_psp_fw(psp); if (ret) goto failed; @@ -2787,7 +2787,7 @@ static int psp_resume(void *handle) if (ret) goto failed; - ret = psp_np_fw_load(psp); + ret = psp_load_non_psp_fw(psp); if (ret) goto failed; @@ -2885,7 +2885,7 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, ucode.mc_addr = cmd_gpu_addr; ucode.ucode_size = cmd_size; - return psp_execute_np_fw_load(&adev->psp, &ucode); + return psp_execute_non_psp_fw_load(&adev->psp, &ucode); } int psp_ring_cmd_submit(struct psp_context *psp, From 295e4cccb32a3b52bbb8fe99c77c837bc5c4c8a7 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 15 Jul 2021 14:57:16 -0500 Subject: [PATCH 549/851] drm/amdkfd: Renaming dqm->packets to dqm->packet_mgr Renaming packets to packet_mgr to reflect the real meaning of this variable. Signed-off-by: Oak Zeng Acked-by: Christian Konig Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 26 +++++++++---------- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 6b57dfd2cd2ac..73132c90791be 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1389,7 +1389,7 @@ int kfd_debugfs_hang_hws(struct kfd_dev *dev) return -EINVAL; } - r = pm_debugfs_hang_hws(&dev->dqm->packets); + r = pm_debugfs_hang_hws(&dev->dqm->packet_mgr); if (!r) r = dqm_debugfs_execute_queues(dev->dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 16a1713808c2e..6b2f5940c1f6b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -260,7 +260,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, struct qcm_process_device *qpd) { - const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; + const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; int ret; if (!qpd->ib_kaddr) @@ -1000,7 +1000,7 @@ static int start_nocpsch(struct device_queue_manager *dqm) init_interrupts(dqm); if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - return pm_init(&dqm->packets, dqm); + return pm_init(&dqm->packet_mgr, dqm); dqm->sched_running = true; return 0; @@ -1009,7 +1009,7 @@ static int start_nocpsch(struct device_queue_manager *dqm) static int stop_nocpsch(struct device_queue_manager *dqm) { if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - pm_uninit(&dqm->packets, false); + pm_uninit(&dqm->packet_mgr, false); dqm->sched_running = false; return 0; @@ -1124,7 +1124,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) "queue mask: 0x%8llX\n", res.vmid_mask, res.queue_mask); - return pm_send_set_resources(&dqm->packets, &res); + return pm_send_set_resources(&dqm->packet_mgr, &res); } static int initialize_cpsch(struct device_queue_manager *dqm) @@ -1164,7 +1164,7 @@ static int start_cpsch(struct device_queue_manager *dqm) retval = 0; - retval = pm_init(&dqm->packets, dqm); + retval = pm_init(&dqm->packet_mgr, dqm); if (retval) goto fail_packet_manager_init; @@ -1197,7 +1197,7 @@ static int start_cpsch(struct device_queue_manager *dqm) return 0; fail_allocate_vidmem: fail_set_sched_resources: - pm_uninit(&dqm->packets, false); + pm_uninit(&dqm->packet_mgr, false); fail_packet_manager_init: return retval; } @@ -1213,10 +1213,10 @@ static int stop_cpsch(struct device_queue_manager *dqm) dqm->sched_running = false; dqm_unlock(dqm); - pm_release_ib(&dqm->packets); + pm_release_ib(&dqm->packet_mgr); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packets, hanging); + pm_uninit(&dqm->packet_mgr, hanging); return 0; } @@ -1390,7 +1390,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) if (dqm->active_runlist) return 0; - retval = pm_send_runlist(&dqm->packets, &dqm->queues); + retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); pr_debug("%s sent runlist\n", __func__); if (retval) { pr_err("failed to execute runlist\n"); @@ -1416,13 +1416,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; - retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, + retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE, filter, filter_param, false, 0); if (retval) return retval; *dqm->fence_addr = KFD_FENCE_INIT; - pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, + pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, KFD_FENCE_COMPLETED); /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, @@ -1448,14 +1448,14 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, * check those fields */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; - if (mqd_mgr->read_doorbell_id(dqm->packets.priv_queue->queue->mqd)) { + if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); while (halt_if_hws_hang) schedule(); return -ETIME; } - pm_release_ib(&dqm->packets); + pm_release_ib(&dqm->packet_mgr); dqm->active_runlist = false; return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 71e2fde56b2b7..c8719682c4da0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -169,7 +169,7 @@ struct device_queue_manager { struct device_queue_manager_asic_ops asic_ops; struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX]; - struct packet_manager packets; + struct packet_manager packet_mgr; struct kfd_dev *dev; struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */ struct list_head queues; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b1ce072aa20bb..72e3cd647cf36 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1630,7 +1630,7 @@ int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) } seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); - r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); + r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); if (r) break; } From 5806697fb136d67ebe404c044d0db212052e607a Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 15 Jul 2021 17:02:46 -0500 Subject: [PATCH 550/851] drm/amdkfd: Set priv_queue to NULL after it is freed This variable will be used to determine whether packet manager is initialized or not, in a future patch. Signed-off-by: Oak Zeng Acked-by: Christian Konig Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index d8e940f03102e..b130cc0a25b35 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -278,6 +278,7 @@ void pm_uninit(struct packet_manager *pm, bool hanging) { mutex_destroy(&pm->lock); kernel_queue_uninit(pm->priv_queue, hanging); + pm->priv_queue = NULL; } int pm_send_set_resources(struct packet_manager *pm, From 59dcc40a22fe507b92292be83eb1d472f26b668e Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 15 Jul 2021 18:34:25 -0500 Subject: [PATCH 551/851] drm/amdkfd: Fix a concurrency issue during kfd recovery start_cpsch and stop_cpsch can be called during kfd device initialization or during gpu reset/recovery. So they can run concurrently. Currently in start_cpsch and stop_cpsch, pm_init and pm_uninit is not protected by the dpm lock. Imagine such a case that user use packet manager's function to submit a pm4 packet to hang hws (ie through command cat /sys/class/kfd/kfd/topology/nodes/1/gpu_id | sudo tee /sys/kernel/debug/kfd/hang_hws), while kfd device is under device reset/recovery so packet manager can be not initialized. There will be unpredictable protection fault in such case. This patch moves pm_init/uninit inside the dpm lock and check packet manager is initialized before using packet manager function. Signed-off-by: Oak Zeng Acked-by: Christian Konig Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 8 +------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 12 +++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 73132c90791be..7fc7fe9d2b45c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1382,18 +1382,12 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) */ int kfd_debugfs_hang_hws(struct kfd_dev *dev) { - int r = 0; - if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { pr_err("HWS is not enabled"); return -EINVAL; } - r = pm_debugfs_hang_hws(&dev->dqm->packet_mgr); - if (!r) - r = dqm_debugfs_execute_queues(dev->dqm); - - return r; + return dqm_debugfs_hang_hws(dev->dqm); } #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6b2f5940c1f6b..6b89ca6ddc655 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1164,6 +1164,7 @@ static int start_cpsch(struct device_queue_manager *dqm) retval = 0; + dqm_lock(dqm); retval = pm_init(&dqm->packet_mgr, dqm); if (retval) goto fail_packet_manager_init; @@ -1186,7 +1187,6 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); - dqm_lock(dqm); /* clear hang status when driver try to start the hw scheduler */ dqm->is_hws_hang = false; dqm->is_resetting = false; @@ -1199,6 +1199,7 @@ static int start_cpsch(struct device_queue_manager *dqm) fail_set_sched_resources: pm_uninit(&dqm->packet_mgr, false); fail_packet_manager_init: + dqm_unlock(dqm); return retval; } @@ -1211,12 +1212,12 @@ static int stop_cpsch(struct device_queue_manager *dqm) unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; - dqm_unlock(dqm); pm_release_ib(&dqm->packet_mgr); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packet_mgr, hanging); + dqm_unlock(dqm); return 0; } @@ -2099,11 +2100,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) return r; } -int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) +int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) { int r = 0; dqm_lock(dqm); + r = pm_debugfs_hang_hws(&dqm->packet_mgr); + if (r) { + dqm_unlock(dqm); + return r; + } dqm->active_runlist = true; r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); dqm_unlock(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index b130cc0a25b35..b33ebe81cd95a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -448,6 +448,9 @@ int pm_debugfs_hang_hws(struct packet_manager *pm) uint32_t *buffer, size; int r = 0; + if (!pm->priv_queue) + return -EAGAIN; + size = pm->pmf->query_status_size; mutex_lock(&pm->lock); kq_acquire_packet_buffer(pm->priv_queue, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3426743ed228b..8a5dfda224bfa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1194,7 +1194,7 @@ int pm_debugfs_runlist(struct seq_file *m, void *data); int kfd_debugfs_hang_hws(struct kfd_dev *dev); int pm_debugfs_hang_hws(struct packet_manager *pm); -int dqm_debugfs_execute_queues(struct device_queue_manager *dqm); +int dqm_debugfs_hang_hws(struct device_queue_manager *dqm); #else From a871cceff87f3ddff501637f9003701856278819 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Tue, 20 Jul 2021 11:00:44 -0400 Subject: [PATCH 552/851] drm/amd/amdgpu: Update debugfs link_settings output link_rate field in hex link_rate is updated via debugfs using hex values, set it to output in hex as well. eg: Resolution: 1920x1080@144Hz cat /sys/kernel/debug/dri/0/DP-1/link_settings Current: 4 0x14 0 Verified: 4 0x1e 0 Reported: 4 0x1e 16 Preferred: 0 0x0 0 echo "4 0x1e" > /sys/kernel/debug/dri/0/DP-1/link_settings cat /sys/kernel/debug/dri/0/DP-1/link_settings Current: 4 0x1e 0 Verified: 4 0x1e 0 Reported: 4 0x1e 16 Preferred: 4 0x1e 0 Signed-off-by: Anson Jacob Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index f1145086a4688..1d15a9af99560 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -197,29 +197,29 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf, rd_buf_ptr = rd_buf; - str_len = strlen("Current: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Current: %d %d %d ", + str_len = strlen("Current: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Current: %d 0x%x %d ", link->cur_link_settings.lane_count, link->cur_link_settings.link_rate, link->cur_link_settings.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Verified: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Verified: %d %d %d ", + str_len = strlen("Verified: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Verified: %d 0x%x %d ", link->verified_link_cap.lane_count, link->verified_link_cap.link_rate, link->verified_link_cap.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Reported: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Reported: %d %d %d ", + str_len = strlen("Reported: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Reported: %d 0x%x %d ", link->reported_link_cap.lane_count, link->reported_link_cap.link_rate, link->reported_link_cap.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Preferred: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Preferred: %d %d %d\n", + str_len = strlen("Preferred: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Preferred: %d 0x%x %d\n", link->preferred_link_setting.lane_count, link->preferred_link_setting.link_rate, link->preferred_link_setting.link_spread); From 23e58f1813832f17f668930806a9c0de089a4afd Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Tue, 20 Jul 2021 11:43:20 -0400 Subject: [PATCH 553/851] drm/amd/amdgpu: Add a new line to debugfs phy_settings output Add new line to phy_settings output Signed-off-by: Anson Jacob Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 1d15a9af99560..87daa78a32b8b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -377,7 +377,7 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf, if (!rd_buf) return -EINVAL; - snprintf(rd_buf, rd_buf_size, " %d %d %d ", + snprintf(rd_buf, rd_buf_size, " %d %d %d\n", link->cur_lane_setting.VOLTAGE_SWING, link->cur_lane_setting.PRE_EMPHASIS, link->cur_lane_setting.POST_CURSOR2); From 02ea35ccff7b9cd4e4d5ac5fe31761b696c77e95 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Mon, 19 Jul 2021 13:46:09 -0400 Subject: [PATCH 554/851] drm/amdgpu: Fix documentaion for dm_dmub_outbox1_low_irq Fix make htmldocs complaint: ./drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:628: warning: Excess function parameter 'interrupt_params' description in 'DMUB_TRACE_MAX_READ' v2: Moved DMUB_TRACE_MAX_READ macro above function documentation Signed-off-by: Anson Jacob CC: Harry Wentland Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 818a825539030..0a1dd25e567d9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -618,6 +618,7 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) } #endif +#define DMUB_TRACE_MAX_READ 64 /** * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt * @interrupt_params: used for determining the Outbox instance @@ -625,7 +626,6 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) * Handles the Outbox Interrupt * event handler. */ -#define DMUB_TRACE_MAX_READ 64 static void dm_dmub_outbox1_low_irq(void *interrupt_params) { struct dmub_notification notify; From 09ba3197b43a40815a5715d29c968675bff289b8 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Tue, 6 Jul 2021 17:46:37 -0400 Subject: [PATCH 555/851] drm/amdkfd: Update SMI throttle event bitmask Update Arcturus/Aldebaran thermal throttle SMI event path to use ASIC-independent throttler bits when logging. Signed-off-by: Graham Sider Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 10 +++++----- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 4 +++- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a8b05c6ddce5f..0462d4aceab7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -332,7 +332,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); #else static inline int kgd2kfd_init(void) { @@ -391,7 +391,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) } static inline -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7fc7fe9d2b45c..fd1fd20cd70c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1369,7 +1369,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd) WARN_ONCE(count < 0, "Compute profile ref. count error"); } -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { if (kfd && kfd->init_complete) kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 2465224235593..ed4bc5f844ce7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -205,23 +205,23 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) } void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, - uint32_t throttle_bitmask) + uint64_t throttle_bitmask) { struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; /* * ThermalThrottle msg = throttle_bitmask(8): * thermal_interrupt_count(16): - * 1 byte event + 1 byte space + 8 byte throttle_bitmask + + * 1 byte event + 1 byte space + 16 byte throttle_bitmask + * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n + - * 1 byte \0 = 29 + * 1 byte \0 = 37 */ - char fifo_in[29]; + char fifo_in[37]; int len; if (list_empty(&dev->smi_clients)) return; - len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n", + len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n", KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, atomic64_read(&adev->smu.throttle_int_counter)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index b9b0438202e21..bffd0c32b0603 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -26,7 +26,7 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, - uint32_t throttle_bitmask); + uint64_t throttle_bitmask); void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset); #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 6b3e0ea10163a..6ec8492f71f59 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2178,7 +2178,9 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu) dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n", log_buf); - kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status); + kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, + smu_cmn_get_indep_throttler_status(throttler_status, + arcturus_throttler_map)); } static uint16_t arcturus_get_current_pcie_link_speed(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 4b12c3b807e9d..856eeaf293b89 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1653,7 +1653,9 @@ static void aldebaran_log_thermal_throttling_event(struct smu_context *smu) dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n", log_buf); - kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status); + kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, + smu_cmn_get_indep_throttler_status(throttler_status, + aldebaran_throttler_map)); } static int aldebaran_get_current_pcie_link_speed(struct smu_context *smu) From e176c81caff99fa40729535892e023f998a2c187 Mon Sep 17 00:00:00 2001 From: Jingwen Chen Date: Tue, 20 Jul 2021 18:35:35 +0800 Subject: [PATCH 556/851] drm/amd/amdgpu: consider kernel job always not guilty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Currently all timedout job will be considered to be guilty. In SRIOV multi-vf use case, the vf flr happens first and then job time out is found. There can be several jobs timeout during a very small time slice. And if the innocent sdma job time out is found before the real bad job, then the innocent sdma job will be set to guilty. This will lead to a page fault after resubmitting job. [How] If the job is a kernel job, we will always consider it not guilty Reviewed-by: Christian König Signed-off-by: Jingwen Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f8bc670fe3f9e..5d2453cc880c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4468,7 +4468,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_fence_driver_force_completion(ring); } - if(job) + if (job && job->vm) drm_sched_increase_karma(&job->base); r = amdgpu_reset_prepare_hwcontext(adev, reset_context); @@ -4932,7 +4932,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", job ? job->base.id : -1, hive->hive_id); amdgpu_put_xgmi_hive(hive); - if (job) + if (job && job->vm) drm_sched_increase_karma(&job->base); return 0; } @@ -4956,7 +4956,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, job ? job->base.id : -1); /* even we skipped this reset, still need to set the job to guilty */ - if (job) + if (job && job->vm) drm_sched_increase_karma(&job->base); goto skip_recovery; } From 45ba189fd1010c4924f297dc1bfd0089a24ec130 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Wed, 21 Jul 2021 13:55:59 -0400 Subject: [PATCH 557/851] drm/amd/amdgpu: add consistent PSP FW loading size checking Signed-off-by: Candice Li Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 108 ++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 31 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 4 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +- 11 files changed, 95 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0de9ccd47eb9d..20b049ad61c10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -374,8 +374,8 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = adev->sdma.instance[query_fw->index].feature_version; break; case AMDGPU_INFO_FW_SOS: - fw_info->ver = adev->psp.sos_fw_version; - fw_info->feature = adev->psp.sos_feature_version; + fw_info->ver = adev->psp.sos.fw_version; + fw_info->feature = adev->psp.sos.feature_version; break; case AMDGPU_INFO_FW_ASD: fw_info->ver = adev->psp.asd_fw_version; @@ -390,8 +390,8 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = 0; break; case AMDGPU_INFO_FW_TOC: - fw_info->ver = adev->psp.toc_fw_version; - fw_info->feature = adev->psp.toc_feature_version; + fw_info->ver = adev->psp.toc.fw_version; + fw_info->feature = adev->psp.toc.feature_version; break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 135f82dfbac6c..c514c052691d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -516,9 +516,9 @@ static int psp_load_toc(struct psp_context *psp, if (!cmd) return -ENOMEM; /* Copy toc to psp firmware private buffer */ - psp_copy_fw(psp, psp->toc_start_addr, psp->toc_bin_size); + psp_copy_fw(psp, psp->toc.start_addr, psp->toc.size_bytes); - psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc_bin_size); + psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc.size_bytes); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -548,8 +548,8 @@ static int psp_tmr_init(struct psp_context *psp) /* For ASICs support RLC autoload, psp will parse the toc * and calculate the total size of TMR needed */ if (!amdgpu_sriov_vf(psp->adev) && - psp->toc_start_addr && - psp->toc_bin_size && + psp->toc.start_addr && + psp->toc.size_bytes && psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); if (ret) { @@ -728,18 +728,18 @@ static int psp_rl_load(struct amdgpu_device *adev) struct psp_context *psp = &adev->psp; struct psp_gfx_cmd_resp *cmd = psp->cmd; - if (psp->rl_bin_size == 0) + if (!is_psp_fw_valid(psp->rl)) return 0; memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->rl_start_addr, psp->rl_bin_size); + memcpy(psp->fw_pri_buf, psp->rl.start_addr, psp->rl.size_bytes); memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(psp->fw_pri_mc_addr); cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(psp->fw_pri_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl_bin_size; + cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl.size_bytes; cmd->cmd.cmd_load_ip_fw.fw_type = GFX_FW_TYPE_REG_LIST; return psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -2129,7 +2129,7 @@ static int psp_hw_start(struct psp_context *psp) int ret; if (!amdgpu_sriov_vf(adev)) { - if (psp->kdb_bin_size && + if ((is_psp_fw_valid(psp->kdb)) && (psp->funcs->bootloader_load_kdb != NULL)) { ret = psp_bootloader_load_kdb(psp); if (ret) { @@ -2138,7 +2138,8 @@ static int psp_hw_start(struct psp_context *psp) } } - if (psp->spl_bin_size) { + if ((is_psp_fw_valid(psp->spl)) && + (psp->funcs->bootloader_load_spl != NULL)) { ret = psp_bootloader_load_spl(psp); if (ret) { DRM_ERROR("PSP load spl failed!\n"); @@ -2146,16 +2147,22 @@ static int psp_hw_start(struct psp_context *psp) } } - ret = psp_bootloader_load_sysdrv(psp); - if (ret) { - DRM_ERROR("PSP load sysdrv failed!\n"); - return ret; + if ((is_psp_fw_valid(psp->sys)) && + (psp->funcs->bootloader_load_sysdrv != NULL)) { + ret = psp_bootloader_load_sysdrv(psp); + if (ret) { + DRM_ERROR("PSP load sysdrv failed!\n"); + return ret; + } } - ret = psp_bootloader_load_sos(psp); - if (ret) { - DRM_ERROR("PSP load sos failed!\n"); - return ret; + if ((is_psp_fw_valid(psp->sos)) && + (psp->funcs->bootloader_load_sos != NULL)) { + ret = psp_bootloader_load_sos(psp); + if (ret) { + DRM_ERROR("PSP load sos failed!\n"); + return ret; + } } } @@ -2997,10 +3004,10 @@ int psp_init_toc_microcode(struct psp_context *psp, goto out; toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; - adev->psp.toc_fw_version = le32_to_cpu(toc_hdr->header.ucode_version); - adev->psp.toc_feature_version = le32_to_cpu(toc_hdr->sos.fw_version); - adev->psp.toc_bin_size = le32_to_cpu(toc_hdr->header.ucode_size_bytes); - adev->psp.toc_start_addr = (uint8_t *)toc_hdr + + adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); + adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); + adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); + adev->psp.toc.start_addr = (uint8_t *)toc_hdr + le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); return 0; out: @@ -3021,32 +3028,32 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev) le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); if (adev->gmc.xgmi.connected_to_cpu || (adev->asic_type != CHIP_ALDEBARAN)) { - adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); - adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->sos.fw_version); + adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version); + adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version); - adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos.offset_bytes); - adev->psp.sys_start_addr = ucode_array_start_addr; + adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr->sos.offset_bytes); + adev->psp.sys.start_addr = ucode_array_start_addr; - adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos.size_bytes); - adev->psp.sos_start_addr = ucode_array_start_addr + + adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes); + adev->psp.sos.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr->sos.offset_bytes); } else { /* Load alternate PSP SOS FW */ sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; - adev->psp.sos_fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); - adev->psp.sos_feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); + adev->psp.sos.fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); + adev->psp.sos.feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); - adev->psp.sys_bin_size = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes); - adev->psp.sys_start_addr = ucode_array_start_addr + + adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes); + adev->psp.sys.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.offset_bytes); - adev->psp.sos_bin_size = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes); - adev->psp.sos_start_addr = ucode_array_start_addr + + adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes); + adev->psp.sos.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes); } - if ((adev->psp.sys_bin_size == 0) || (adev->psp.sos_bin_size == 0)) { + if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) { dev_warn(adev->dev, "PSP SOS FW not available"); return -EINVAL; } @@ -3093,32 +3100,32 @@ int psp_init_sos_microcode(struct psp_context *psp, if (sos_hdr->header.header_version_minor == 1) { sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data; - adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes); - adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes); + adev->psp.toc.start_addr = (uint8_t *)adev->psp.sys.start_addr + le32_to_cpu(sos_hdr_v1_1->toc.offset_bytes); - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes); - adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes); + adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr + le32_to_cpu(sos_hdr_v1_1->kdb.offset_bytes); } if (sos_hdr->header.header_version_minor == 2) { sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes); - adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes); + adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr + le32_to_cpu(sos_hdr_v1_2->kdb.offset_bytes); } if (sos_hdr->header.header_version_minor == 3) { sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; - adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes); - adev->psp.toc_start_addr = ucode_array_start_addr + + adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes); + adev->psp.toc.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->v1_1.toc.offset_bytes); - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes); - adev->psp.kdb_start_addr = ucode_array_start_addr + + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes); + adev->psp.kdb.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.offset_bytes); - adev->psp.spl_bin_size = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes); - adev->psp.spl_start_addr = ucode_array_start_addr + + adev->psp.spl.size_bytes = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes); + adev->psp.spl.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->spl.offset_bytes); - adev->psp.rl_bin_size = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes); - adev->psp.rl_start_addr = ucode_array_start_addr + + adev->psp.rl.size_bytes = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes); + adev->psp.rl.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->rl.offset_bytes); } break; @@ -3361,7 +3368,10 @@ static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR, psp_usbc_pd_fw_sysfs_read, psp_usbc_pd_fw_sysfs_write); - +int is_psp_fw_valid(struct psp_bin_desc bin) +{ + return bin.size_bytes; +} const struct amd_ip_funcs psp_ip_funcs = { .name = "psp", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index f0e32a958171a..64afcd645ec4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -48,6 +48,7 @@ struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; +struct psp_bin_desc; enum psp_bootloader_cmd { PSP_BL__LOAD_SYSDRV = 0x10000, @@ -283,6 +284,13 @@ struct psp_runtime_boot_cfg_entry { uint32_t reserved; }; +struct psp_bin_desc { + uint32_t fw_version; + uint32_t feature_version; + uint32_t size_bytes; + uint8_t *start_addr; +}; + struct psp_context { struct amdgpu_device *adev; @@ -298,20 +306,12 @@ struct psp_context /* sos firmware */ const struct firmware *sos_fw; - uint32_t sos_fw_version; - uint32_t sos_feature_version; - uint32_t sys_bin_size; - uint32_t sos_bin_size; - uint32_t toc_bin_size; - uint32_t kdb_bin_size; - uint32_t spl_bin_size; - uint32_t rl_bin_size; - uint8_t *sys_start_addr; - uint8_t *sos_start_addr; - uint8_t *toc_start_addr; - uint8_t *kdb_start_addr; - uint8_t *spl_start_addr; - uint8_t *rl_start_addr; + struct psp_bin_desc sys; + struct psp_bin_desc sos; + struct psp_bin_desc toc; + struct psp_bin_desc kdb; + struct psp_bin_desc spl; + struct psp_bin_desc rl; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -326,8 +326,6 @@ struct psp_context /* toc firmware */ const struct firmware *toc_fw; - uint32_t toc_fw_version; - uint32_t toc_feature_version; /* fence buffer */ struct amdgpu_bo *fence_buf_bo; @@ -484,4 +482,5 @@ int psp_load_fw_list(struct psp_context *psp, struct amdgpu_firmware_info **ucode_list, int ucode_count); void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); +int is_psp_fw_valid(struct psp_bin_desc bin); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index fe601f67d3a73..8e9b1f9fa34f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -518,7 +518,7 @@ FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); -FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version); FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_ras_ucode_version); FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_xgmi_ucode_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b71dd1deeb2d3..12a7cc2f01cd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -531,7 +531,7 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ta_ras_ucode_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.ta_xgmi_ucode_version); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d102cfd36ba2d..773368a59487e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5397,7 +5397,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) int ret; RLC_TABLE_OF_CONTENT *rlc_toc; - ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE, + ret = amdgpu_bo_create_reserved(adev, adev->psp.toc.size_bytes, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->gfx.rlc.rlc_toc_bo, &adev->gfx.rlc.rlc_toc_gpu_addr, @@ -5408,7 +5408,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) } /* Copy toc from psp sos fw to rlc toc buffer */ - memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size); + memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc.start_addr, adev->psp.toc.size_bytes); rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf; while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) && diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 86a1837113882..8862684f8b43b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -287,7 +287,7 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) return ret; /* Copy PSP KDB binary to memory */ - psp_copy_fw(psp, psp->kdb_start_addr, psp->kdb_bin_size); + psp_copy_fw(psp, psp->kdb.start_addr, psp->kdb.size_bytes); /* Provide the PSP KDB to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -318,7 +318,7 @@ static int psp_v11_0_bootloader_load_spl(struct psp_context *psp) return ret; /* Copy PSP SPL binary to memory */ - psp_copy_fw(psp, psp->spl_start_addr, psp->spl_bin_size); + psp_copy_fw(psp, psp->spl.start_addr, psp->spl.size_bytes); /* Provide the PSP SPL to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -349,7 +349,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) return ret; /* Copy PSP System Driver binary to memory */ - psp_copy_fw(psp, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -383,7 +383,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) return ret; /* Copy Secure OS binary to PSP memory */ - psp_copy_fw(psp, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 536d41f327c1b..0c908d4566e8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -138,7 +138,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) return ret; /* Copy PSP System Driver binary to memory */ - psp_copy_fw(psp, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -177,7 +177,7 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) return ret; /* Copy Secure OS binary to PSP memory */ - psp_copy_fw(psp, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index e1046bb3bab00..d017da3ceadbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -136,7 +136,7 @@ static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy PSP KDB binary to memory */ - memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); + memcpy(psp->fw_pri_buf, psp->kdb.start_addr, psp->kdb.size_bytes); /* Provide the PSP KDB to bootloader */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, @@ -169,7 +169,7 @@ static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy PSP System Driver binary to memory */ - memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + memcpy(psp->fw_pri_buf, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, @@ -205,7 +205,7 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy Secure OS binary to PSP memory */ - memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index b86dcbabb6352..1ed357cb0f490 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -103,7 +103,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) return ret; /* Copy PSP System Driver binary to memory */ - psp_copy_fw(psp, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -142,7 +142,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) return ret; /* Copy Secure OS binary to PSP memory */ - psp_copy_fw(psp, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b7d350be80502..a5e085e570f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -575,7 +575,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: - if (adev->psp.sos_fw_version >= 0x80067) + if (adev->psp.sos.fw_version >= 0x80067) baco_reset = amdgpu_dpm_is_baco_supported(adev); /* @@ -635,7 +635,7 @@ static bool soc15_supports_baco(struct amdgpu_device *adev) case CHIP_ARCTURUS: return amdgpu_dpm_is_baco_supported(adev); case CHIP_VEGA20: - if (adev->psp.sos_fw_version >= 0x80067) + if (adev->psp.sos.fw_version >= 0x80067) return amdgpu_dpm_is_baco_supported(adev); return false; default: From ec0519110c8b493324e39733b9946b587572e190 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 21 Jul 2021 01:22:15 +0800 Subject: [PATCH 558/851] drm/amdgpu/acp: Make PM domain really work Devices created by mfd_add_hotplug_devices() don't really increase the index of its name, so get_mfd_cell_dev() cannot find any device, hence a NULL dev is passed to pm_genpd_add_device(): [ 56.974926] (NULL device *): amdgpu: device acp_audio_dma.0.auto added to pm domain [ 56.974933] (NULL device *): amdgpu: Failed to add dev to genpd [ 56.974941] [drm:amdgpu_device_ip_init [amdgpu]] *ERROR* hw_init of IP block failed -22 [ 56.975810] amdgpu 0000:00:01.0: amdgpu: amdgpu_device_ip_init failed [ 56.975839] amdgpu 0000:00:01.0: amdgpu: Fatal error during GPU init [ 56.977136] ------------[ cut here ]------------ [ 56.977143] kernel BUG at mm/slub.c:4206! [ 56.977158] invalid opcode: 0000 [#1] SMP NOPTI [ 56.977167] CPU: 1 PID: 1648 Comm: modprobe Not tainted 5.12.0-051200rc8-generic #202104182230 [ 56.977175] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./FM2A68M-HD+, BIOS P5.20 02/13/2019 [ 56.977180] RIP: 0010:kfree+0x3bf/0x410 [ 56.977195] Code: 89 e7 48 d3 e2 f7 da e8 5f 0d 02 00 80 e7 02 75 3e 44 89 ee 4c 89 e7 e8 ef 5f fd ff e9 fa fe ff ff 49 8b 44 24 08 a8 01 75 b7 <0f> 0b 4c 8b 4d b0 48 8b 4d a8 48 89 da 4c 89 e6 41 b8 01 00 00 00 [ 56.977202] RSP: 0018:ffffa48640ff79f0 EFLAGS: 00010246 [ 56.977210] RAX: 0000000000000000 RBX: ffff9286127d5608 RCX: 0000000000000000 [ 56.977215] RDX: 0000000000000000 RSI: ffffffffc099d0fb RDI: ffff9286127d5608 [ 56.977220] RBP: ffffa48640ff7a48 R08: 0000000000000001 R09: 0000000000000001 [ 56.977224] R10: 0000000000000000 R11: ffff9286087d8458 R12: fffff3ae0449f540 [ 56.977229] R13: 0000000000000000 R14: dead000000000122 R15: dead000000000100 [ 56.977234] FS: 00007f9de5929540(0000) GS:ffff928612e80000(0000) knlGS:0000000000000000 [ 56.977240] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 56.977245] CR2: 00007f697dd97160 CR3: 00000001110f0000 CR4: 00000000001506e0 [ 56.977251] Call Trace: [ 56.977261] amdgpu_dm_encoder_destroy+0x1b/0x30 [amdgpu] [ 56.978056] drm_mode_config_cleanup+0x4f/0x2e0 [drm] [ 56.978147] ? kfree+0x3dd/0x410 [ 56.978157] ? drm_managed_release+0xc8/0x100 [drm] [ 56.978232] drm_mode_config_init_release+0xe/0x10 [drm] [ 56.978311] drm_managed_release+0x9d/0x100 [drm] [ 56.978388] devm_drm_dev_init_release+0x4d/0x70 [drm] [ 56.978450] devm_action_release+0x15/0x20 [ 56.978459] release_nodes+0x77/0xc0 [ 56.978469] devres_release_all+0x3f/0x50 [ 56.978477] really_probe+0x245/0x460 [ 56.978485] driver_probe_device+0xe9/0x160 [ 56.978492] device_driver_attach+0xab/0xb0 [ 56.978499] __driver_attach+0x8f/0x150 [ 56.978506] ? device_driver_attach+0xb0/0xb0 [ 56.978513] bus_for_each_dev+0x7e/0xc0 [ 56.978521] driver_attach+0x1e/0x20 [ 56.978528] bus_add_driver+0x135/0x1f0 [ 56.978534] driver_register+0x91/0xf0 [ 56.978540] __pci_register_driver+0x54/0x60 [ 56.978549] amdgpu_init+0x77/0x1000 [amdgpu] [ 56.979246] ? 0xffffffffc0dbc000 [ 56.979254] do_one_initcall+0x48/0x1d0 [ 56.979265] ? kmem_cache_alloc_trace+0x120/0x230 [ 56.979274] ? do_init_module+0x28/0x280 [ 56.979282] do_init_module+0x62/0x280 [ 56.979288] load_module+0x71c/0x7a0 [ 56.979296] __do_sys_finit_module+0xc2/0x120 [ 56.979305] __x64_sys_finit_module+0x1a/0x20 [ 56.979311] do_syscall_64+0x38/0x90 [ 56.979319] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 56.979328] RIP: 0033:0x7f9de54f989d [ 56.979335] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c3 f5 0c 00 f7 d8 64 89 01 48 [ 56.979342] RSP: 002b:00007ffe3c395a28 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 56.979350] RAX: ffffffffffffffda RBX: 0000560df3ef4330 RCX: 00007f9de54f989d [ 56.979355] RDX: 0000000000000000 RSI: 0000560df3a07358 RDI: 000000000000000f [ 56.979360] RBP: 0000000000040000 R08: 0000000000000000 R09: 0000000000000000 [ 56.979365] R10: 000000000000000f R11: 0000000000000246 R12: 0000560df3a07358 [ 56.979369] R13: 0000000000000000 R14: 0000560df3ef4460 R15: 0000560df3ef4330 [ 56.979377] Modules linked in: amdgpu(+) iommu_v2 gpu_sched drm_ttm_helper ttm drm_kms_helper cec rc_core i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt nft_counter xt_tcpudp ipt_REJECT nf_reject_ipv4 xt_conntrack iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_mangle iptable_raw iptable_security ip_set nf_tables libcrc32c nfnetlink ip6_tables iptable_filter bpfilter input_leds binfmt_misc edac_mce_amd kvm_amd ccp kvm snd_hda_codec_realtek snd_hda_codec_generic crct10dif_pclmul snd_hda_codec_hdmi ledtrig_audio ghash_clmulni_intel aesni_intel snd_hda_intel snd_intel_dspcfg snd_seq_midi crypto_simd snd_intel_sdw_acpi cryptd snd_hda_codec snd_seq_midi_event snd_rawmidi snd_hda_core snd_hwdep snd_seq fam15h_power k10temp snd_pcm snd_seq_device snd_timer snd mac_hid soundcore sch_fq_codel nct6775 hwmon_vid drm ip_tables x_tables autofs4 dm_mirror dm_region_hash dm_log hid_generic usbhid hid uas usb_storage r8169 crc32_pclmul realtek ahci xhci_pci i2c_piix4 [ 56.979521] xhci_pci_renesas libahci video [ 56.979541] ---[ end trace cb8f6a346f18da7b ]--- Instead of finding MFD hotplugged device by its name, simply iterate over the child devices to avoid the issue. Squash in unused variable removal (Alex) BugLink: https://bugs.launchpad.net/bugs/1920674 Fixes: 25030321ba28 ("drm/amd: add pm domain for ACP IP sub blocks") Signed-off-by: Kai-Heng Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 54 ++++++++++++------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index b8655ff73a658..cc9c9f8b23b2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -160,17 +160,28 @@ static int acp_poweron(struct generic_pm_domain *genpd) return 0; } -static struct device *get_mfd_cell_dev(const char *device_name, int r) +static int acp_genpd_add_device(struct device *dev, void *data) { - char auto_dev_name[25]; - struct device *dev; + struct generic_pm_domain *gpd = data; + int ret; - snprintf(auto_dev_name, sizeof(auto_dev_name), - "%s.%d.auto", device_name, r); - dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name); - dev_info(dev, "device %s added to pm domain\n", auto_dev_name); + ret = pm_genpd_add_device(gpd, dev); + if (ret) + dev_err(dev, "Failed to add dev to genpd %d\n", ret); - return dev; + return ret; +} + +static int acp_genpd_remove_device(struct device *dev, void *data) +{ + int ret; + + ret = pm_genpd_remove_device(dev); + if (ret) + dev_err(dev, "Failed to remove dev from genpd %d\n", ret); + + /* Continue to remove */ + return 0; } /** @@ -181,11 +192,10 @@ static struct device *get_mfd_cell_dev(const char *device_name, int r) */ static int acp_hw_init(void *handle) { - int r, i; + int r; uint64_t acp_base; u32 val = 0; u32 count = 0; - struct device *dev; struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -341,15 +351,10 @@ static int acp_hw_init(void *handle) if (r) goto failure; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); - if (r) { - dev_err(dev, "Failed to add dev to genpd\n"); - goto failure; - } - } - + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -410,10 +415,8 @@ static int acp_hw_init(void *handle) */ static int acp_hw_fini(void *handle) { - int i, ret; u32 val = 0; u32 count = 0; - struct device *dev; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* return early if no ACP */ @@ -458,13 +461,8 @@ static int acp_hw_fini(void *handle) udelay(100); } - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(dev); - /* If removal fails, dont giveup and try rest */ - if (ret) - dev_err(dev, "remove dev from genpd failed\n"); - } + device_for_each_child(adev->acp.parent, NULL, + acp_genpd_remove_device); mfd_remove_devices(adev->acp.parent); kfree(adev->acp.acp_res); From 5b46393666224e9aeb25555d36696f7e598463f1 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 2 Jun 2021 10:32:41 +0800 Subject: [PATCH 559/851] drm/amdgpu: update yellow carp external rev_id handling 0x1681 has a different external revision id. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 751c7b8b13e66..94d029dbf30da 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1236,7 +1236,10 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; - adev->external_rev_id = adev->rev_id + 0x01; + if (adev->pdev->device == 0x1681) + adev->external_rev_id = adev->rev_id + 0x19; + else + adev->external_rev_id = adev->rev_id + 0x01; break; default: /* FIXME: not supported yet */ From 057811cc70c48933ad8d69d6fed0a7de42d5639d Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 4 Nov 2020 13:04:06 +0800 Subject: [PATCH 560/851] drm/amdgpu: add yellow carp pci id (v2) Add Yellow Carp PCI id support. v2: add another DID Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 149fbdffe8ecc..fe29acff69aef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1189,6 +1189,10 @@ static const struct pci_device_id pciidlist[] = { /* Van Gogh */ {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + /* Yellow Carp */ + {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + /* Navy_Flounder */ {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, From 90a5360110e8f93fd4d94cdc7b23ab7a9b43764d Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 19 May 2021 17:57:35 +0800 Subject: [PATCH 561/851] drm/amdgpu: increase size for sdma fw name string Longer firmware name needs more space. Signed-off-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 6117ba8a4c3fd..67f354475d6d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -200,7 +200,7 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; + char fw_name[40]; int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; From 14773ad860d01e6efb77bd38b40ac4f39c1ce15e Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 23 Jun 2021 14:48:34 +0800 Subject: [PATCH 562/851] drm/amdgpu: adjust fw_name string length for toc Adjust toc fw_name string length to PSP_FW_NAME_LEN. Signed-off-by: Lang Yu Reviewed-by: Tao Zhou Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c514c052691d0..caae01690815e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2985,7 +2985,7 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[30]; + char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; From b443f9c71e50fd5142722a3737e2e5a84bf01ff4 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:13:48 -0400 Subject: [PATCH 563/851] drm/amdgpu: add cyan_skillfish asic type Add cyan_skillfish asic family. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + include/drm/amd_asic_type.h | 17 +++++++++-------- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5d2453cc880c1..ba7b193ef0e9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -116,6 +116,7 @@ const char *amdgpu_asic_name[] = { "RENOIR", "ALDEBARAN", "NAVI10", + "CYAN_SKILLFISH", "NAVI14", "NAVI12", "SIENNA_CICHLID", @@ -1439,6 +1440,10 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) break; case CHIP_YELLOW_CARP: break; + case CHIP_CYAN_SKILLFISH: + if (adev->pdev->device == 0x13FE) + adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index ff1d3d4a64889..257f280d3d53f 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -47,6 +47,7 @@ enum amd_apu_flags { AMD_APU_IS_RENOIR = 0x00000008UL, AMD_APU_IS_GREEN_SARDINE = 0x00000010UL, AMD_APU_IS_VANGOGH = 0x00000020UL, + AMD_APU_IS_CYAN_SKILLFISH2 = 0x00000040UL, }; /** diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index 0bf0ad869eb9f..0f66a0d9f06d0 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -53,14 +53,15 @@ enum amd_asic_type { CHIP_RENOIR, /* 24 */ CHIP_ALDEBARAN, /* 25 */ CHIP_NAVI10, /* 26 */ - CHIP_NAVI14, /* 27 */ - CHIP_NAVI12, /* 28 */ - CHIP_SIENNA_CICHLID, /* 29 */ - CHIP_NAVY_FLOUNDER, /* 30 */ - CHIP_VANGOGH, /* 31 */ - CHIP_DIMGREY_CAVEFISH, /* 32 */ - CHIP_BEIGE_GOBY, /* 33 */ - CHIP_YELLOW_CARP, /* 34 */ + CHIP_CYAN_SKILLFISH, /* 27 */ + CHIP_NAVI14, /* 28 */ + CHIP_NAVI12, /* 29 */ + CHIP_SIENNA_CICHLID, /* 30 */ + CHIP_NAVY_FLOUNDER, /* 31 */ + CHIP_VANGOGH, /* 32 */ + CHIP_DIMGREY_CAVEFISH, /* 33 */ + CHIP_BEIGE_GOBY, /* 34 */ + CHIP_YELLOW_CARP, /* 35 */ CHIP_LAST, }; From 1d29c29d0eb84118c331869ccb4edc6fe3123921 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:16:43 -0400 Subject: [PATCH 564/851] drm/amdgpu: dynamic initialize ip offset for cyan_skillfish Add ip offset definition for cyan_skillfish and initialize it. v2: squash in ip_offset updates (Alex) Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- .../drm/amd/amdgpu/cyan_skillfish_reg_init.c | 51 ++ drivers/gpu/drm/amd/amdgpu/nv.c | 3 + drivers/gpu/drm/amd/amdgpu/nv.h | 1 + .../amd/include/cyan_skillfish_ip_offset.h | 714 ++++++++++++++++++ 5 files changed, 770 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c create mode 100644 drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 7d292485ca7cf..aa5ca7e227e6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -76,7 +76,7 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o \ - beige_goby_reg_init.o yellow_carp_reg_init.o + beige_goby_reg_init.o yellow_carp_reg_init.o cyan_skillfish_reg_init.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c new file mode 100644 index 0000000000000..58808814d8fb0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -0,0 +1,51 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "cyan_skillfish_ip_offset.h" + +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 94d029dbf30da..bc89d00ff7bc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -666,6 +666,9 @@ static int nv_reg_base_init(struct amdgpu_device *adev) case CHIP_YELLOW_CARP: yellow_carp_reg_base_init(adev); break; + case CHIP_CYAN_SKILLFISH: + cyan_skillfish_reg_base_init(adev); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index eb9aff1e7253d..1f40ba3b04606 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -38,5 +38,6 @@ void vangogh_reg_base_init(struct amdgpu_device *adev); int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev); int beige_goby_reg_base_init(struct amdgpu_device *adev); int yellow_carp_reg_base_init(struct amdgpu_device *adev); +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h new file mode 100644 index 0000000000000..9cb5f3631c609 --- /dev/null +++ b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h @@ -0,0 +1,714 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _cyan_skillfish_ip_offset_HEADER +#define _cyan_skillfish_ip_offset_HEADER + +#define MAX_INSTANCE 6 +#define MAX_SEGMENT 5 + + +struct IP_BASE_INSTANCE +{ + unsigned int segment[MAX_SEGMENT]; +}; + +struct IP_BASE +{ + struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; +}; + + +static const struct IP_BASE ATHUB_BASE ={ { { { 0x00000C00, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE CLK_BASE ={ { { { 0x00016C00, 0, 0, 0, 0 } }, + { { 0x00016E00, 0, 0, 0, 0 } }, + { { 0x00017000, 0, 0, 0, 0 } }, + { { 0x00017200, 0, 0, 0, 0 } }, + { { 0x00017E00, 0, 0, 0, 0 } }, + { { 0x0001B000, 0, 0, 0, 0 } } } }; +static const struct IP_BASE DF_BASE ={ { { { 0x00007000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE DMU_BASE ={ { { { 0x00000012, 0x000000C0, 0x000034C0, 0x00009000, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE FUSE_BASE ={ { { { 0x00017400, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE GC_BASE ={ { { { 0x00001260, 0x0000A000, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE HDP_BASE ={ { { { 0x00000F20, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MMHUB_BASE ={ { { { 0x0001A000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE NBIO_BASE ={ { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE OSSSYS_BASE ={ { { { 0x000010A0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE SMUIO_BASE ={ { { { 0x00016800, 0x00016A00, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE THM_BASE ={ { { { 0x00016600, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE UMC0_BASE ={ { { { 0x00014000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE UVD0_BASE ={ { { { 0x00007800, 0x00007E00, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; + + +#define ATHUB_BASE__INST0_SEG0 0x00000C00 +#define ATHUB_BASE__INST0_SEG1 0 +#define ATHUB_BASE__INST0_SEG2 0 +#define ATHUB_BASE__INST0_SEG3 0 +#define ATHUB_BASE__INST0_SEG4 0 + +#define ATHUB_BASE__INST1_SEG0 0 +#define ATHUB_BASE__INST1_SEG1 0 +#define ATHUB_BASE__INST1_SEG2 0 +#define ATHUB_BASE__INST1_SEG3 0 +#define ATHUB_BASE__INST1_SEG4 0 + +#define ATHUB_BASE__INST2_SEG0 0 +#define ATHUB_BASE__INST2_SEG1 0 +#define ATHUB_BASE__INST2_SEG2 0 +#define ATHUB_BASE__INST2_SEG3 0 +#define ATHUB_BASE__INST2_SEG4 0 + +#define ATHUB_BASE__INST3_SEG0 0 +#define ATHUB_BASE__INST3_SEG1 0 +#define ATHUB_BASE__INST3_SEG2 0 +#define ATHUB_BASE__INST3_SEG3 0 +#define ATHUB_BASE__INST3_SEG4 0 + +#define ATHUB_BASE__INST4_SEG0 0 +#define ATHUB_BASE__INST4_SEG1 0 +#define ATHUB_BASE__INST4_SEG2 0 +#define ATHUB_BASE__INST4_SEG3 0 +#define ATHUB_BASE__INST4_SEG4 0 + +#define ATHUB_BASE__INST5_SEG0 0 +#define ATHUB_BASE__INST5_SEG1 0 +#define ATHUB_BASE__INST5_SEG2 0 +#define ATHUB_BASE__INST5_SEG3 0 +#define ATHUB_BASE__INST5_SEG4 0 + +#define CLK_BASE__INST0_SEG0 0x00016C00 +#define CLK_BASE__INST0_SEG1 0 +#define CLK_BASE__INST0_SEG2 0 +#define CLK_BASE__INST0_SEG3 0 +#define CLK_BASE__INST0_SEG4 0 + +#define CLK_BASE__INST1_SEG0 0x00016E00 +#define CLK_BASE__INST1_SEG1 0 +#define CLK_BASE__INST1_SEG2 0 +#define CLK_BASE__INST1_SEG3 0 +#define CLK_BASE__INST1_SEG4 0 + +#define CLK_BASE__INST2_SEG0 0x00017000 +#define CLK_BASE__INST2_SEG1 0 +#define CLK_BASE__INST2_SEG2 0 +#define CLK_BASE__INST2_SEG3 0 +#define CLK_BASE__INST2_SEG4 0 + +#define CLK_BASE__INST3_SEG0 0x00017200 +#define CLK_BASE__INST3_SEG1 0 +#define CLK_BASE__INST3_SEG2 0 +#define CLK_BASE__INST3_SEG3 0 +#define CLK_BASE__INST3_SEG4 0 + +#define CLK_BASE__INST4_SEG0 0x00017E00 +#define CLK_BASE__INST4_SEG1 0 +#define CLK_BASE__INST4_SEG2 0 +#define CLK_BASE__INST4_SEG3 0 +#define CLK_BASE__INST4_SEG4 0 + +#define CLK_BASE__INST5_SEG0 0x0001B000 +#define CLK_BASE__INST5_SEG1 0 +#define CLK_BASE__INST5_SEG2 0 +#define CLK_BASE__INST5_SEG3 0 +#define CLK_BASE__INST5_SEG4 0 + +#define DF_BASE__INST0_SEG0 0x00007000 +#define DF_BASE__INST0_SEG1 0 +#define DF_BASE__INST0_SEG2 0 +#define DF_BASE__INST0_SEG3 0 +#define DF_BASE__INST0_SEG4 0 + +#define DF_BASE__INST1_SEG0 0 +#define DF_BASE__INST1_SEG1 0 +#define DF_BASE__INST1_SEG2 0 +#define DF_BASE__INST1_SEG3 0 +#define DF_BASE__INST1_SEG4 0 + +#define DF_BASE__INST2_SEG0 0 +#define DF_BASE__INST2_SEG1 0 +#define DF_BASE__INST2_SEG2 0 +#define DF_BASE__INST2_SEG3 0 +#define DF_BASE__INST2_SEG4 0 + +#define DF_BASE__INST3_SEG0 0 +#define DF_BASE__INST3_SEG1 0 +#define DF_BASE__INST3_SEG2 0 +#define DF_BASE__INST3_SEG3 0 +#define DF_BASE__INST3_SEG4 0 + +#define DF_BASE__INST4_SEG0 0 +#define DF_BASE__INST4_SEG1 0 +#define DF_BASE__INST4_SEG2 0 +#define DF_BASE__INST4_SEG3 0 +#define DF_BASE__INST4_SEG4 0 + +#define DF_BASE__INST5_SEG0 0 +#define DF_BASE__INST5_SEG1 0 +#define DF_BASE__INST5_SEG2 0 +#define DF_BASE__INST5_SEG3 0 +#define DF_BASE__INST5_SEG4 0 + +#define DMU_BASE__INST0_SEG0 0x00000012 +#define DMU_BASE__INST0_SEG1 0x000000C0 +#define DMU_BASE__INST0_SEG2 0x000034C0 +#define DMU_BASE__INST0_SEG3 0x00009000 +#define DMU_BASE__INST0_SEG4 0 + +#define DMU_BASE__INST1_SEG0 0 +#define DMU_BASE__INST1_SEG1 0 +#define DMU_BASE__INST1_SEG2 0 +#define DMU_BASE__INST1_SEG3 0 +#define DMU_BASE__INST1_SEG4 0 + +#define DMU_BASE__INST2_SEG0 0 +#define DMU_BASE__INST2_SEG1 0 +#define DMU_BASE__INST2_SEG2 0 +#define DMU_BASE__INST2_SEG3 0 +#define DMU_BASE__INST2_SEG4 0 + +#define DMU_BASE__INST3_SEG0 0 +#define DMU_BASE__INST3_SEG1 0 +#define DMU_BASE__INST3_SEG2 0 +#define DMU_BASE__INST3_SEG3 0 +#define DMU_BASE__INST3_SEG4 0 + +#define DMU_BASE__INST4_SEG0 0 +#define DMU_BASE__INST4_SEG1 0 +#define DMU_BASE__INST4_SEG2 0 +#define DMU_BASE__INST4_SEG3 0 +#define DMU_BASE__INST4_SEG4 0 + +#define DMU_BASE__INST5_SEG0 0 +#define DMU_BASE__INST5_SEG1 0 +#define DMU_BASE__INST5_SEG2 0 +#define DMU_BASE__INST5_SEG3 0 +#define DMU_BASE__INST5_SEG4 0 + +#define FUSE_BASE__INST0_SEG0 0x00017400 +#define FUSE_BASE__INST0_SEG1 0 +#define FUSE_BASE__INST0_SEG2 0 +#define FUSE_BASE__INST0_SEG3 0 +#define FUSE_BASE__INST0_SEG4 0 + +#define FUSE_BASE__INST1_SEG0 0 +#define FUSE_BASE__INST1_SEG1 0 +#define FUSE_BASE__INST1_SEG2 0 +#define FUSE_BASE__INST1_SEG3 0 +#define FUSE_BASE__INST1_SEG4 0 + +#define FUSE_BASE__INST2_SEG0 0 +#define FUSE_BASE__INST2_SEG1 0 +#define FUSE_BASE__INST2_SEG2 0 +#define FUSE_BASE__INST2_SEG3 0 +#define FUSE_BASE__INST2_SEG4 0 + +#define FUSE_BASE__INST3_SEG0 0 +#define FUSE_BASE__INST3_SEG1 0 +#define FUSE_BASE__INST3_SEG2 0 +#define FUSE_BASE__INST3_SEG3 0 +#define FUSE_BASE__INST3_SEG4 0 + +#define FUSE_BASE__INST4_SEG0 0 +#define FUSE_BASE__INST4_SEG1 0 +#define FUSE_BASE__INST4_SEG2 0 +#define FUSE_BASE__INST4_SEG3 0 +#define FUSE_BASE__INST4_SEG4 0 + +#define FUSE_BASE__INST5_SEG0 0 +#define FUSE_BASE__INST5_SEG1 0 +#define FUSE_BASE__INST5_SEG2 0 +#define FUSE_BASE__INST5_SEG3 0 +#define FUSE_BASE__INST5_SEG4 0 + +#define GC_BASE__INST0_SEG0 0x00001260 +#define GC_BASE__INST0_SEG1 0x0000A000 +#define GC_BASE__INST0_SEG2 0 +#define GC_BASE__INST0_SEG3 0 +#define GC_BASE__INST0_SEG4 0 + +#define GC_BASE__INST1_SEG0 0 +#define GC_BASE__INST1_SEG1 0 +#define GC_BASE__INST1_SEG2 0 +#define GC_BASE__INST1_SEG3 0 +#define GC_BASE__INST1_SEG4 0 + +#define GC_BASE__INST2_SEG0 0 +#define GC_BASE__INST2_SEG1 0 +#define GC_BASE__INST2_SEG2 0 +#define GC_BASE__INST2_SEG3 0 +#define GC_BASE__INST2_SEG4 0 + +#define GC_BASE__INST3_SEG0 0 +#define GC_BASE__INST3_SEG1 0 +#define GC_BASE__INST3_SEG2 0 +#define GC_BASE__INST3_SEG3 0 +#define GC_BASE__INST3_SEG4 0 + +#define GC_BASE__INST4_SEG0 0 +#define GC_BASE__INST4_SEG1 0 +#define GC_BASE__INST4_SEG2 0 +#define GC_BASE__INST4_SEG3 0 +#define GC_BASE__INST4_SEG4 0 + +#define GC_BASE__INST5_SEG0 0 +#define GC_BASE__INST5_SEG1 0 +#define GC_BASE__INST5_SEG2 0 +#define GC_BASE__INST5_SEG3 0 +#define GC_BASE__INST5_SEG4 0 + +#define HDP_BASE__INST0_SEG0 0x00000F20 +#define HDP_BASE__INST0_SEG1 0 +#define HDP_BASE__INST0_SEG2 0 +#define HDP_BASE__INST0_SEG3 0 +#define HDP_BASE__INST0_SEG4 0 + +#define HDP_BASE__INST1_SEG0 0 +#define HDP_BASE__INST1_SEG1 0 +#define HDP_BASE__INST1_SEG2 0 +#define HDP_BASE__INST1_SEG3 0 +#define HDP_BASE__INST1_SEG4 0 + +#define HDP_BASE__INST2_SEG0 0 +#define HDP_BASE__INST2_SEG1 0 +#define HDP_BASE__INST2_SEG2 0 +#define HDP_BASE__INST2_SEG3 0 +#define HDP_BASE__INST2_SEG4 0 + +#define HDP_BASE__INST3_SEG0 0 +#define HDP_BASE__INST3_SEG1 0 +#define HDP_BASE__INST3_SEG2 0 +#define HDP_BASE__INST3_SEG3 0 +#define HDP_BASE__INST3_SEG4 0 + +#define HDP_BASE__INST4_SEG0 0 +#define HDP_BASE__INST4_SEG1 0 +#define HDP_BASE__INST4_SEG2 0 +#define HDP_BASE__INST4_SEG3 0 +#define HDP_BASE__INST4_SEG4 0 + +#define HDP_BASE__INST5_SEG0 0 +#define HDP_BASE__INST5_SEG1 0 +#define HDP_BASE__INST5_SEG2 0 +#define HDP_BASE__INST5_SEG3 0 +#define HDP_BASE__INST5_SEG4 0 + +#define MMHUB_BASE__INST0_SEG0 0x0001A000 +#define MMHUB_BASE__INST0_SEG1 0 +#define MMHUB_BASE__INST0_SEG2 0 +#define MMHUB_BASE__INST0_SEG3 0 +#define MMHUB_BASE__INST0_SEG4 0 + +#define MMHUB_BASE__INST1_SEG0 0 +#define MMHUB_BASE__INST1_SEG1 0 +#define MMHUB_BASE__INST1_SEG2 0 +#define MMHUB_BASE__INST1_SEG3 0 +#define MMHUB_BASE__INST1_SEG4 0 + +#define MMHUB_BASE__INST2_SEG0 0 +#define MMHUB_BASE__INST2_SEG1 0 +#define MMHUB_BASE__INST2_SEG2 0 +#define MMHUB_BASE__INST2_SEG3 0 +#define MMHUB_BASE__INST2_SEG4 0 + +#define MMHUB_BASE__INST3_SEG0 0 +#define MMHUB_BASE__INST3_SEG1 0 +#define MMHUB_BASE__INST3_SEG2 0 +#define MMHUB_BASE__INST3_SEG3 0 +#define MMHUB_BASE__INST3_SEG4 0 + +#define MMHUB_BASE__INST4_SEG0 0 +#define MMHUB_BASE__INST4_SEG1 0 +#define MMHUB_BASE__INST4_SEG2 0 +#define MMHUB_BASE__INST4_SEG3 0 +#define MMHUB_BASE__INST4_SEG4 0 + +#define MMHUB_BASE__INST5_SEG0 0 +#define MMHUB_BASE__INST5_SEG1 0 +#define MMHUB_BASE__INST5_SEG2 0 +#define MMHUB_BASE__INST5_SEG3 0 +#define MMHUB_BASE__INST5_SEG4 0 + +#define MP0_BASE__INST0_SEG0 0x00016000 +#define MP0_BASE__INST0_SEG1 0 +#define MP0_BASE__INST0_SEG2 0 +#define MP0_BASE__INST0_SEG3 0 +#define MP0_BASE__INST0_SEG4 0 + +#define MP0_BASE__INST1_SEG0 0 +#define MP0_BASE__INST1_SEG1 0 +#define MP0_BASE__INST1_SEG2 0 +#define MP0_BASE__INST1_SEG3 0 +#define MP0_BASE__INST1_SEG4 0 + +#define MP0_BASE__INST2_SEG0 0 +#define MP0_BASE__INST2_SEG1 0 +#define MP0_BASE__INST2_SEG2 0 +#define MP0_BASE__INST2_SEG3 0 +#define MP0_BASE__INST2_SEG4 0 + +#define MP0_BASE__INST3_SEG0 0 +#define MP0_BASE__INST3_SEG1 0 +#define MP0_BASE__INST3_SEG2 0 +#define MP0_BASE__INST3_SEG3 0 +#define MP0_BASE__INST3_SEG4 0 + +#define MP0_BASE__INST4_SEG0 0 +#define MP0_BASE__INST4_SEG1 0 +#define MP0_BASE__INST4_SEG2 0 +#define MP0_BASE__INST4_SEG3 0 +#define MP0_BASE__INST4_SEG4 0 + +#define MP0_BASE__INST5_SEG0 0 +#define MP0_BASE__INST5_SEG1 0 +#define MP0_BASE__INST5_SEG2 0 +#define MP0_BASE__INST5_SEG3 0 +#define MP0_BASE__INST5_SEG4 0 + +#define MP1_BASE__INST0_SEG0 0x00016000 +#define MP1_BASE__INST0_SEG1 0 +#define MP1_BASE__INST0_SEG2 0 +#define MP1_BASE__INST0_SEG3 0 +#define MP1_BASE__INST0_SEG4 0 + +#define MP1_BASE__INST1_SEG0 0 +#define MP1_BASE__INST1_SEG1 0 +#define MP1_BASE__INST1_SEG2 0 +#define MP1_BASE__INST1_SEG3 0 +#define MP1_BASE__INST1_SEG4 0 + +#define MP1_BASE__INST2_SEG0 0 +#define MP1_BASE__INST2_SEG1 0 +#define MP1_BASE__INST2_SEG2 0 +#define MP1_BASE__INST2_SEG3 0 +#define MP1_BASE__INST2_SEG4 0 + +#define MP1_BASE__INST3_SEG0 0 +#define MP1_BASE__INST3_SEG1 0 +#define MP1_BASE__INST3_SEG2 0 +#define MP1_BASE__INST3_SEG3 0 +#define MP1_BASE__INST3_SEG4 0 + +#define MP1_BASE__INST4_SEG0 0 +#define MP1_BASE__INST4_SEG1 0 +#define MP1_BASE__INST4_SEG2 0 +#define MP1_BASE__INST4_SEG3 0 +#define MP1_BASE__INST4_SEG4 0 + +#define MP1_BASE__INST5_SEG0 0 +#define MP1_BASE__INST5_SEG1 0 +#define MP1_BASE__INST5_SEG2 0 +#define MP1_BASE__INST5_SEG3 0 +#define MP1_BASE__INST5_SEG4 0 + +#define NBIO_BASE__INST0_SEG0 0x00000000 +#define NBIO_BASE__INST0_SEG1 0x00000014 +#define NBIO_BASE__INST0_SEG2 0x00000D20 +#define NBIO_BASE__INST0_SEG3 0x00010400 +#define NBIO_BASE__INST0_SEG4 0 + +#define NBIO_BASE__INST1_SEG0 0 +#define NBIO_BASE__INST1_SEG1 0 +#define NBIO_BASE__INST1_SEG2 0 +#define NBIO_BASE__INST1_SEG3 0 +#define NBIO_BASE__INST1_SEG4 0 + +#define NBIO_BASE__INST2_SEG0 0 +#define NBIO_BASE__INST2_SEG1 0 +#define NBIO_BASE__INST2_SEG2 0 +#define NBIO_BASE__INST2_SEG3 0 +#define NBIO_BASE__INST2_SEG4 0 + +#define NBIO_BASE__INST3_SEG0 0 +#define NBIO_BASE__INST3_SEG1 0 +#define NBIO_BASE__INST3_SEG2 0 +#define NBIO_BASE__INST3_SEG3 0 +#define NBIO_BASE__INST3_SEG4 0 + +#define NBIO_BASE__INST4_SEG0 0 +#define NBIO_BASE__INST4_SEG1 0 +#define NBIO_BASE__INST4_SEG2 0 +#define NBIO_BASE__INST4_SEG3 0 +#define NBIO_BASE__INST4_SEG4 0 + +#define NBIO_BASE__INST5_SEG0 0 +#define NBIO_BASE__INST5_SEG1 0 +#define NBIO_BASE__INST5_SEG2 0 +#define NBIO_BASE__INST5_SEG3 0 +#define NBIO_BASE__INST5_SEG4 0 + +#define OSSSYS_BASE__INST0_SEG0 0x000010A0 +#define OSSSYS_BASE__INST0_SEG1 0 +#define OSSSYS_BASE__INST0_SEG2 0 +#define OSSSYS_BASE__INST0_SEG3 0 +#define OSSSYS_BASE__INST0_SEG4 0 + +#define OSSSYS_BASE__INST1_SEG0 0 +#define OSSSYS_BASE__INST1_SEG1 0 +#define OSSSYS_BASE__INST1_SEG2 0 +#define OSSSYS_BASE__INST1_SEG3 0 +#define OSSSYS_BASE__INST1_SEG4 0 + +#define OSSSYS_BASE__INST2_SEG0 0 +#define OSSSYS_BASE__INST2_SEG1 0 +#define OSSSYS_BASE__INST2_SEG2 0 +#define OSSSYS_BASE__INST2_SEG3 0 +#define OSSSYS_BASE__INST2_SEG4 0 + +#define OSSSYS_BASE__INST3_SEG0 0 +#define OSSSYS_BASE__INST3_SEG1 0 +#define OSSSYS_BASE__INST3_SEG2 0 +#define OSSSYS_BASE__INST3_SEG3 0 +#define OSSSYS_BASE__INST3_SEG4 0 + +#define OSSSYS_BASE__INST4_SEG0 0 +#define OSSSYS_BASE__INST4_SEG1 0 +#define OSSSYS_BASE__INST4_SEG2 0 +#define OSSSYS_BASE__INST4_SEG3 0 +#define OSSSYS_BASE__INST4_SEG4 0 + +#define OSSSYS_BASE__INST5_SEG0 0 +#define OSSSYS_BASE__INST5_SEG1 0 +#define OSSSYS_BASE__INST5_SEG2 0 +#define OSSSYS_BASE__INST5_SEG3 0 +#define OSSSYS_BASE__INST5_SEG4 0 + +#define SMUIO_BASE__INST0_SEG0 0x00016800 +#define SMUIO_BASE__INST0_SEG1 0x00016A00 +#define SMUIO_BASE__INST0_SEG2 0 +#define SMUIO_BASE__INST0_SEG3 0 +#define SMUIO_BASE__INST0_SEG4 0 + +#define SMUIO_BASE__INST1_SEG0 0 +#define SMUIO_BASE__INST1_SEG1 0 +#define SMUIO_BASE__INST1_SEG2 0 +#define SMUIO_BASE__INST1_SEG3 0 +#define SMUIO_BASE__INST1_SEG4 0 + +#define SMUIO_BASE__INST2_SEG0 0 +#define SMUIO_BASE__INST2_SEG1 0 +#define SMUIO_BASE__INST2_SEG2 0 +#define SMUIO_BASE__INST2_SEG3 0 +#define SMUIO_BASE__INST2_SEG4 0 + +#define SMUIO_BASE__INST3_SEG0 0 +#define SMUIO_BASE__INST3_SEG1 0 +#define SMUIO_BASE__INST3_SEG2 0 +#define SMUIO_BASE__INST3_SEG3 0 +#define SMUIO_BASE__INST3_SEG4 0 + +#define SMUIO_BASE__INST4_SEG0 0 +#define SMUIO_BASE__INST4_SEG1 0 +#define SMUIO_BASE__INST4_SEG2 0 +#define SMUIO_BASE__INST4_SEG3 0 +#define SMUIO_BASE__INST4_SEG4 0 + +#define SMUIO_BASE__INST5_SEG0 0 +#define SMUIO_BASE__INST5_SEG1 0 +#define SMUIO_BASE__INST5_SEG2 0 +#define SMUIO_BASE__INST5_SEG3 0 +#define SMUIO_BASE__INST5_SEG4 0 + +#define THM_BASE__INST0_SEG0 0x00016600 +#define THM_BASE__INST0_SEG1 0 +#define THM_BASE__INST0_SEG2 0 +#define THM_BASE__INST0_SEG3 0 +#define THM_BASE__INST0_SEG4 0 + +#define THM_BASE__INST1_SEG0 0 +#define THM_BASE__INST1_SEG1 0 +#define THM_BASE__INST1_SEG2 0 +#define THM_BASE__INST1_SEG3 0 +#define THM_BASE__INST1_SEG4 0 + +#define THM_BASE__INST2_SEG0 0 +#define THM_BASE__INST2_SEG1 0 +#define THM_BASE__INST2_SEG2 0 +#define THM_BASE__INST2_SEG3 0 +#define THM_BASE__INST2_SEG4 0 + +#define THM_BASE__INST3_SEG0 0 +#define THM_BASE__INST3_SEG1 0 +#define THM_BASE__INST3_SEG2 0 +#define THM_BASE__INST3_SEG3 0 +#define THM_BASE__INST3_SEG4 0 + +#define THM_BASE__INST4_SEG0 0 +#define THM_BASE__INST4_SEG1 0 +#define THM_BASE__INST4_SEG2 0 +#define THM_BASE__INST4_SEG3 0 +#define THM_BASE__INST4_SEG4 0 + +#define THM_BASE__INST5_SEG0 0 +#define THM_BASE__INST5_SEG1 0 +#define THM_BASE__INST5_SEG2 0 +#define THM_BASE__INST5_SEG3 0 +#define THM_BASE__INST5_SEG4 0 + +#define UMC0_BASE__INST0_SEG0 0x00014000 +#define UMC0_BASE__INST0_SEG1 0 +#define UMC0_BASE__INST0_SEG2 0 +#define UMC0_BASE__INST0_SEG3 0 +#define UMC0_BASE__INST0_SEG4 0 + +#define UMC0_BASE__INST1_SEG0 0 +#define UMC0_BASE__INST1_SEG1 0 +#define UMC0_BASE__INST1_SEG2 0 +#define UMC0_BASE__INST1_SEG3 0 +#define UMC0_BASE__INST1_SEG4 0 + +#define UMC0_BASE__INST2_SEG0 0 +#define UMC0_BASE__INST2_SEG1 0 +#define UMC0_BASE__INST2_SEG2 0 +#define UMC0_BASE__INST2_SEG3 0 +#define UMC0_BASE__INST2_SEG4 0 + +#define UMC0_BASE__INST3_SEG0 0 +#define UMC0_BASE__INST3_SEG1 0 +#define UMC0_BASE__INST3_SEG2 0 +#define UMC0_BASE__INST3_SEG3 0 +#define UMC0_BASE__INST3_SEG4 0 + +#define UMC0_BASE__INST4_SEG0 0 +#define UMC0_BASE__INST4_SEG1 0 +#define UMC0_BASE__INST4_SEG2 0 +#define UMC0_BASE__INST4_SEG3 0 +#define UMC0_BASE__INST4_SEG4 0 + +#define UMC0_BASE__INST5_SEG0 0 +#define UMC0_BASE__INST5_SEG1 0 +#define UMC0_BASE__INST5_SEG2 0 +#define UMC0_BASE__INST5_SEG3 0 +#define UMC0_BASE__INST5_SEG4 0 + +#define UVD0_BASE__INST0_SEG0 0x00007800 +#define UVD0_BASE__INST0_SEG1 0x00007E00 +#define UVD0_BASE__INST0_SEG2 0 +#define UVD0_BASE__INST0_SEG3 0 +#define UVD0_BASE__INST0_SEG4 0 + +#define UVD0_BASE__INST1_SEG0 0 +#define UVD0_BASE__INST1_SEG1 0 +#define UVD0_BASE__INST1_SEG2 0 +#define UVD0_BASE__INST1_SEG3 0 +#define UVD0_BASE__INST1_SEG4 0 + +#define UVD0_BASE__INST2_SEG0 0 +#define UVD0_BASE__INST2_SEG1 0 +#define UVD0_BASE__INST2_SEG2 0 +#define UVD0_BASE__INST2_SEG3 0 +#define UVD0_BASE__INST2_SEG4 0 + +#define UVD0_BASE__INST3_SEG0 0 +#define UVD0_BASE__INST3_SEG1 0 +#define UVD0_BASE__INST3_SEG2 0 +#define UVD0_BASE__INST3_SEG3 0 +#define UVD0_BASE__INST3_SEG4 0 + +#define UVD0_BASE__INST4_SEG0 0 +#define UVD0_BASE__INST4_SEG1 0 +#define UVD0_BASE__INST4_SEG2 0 +#define UVD0_BASE__INST4_SEG3 0 +#define UVD0_BASE__INST4_SEG4 0 + +#define UVD0_BASE__INST5_SEG0 0 +#define UVD0_BASE__INST5_SEG1 0 +#define UVD0_BASE__INST5_SEG2 0 +#define UVD0_BASE__INST5_SEG3 0 +#define UVD0_BASE__INST5_SEG4 0 + +#endif + From 3de3364550bfc2f869690e53c0aab2140816f4be Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:18:03 -0400 Subject: [PATCH 565/851] drm/amdgpu: init family name for cyan_skillfish Use FAMILY_NV for cyan_skillfish. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ba7b193ef0e9c..909e52fb76cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2150,6 +2150,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_BEIGE_GOBY: case CHIP_VANGOGH: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: if (adev->asic_type == CHIP_VANGOGH) adev->family = AMDGPU_FAMILY_VGH; else if (adev->asic_type == CHIP_YELLOW_CARP) From 9d9ee0f4ead4b9ac2b2b67576f4ede0de2a9b87f Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:19:25 -0400 Subject: [PATCH 566/851] drm/amdgpu: set ip blocks for cyan_skillfish Add ip blocks for cyan_skillfish. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index bc89d00ff7bc4..de0c08ec8ed07 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -892,6 +892,15 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); break; + case CHIP_CYAN_SKILLFISH: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + break; default: return -EINVAL; } From 3e1cd6c37dcfe34d076f7fd49040a9fc17a39917 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:21:27 -0400 Subject: [PATCH 567/851] drm/amdgpu: add cp/rlc fw loading support for cyan_skillfish Add cp/rlc fw loading support and gfx golden setting. v2: squash in updates (Alex) Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 39 ++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 773368a59487e..c52c23b3a3c9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -256,6 +256,39 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); + +static const struct soc15_reg_golden golden_settings_gc_10_0[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), + /* TA_GRAD_ADJ_UCONFIG -> TA_GRAD_ADJ */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382), + /* VGT_TF_RING_SIZE_UMD -> VGT_TF_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2262c24e), + /* VGT_HS_OFFCHIP_PARAM_UMD -> VGT_HS_OFFCHIP_PARAM */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226cc24f), + /* VGT_TF_MEMORY_BASE_UMD -> VGT_TF_MEMORY_BASE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226ec250), + /* VGT_TF_MEMORY_BASE_HI_UMD -> VGT_TF_MEMORY_BASE_HI */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2278c261), + /* VGT_ESGS_RING_SIZE_UMD -> VGT_ESGS_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2232c240), + /* VGT_GSVS_RING_SIZE_UMD -> VGT_GSVS_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2233c241), +}; + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -4043,6 +4076,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_YELLOW_CARP: chip_name = "yellow_carp"; break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + chip_name = "cyan_skillfish2"; + else + chip_name = "cyan_skillfish"; + break; default: BUG(); } From 0f7f3ae5a5c9af5a1fa406b52196f5aacfa38e68 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 18 Dec 2018 19:08:16 +0800 Subject: [PATCH 568/851] drm/amdgpu: add sdma fw loading support for cyan_skillfish Same as Navi10. v2: squash in updates (Alex) Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 67f354475d6d4..4fe5e02a2a49d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -51,6 +51,12 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); + #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 #define SDMA0_HYP_DEC_REG_END 0x5893 @@ -221,6 +227,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + chip_name = "cyan_skillfish2"; + else + chip_name = "cyan_skillfish"; + break; default: BUG(); } From e85d01d5a9013a64bc184a5e260cad5386142b0a Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 18 Dec 2018 19:10:03 +0800 Subject: [PATCH 569/851] drm/amdgpu: load fw direclty for cyan_skillfish Use backdoor loading. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 8e9b1f9fa34f0..257f596fe9c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -409,6 +409,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_CYAN_SKILLFISH: + return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); } From 01e1175b959b020880a27cd15c051f792ab96ce2 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:25:33 -0400 Subject: [PATCH 570/851] drm/amdgpu: add cyan_skillfish support in gmc v10 Add gmc support for cyan_skillfish. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 4523df2785d63..24b781e90befa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -810,6 +810,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -879,6 +880,7 @@ static int gmc_v10_0_sw_init(void *handle) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -996,6 +998,7 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: break; default: break; From bac2693a8397e499cd0c6699dbe6166700fa367b Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:27:35 -0400 Subject: [PATCH 571/851] drm/amdgpu: add cyan_skillfish support in gfx v10 Add gfx support for cyan_skillfish. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c52c23b3a3c9e..2cd68c50f5496 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3545,6 +3545,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = { (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) +/* TODO: pending on golden setting value of gb address config */ +#define CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN 0x00100044 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); @@ -3942,6 +3944,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_NAVI14: + case CHIP_CYAN_SKILLFISH: if ((adev->gfx.me_fw_version >= 0x00000046) && (adev->gfx.me_feature_version >= 27) && (adev->gfx.pfp_fw_version >= 0x00000068) && @@ -4661,6 +4664,14 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config_fields.num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); break; + case CHIP_CYAN_SKILLFISH: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN; + break; default: BUG(); break; @@ -4765,6 +4776,7 @@ static int gfx_v10_0_sw_init(void *handle) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -7729,6 +7741,7 @@ static int gfx_v10_0_early_init(void *handle) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; break; case CHIP_SIENNA_CICHLID: @@ -9489,6 +9502,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: From 752efc6f29911cb36cc19a31cf166ec48502351c Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 19 Dec 2018 11:53:08 +0800 Subject: [PATCH 572/851] drm/amdgpu: add sdma v5 golden settings for cyan_skillfish Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 4fe5e02a2a49d..50bf3b71bc93c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -136,6 +136,37 @@ static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), }; +static const struct soc15_reg_golden golden_settings_sdma_cyan_skillfish[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x007fffff, 0x004c5c00), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x007fffff, 0x004c5c00) +}; + static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { u32 base; @@ -186,6 +217,11 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_nv12, (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); break; + case CHIP_CYAN_SKILLFISH: + soc15_program_register_sequence(adev, + golden_settings_sdma_cyan_skillfish, + (const u32)ARRAY_SIZE(golden_settings_sdma_cyan_skillfish)); + break; default: break; } From 4ed89b56cd5122b39c5702055570c51093ece460 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:29:36 -0400 Subject: [PATCH 573/851] drm/amdgpu: add gc v10 golden settings for cyan_skillfish v2: squash in updates from Ray Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 47 ++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2cd68c50f5496..26334d5147aae 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3539,6 +3539,45 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; +static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_PSP_DEBUG, 0x0000003f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_PSP_DEBUG, 0x0000003f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_FAST_CLKS, 0x3fffffff, 0x0000493e), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x3c000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0xa0000000, 0xa0000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x00008000, 0x003c8014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_DRAM_BURST_CTRL, 0x00000010, 0x00000017), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xd8d8d8d8), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000003), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00009d00, 0x00008500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xffffffff, 0x000fffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_DRAM_BURST_CTRL, 0x00000010, 0x00000017), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xfcfcfcfc, 0xd8d8d8d8), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77707770, 0x21302130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77707770, 0x21302130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xfc02002f, 0x9402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00002188, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x08000009, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xcc3fcc03, 0x842a4c02), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000000f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffff3109, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x00030008, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3771,6 +3810,14 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_5, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_5)); break; + case CHIP_CYAN_SKILLFISH: + soc15_program_register_sequence(adev, + golden_settings_gc_10_0, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_0_cyan_skillfish, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); + break; default: break; } From be3c5aa1f16e2806823ee35c49985d831bb4d874 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:34:53 -0400 Subject: [PATCH 574/851] drm/amdkfd: enable cyan_skillfish KFD Add KFD support for cyan_skillfish. v2: whitespace fixes (Alex) Signed-off-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 20 +++++++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 1 + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 + 6 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index eada22b9ea69b..cfedfb1e8596c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1404,6 +1404,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, break; case CHIP_NAVI10: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: pcache_info = navi10_cache_info; num_of_cache_types = ARRAY_SIZE(navi10_cache_info); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index fd1fd20cd70c1..b551dd6750855 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -84,6 +84,7 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { [CHIP_DIMGREY_CAVEFISH] = &gfx_v10_3_kfd2kgd, [CHIP_BEIGE_GOBY] = &gfx_v10_3_kfd2kgd, [CHIP_YELLOW_CARP] = &gfx_v10_3_kfd2kgd, + [CHIP_CYAN_SKILLFISH] = &gfx_v10_kfd2kgd, }; #ifdef KFD_SUPPORT_IOMMU_V2 @@ -596,6 +597,24 @@ static const struct kfd_device_info yellow_carp_device_info = { .num_sdma_queues_per_engine = 2, }; +static const struct kfd_device_info cyan_skillfish_device_info = { + .asic_family = CHIP_CYAN_SKILLFISH, + .asic_name = "cyan_skillfish", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + /* For each entry, [0] is regular and [1] is virtualisation device. */ static const struct kfd_device_info *kfd_supported_devices[][2] = { #ifdef KFD_SUPPORT_IOMMU_V2 @@ -625,6 +644,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = { [CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info}, [CHIP_BEIGE_GOBY] = {&beige_goby_device_info, &beige_goby_device_info}, [CHIP_YELLOW_CARP] = {&yellow_carp_device_info, NULL}, + [CHIP_CYAN_SKILLFISH] = {&cyan_skillfish_device_info, NULL}, }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6b89ca6ddc655..a972ef5eae688 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1947,6 +1947,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: device_queue_manager_init_v10_navi10(&dqm->asic_ops); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index a9b329f0f8620..2e86692def19a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -422,6 +422,7 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: kfd_init_apertures_v9(pdd, id); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index b33ebe81cd95a..e547f1f8c49f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -251,6 +251,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: pm->pmf = &kfd_v9_pm_funcs; break; case CHIP_ALDEBARAN: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 72e3cd647cf36..ef992dd2da3af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1424,6 +1424,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); From 08e1994232da84793e50312e49e1cd22296aaae7 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:45:40 -0400 Subject: [PATCH 575/851] drm/amdgpu: add chip early init for cyan_skillfish Set cg/pg flags and rev id for cyan_skillfish. Signed-off-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index de0c08ec8ed07..045a26b1594e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1253,6 +1253,11 @@ static int nv_common_early_init(void *handle) else adev->external_rev_id = adev->rev_id + 0x01; break; + case CHIP_CYAN_SKILLFISH: + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x82; + break; default: /* FIXME: not supported yet */ return -EINVAL; From 02387473ce26b9a2eac2a86fc71ded2098a01549 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 1 Jun 2021 17:06:44 +0800 Subject: [PATCH 576/851] drm/amdgpu: add nbio support for cyan_skillfish nbio version is 2.3. v2: Make it more explicit (Alex) Signed-off-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 045a26b1594e6..5e4f763263157 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -685,7 +685,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; - if (adev->flags & AMD_IS_APU) { + if (adev->asic_type == CHIP_CYAN_SKILLFISH) { + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + } else if (adev->flags & AMD_IS_APU) { adev->nbio.funcs = &nbio_v7_2_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; } else { From 873c5a91eee2b208acaf020facc7b659c449b993 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 17 Jun 2021 11:37:29 +0800 Subject: [PATCH 577/851] drm/amdgpu: add mp 11.0.8 header for cyan_skillfish The cyan_skillfish will use the mp 11.0.8. Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- .../include/asic_reg/mp/mp_11_0_8_offset.h | 352 ++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h new file mode 100644 index 0000000000000..19293ccaec23b --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h @@ -0,0 +1,352 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _mp_11_0_8_OFFSET_HEADER +#define _mp_11_0_8_OFFSET_HEADER + + + +// addressBlock: mp_SmuMp0_SmnDec +// base address: 0x0 +#define mmMP0_SMN_C2PMSG_32 0x0060 +#define mmMP0_SMN_C2PMSG_32_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_33 0x0061 +#define mmMP0_SMN_C2PMSG_33_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_34 0x0062 +#define mmMP0_SMN_C2PMSG_34_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_35 0x0063 +#define mmMP0_SMN_C2PMSG_35_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_36 0x0064 +#define mmMP0_SMN_C2PMSG_36_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_37 0x0065 +#define mmMP0_SMN_C2PMSG_37_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_38 0x0066 +#define mmMP0_SMN_C2PMSG_38_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_39 0x0067 +#define mmMP0_SMN_C2PMSG_39_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_40 0x0068 +#define mmMP0_SMN_C2PMSG_40_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_41 0x0069 +#define mmMP0_SMN_C2PMSG_41_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_42 0x006a +#define mmMP0_SMN_C2PMSG_42_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_43 0x006b +#define mmMP0_SMN_C2PMSG_43_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_44 0x006c +#define mmMP0_SMN_C2PMSG_44_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_45 0x006d +#define mmMP0_SMN_C2PMSG_45_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_46 0x006e +#define mmMP0_SMN_C2PMSG_46_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_47 0x006f +#define mmMP0_SMN_C2PMSG_47_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_48 0x0070 +#define mmMP0_SMN_C2PMSG_48_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_49 0x0071 +#define mmMP0_SMN_C2PMSG_49_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_50 0x0072 +#define mmMP0_SMN_C2PMSG_50_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_51 0x0073 +#define mmMP0_SMN_C2PMSG_51_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_52 0x0074 +#define mmMP0_SMN_C2PMSG_52_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_53 0x0075 +#define mmMP0_SMN_C2PMSG_53_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_54 0x0076 +#define mmMP0_SMN_C2PMSG_54_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_55 0x0077 +#define mmMP0_SMN_C2PMSG_55_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_56 0x0078 +#define mmMP0_SMN_C2PMSG_56_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_57 0x0079 +#define mmMP0_SMN_C2PMSG_57_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_58 0x007a +#define mmMP0_SMN_C2PMSG_58_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_59 0x007b +#define mmMP0_SMN_C2PMSG_59_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_60 0x007c +#define mmMP0_SMN_C2PMSG_60_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_61 0x007d +#define mmMP0_SMN_C2PMSG_61_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_62 0x007e +#define mmMP0_SMN_C2PMSG_62_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_63 0x007f +#define mmMP0_SMN_C2PMSG_63_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_64 0x0080 +#define mmMP0_SMN_C2PMSG_64_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_65 0x0081 +#define mmMP0_SMN_C2PMSG_65_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_66 0x0082 +#define mmMP0_SMN_C2PMSG_66_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_67 0x0083 +#define mmMP0_SMN_C2PMSG_67_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_68 0x0084 +#define mmMP0_SMN_C2PMSG_68_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_69 0x0085 +#define mmMP0_SMN_C2PMSG_69_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_70 0x0086 +#define mmMP0_SMN_C2PMSG_70_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_71 0x0087 +#define mmMP0_SMN_C2PMSG_71_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_72 0x0088 +#define mmMP0_SMN_C2PMSG_72_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_73 0x0089 +#define mmMP0_SMN_C2PMSG_73_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_74 0x008a +#define mmMP0_SMN_C2PMSG_74_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_75 0x008b +#define mmMP0_SMN_C2PMSG_75_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_76 0x008c +#define mmMP0_SMN_C2PMSG_76_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_77 0x008d +#define mmMP0_SMN_C2PMSG_77_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_78 0x008e +#define mmMP0_SMN_C2PMSG_78_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_79 0x008f +#define mmMP0_SMN_C2PMSG_79_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_80 0x0090 +#define mmMP0_SMN_C2PMSG_80_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_81 0x0091 +#define mmMP0_SMN_C2PMSG_81_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_82 0x0092 +#define mmMP0_SMN_C2PMSG_82_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_83 0x0093 +#define mmMP0_SMN_C2PMSG_83_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_84 0x0094 +#define mmMP0_SMN_C2PMSG_84_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_85 0x0095 +#define mmMP0_SMN_C2PMSG_85_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_86 0x0096 +#define mmMP0_SMN_C2PMSG_86_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_87 0x0097 +#define mmMP0_SMN_C2PMSG_87_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_88 0x0098 +#define mmMP0_SMN_C2PMSG_88_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_89 0x0099 +#define mmMP0_SMN_C2PMSG_89_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_90 0x009a +#define mmMP0_SMN_C2PMSG_90_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_91 0x009b +#define mmMP0_SMN_C2PMSG_91_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_92 0x009c +#define mmMP0_SMN_C2PMSG_92_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_93 0x009d +#define mmMP0_SMN_C2PMSG_93_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_94 0x009e +#define mmMP0_SMN_C2PMSG_94_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_95 0x009f +#define mmMP0_SMN_C2PMSG_95_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_96 0x00a0 +#define mmMP0_SMN_C2PMSG_96_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_97 0x00a1 +#define mmMP0_SMN_C2PMSG_97_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_98 0x00a2 +#define mmMP0_SMN_C2PMSG_98_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_99 0x00a3 +#define mmMP0_SMN_C2PMSG_99_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_100 0x00a4 +#define mmMP0_SMN_C2PMSG_100_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_101 0x00a5 +#define mmMP0_SMN_C2PMSG_101_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_102 0x00a6 +#define mmMP0_SMN_C2PMSG_102_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_103 0x00a7 +#define mmMP0_SMN_C2PMSG_103_BASE_IDX 0 +#define mmMP0_SMN_IH_CREDIT 0x00c1 +#define mmMP0_SMN_IH_CREDIT_BASE_IDX 0 +#define mmMP0_SMN_IH_SW_INT 0x00c2 +#define mmMP0_SMN_IH_SW_INT_BASE_IDX 0 +#define mmMP0_SMN_IH_SW_INT_CTRL 0x00c3 +#define mmMP0_SMN_IH_SW_INT_CTRL_BASE_IDX 0 + + +// addressBlock: mp_SmuMp1_SmnDec +// base address: 0x0 +#define mmMP1_SMN_C2PMSG_32 0x0260 +#define mmMP1_SMN_C2PMSG_32_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_33 0x0261 +#define mmMP1_SMN_C2PMSG_33_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_34 0x0262 +#define mmMP1_SMN_C2PMSG_34_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_35 0x0263 +#define mmMP1_SMN_C2PMSG_35_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_36 0x0264 +#define mmMP1_SMN_C2PMSG_36_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_37 0x0265 +#define mmMP1_SMN_C2PMSG_37_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_38 0x0266 +#define mmMP1_SMN_C2PMSG_38_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_39 0x0267 +#define mmMP1_SMN_C2PMSG_39_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_40 0x0268 +#define mmMP1_SMN_C2PMSG_40_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_41 0x0269 +#define mmMP1_SMN_C2PMSG_41_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_42 0x026a +#define mmMP1_SMN_C2PMSG_42_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_43 0x026b +#define mmMP1_SMN_C2PMSG_43_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_44 0x026c +#define mmMP1_SMN_C2PMSG_44_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_45 0x026d +#define mmMP1_SMN_C2PMSG_45_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_46 0x026e +#define mmMP1_SMN_C2PMSG_46_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_47 0x026f +#define mmMP1_SMN_C2PMSG_47_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_48 0x0270 +#define mmMP1_SMN_C2PMSG_48_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_49 0x0271 +#define mmMP1_SMN_C2PMSG_49_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_50 0x0272 +#define mmMP1_SMN_C2PMSG_50_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_51 0x0273 +#define mmMP1_SMN_C2PMSG_51_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_52 0x0274 +#define mmMP1_SMN_C2PMSG_52_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_53 0x0275 +#define mmMP1_SMN_C2PMSG_53_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_54 0x0276 +#define mmMP1_SMN_C2PMSG_54_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_55 0x0277 +#define mmMP1_SMN_C2PMSG_55_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_56 0x0278 +#define mmMP1_SMN_C2PMSG_56_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_57 0x0279 +#define mmMP1_SMN_C2PMSG_57_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_58 0x027a +#define mmMP1_SMN_C2PMSG_58_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_59 0x027b +#define mmMP1_SMN_C2PMSG_59_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_60 0x027c +#define mmMP1_SMN_C2PMSG_60_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_61 0x027d +#define mmMP1_SMN_C2PMSG_61_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_62 0x027e +#define mmMP1_SMN_C2PMSG_62_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_63 0x027f +#define mmMP1_SMN_C2PMSG_63_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_64 0x0280 +#define mmMP1_SMN_C2PMSG_64_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_65 0x0281 +#define mmMP1_SMN_C2PMSG_65_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_66 0x0282 +#define mmMP1_SMN_C2PMSG_66_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_67 0x0283 +#define mmMP1_SMN_C2PMSG_67_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_68 0x0284 +#define mmMP1_SMN_C2PMSG_68_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_69 0x0285 +#define mmMP1_SMN_C2PMSG_69_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_70 0x0286 +#define mmMP1_SMN_C2PMSG_70_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_71 0x0287 +#define mmMP1_SMN_C2PMSG_71_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_72 0x0288 +#define mmMP1_SMN_C2PMSG_72_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_73 0x0289 +#define mmMP1_SMN_C2PMSG_73_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_74 0x028a +#define mmMP1_SMN_C2PMSG_74_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_75 0x028b +#define mmMP1_SMN_C2PMSG_75_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_76 0x028c +#define mmMP1_SMN_C2PMSG_76_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_77 0x028d +#define mmMP1_SMN_C2PMSG_77_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_78 0x028e +#define mmMP1_SMN_C2PMSG_78_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_79 0x028f +#define mmMP1_SMN_C2PMSG_79_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_80 0x0290 +#define mmMP1_SMN_C2PMSG_80_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_81 0x0291 +#define mmMP1_SMN_C2PMSG_81_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_82 0x0292 +#define mmMP1_SMN_C2PMSG_82_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_83 0x0293 +#define mmMP1_SMN_C2PMSG_83_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_84 0x0294 +#define mmMP1_SMN_C2PMSG_84_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_85 0x0295 +#define mmMP1_SMN_C2PMSG_85_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_86 0x0296 +#define mmMP1_SMN_C2PMSG_86_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_87 0x0297 +#define mmMP1_SMN_C2PMSG_87_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_88 0x0298 +#define mmMP1_SMN_C2PMSG_88_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_89 0x0299 +#define mmMP1_SMN_C2PMSG_89_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_90 0x029a +#define mmMP1_SMN_C2PMSG_90_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_91 0x029b +#define mmMP1_SMN_C2PMSG_91_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_92 0x029c +#define mmMP1_SMN_C2PMSG_92_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_93 0x029d +#define mmMP1_SMN_C2PMSG_93_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_94 0x029e +#define mmMP1_SMN_C2PMSG_94_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_95 0x029f +#define mmMP1_SMN_C2PMSG_95_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_96 0x02a0 +#define mmMP1_SMN_C2PMSG_96_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_97 0x02a1 +#define mmMP1_SMN_C2PMSG_97_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_98 0x02a2 +#define mmMP1_SMN_C2PMSG_98_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_99 0x02a3 +#define mmMP1_SMN_C2PMSG_99_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_100 0x02a4 +#define mmMP1_SMN_C2PMSG_100_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_101 0x02a5 +#define mmMP1_SMN_C2PMSG_101_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_102 0x02a6 +#define mmMP1_SMN_C2PMSG_102_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_103 0x02a7 +#define mmMP1_SMN_C2PMSG_103_BASE_IDX 0 +#define mmMP1_SMN_IH_CREDIT 0x02c1 +#define mmMP1_SMN_IH_CREDIT_BASE_IDX 0 +#define mmMP1_SMN_IH_SW_INT 0x02c2 +#define mmMP1_SMN_IH_SW_INT_BASE_IDX 0 +#define mmMP1_SMN_IH_SW_INT_CTRL 0x02c3 +#define mmMP1_SMN_IH_SW_INT_CTRL_BASE_IDX 0 +#define mmMP1_SMN_FPS_CNT 0x02c4 +#define mmMP1_SMN_FPS_CNT_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH0 0x03c0 +#define mmMP1_SMN_EXT_SCRATCH0_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH1 0x03c1 +#define mmMP1_SMN_EXT_SCRATCH1_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH2 0x03c2 +#define mmMP1_SMN_EXT_SCRATCH2_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH3 0x03c3 +#define mmMP1_SMN_EXT_SCRATCH3_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH4 0x03c4 +#define mmMP1_SMN_EXT_SCRATCH4_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH5 0x03c5 +#define mmMP1_SMN_EXT_SCRATCH5_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH6 0x03c6 +#define mmMP1_SMN_EXT_SCRATCH6_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH7 0x03c7 +#define mmMP1_SMN_EXT_SCRATCH7_BASE_IDX 0 + + +#endif From fcb5b92801a32904d805fcd8a2f144d604f4fd65 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 17 Jun 2021 12:05:41 +0800 Subject: [PATCH 578/851] drm/amdgpu: add psp v11.0.8 driver for cyan_skillfish Introduce the psp v11.0.8 driver for cyan_skillfish. Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c | 208 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h | 30 ++++ 3 files changed, 239 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c create mode 100644 drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index aa5ca7e227e6e..f089794bbdd5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -112,6 +112,7 @@ amdgpu-y += \ psp_v3_1.o \ psp_v10_0.o \ psp_v11_0.o \ + psp_v11_0_8.o \ psp_v12_0.o \ psp_v13_0.o diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c new file mode 100644 index 0000000000000..ff13e1beb49b0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -0,0 +1,208 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v11_0_8.h" + +#include "mp/mp_11_0_8_offset.h" + +static int psp_v11_0_8_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static int psp_v11_0_8_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v11_0_8_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v11_0_8_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v11_0_8_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v11_0_8_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v11_0_8_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v11_0_8_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + +static const struct psp_funcs psp_v11_0_8_funcs = { + .ring_init = psp_v11_0_8_ring_init, + .ring_create = psp_v11_0_8_ring_create, + .ring_stop = psp_v11_0_8_ring_stop, + .ring_destroy = psp_v11_0_8_ring_destroy, + .ring_get_wptr = psp_v11_0_8_ring_get_wptr, + .ring_set_wptr = psp_v11_0_8_ring_set_wptr, +}; + +void psp_v11_0_8_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v11_0_8_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h new file mode 100644 index 0000000000000..890377a5afe09 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h @@ -0,0 +1,30 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V11_0_8_H__ +#define __PSP_V11_0_8_H__ + +#include "amdgpu_psp.h" + +void psp_v11_0_8_set_psp_funcs(struct psp_context *psp); + +#endif From eb3eab5a281e528e973b6e1accd8b22fd7655f80 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 13 Jul 2021 17:50:23 -0400 Subject: [PATCH 579/851] drm/amdgpu: init psp v11.0.8 function for cyan_skillfish Add psp v11.0.8 function into psp driver. Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index caae01690815e..d17ac0298b74a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -33,6 +33,7 @@ #include "psp_v3_1.h" #include "psp_v10_0.h" #include "psp_v11_0.h" +#include "psp_v11_0_8.h" #include "psp_v12_0.h" #include "psp_v13_0.h" @@ -121,6 +122,12 @@ static int psp_early_init(void *handle) psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + psp_v11_0_8_set_psp_funcs(psp); + psp->autoload_supported = false; + } + break; default: return -EINVAL; } From 1a60b174abb62f32dd49e17d6558d43289efc79a Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 17 Jun 2021 12:48:45 +0800 Subject: [PATCH 580/851] drm/amdgpu: enable psp v11.0.8 for cyan_skillfish Add psp v11.0.8 to ip block initialization. v2: use APU flags (Alex) Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index d17ac0298b74a..ed731144ca7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3440,6 +3440,14 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block = .funcs = &psp_ip_funcs, }; +const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 11, + .minor = 0, + .rev = 8, + .funcs = &psp_ip_funcs, +}; + const struct amdgpu_ip_block_version psp_v12_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 64afcd645ec4a..36467f1fdb5e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -426,6 +426,7 @@ extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block; extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 5e4f763263157..2b463c078aa8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -899,6 +899,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &nv_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block); + } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); From c510eedea3a6b73048b593d9ea45bffdb7a9325f Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 17 Jun 2021 12:55:10 +0800 Subject: [PATCH 581/851] drm/amdgpu: use direct loading by default for cyan_skillfish2 Will switch to front door loading by default after this function is stable. v2: use APU flags (Alex) Signed-off-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 257f596fe9c43..e001875ea1b75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -410,6 +410,9 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) else return AMDGPU_FW_LOAD_PSP; case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2 && + load_type > 1) + return AMDGPU_FW_LOAD_PSP; return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); From 1c681e20dac99524bce034e141b0c0f453a4735a Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 1 Jul 2021 15:18:34 +0800 Subject: [PATCH 582/851] drm/amdgpu: add smu_v11_8_pmfw header for cyan_skilfish Add smu_v11_8_pmfw.h for cyan_skilfish. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h | 152 ++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h new file mode 100644 index 0000000000000..bd4fcb6b9610e --- /dev/null +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h @@ -0,0 +1,152 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SMU_V11_8_0_PMFW_H__ +#define __SMU_V11_8_0_PMFW_H__ + +#pragma pack(push, 1) + +#define ENABLE_DEBUG_FEATURES + +// Feature Control Defines +#define FEATURE_CCLK_CONTROLLER_BIT 0 +#define FEATURE_GFXCLK_EFFT_FREQ_BIT 1 +#define FEATURE_DATA_CALCULATION_BIT 2 +#define FEATURE_THERMAL_BIT 3 +#define FEATURE_PLL_POWER_DOWN_BIT 4 +#define FEATURE_FCLK_DPM_BIT 5 +#define FEATURE_GFX_DPM_BIT 6 +#define FEATURE_DS_GFXCLK_BIT 7 +#define FEATURE_DS_SOCCLK_BIT 8 +#define FEATURE_DS_LCLK_BIT 9 +#define FEATURE_CORE_CSTATES_BIT 10 +#define FEATURE_G6_SSC_BIT 11 //G6 memory UCLK and UCLK_DIV SS +#define FEATURE_RM_BIT 12 +#define FEATURE_SOC_DPM_BIT 13 +#define FEATURE_DS_SMNCLK_BIT 14 +#define FEATURE_DS_MP1CLK_BIT 15 +#define FEATURE_DS_MP0CLK_BIT 16 +#define FEATURE_MGCG_BIT 17 +#define FEATURE_DS_FUSE_SRAM_BIT 18 +#define FEATURE_GFX_CKS_BIT 19 +#define FEATURE_FP_THROTTLING_BIT 20 +#define FEATURE_PROCHOT_BIT 21 +#define FEATURE_CPUOFF_BIT 22 +#define FEATURE_UMC_THROTTLE_BIT 23 +#define FEATURE_DF_THROTTLE_BIT 24 +#define FEATURE_DS_MP3CLK_BIT 25 +#define FEATURE_DS_SHUBCLK_BIT 26 +#define FEATURE_TDC_BIT 27 //Legacy APM_BIT +#define FEATURE_UMC_CAL_SHARING_BIT 28 +#define FEATURE_DFLL_BTC_CALIBRATION_BIT 29 +#define FEATURE_EDC_BIT 30 +#define FEATURE_DLDO_BIT 31 +#define FEATURE_MEAS_DRAM_BLACKOUT_BIT 32 +#define FEATURE_CC1_BIT 33 +#define FEATURE_PPT_BIT 34 +#define FEATURE_STAPM_BIT 35 +#define FEATURE_CSTATE_BOOST_BIT 36 +#define FEATURE_SPARE_37_BIT 37 +#define FEATURE_SPARE_38_BIT 38 +#define FEATURE_SPARE_39_BIT 39 +#define FEATURE_SPARE_40_BIT 40 +#define FEATURE_SPARE_41_BIT 41 +#define FEATURE_SPARE_42_BIT 42 +#define FEATURE_SPARE_43_BIT 43 +#define FEATURE_SPARE_44_BIT 44 +#define FEATURE_SPARE_45_BIT 45 +#define FEATURE_SPARE_46_BIT 46 +#define FEATURE_SPARE_47_BIT 47 +#define FEATURE_SPARE_48_BIT 48 +#define FEATURE_SPARE_49_BIT 49 +#define FEATURE_SPARE_50_BIT 50 +#define FEATURE_SPARE_51_BIT 51 +#define FEATURE_SPARE_52_BIT 52 +#define FEATURE_SPARE_53_BIT 53 +#define FEATURE_SPARE_54_BIT 54 +#define FEATURE_SPARE_55_BIT 55 +#define FEATURE_SPARE_56_BIT 56 +#define FEATURE_SPARE_57_BIT 57 +#define FEATURE_SPARE_58_BIT 58 +#define FEATURE_SPARE_59_BIT 59 +#define FEATURE_SPARE_60_BIT 60 +#define FEATURE_SPARE_61_BIT 61 +#define FEATURE_SPARE_62_BIT 62 +#define FEATURE_SPARE_63_BIT 63 + +#define NUM_FEATURES 64 + +#define FEATURE_CCLK_CONTROLLER_MASK (1 << FEATURE_CCLK_CONTROLLER_BIT) +#define FEATURE_DATA_CALCULATION_MASK (1 << FEATURE_DATA_CALCULATION_BIT) +#define FEATURE_THERMAL_MASK (1 << FEATURE_THERMAL_BIT) +#define FEATURE_PLL_POWER_DOWN_MASK (1 << FEATURE_PLL_POWER_DOWN_BIT) +#define FEATURE_FCLK_DPM_MASK (1 << FEATURE_FCLK_DPM_BIT) +#define FEATURE_GFX_DPM_MASK (1 << FEATURE_GFX_DPM_BIT) +#define FEATURE_DS_GFXCLK_MASK (1 << FEATURE_DS_GFXCLK_BIT) +#define FEATURE_DS_SOCCLK_MASK (1 << FEATURE_DS_SOCCLK_BIT) +#define FEATURE_DS_LCLK_MASK (1 << FEATURE_DS_LCLK_BIT) +#define FEATURE_RM_MASK (1 << FEATURE_RM_BIT) +#define FEATURE_DS_SMNCLK_MASK (1 << FEATURE_DS_SMNCLK_BIT) +#define FEATURE_DS_MP1CLK_MASK (1 << FEATURE_DS_MP1CLK_BIT) +#define FEATURE_DS_MP0CLK_MASK (1 << FEATURE_DS_MP0CLK_BIT) +#define FEATURE_MGCG_MASK (1 << FEATURE_MGCG_BIT) +#define FEATURE_DS_FUSE_SRAM_MASK (1 << FEATURE_DS_FUSE_SRAM_BIT) +#define FEATURE_PROCHOT_MASK (1 << FEATURE_PROCHOT_BIT) +#define FEATURE_CPUOFF_MASK (1 << FEATURE_CPUOFF_BIT) +#define FEATURE_GFX_CKS_MASK (1 << FEATURE_GFX_CKS_BIT) +#define FEATURE_UMC_THROTTLE_MASK (1 << FEATURE_UMC_THROTTLE_BIT) +#define FEATURE_DF_THROTTLE_MASK (1 << FEATURE_DF_THROTTLE_BIT) +#define FEATURE_SOC_DPM_MASK (1 << FEATURE_SOC_DPM_BIT) + +typedef struct { + // MP1_EXT_SCRATCH0 + uint32_t SPARE1 : 4; + uint32_t SPARE2 : 4; + uint32_t SPARE3 : 4; + uint32_t CurrLevel_LCLK : 4; + uint32_t CurrLevel_MP0CLK : 4; + uint32_t CurrLevel_FCLK : 4; + uint32_t CurrLevel_SOCCLK : 4; + uint32_t CurrLevel_DCEFCLK : 4; + // MP1_EXT_SCRATCH1 + uint32_t SPARE4 : 4; + uint32_t SPARE5 : 4; + uint32_t SPARE6 : 4; + uint32_t TargLevel_LCLK : 4; + uint32_t TargLevel_MP0CLK : 4; + uint32_t TargLevel_FCLK : 4; + uint32_t TargLevel_SOCCLK : 4; + uint32_t TargLevel_DCEFCLK : 4; + // MP1_EXT_SCRATCH2 + uint32_t CurrLevel_SHUBCLK : 4; + uint32_t TargLevel_SHUBCLK : 4; + uint32_t Reserved : 24; + // MP1_EXT_SCRATCH3-4 + uint32_t Reserved2[2]; + // MP1_EXT_SCRATCH5 + uint32_t FeatureStatus[NUM_FEATURES / 32]; +} FwStatus_t; + +#pragma pack(pop) + +#endif From 975c8efc31b54507866be96ec21cbc1f04c7f591 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 1 Jul 2021 15:20:19 +0800 Subject: [PATCH 583/851] drm/amdgpu: add smu_v11_8_ppsmc header for cyan_skilfish Add smu_v11_8_ppsmc.h for cyan_skilfish. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h | 70 ++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h new file mode 100644 index 0000000000000..6e6088760b180 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h @@ -0,0 +1,70 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU_11_8_0_PPSMC_H +#define SMU_11_8_0_PPSMC_H + +// SMU Response Codes: +#define PPSMC_Result_OK 0x1 +#define PPSMC_Result_Failed 0xFF +#define PPSMC_Result_UnknownCmd 0xFE +#define PPSMC_Result_CmdRejectedPrereq 0xFD +#define PPSMC_Result_CmdRejectedBusy 0xFC + +// Message Definitions: +#define PPSMC_MSG_TestMessage 0x1 +#define PPSMC_MSG_GetSmuVersion 0x2 +#define PPSMC_MSG_GetDriverIfVersion 0x3 +#define PPSMC_MSG_SetDriverTableDramAddrHigh 0x4 +#define PPSMC_MSG_SetDriverTableDramAddrLow 0x5 +#define PPSMC_MSG_TransferTableSmu2Dram 0x6 +#define PPSMC_MSG_TransferTableDram2Smu 0x7 +#define PPSMC_MSG_Rsvd1 0xA +#define PPSMC_MSG_RequestCorePstate 0xB +#define PPSMC_MSG_QueryCorePstate 0xC +#define PPSMC_MSG_Rsvd2 0xD +#define PPSMC_MSG_RequestGfxclk 0xE +#define PPSMC_MSG_QueryGfxclk 0xF +#define PPSMC_MSG_QueryVddcrSocClock 0x11 +#define PPSMC_MSG_QueryDfPstate 0x13 +#define PPSMC_MSG_Rsvd3 0x14 +#define PPSMC_MSG_ConfigureS3PwrOffRegisterAddressHigh 0x16 +#define PPSMC_MSG_ConfigureS3PwrOffRegisterAddressLow 0x17 +#define PPSMC_MSG_RequestActiveWgp 0x18 +#define PPSMC_MSG_SetMinDeepSleepGfxclkFreq 0x19 +#define PPSMC_MSG_SetMaxDeepSleepDfllGfxDiv 0x1A +#define PPSMC_MSG_StartTelemetryReporting 0x1B +#define PPSMC_MSG_StopTelemetryReporting 0x1C +#define PPSMC_MSG_ClearTelemetryMax 0x1D +#define PPSMC_MSG_QueryActiveWgp 0x1E +#define PPSMC_MSG_SetCoreEnableMask 0x2C +#define PPSMC_MSG_InitiateGcRsmuSoftReset 0x2E +#define PPSMC_MSG_GfxCacWeightOperation 0x2F +#define PPSMC_MSG_L3CacWeightOperation 0x30 +#define PPSMC_MSG_PackCoreCacWeight 0x31 +#define PPSMC_MSG_SetDriverTableVMID 0x34 +#define PPSMC_MSG_SetSoftMinCclk 0x35 +#define PPSMC_MSG_SetSoftMaxCclk 0x36 +#define PPSMC_Message_Count 0x37 + +#endif From ca23c498429694b0812f9a6cc825752d6cea5686 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 1 Jul 2021 15:21:59 +0800 Subject: [PATCH 584/851] drm/amdgpu: add smu interface header for cyan_skilfish Add smu11_driver_if_cyan_skillfish.h for cyan_skilfish. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- .../pm/inc/smu11_driver_if_cyan_skillfish.h | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h new file mode 100644 index 0000000000000..8a08ecc34c699 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h @@ -0,0 +1,95 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMU11_DRIVER_IF_CYAN_SKILLFISH_H__ +#define __SMU11_DRIVER_IF_CYAN_SKILLFISH_H__ + +// *** IMPORTANT *** +// Always increment the interface version if +// any structure is changed in this file +#define MP1_DRIVER_IF_VERSION 0x8 + +#define TABLE_BIOS_IF 0 // Called by BIOS +#define TABLE_WATERMARKS 1 // Called by Driver; defined here, but not used, for backward compatible +#define TABLE_PMSTATUSLOG 3 // Called by Tools for Agm logging +#define TABLE_DPMCLOCKS 4 // Called by Driver; defined here, but not used, for backward compatible +#define TABLE_MOMENTARY_PM 5 // Called by Tools; defined here, but not used, for backward compatible +#define TABLE_COUNT 6 + +#define NUM_DSPCLK_LEVELS 8 +#define NUM_SOCCLK_DPM_LEVELS 8 +#define NUM_DCEFCLK_DPM_LEVELS 4 +#define NUM_FCLK_DPM_LEVELS 4 +#define NUM_MEMCLK_DPM_LEVELS 4 + +#define NUMBER_OF_PSTATES 8 +#define NUMBER_OF_CORES 8 + +typedef enum { + S3_TYPE_ENTRY, + S5_TYPE_ENTRY, +} Sleep_Type_e; + +typedef enum { + GFX_OFF = 0, + GFX_ON = 1, +} GFX_Mode_e; + +typedef enum { + CPU_P0 = 0, + CPU_P1, + CPU_P2, + CPU_P3, + CPU_P4, + CPU_P5, + CPU_P6, + CPU_P7 +} CPU_PState_e; + +typedef enum { + CPU_CORE0 = 0, + CPU_CORE1, + CPU_CORE2, + CPU_CORE3, + CPU_CORE4, + CPU_CORE5, + CPU_CORE6, + CPU_CORE7 +} CORE_ID_e; + +typedef enum { + DF_DPM0 = 0, + DF_DPM1, + DF_DPM2, + DF_DPM3, + DF_PState_Count +} DF_PState_e; + +typedef enum { + GFX_DPM0 = 0, + GFX_DPM1, + GFX_DPM2, + GFX_DPM3, + GFX_PState_Count +} GFX_PState_e; + +#endif From 587b05b2aa0f29b0de650ab842b6f5215e41fa3c Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 1 Jul 2021 15:39:55 +0800 Subject: [PATCH 585/851] drm/amdgpu: add basic ppt functions for cyan_skilfish Add basic ppt funcs support or cyan_skilfish. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile | 1 + .../amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 75 +++++++++++++++++++ .../amd/pm/swsmu/smu11/cyan_skillfish_ppt.h | 29 +++++++ 3 files changed, 105 insertions(+) create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile b/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile index 0138c982dfd31..f9b2e16f64311 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile @@ -27,6 +27,7 @@ SMU11_MGR = arcturus_ppt.o \ navi10_ppt.o \ sienna_cichlid_ppt.o \ vangogh_ppt.o \ + cyan_skillfish_ppt.o \ smu_v11_0.o AMD_SWSMU_SMU11MGR = $(addprefix $(AMD_SWSMU_PATH)/smu11/,$(SMU11_MGR)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c new file mode 100644 index 0000000000000..4fbffc14021c2 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -0,0 +1,75 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#define SWSMU_CODE_LAYER_L2 + +#include "amdgpu.h" +#include "amdgpu_smu.h" +#include "smu_v11_0.h" +#include "smu11_driver_if_cyan_skillfish.h" +#include "cyan_skillfish_ppt.h" +#include "smu_v11_8_ppsmc.h" +#include "smu_v11_8_pmfw.h" +#include "smu_cmn.h" +#include "soc15_common.h" + +/* + * DO NOT use these for err/warn/info/debug messages. + * Use dev_err, dev_warn, dev_info and dev_dbg instead. + * They are more MGPU friendly. + */ + +#undef pr_err +#undef pr_warn +#undef pr_info +#undef pr_debug + +static struct cmn2asic_msg_mapping cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] = { + MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), + MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 0), + MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 0), + MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverTableDramAddrHigh, 0), + MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverTableDramAddrLow, 0), + MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 0), + MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 0), +}; + +static const struct pptable_funcs cyan_skillfish_ppt_funcs = { + + .check_fw_status = smu_v11_0_check_fw_status, + .init_power = smu_v11_0_init_power, + .fini_power = smu_v11_0_fini_power, + .register_irq_handler = smu_v11_0_register_irq_handler, + .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, + .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, + .send_smc_msg = smu_cmn_send_smc_msg, + .set_driver_table_location = smu_v11_0_set_driver_table_location, + .interrupt_work = smu_v11_0_interrupt_work, +}; + +void cyan_skillfish_set_ppt_funcs(struct smu_context *smu) +{ + smu->ppt_funcs = &cyan_skillfish_ppt_funcs; + smu->message_map = cyan_skillfish_message_map; + smu->is_apu = true; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h new file mode 100644 index 0000000000000..76cd7229e3834 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h @@ -0,0 +1,29 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __CYAN_SKILLFISH_PPT_H__ +#define __CYAN_SKILLFISH_PPT_H__ + +extern void cyan_skillfish_set_ppt_funcs(struct smu_context *smu); + +#endif From 10a41510db9f9bebc0a6e3f06132f91072c929b7 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 21 Jul 2021 11:05:07 -0400 Subject: [PATCH 586/851] drm/amdgpu: add check_fw_version support for cyan_skillfish Add check_fw_version function support for cyan_skillfish. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 1 + drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 385b2ea5379ca..d8048c3e3ed8f 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -35,6 +35,7 @@ #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF #define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD +#define SMU11_DRIVER_IF_VERSION_Cyan_Skillfish 0x8 /* MP Apertures */ #define MP0_Public 0x03800000 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c index 4fbffc14021c2..b05f9541accc3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -57,6 +57,7 @@ static struct cmn2asic_msg_mapping cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] static const struct pptable_funcs cyan_skillfish_ppt_funcs = { .check_fw_status = smu_v11_0_check_fw_status, + .check_fw_version = smu_v11_0_check_fw_version, .init_power = smu_v11_0_init_power, .fini_power = smu_v11_0_fini_power, .register_irq_handler = smu_v11_0_register_irq_handler, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 0a5d46ac9ccd8..1ba57e5afefb9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -266,6 +266,9 @@ int smu_v11_0_check_fw_version(struct smu_context *smu) case CHIP_BEIGE_GOBY: smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_Beige_Goby; break; + case CHIP_CYAN_SKILLFISH: + smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_Cyan_Skillfish; + break; default: dev_err(smu->adev->dev, "smu unsupported asic type:%d.\n", smu->adev->asic_type); smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_INV; From c7ef60ebefd176ab56f8cdbd1e3b9dae693dd8cd Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 13 Jul 2021 17:55:52 -0400 Subject: [PATCH 587/851] drm/amdgpu: enable SMU for cyan_skilfish Enable SMU support for cyan_skilfish. v2: Squash in fix (Alex) Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 17 +++++++++-------- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 ++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 2b463c078aa8b..436fb13e32f0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -897,16 +897,17 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) break; case CHIP_CYAN_SKILLFISH: amdgpu_device_ip_block_add(adev, &nv_common_ip_block); - amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); } - if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index ebe6721428085..d68c78be9e98d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -36,6 +36,7 @@ #include "vangogh_ppt.h" #include "aldebaran_ppt.h" #include "yellow_carp_ppt.h" +#include "cyan_skillfish_ppt.h" #include "amd_pcie.h" /* @@ -588,6 +589,9 @@ static int smu_set_funcs(struct amdgpu_device *adev) case CHIP_YELLOW_CARP: yellow_carp_set_ppt_funcs(smu); break; + case CHIP_CYAN_SKILLFISH: + cyan_skillfish_set_ppt_funcs(smu); + break; default: return -EINVAL; } From 50376802db0621a9d77b6bbb48a9cc328f7fb955 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Mon, 12 Jul 2021 10:51:15 +0800 Subject: [PATCH 588/851] drm/amdgpu: add autoload_supported check for RLC autoload Asic cyan_skilfish2 won't support RLC autoload when using front door loading. We just use PSP to load firmware like gfx9 here. So add autoload_supported flag check instead of just checking firmware load type for RLC autoload. Signed-off-by: Lang Yu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 26334d5147aae..9f3d82dfb79c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5435,7 +5435,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) { int r; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + adev->psp.autoload_supported) { r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) From eb6ffb59959c1538cfb6c9e31718e71dd15ff30f Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 13 Jul 2021 17:57:06 -0400 Subject: [PATCH 589/851] drm/amdgpu: add pci device id for cyan_skillfish Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index fe29acff69aef..c2a8fef8ddb04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1212,6 +1212,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + /* CYAN_SKILLFISH */ + {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0, 0, 0} }; From 7c57130c789734aee081a2a29abf2e07e6e50814 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 16 Jul 2021 22:46:21 -0400 Subject: [PATCH 590/851] drm/amdkfd: Allow querying SVM attributes that are clear Currently the SVM get_attr call allows querying, which flags are set in the entire address range. Add the opposite query, which flags are clear in the entire address range. Both queries can be combined in a single get_attr call, which allows answering questions such as, "is this address range coherent, non-coherent, or a mix of both"? Signed-off-by: Felix Kuehling Reviewed-by: Philip Yand Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 19 +++++++++++++------ include/uapi/linux/kfd_ioctl.h | 16 +++++++++------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c7b364e4a287f..7df69b77bc5cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3019,7 +3019,8 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, struct svm_range *prange; uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; - uint32_t flags = 0xffffffff; + uint32_t flags_and = 0xffffffff; + uint32_t flags_or = 0; int gpuidx; uint32_t i; @@ -3046,12 +3047,12 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, get_accessible = true; break; case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: get_flags = true; break; case KFD_IOCTL_SVM_ATTR_GRANULARITY: get_granularity = true; break; - case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: case KFD_IOCTL_SVM_ATTR_NO_ACCESS: fallthrough; @@ -3069,7 +3070,8 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, if (!node) { pr_debug("range attrs not found return default values\n"); svm_range_set_default_attributes(&location, &prefetch_loc, - &granularity, &flags); + &granularity, &flags_and); + flags_or = flags_and; if (p->xnack_enabled) bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); @@ -3115,8 +3117,10 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, bitmap_and(bitmap_aip, bitmap_aip, prange->bitmap_aip, MAX_GPU_INSTANCE); } - if (get_flags) - flags &= prange->flags; + if (get_flags) { + flags_and &= prange->flags; + flags_or |= prange->flags; + } if (get_granularity && prange->granularity < granularity) granularity = prange->granularity; @@ -3150,7 +3154,10 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS; break; case KFD_IOCTL_SVM_ATTR_SET_FLAGS: - attrs[i].value = flags; + attrs[i].value = flags_and; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + attrs[i].value = ~flags_or; break; case KFD_IOCTL_SVM_ATTR_GRANULARITY: attrs[i].value = (uint32_t)granularity; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 3cb5b5dd9f77e..af96af174dc47 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -31,9 +31,10 @@ * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API + * - 1.6 - Query clear flags in SVM get_attr API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 5 +#define KFD_IOCTL_MINOR_VERSION 6 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -575,18 +576,19 @@ struct kfd_ioctl_svm_attribute { * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be - * aggregated by bitwise AND. The minimum migration granularity - * throughout the range will be returned for - * @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * aggregated by bitwise AND. That means, a flag will be set in the + * output, if that flag is set for all pages in the range. For + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS, flags of all pages will be + * aggregated by bitwise NOR. That means, a flag will be set in the + * output, if that flag is clear for all pages in the range. + * The minimum migration granularity throughout the range will be + * returned for @KFD_IOCTL_SVM_ATTR_GRANULARITY. * * Querying of accessibility attributes works by initializing the * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the * GPUID being queried. Multiple attributes can be given to allow * querying multiple GPUIDs. The ioctl function overwrites the * attribute type to indicate the access for the specified GPU. - * - * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for - * @KFD_IOCTL_SVM_OP_GET_ATTR. */ struct kfd_ioctl_svm_args { __u64 start_addr; From 701b5ff31d78cdc56f8cb06aa4d8a4b262245b3f Mon Sep 17 00:00:00 2001 From: "Liang Liang (Leo)" Date: Sat, 20 Feb 2021 08:58:08 +0800 Subject: [PATCH 591/851] usb/host: enable auto power control for xhci-pci [why] USB control should enter D3 state with no device connected [How] Enable runtime power management for XHCI-PCI USB host controller Jira-ID: MR-1739/AER-588 Signed-off-by: Liang Liang (Leo) Signed-off-by: Ziyang Wu Signed-off-by: Alex Deucher --- drivers/usb/host/xhci-pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 18c2bbddf080b..3aba60c4e10f1 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -164,6 +164,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; + if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x163b) + xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; + if (pdev->vendor == PCI_VENDOR_ID_AMD && (pdev->device == 0x15e0 || pdev->device == 0x15e1)) xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND; From a47a21624b60056859def3cdebaf6f18f25b799c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 May 2020 13:58:26 -0400 Subject: [PATCH 592/851] drm/amdgpu: add another raven1 gfxoff quirk HP Elite Disk 705 G4 Micro seems to have issues with gfxoff. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=207899 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 03acc777adf7e..d5899d1a3936f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1267,6 +1267,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=207899 */ + { 0x1002, 0x15dd, 0x103c, 0x83e9, 0xd6 }, { 0, 0, 0, 0, 0 }, }; From 06bd6b581a8c75dc8d21db30235fae658a6f7cb3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Mar 2021 10:40:53 -0500 Subject: [PATCH 593/851] drm/amdgpu: only check for _PR3 on dGPUs We don't support runtime pm on APUs. They support more dynamic power savings using clock and powergating. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 20b049ad61c10..8568ef20a80cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -164,8 +164,10 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) flags |= AMD_IS_PX; - parent = pci_upstream_bridge(adev->pdev); - adev->has_pr3 = parent ? pci_pr3_present(parent) : false; + if (!(flags & AMD_IS_APU)) { + parent = pci_upstream_bridge(adev->pdev); + adev->has_pr3 = parent ? pci_pr3_present(parent) : false; + } /* amdgpu_device_init should report only fatal error * like memory allocation failure or iomapping failure, From 091f05c13c91c5dd282d9b46137dcbbbad90aa87 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Mar 2021 22:05:09 -0500 Subject: [PATCH 594/851] drm/amdgpu/swsmu/vgh: rename MSG_RlcPowerNotify To match the new definition: MSG_RlcPowerDownNotify. We only need to tell the SMU when we turn off the RLC; when it powers up it already handshakes with SMU. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_types.h | 2 +- drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 1d3765b873df4..c04c6c17a9ea1 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -183,7 +183,7 @@ __SMU_DUMMY_MAP(SET_DRIVER_DUMMY_TABLE_DRAM_ADDR_LOW), \ __SMU_DUMMY_MAP(GET_UMC_FW_WA), \ __SMU_DUMMY_MAP(Mode1Reset), \ - __SMU_DUMMY_MAP(RlcPowerNotify), \ + __SMU_DUMMY_MAP(RlcPowerDownNotify), \ __SMU_DUMMY_MAP(SetHardMinIspiclkByFreq), \ __SMU_DUMMY_MAP(SetHardMinIspxclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMinSocclkByFreq), \ diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h index fe130a497d6c3..27a8fd5bc14b6 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h @@ -41,7 +41,7 @@ #define PPSMC_MSG_PowerUpIspByTile 0x7 #define PPSMC_MSG_PowerDownVcn 0x8 // VCN is power gated by default #define PPSMC_MSG_PowerUpVcn 0x9 -#define PPSMC_MSG_RlcPowerNotify 0xA +#define PPSMC_MSG_RlcPowerDownNotify 0xA #define PPSMC_MSG_SetHardMinVcn 0xB // For wireless display #define PPSMC_MSG_SetSoftMinGfxclk 0xC //Sets SoftMin for GFXCLK. Arg is in MHz #define PPSMC_MSG_ActiveProcessNotify 0xD diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 18681dc458da2..335b3c70e1a7c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -69,7 +69,7 @@ static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(PowerUpIspByTile, PPSMC_MSG_PowerUpIspByTile, 0), MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 0), MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 0), - MSG_MAP(RlcPowerNotify, PPSMC_MSG_RlcPowerNotify, 0), + MSG_MAP(RlcPowerDownNotify, PPSMC_MSG_RlcPowerDownNotify, 0), MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 0), MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 0), MSG_MAP(ActiveProcessNotify, PPSMC_MSG_ActiveProcessNotify, 0), @@ -1979,7 +1979,7 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en) int ret = 0; if (adev->pm.fw_version >= 0x43f1700 && !en) - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify, + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerDownNotify, RLC_STATUS_OFF, NULL); bitmap_zero(feature->enabled, feature->feature_num); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h index c56d4583dc723..6c8093eefef77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h @@ -54,6 +54,5 @@ extern void vangogh_set_ppt_funcs(struct smu_context *smu); /* RLC Power Status */ #define RLC_STATUS_OFF 0 -#define RLC_STATUS_NORMAL 1 #endif From 2590178f8f7b617bb95699af707420ff3901d22c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Mar 2021 09:15:26 -0500 Subject: [PATCH 595/851] Revert "amd/amdgpu: Disable VCN DPG mode for Picasso" This reverts commit c6d2b0fbb893d5c7dda405aa0e7bcbecf1c75f98. This patch is a workaround for a hardware bug, but I don't know that we've actually seen the hw bug triggered in practice, meanwhile a number of people have reported that this causes suspend and resume issues. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Cc: Veerabadhran Gopalakrishnan Cc: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a5e085e570f72..9ee1fcbb23b31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1275,7 +1275,8 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_MMHUB | - AMD_PG_SUPPORT_VCN; + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | From f66ae338f788855db426efa78b10b4fb0f28fa81 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Mar 2021 21:19:55 -0400 Subject: [PATCH 596/851] Revert "drm/amd/display: To modify the condition in indicating branch device" This breaks HDMI audio. This reverts commit 9413b23fadad3861f5afd626ac44ef83ad8068ab. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1536 Signed-off-by: Alex Deucher Cc: Martin Tsai Cc: Bindu Ramamurthy --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 8b35cd9d4c01d..036a87af6c5b9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3457,7 +3457,13 @@ static void get_active_converter_info( } /* DPCD 0x5 bit 0 = 1, it indicate it's branch device */ - link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; + if (ds_port.fields.PORT_TYPE == DOWNSTREAM_DP) { + link->dpcd_caps.is_branch_dev = false; + } + + else { + link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; + } switch (ds_port.fields.PORT_TYPE) { case DOWNSTREAM_VGA: From e4538360f53a05868001af9e1767c4fd9ee8610a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 29 Mar 2021 13:37:57 -0400 Subject: [PATCH 597/851] drm/amdgpu: check whether s2idle is enabled to determine s0ix For legacy S3, we need to use different handling in the driver. Suggested by Heiko Przybyl. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1553 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 84a1b4bc9bb4d..59373c697b00b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1042,7 +1043,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) #if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) - return true; + return pm_suspend_default_s2idle(); } #endif return false; From a977e4867088eaf87ea606f3f99c5d2af9a2e832 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 22 Jul 2019 16:38:14 +0200 Subject: [PATCH 598/851] drm/radeon: Add HD-audio component notifier support (v2) This patch adds the support for the notification of HD-audio hotplug via the already existing drm_audio_component framework to radeon driver. This allows us more reliable hotplug notification and ELD transfer without accessing HD-audio bus; it's more efficient, and more importantly, it works without waking up the runtime PM. The implementation is rather simplistic: radeon driver provides the get_eld ops for HD-audio, and it notifies the audio hotplug via pin_eld_notify callback upon each radeon_audio_enable() call. The pin->id is referred as the port number passed to the notifier callback, and the corresponding connector is looked through the encoder list in the get_eld callback in turn. The bind and unbind callbacks handle the device-link so that it assures the PM call order. v2: fix the logic in radeon_audio_component_get_eld to walk the connector list since that is where the EDID lives and we can derive the encoder from the connector because the encoder has not been assigned at this point (i.e., during monitor probe). Acked-by: Jim Qu Acked-by: Alex Deucher Signed-off-by: Takashi Iwai Signed-off-by: Alex Deucher --- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/radeon/radeon.h | 3 + drivers/gpu/drm/radeon/radeon_audio.c | 95 +++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 7ff89690a976a..9d624a23e5468 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -230,6 +230,7 @@ config DRM_RADEON select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE + select SND_HDA_COMPONENT if SND_HDA_CORE help Choose this option if you have an ATI Radeon graphics card. There are both PCI and AGP versions. You don't need to choose this to diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 895776c421d4d..67d0274e8fce0 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -79,6 +79,7 @@ #include #include +#include #include "radeon_family.h" #include "radeon_mode.h" @@ -1797,6 +1798,8 @@ struct r600_audio { struct radeon_audio_funcs *hdmi_funcs; struct radeon_audio_funcs *dp_funcs; struct radeon_audio_basic_funcs *funcs; + struct drm_audio_component *component; + bool component_registered; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 7c5e80d03fc90..8c6e7979b9880 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -23,6 +23,7 @@ */ #include +#include #include #include "dce6_afmt.h" @@ -180,6 +181,14 @@ static struct radeon_audio_funcs dce6_dp_funcs = { .dpms = evergreen_dp_enable, }; +static void radeon_audio_component_notify(struct drm_audio_component *acomp, + int port) +{ + if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) + acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, + port, -1); +} + static void radeon_audio_enable(struct radeon_device *rdev, struct r600_audio_pin *pin, u8 enable_mask) { @@ -207,6 +216,8 @@ static void radeon_audio_enable(struct radeon_device *rdev, if (rdev->audio.funcs->enable) rdev->audio.funcs->enable(rdev, pin, enable_mask); + + radeon_audio_component_notify(rdev->audio.component, pin->id); } static void radeon_audio_interface_init(struct radeon_device *rdev) @@ -230,6 +241,82 @@ static void radeon_audio_interface_init(struct radeon_device *rdev) } } +static int radeon_audio_component_get_eld(struct device *kdev, int port, + int pipe, bool *enabled, + unsigned char *buf, int max_bytes) +{ + struct drm_device *dev = dev_get_drvdata(kdev); + struct drm_encoder *encoder; + struct radeon_encoder *radeon_encoder; + struct radeon_encoder_atom_dig *dig; + struct drm_connector *connector; + int ret = 0; + + *enabled = false; + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + const struct drm_connector_helper_funcs *connector_funcs = + connector->helper_private; + encoder = connector_funcs->best_encoder(connector); + + if (!encoder) + continue; + if (!radeon_encoder_is_digital(encoder)) + continue; + radeon_encoder = to_radeon_encoder(encoder); + dig = radeon_encoder->enc_priv; + if (!dig->pin || dig->pin->id != port) + continue; + *enabled = true; + ret = drm_eld_size(connector->eld); + memcpy(buf, connector->eld, min(max_bytes, ret)); + break; + } + + return ret; +} + +static const struct drm_audio_component_ops radeon_audio_component_ops = { + .get_eld = radeon_audio_component_get_eld, +}; + +static int radeon_audio_component_bind(struct device *kdev, + struct device *hda_kdev, void *data) +{ + struct drm_device *dev = dev_get_drvdata(kdev); + struct radeon_device *rdev = dev->dev_private; + struct drm_audio_component *acomp = data; + + if (WARN_ON(!device_link_add(hda_kdev, kdev, DL_FLAG_STATELESS))) + return -ENOMEM; + + drm_modeset_lock_all(dev); + acomp->ops = &radeon_audio_component_ops; + acomp->dev = kdev; + rdev->audio.component = acomp; + drm_modeset_unlock_all(dev); + + return 0; +} + +static void radeon_audio_component_unbind(struct device *kdev, + struct device *hda_kdev, void *data) +{ + struct drm_device *dev = dev_get_drvdata(kdev); + struct radeon_device *rdev = dev->dev_private; + struct drm_audio_component *acomp = data; + + drm_modeset_lock_all(dev); + rdev->audio.component = NULL; + acomp->ops = NULL; + acomp->dev = NULL; + drm_modeset_unlock_all(dev); +} + +static const struct component_ops radeon_audio_component_bind_ops = { + .bind = radeon_audio_component_bind, + .unbind = radeon_audio_component_unbind, +}; + static int radeon_audio_chipset_supported(struct radeon_device *rdev) { return ASIC_IS_DCE2(rdev) && !ASIC_IS_NODCE(rdev); @@ -276,6 +363,9 @@ int radeon_audio_init(struct radeon_device *rdev) for (i = 0; i < rdev->audio.num_pins; i++) radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); + if (!component_add(rdev->dev, &radeon_audio_component_bind_ops)) + rdev->audio.component_registered = true; + return 0; } @@ -428,6 +518,11 @@ void radeon_audio_fini(struct radeon_device *rdev) radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); rdev->audio.enabled = false; + + if (rdev->audio.component_registered) { + component_del(rdev->dev, &radeon_audio_component_bind_ops); + rdev->audio.component_registered = false; + } } static void radeon_audio_set_dto(struct drm_encoder *encoder, unsigned int clock) From c0f17b43af624df1b9a96e4a7da4df94d5e85466 Mon Sep 17 00:00:00 2001 From: Marcin Bachry Date: Sat, 27 Feb 2021 17:06:49 +0100 Subject: [PATCH 599/851] PCI: quirks: Quirk PCI d3hot delay for AMD xhci Renoir needs a similar delay. [Alex: I talked to the AMD USB hardware team and the AMD windows team and they are not aware of any HW errata or specific issues. The HW works fine in windows. That said, we don't have the source to relevant general windows code so I don't know exactly what windows does there. Apparently windows uses a rather generous default delay for PCI state transitions, but I have not been able to confirm what it is yet.] Signed-off-by: Marcin Bachry Signed-off-by: Alex Deucher Cc: mario.limonciello@amd.com Cc: prike.liang@amd.com Cc: shyam-sundar.s-k@amd.com --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 22b2bb1109c9e..dea10d62d5b90 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1899,6 +1899,7 @@ static void quirk_ryzen_xhci_d3hot(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e0, quirk_ryzen_xhci_d3hot); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e1, quirk_ryzen_xhci_d3hot); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1639, quirk_ryzen_xhci_d3hot); #ifdef CONFIG_X86_IO_APIC static int dmi_disable_ioapicreroute(const struct dmi_system_id *d) From d91a713ed367868ad8ddb50f7927545073bf38b9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 8 Jul 2021 16:31:10 -0400 Subject: [PATCH 600/851] drm/amdgpu/display: add support for multiple backlights On platforms that support multiple backlights, register each one separately. This lets us manage them independently rather than registering a single backlight and applying the same settings to both. v2: fix typo: Reported-by: kernel test robot Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 147 ++++++++++-------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 4 +- 3 files changed, 84 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 59373c697b00b..9122dcf42c469 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -854,8 +854,8 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; - if (dm->backlight_dev) - atif->bd = dm->backlight_dev; + if (dm->backlight_dev[0]) + atif->bd = dm->backlight_dev[0]; #endif } else { struct drm_encoder *tmp; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0a1dd25e567d9..12db13d2bce96 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2412,6 +2412,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) static const u8 pre_computed_values[] = { 50, 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63, 65, 66, 68, 69, 71, 72, 74, 75, 77, 79, 81, 82, 84, 86, 88, 90, 92, 94, 96, 98}; + int i; if (!aconnector || !aconnector->dc_link) return; @@ -2423,7 +2424,13 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) conn_base = &aconnector->base; adev = drm_to_adev(conn_base->dev); dm = &adev->dm; - caps = &dm->backlight_caps; + for (i = 0; i < dm->num_of_edps; i++) { + if (link == dm->backlight_link[i]) + break; + } + if (i >= dm->num_of_edps) + return; + caps = &dm->backlight_caps[i]; caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; caps->aux_support = false; max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll; @@ -3423,35 +3430,36 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) -static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm) +static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, + int bl_idx) { #if defined(CONFIG_ACPI) struct amdgpu_dm_backlight_caps caps; memset(&caps, 0, sizeof(caps)); - if (dm->backlight_caps.caps_valid) + if (dm->backlight_caps[bl_idx].caps_valid) return; amdgpu_acpi_get_backlight_caps(&caps); if (caps.caps_valid) { - dm->backlight_caps.caps_valid = true; + dm->backlight_caps[bl_idx].caps_valid = true; if (caps.aux_support) return; - dm->backlight_caps.min_input_signal = caps.min_input_signal; - dm->backlight_caps.max_input_signal = caps.max_input_signal; + dm->backlight_caps[bl_idx].min_input_signal = caps.min_input_signal; + dm->backlight_caps[bl_idx].max_input_signal = caps.max_input_signal; } else { - dm->backlight_caps.min_input_signal = + dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; - dm->backlight_caps.max_input_signal = + dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; } #else - if (dm->backlight_caps.aux_support) + if (dm->backlight_caps[bl_idx].aux_support) return; - dm->backlight_caps.min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; - dm->backlight_caps.max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; + dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; + dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; #endif } @@ -3502,41 +3510,31 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap } static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, + int bl_idx, u32 user_brightness) { struct amdgpu_dm_backlight_caps caps; - struct dc_link *link[AMDGPU_DM_MAX_NUM_EDP]; - u32 brightness[AMDGPU_DM_MAX_NUM_EDP]; + struct dc_link *link; + u32 brightness; bool rc; - int i; - amdgpu_dm_update_backlight_caps(dm); - caps = dm->backlight_caps; + amdgpu_dm_update_backlight_caps(dm, bl_idx); + caps = dm->backlight_caps[bl_idx]; - for (i = 0; i < dm->num_of_edps; i++) { - dm->brightness[i] = user_brightness; - brightness[i] = convert_brightness_from_user(&caps, dm->brightness[i]); - link[i] = (struct dc_link *)dm->backlight_link[i]; - } + dm->brightness[bl_idx] = user_brightness; + brightness = convert_brightness_from_user(&caps, dm->brightness[bl_idx]); + link = (struct dc_link *)dm->backlight_link[bl_idx]; /* Change brightness based on AUX property */ if (caps.aux_support) { - for (i = 0; i < dm->num_of_edps; i++) { - rc = dc_link_set_backlight_level_nits(link[i], true, brightness[i], - AUX_BL_DEFAULT_TRANSITION_TIME_MS); - if (!rc) { - DRM_DEBUG("DM: Failed to update backlight via AUX on eDP[%d]\n", i); - break; - } - } + rc = dc_link_set_backlight_level_nits(link, true, brightness, + AUX_BL_DEFAULT_TRANSITION_TIME_MS); + if (!rc) + DRM_DEBUG("DM: Failed to update backlight via AUX on eDP[%d]\n", bl_idx); } else { - for (i = 0; i < dm->num_of_edps; i++) { - rc = dc_link_set_backlight_level(dm->backlight_link[i], brightness[i], 0); - if (!rc) { - DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", i); - break; - } - } + rc = dc_link_set_backlight_level(link, brightness, 0); + if (!rc) + DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } return rc ? 0 : 1; @@ -3545,33 +3543,41 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); + int i; - amdgpu_dm_backlight_set_level(dm, bd->props.brightness); + for (i = 0; i < dm->num_of_edps; i++) { + if (bd == dm->backlight_dev[i]) + break; + } + if (i >= AMDGPU_DM_MAX_NUM_EDP) + i = 0; + amdgpu_dm_backlight_set_level(dm, i, bd->props.brightness); return 0; } -static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm) +static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm, + int bl_idx) { struct amdgpu_dm_backlight_caps caps; + struct dc_link *link = (struct dc_link *)dm->backlight_link[bl_idx]; - amdgpu_dm_update_backlight_caps(dm); - caps = dm->backlight_caps; + amdgpu_dm_update_backlight_caps(dm, bl_idx); + caps = dm->backlight_caps[bl_idx]; if (caps.aux_support) { - struct dc_link *link = (struct dc_link *)dm->backlight_link[0]; u32 avg, peak; bool rc; rc = dc_link_get_backlight_level_nits(link, &avg, &peak); if (!rc) - return dm->brightness[0]; + return dm->brightness[bl_idx]; return convert_brightness_to_user(&caps, avg); } else { - int ret = dc_link_get_backlight_level(dm->backlight_link[0]); + int ret = dc_link_get_backlight_level(link); if (ret == DC_ERROR_UNEXPECTED) - return dm->brightness[0]; + return dm->brightness[bl_idx]; return convert_brightness_to_user(&caps, ret); } } @@ -3579,8 +3585,15 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm) static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); + int i; - return amdgpu_dm_backlight_get_level(dm); + for (i = 0; i < dm->num_of_edps; i++) { + if (bd == dm->backlight_dev[i]) + break; + } + if (i >= AMDGPU_DM_MAX_NUM_EDP) + i = 0; + return amdgpu_dm_backlight_get_level(dm, i); } static const struct backlight_ops amdgpu_dm_backlight_ops = { @@ -3594,31 +3607,28 @@ amdgpu_dm_register_backlight_device(struct amdgpu_display_manager *dm) { char bl_name[16]; struct backlight_properties props = { 0 }; - int i; - amdgpu_dm_update_backlight_caps(dm); - for (i = 0; i < dm->num_of_edps; i++) - dm->brightness[i] = AMDGPU_MAX_BL_LEVEL; + amdgpu_dm_update_backlight_caps(dm, dm->num_of_edps); + dm->brightness[dm->num_of_edps] = AMDGPU_MAX_BL_LEVEL; props.max_brightness = AMDGPU_MAX_BL_LEVEL; props.brightness = AMDGPU_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", - adev_to_drm(dm->adev)->primary->index); + adev_to_drm(dm->adev)->primary->index + dm->num_of_edps); - dm->backlight_dev = backlight_device_register(bl_name, - adev_to_drm(dm->adev)->dev, - dm, - &amdgpu_dm_backlight_ops, - &props); + dm->backlight_dev[dm->num_of_edps] = backlight_device_register(bl_name, + adev_to_drm(dm->adev)->dev, + dm, + &amdgpu_dm_backlight_ops, + &props); - if (IS_ERR(dm->backlight_dev)) + if (IS_ERR(dm->backlight_dev[dm->num_of_edps])) DRM_ERROR("DM: Backlight registration failed!\n"); else DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name); } - #endif static int initialize_plane(struct amdgpu_display_manager *dm, @@ -3675,10 +3685,10 @@ static void register_backlight_device(struct amdgpu_display_manager *dm, * DM initialization because not having a backlight control * is better then a black screen. */ - if (!dm->backlight_dev) + if (!dm->backlight_dev[dm->num_of_edps]) amdgpu_dm_register_backlight_device(dm); - if (dm->backlight_dev) { + if (dm->backlight_dev[dm->num_of_edps]) { dm->backlight_link[dm->num_of_edps] = link; dm->num_of_edps++; } @@ -6198,6 +6208,7 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) const struct dc_link *link = aconnector->dc_link; struct amdgpu_device *adev = drm_to_adev(connector->dev); struct amdgpu_display_manager *dm = &adev->dm; + int i; /* * Call only if mst_mgr was iniitalized before since it's not done @@ -6208,12 +6219,11 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - - if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) && - link->type != dc_connection_none && - dm->backlight_dev) { - backlight_device_unregister(dm->backlight_dev); - dm->backlight_dev = NULL; + for (i = 0; i < dm->num_of_edps; i++) { + if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) { + backlight_device_unregister(dm->backlight_dev[i]); + dm->backlight_dev[i] = NULL; + } } #endif @@ -9193,8 +9203,11 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || \ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) /* restore the backlight level */ - if (dm->backlight_dev && (amdgpu_dm_backlight_get_level(dm) != dm->brightness[0])) - amdgpu_dm_backlight_set_level(dm, dm->brightness[0]); + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i] && + (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i])) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } #endif /* * send vblank event on all events not handled in flip and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 9522d4ca299ef..ab1670b16b02f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -365,13 +365,13 @@ struct amdgpu_display_manager { spinlock_t irq_handler_list_table_lock; - struct backlight_device *backlight_dev; + struct backlight_device *backlight_dev[AMDGPU_DM_MAX_NUM_EDP]; const struct dc_link *backlight_link[AMDGPU_DM_MAX_NUM_EDP]; uint8_t num_of_edps; - struct amdgpu_dm_backlight_caps backlight_caps; + struct amdgpu_dm_backlight_caps backlight_caps[AMDGPU_DM_MAX_NUM_EDP]; struct mod_freesync *freesync_module; #ifdef CONFIG_DRM_AMD_DC_HDCP From a63d61927657461ad1e3a4f1598a3d1650c16b67 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 24 Apr 2021 14:50:36 -0400 Subject: [PATCH 601/851] mm: Add arch_make_folio_accessible() As a default implementation, call arch_make_page_accessible n times. If an architecture can do better, it can override this. Also move the default implementation of arch_make_page_accessible() from gfp.h to mm.h. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/gfp.h | 6 ------ include/linux/mm.h | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 55b2ec1f965a5..dc5ff40608ce9 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -520,12 +520,6 @@ static inline void arch_free_page(struct page *page, int order) { } #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif -#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -static inline int arch_make_page_accessible(struct page *page) -{ - return 0; -} -#endif struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); diff --git a/include/linux/mm.h b/include/linux/mm.h index a0db9c0ba18b0..776a2e0787358 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1733,6 +1733,27 @@ static inline size_t folio_size(struct folio *folio) return PAGE_SIZE << folio_order(folio); } +#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +static inline int arch_make_page_accessible(struct page *page) +{ + return 0; +} +#endif + +#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE +static inline int arch_make_folio_accessible(struct folio *folio) +{ + int ret, i; + for (i = 0; i < folio_nr_pages(folio); i++) { + ret = arch_make_page_accessible(folio_page(folio, i)); + if (ret) + break; + } + + return ret; +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ From 76dc430ff513e28cd20c58fa8a6804bdabb70822 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 09:00:12 -0400 Subject: [PATCH 602/851] mm: Add folio_young and folio_idle Idle page tracking is handled through page_ext on 32-bit architectures. Add folio equivalents for 32-bit and move all the page compatibility parts to common code. Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig --- include/linux/page_idle.h | 99 +++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 1e894d34bdceb..1bcb1365b1d06 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -8,46 +8,16 @@ #ifdef CONFIG_IDLE_PAGE_TRACKING -#ifdef CONFIG_64BIT -static inline bool page_is_young(struct page *page) -{ - return PageYoung(page); -} - -static inline void set_page_young(struct page *page) -{ - SetPageYoung(page); -} - -static inline bool test_and_clear_page_young(struct page *page) -{ - return TestClearPageYoung(page); -} - -static inline bool page_is_idle(struct page *page) -{ - return PageIdle(page); -} - -static inline void set_page_idle(struct page *page) -{ - SetPageIdle(page); -} - -static inline void clear_page_idle(struct page *page) -{ - ClearPageIdle(page); -} -#else /* !CONFIG_64BIT */ +#ifndef CONFIG_64BIT /* * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ extern struct page_ext_operations page_idle_ops; -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -55,9 +25,9 @@ static inline bool page_is_young(struct page *page) return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -65,9 +35,9 @@ static inline void set_page_young(struct page *page) set_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -75,9 +45,9 @@ static inline bool test_and_clear_page_young(struct page *page) return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return false; @@ -85,9 +55,9 @@ static inline bool page_is_idle(struct page *page) return test_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; @@ -95,46 +65,75 @@ static inline void set_page_idle(struct page *page) set_bit(PAGE_EXT_IDLE, &page_ext->flags); } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { - struct page_ext *page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(&folio->page); if (unlikely(!page_ext)) return; clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } -#endif /* CONFIG_64BIT */ +#endif /* !CONFIG_64BIT */ #else /* !CONFIG_IDLE_PAGE_TRACKING */ -static inline bool page_is_young(struct page *page) +static inline bool folio_test_young(struct folio *folio) { return false; } -static inline void set_page_young(struct page *page) +static inline void folio_set_young(struct folio *folio) { } -static inline bool test_and_clear_page_young(struct page *page) +static inline bool folio_test_clear_young(struct folio *folio) { return false; } -static inline bool page_is_idle(struct page *page) +static inline bool folio_test_idle(struct folio *folio) { return false; } -static inline void set_page_idle(struct page *page) +static inline void folio_set_idle(struct folio *folio) { } -static inline void clear_page_idle(struct page *page) +static inline void folio_clear_idle(struct folio *folio) { } #endif /* CONFIG_IDLE_PAGE_TRACKING */ +static inline bool page_is_young(struct page *page) +{ + return folio_test_young(page_folio(page)); +} + +static inline void set_page_young(struct page *page) +{ + folio_set_young(page_folio(page)); +} + +static inline bool test_and_clear_page_young(struct page *page) +{ + return folio_test_clear_young(page_folio(page)); +} + +static inline bool page_is_idle(struct page *page) +{ + return folio_test_idle(page_folio(page)); +} + +static inline void set_page_idle(struct page *page) +{ + folio_set_idle(page_folio(page)); +} + +static inline void clear_page_idle(struct page *page) +{ + folio_clear_idle(page_folio(page)); +} #endif /* _LINUX_MM_PAGE_IDLE_H */ From 1d70efc2f5a50ce57feb6c91aa947fc9416ca7c8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 10:37:50 -0400 Subject: [PATCH 603/851] mm/swap: Add folio_activate() This replaces activate_page() and eliminates lots of calls to compound_head(). Saves net 118 bytes of kernel text. There are still some redundant calls to page_folio() here which will be removed when pagevec_lru_move_fn() is converted to use folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/trace/events/pagemap.h | 14 +++++------- mm/swap.c | 41 ++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 92ad176210ff5..1fd0185d66e80 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -60,23 +60,21 @@ TRACE_EVENT(mm_lru_insertion, TRACE_EVENT(mm_lru_activate, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( - __field(struct page *, page ) + __field(struct folio *, folio ) __field(unsigned long, pfn ) ), TP_fast_assign( - __entry->page = page; - __entry->pfn = page_to_pfn(page); + __entry->folio = folio; + __entry->pfn = folio_pfn(folio); ), - /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn) - + TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn) ); #endif /* _TRACE_PAGEMAP_H */ diff --git a/mm/swap.c b/mm/swap.c index 85969b36b6360..c3137e4e1cd80 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -322,15 +322,15 @@ void lru_note_cost_page(struct page *page) page_is_file_lru(page), thp_nr_pages(page)); } -static void __activate_page(struct page *page, struct lruvec *lruvec) +static void __folio_activate(struct folio *folio, struct lruvec *lruvec) { - if (!PageActive(page) && !PageUnevictable(page)) { - int nr_pages = thp_nr_pages(page); + if (!folio_test_active(folio) && !folio_test_unevictable(folio)) { + int nr_pages = folio_nr_pages(folio); - del_page_from_lru_list(page, lruvec); - SetPageActive(page); - add_page_to_lru_list(page, lruvec); - trace_mm_lru_activate(page); + lruvec_del_folio(lruvec, folio); + folio_set_active(folio); + lruvec_add_folio(lruvec, folio); + trace_mm_lru_activate(folio); __count_vm_events(PGACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, @@ -339,6 +339,11 @@ static void __activate_page(struct page *page, struct lruvec *lruvec) } #ifdef CONFIG_SMP +static void __activate_page(struct page *page, struct lruvec *lruvec) +{ + return __folio_activate(page_folio(page), lruvec); +} + static void activate_page_drain(int cpu) { struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu); @@ -352,16 +357,16 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void activate_page(struct page *page) +static void folio_activate(struct folio *folio) { - page = compound_head(page); - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { + if (folio_test_lru(folio) && !folio_test_active(folio) && + !folio_test_unevictable(folio)) { struct pagevec *pvec; + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.activate_page); - get_page(page); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) pagevec_lru_move_fn(pvec, __activate_page); local_unlock(&lru_pvecs.lock); } @@ -372,17 +377,15 @@ static inline void activate_page_drain(int cpu) { } -static void activate_page(struct page *page) +static void folio_activate(struct folio *folio) { - struct folio *folio = page_folio(page); struct lruvec *lruvec; - page = &folio->page; - if (TestClearPageLRU(page)) { + if (folio_test_clear_lru(folio)) { lruvec = folio_lruvec_lock_irq(folio); - __activate_page(page, lruvec); + __folio_activate(folio, lruvec); unlock_page_lruvec_irq(lruvec); - SetPageLRU(page); + folio_set_lru(folio); } } #endif @@ -447,7 +450,7 @@ void mark_page_accessed(struct page *page) * LRU on the next drain. */ if (PageLRU(page)) - activate_page(page); + folio_activate(page_folio(page)); else __lru_cache_activate_page(page); ClearPageReferenced(page); From a70078dc4e93fa277947b7e4f4cdc69a9a955fab Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 10:47:39 -0400 Subject: [PATCH 604/851] mm/swap: Add folio_mark_accessed() Convert mark_page_accessed() to folio_mark_accessed(). It already operated on the entire compound page, but now we can avoid calling compound_head quite so many times. Shrinks the function from 424 bytes to 295 bytes (shrinking by 129 bytes). The compatibility wrapper is 30 bytes, plus the 8 bytes for the exported symbol means the kernel shrinks by 91 bytes. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/swap.h | 3 ++- mm/folio-compat.c | 7 +++++++ mm/swap.c | 34 ++++++++++++++++------------------ 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 989d8f78c256a..c7a4c0a5863d1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -352,7 +352,8 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); -extern void mark_page_accessed(struct page *); +void mark_page_accessed(struct page *); +void folio_mark_accessed(struct folio *); extern atomic_t lru_disable_count; diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 7044fcc8a8aa8..a374747ae1c6d 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -5,6 +5,7 @@ */ #include +#include struct address_space *page_mapping(struct page *page) { @@ -41,3 +42,9 @@ bool page_mapped(struct page *page) return folio_mapped(page_folio(page)); } EXPORT_SYMBOL(page_mapped); + +void mark_page_accessed(struct page *page) +{ + folio_mark_accessed(page_folio(page)); +} +EXPORT_SYMBOL(mark_page_accessed); diff --git a/mm/swap.c b/mm/swap.c index c3137e4e1cd80..d32007fe23b34 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -390,7 +390,7 @@ static void folio_activate(struct folio *folio) } #endif -static void __lru_cache_activate_page(struct page *page) +static void __lru_cache_activate_folio(struct folio *folio) { struct pagevec *pvec; int i; @@ -411,8 +411,8 @@ static void __lru_cache_activate_page(struct page *page) for (i = pagevec_count(pvec) - 1; i >= 0; i--) { struct page *pagevec_page = pvec->pages[i]; - if (pagevec_page == page) { - SetPageActive(page); + if (pagevec_page == &folio->page) { + folio_set_active(folio); break; } } @@ -430,36 +430,34 @@ static void __lru_cache_activate_page(struct page *page) * When a newly allocated page is not yet visible, so safe for non-atomic ops, * __SetPageReferenced(page) may be substituted for mark_page_accessed(page). */ -void mark_page_accessed(struct page *page) +void folio_mark_accessed(struct folio *folio) { - page = compound_head(page); - - if (!PageReferenced(page)) { - SetPageReferenced(page); - } else if (PageUnevictable(page)) { + if (!folio_test_referenced(folio)) { + folio_set_referenced(folio); + } else if (folio_test_unevictable(folio)) { /* * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, * this list is never rotated or maintained, so marking an * evictable page accessed has no effect. */ - } else if (!PageActive(page)) { + } else if (!folio_test_active(folio)) { /* * If the page is on the LRU, queue it for activation via * lru_pvecs.activate_page. Otherwise, assume the page is on a * pagevec, mark it active and it'll be moved to the active * LRU on the next drain. */ - if (PageLRU(page)) - folio_activate(page_folio(page)); + if (folio_test_lru(folio)) + folio_activate(folio); else - __lru_cache_activate_page(page); - ClearPageReferenced(page); - workingset_activation(page_folio(page)); + __lru_cache_activate_folio(folio); + folio_clear_referenced(folio); + workingset_activation(folio); } - if (page_is_idle(page)) - clear_page_idle(page); + if (folio_test_idle(folio)) + folio_clear_idle(folio); } -EXPORT_SYMBOL(mark_page_accessed); +EXPORT_SYMBOL(folio_mark_accessed); /** * lru_cache_add - add a page to a page list From c6054bad5e4457522bacd1140bd18075bea0a2f4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Feb 2021 16:02:57 -0500 Subject: [PATCH 605/851] mm/rmap: Add folio_mkclean() Transform page_mkclean() into folio_mkclean() and add a page_mkclean() wrapper around folio_mkclean(). folio_mkclean is 15 bytes smaller than page_mkclean, but the kernel is enlarged by 33 bytes due to inlining page_folio() into each caller. This will go away once the callers are converted to use folio_mkclean(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/rmap.h | 10 ++++++---- mm/rmap.c | 12 ++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c976cc6de2574..e704b1a4c06c0 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); * * returns the number of cleaned PTEs. */ -int page_mkclean(struct page *); +int folio_mkclean(struct folio *); /* * called in munlock()/munmap() path to check for other vmas holding @@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags) { } -static inline int page_mkclean(struct page *page) +static inline int folio_mkclean(struct folio *folio) { return 0; } - - #endif /* CONFIG_MMU */ +static inline int page_mkclean(struct page *page) +{ + return folio_mkclean(page_folio(page)); +} #endif /* _LINUX_RMAP_H */ diff --git a/mm/rmap.c b/mm/rmap.c index 1df8683c4c4cc..b3aae8eeaeaf4 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -980,7 +980,7 @@ static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) return true; } -int page_mkclean(struct page *page) +int folio_mkclean(struct folio *folio) { int cleaned = 0; struct address_space *mapping; @@ -990,20 +990,20 @@ int page_mkclean(struct page *page) .invalid_vma = invalid_mkclean_vma, }; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); - if (!page_mapped(page)) + if (!folio_mapped(folio)) return 0; - mapping = page_mapping(page); + mapping = folio_mapping(folio); if (!mapping) return 0; - rmap_walk(page, &rwc); + rmap_walk(&folio->page, &rwc); return cleaned; } -EXPORT_SYMBOL_GPL(page_mkclean); +EXPORT_SYMBOL_GPL(folio_mkclean); /** * page_move_anon_rmap - move a page to our anon_vma From 4c5e81b3f533c3c85b3bff6ff878178964a4f6e9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 07:28:40 -0400 Subject: [PATCH 606/851] mm/migrate: Add folio_migrate_mapping() Reimplement migrate_page_move_mapping() as a wrapper around folio_migrate_mapping(). Saves 193 bytes of kernel text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/migrate.h | 2 + mm/folio-compat.c | 11 ++++++ mm/migrate.c | 85 +++++++++++++++++++++-------------------- 3 files changed, 57 insertions(+), 41 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 23dadf7aeba89..eb14495a1f468 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -51,6 +51,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count); #else static inline void putback_movable_pages(struct list_head *l) {} diff --git a/mm/folio-compat.c b/mm/folio-compat.c index a374747ae1c6d..d883d964fd520 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -4,6 +4,7 @@ * eventually. */ +#include #include #include @@ -48,3 +49,13 @@ void mark_page_accessed(struct page *page) folio_mark_accessed(page_folio(page)); } EXPORT_SYMBOL(mark_page_accessed); + +#ifdef CONFIG_MIGRATION +int migrate_page_move_mapping(struct address_space *mapping, + struct page *newpage, struct page *page, int extra_count) +{ + return folio_migrate_mapping(mapping, page_folio(newpage), + page_folio(page), extra_count); +} +EXPORT_SYMBOL(migrate_page_move_mapping); +#endif diff --git a/mm/migrate.c b/mm/migrate.c index 910552318df35..aa4f2310c5bba 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -363,7 +363,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) */ expected_count += is_device_private_page(page); if (mapping) - expected_count += thp_nr_pages(page) + page_has_private(page); + expected_count += compound_nr(page) + page_has_private(page); return expected_count; } @@ -376,74 +376,75 @@ static int expected_page_refs(struct address_space *mapping, struct page *page) * 2 for pages with a mapping * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ -int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, int extra_count) +int folio_migrate_mapping(struct address_space *mapping, + struct folio *newfolio, struct folio *folio, int extra_count) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct zone *oldzone, *newzone; int dirty; - int expected_count = expected_page_refs(mapping, page) + extra_count; - int nr = thp_nr_pages(page); + int expected_count = expected_page_refs(mapping, &folio->page) + extra_count; + int nr = folio_nr_pages(folio); if (!mapping) { /* Anonymous page without mapping */ - if (page_count(page) != expected_count) + if (folio_ref_count(folio) != expected_count) return -EAGAIN; /* No turning back from here */ - newpage->index = page->index; - newpage->mapping = page->mapping; - if (PageSwapBacked(page)) - __SetPageSwapBacked(newpage); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + if (folio_test_swapbacked(folio)) + __folio_set_swapbacked(newfolio); return MIGRATEPAGE_SUCCESS; } - oldzone = page_zone(page); - newzone = page_zone(newpage); + oldzone = folio_zone(folio); + newzone = folio_zone(newfolio); xas_lock_irq(&xas); - if (page_count(page) != expected_count || xas_load(&xas) != page) { + if (folio_ref_count(folio) != expected_count || + xas_load(&xas) != folio) { xas_unlock_irq(&xas); return -EAGAIN; } - if (!page_ref_freeze(page, expected_count)) { + if (!folio_ref_freeze(folio, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; } /* - * Now we know that no one else is looking at the page: + * Now we know that no one else is looking at the folio: * no turning back from here. */ - newpage->index = page->index; - newpage->mapping = page->mapping; - page_ref_add(newpage, nr); /* add cache reference */ - if (PageSwapBacked(page)) { - __SetPageSwapBacked(newpage); - if (PageSwapCache(page)) { - SetPageSwapCache(newpage); - set_page_private(newpage, page_private(page)); + newfolio->index = folio->index; + newfolio->mapping = folio->mapping; + folio_ref_add(newfolio, nr); /* add cache reference */ + if (folio_test_swapbacked(folio)) { + __folio_set_swapbacked(newfolio); + if (folio_test_swapcache(folio)) { + folio_set_swapcache(newfolio); + newfolio->private = folio_get_private(folio); } } else { - VM_BUG_ON_PAGE(PageSwapCache(page), page); + VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); } /* Move dirty while page refs frozen and newpage not yet exposed */ - dirty = PageDirty(page); + dirty = folio_test_dirty(folio); if (dirty) { - ClearPageDirty(page); - SetPageDirty(newpage); + folio_clear_dirty(folio); + folio_set_dirty(newfolio); } - xas_store(&xas, newpage); - if (PageTransHuge(page)) { + xas_store(&xas, newfolio); + if (nr > 1) { int i; for (i = 1; i < nr; i++) { xas_next(&xas); - xas_store(&xas, newpage); + xas_store(&xas, newfolio); } } @@ -452,7 +453,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - nr); + folio_ref_unfreeze(folio, expected_count - nr); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -471,18 +472,18 @@ int migrate_page_move_mapping(struct address_space *mapping, struct lruvec *old_lruvec, *new_lruvec; struct mem_cgroup *memcg; - memcg = page_memcg(page); + memcg = folio_memcg(folio); old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); - if (PageSwapBacked(page) && !PageSwapCache(page)) { + if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); } #ifdef CONFIG_SWAP - if (PageSwapCache(page)) { + if (folio_test_swapcache(folio)) { __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr); __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr); } @@ -498,11 +499,11 @@ int migrate_page_move_mapping(struct address_space *mapping, return MIGRATEPAGE_SUCCESS; } -EXPORT_SYMBOL(migrate_page_move_mapping); +EXPORT_SYMBOL(folio_migrate_mapping); /* * The expected number of remaining references is the same as that - * of migrate_page_move_mapping(). + * of folio_migrate_mapping(). */ int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) @@ -563,7 +564,7 @@ void migrate_page_states(struct page *newpage, struct page *page) if (PageMappedToDisk(page)) SetPageMappedToDisk(newpage); - /* Move dirty on pages not done by migrate_page_move_mapping() */ + /* Move dirty on pages not done by folio_migrate_mapping() */ if (PageDirty(page)) SetPageDirty(newpage); @@ -639,11 +640,13 @@ int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) { + struct folio *newfolio = page_folio(newpage); + struct folio *folio = page_folio(page); int rc; - BUG_ON(PageWriteback(page)); /* Writeback must be complete */ + BUG_ON(folio_test_writeback(folio)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = folio_migrate_mapping(mapping, newfolio, folio, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -2387,7 +2390,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate) * @page: struct page to check * * Pinned pages cannot be migrated. This is the same test as in - * migrate_page_move_mapping(), except that here we allow migration of a + * folio_migrate_mapping(), except that here we allow migration of a * ZONE_DEVICE page. */ static bool migrate_vma_check_page(struct page *page) From 785a947f79460f664474dd8c1bb298ac9dc5a4a6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 15:26:29 -0400 Subject: [PATCH 607/851] mm/migrate: Add folio_migrate_flags() Turn migrate_page_states() into a wrapper around folio_migrate_flags(). Also convert two functions only called from folio_migrate_flags() to be folio-based. ksm_migrate_page() becomes folio_migrate_ksm() and copy_page_owner() becomes folio_copy_owner(). folio_migrate_flags() alone shrinks by two thirds -- 1967 bytes down to 642 bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan --- include/linux/ksm.h | 4 +- include/linux/migrate.h | 1 + include/linux/page_owner.h | 8 ++-- mm/folio-compat.c | 6 +++ mm/ksm.c | 31 ++++++++------ mm/migrate.c | 84 +++++++++++++++++++------------------- mm/page_owner.c | 10 ++--- 7 files changed, 77 insertions(+), 67 deletions(-) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 161e8164abcf5..a38a5bca1ba58 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); -void ksm_migrate_page(struct page *newpage, struct page *oldpage); +void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); #else /* !CONFIG_KSM */ @@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page, { } -static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) +static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old) { } #endif /* CONFIG_MMU */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index eb14495a1f468..ba0a554b3eae4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -51,6 +51,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); #else diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 719bfe5108c56..43c638c51c1f5 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -12,7 +12,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); extern void __split_page_owner(struct page *page, unsigned int nr); -extern void __copy_page_owner(struct page *oldpage, struct page *newpage); +extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, @@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr) if (static_branch_unlikely(&page_owner_inited)) __split_page_owner(page, nr); } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) { if (static_branch_unlikely(&page_owner_inited)) - __copy_page_owner(oldpage, newpage); + __folio_copy_owner(newfolio, old); } static inline void set_page_owner_migrate_reason(struct page *page, int reason) { @@ -63,7 +63,7 @@ static inline void split_page_owner(struct page *page, unsigned int order) { } -static inline void copy_page_owner(struct page *oldpage, struct page *newpage) +static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) { } static inline void set_page_owner_migrate_reason(struct page *page, int reason) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index d883d964fd520..3f00ad92d1ff5 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -58,4 +58,10 @@ int migrate_page_move_mapping(struct address_space *mapping, page_folio(page), extra_count); } EXPORT_SYMBOL(migrate_page_move_mapping); + +void migrate_page_states(struct page *newpage, struct page *page) +{ + folio_migrate_flags(page_folio(newpage), page_folio(page)); +} +EXPORT_SYMBOL(migrate_page_states); #endif diff --git a/mm/ksm.c b/mm/ksm.c index 23d36b59f9975..3a70786906eb9 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -753,7 +753,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, /* * We come here from above when page->mapping or !PageSwapCache * suggests that the node is stale; but it might be under migration. - * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(), + * We need smp_rmb(), matching the smp_wmb() in folio_migrate_ksm(), * before checking whether node->kpfn has been changed. */ smp_rmb(); @@ -854,9 +854,14 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma, return err; } +static inline struct stable_node *folio_stable_node(struct folio *folio) +{ + return folio_test_ksm(folio) ? folio_raw_mapping(folio) : NULL; +} + static inline struct stable_node *page_stable_node(struct page *page) { - return PageKsm(page) ? page_rmapping(page) : NULL; + return folio_stable_node(page_folio(page)); } static inline void set_page_stable_node(struct page *page, @@ -2661,26 +2666,26 @@ void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) } #ifdef CONFIG_MIGRATION -void ksm_migrate_page(struct page *newpage, struct page *oldpage) +void folio_migrate_ksm(struct folio *newfolio, struct folio *folio) { struct stable_node *stable_node; - VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); - VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); - VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); + VM_BUG_ON_FOLIO(newfolio->mapping != folio->mapping, newfolio); - stable_node = page_stable_node(newpage); + stable_node = folio_stable_node(folio); if (stable_node) { - VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage); - stable_node->kpfn = page_to_pfn(newpage); + VM_BUG_ON_FOLIO(stable_node->kpfn != folio_pfn(folio), folio); + stable_node->kpfn = folio_pfn(newfolio); /* - * newpage->mapping was set in advance; now we need smp_wmb() + * newfolio->mapping was set in advance; now we need smp_wmb() * to make sure that the new stable_node->kpfn is visible - * to get_ksm_page() before it can see that oldpage->mapping - * has gone stale (or that PageSwapCache has been cleared). + * to get_ksm_page() before it can see that folio->mapping + * has gone stale (or that folio_test_swapcache has been cleared). */ smp_wmb(); - set_page_stable_node(oldpage, NULL); + set_page_stable_node(&folio->page, NULL); } } #endif /* CONFIG_MIGRATION */ diff --git a/mm/migrate.c b/mm/migrate.c index aa4f2310c5bba..a86be2bfc9a15 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -538,82 +538,80 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* - * Copy the page to its new location + * Copy the flags and some other ancillary information */ -void migrate_page_states(struct page *newpage, struct page *page) +void folio_migrate_flags(struct folio *newfolio, struct folio *folio) { - struct folio *folio = page_folio(page); - struct folio *newfolio = page_folio(newpage); int cpupid; - if (PageError(page)) - SetPageError(newpage); - if (PageReferenced(page)) - SetPageReferenced(newpage); - if (PageUptodate(page)) - SetPageUptodate(newpage); - if (TestClearPageActive(page)) { - VM_BUG_ON_PAGE(PageUnevictable(page), page); - SetPageActive(newpage); - } else if (TestClearPageUnevictable(page)) - SetPageUnevictable(newpage); - if (PageWorkingset(page)) - SetPageWorkingset(newpage); - if (PageChecked(page)) - SetPageChecked(newpage); - if (PageMappedToDisk(page)) - SetPageMappedToDisk(newpage); + if (folio_test_error(folio)) + folio_set_error(newfolio); + if (folio_test_referenced(folio)) + folio_set_referenced(newfolio); + if (folio_test_uptodate(folio)) + folio_mark_uptodate(newfolio); + if (folio_test_clear_active(folio)) { + VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio); + folio_set_active(newfolio); + } else if (folio_test_clear_unevictable(folio)) + folio_set_unevictable(newfolio); + if (folio_test_workingset(folio)) + folio_set_workingset(newfolio); + if (folio_test_checked(folio)) + folio_set_checked(newfolio); + if (folio_test_mappedtodisk(folio)) + folio_set_mappedtodisk(newfolio); /* Move dirty on pages not done by folio_migrate_mapping() */ - if (PageDirty(page)) - SetPageDirty(newpage); + if (folio_test_dirty(folio)) + folio_set_dirty(newfolio); - if (page_is_young(page)) - set_page_young(newpage); - if (page_is_idle(page)) - set_page_idle(newpage); + if (folio_test_young(folio)) + folio_set_young(newfolio); + if (folio_test_idle(folio)) + folio_set_idle(newfolio); /* * Copy NUMA information to the new page, to prevent over-eager * future migrations of this same page. */ - cpupid = page_cpupid_xchg_last(page, -1); - page_cpupid_xchg_last(newpage, cpupid); + cpupid = page_cpupid_xchg_last(&folio->page, -1); + page_cpupid_xchg_last(&newfolio->page, cpupid); - ksm_migrate_page(newpage, page); + folio_migrate_ksm(newfolio, folio); /* * Please do not reorder this without considering how mm/ksm.c's * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache(). */ - if (PageSwapCache(page)) - ClearPageSwapCache(page); - ClearPagePrivate(page); + if (folio_test_swapcache(folio)) + folio_clear_swapcache(folio); + folio_clear_private(folio); /* page->private contains hugetlb specific flags */ - if (!PageHuge(page)) - set_page_private(page, 0); + if (!folio_test_hugetlb(folio)) + folio->private = NULL; /* * If any waiters have accumulated on the new page then * wake them up. */ - if (PageWriteback(newpage)) - end_page_writeback(newpage); + if (folio_test_writeback(newfolio)) + folio_end_writeback(newfolio); /* * PG_readahead shares the same bit with PG_reclaim. The above * end_page_writeback() may clear PG_readahead mistakenly, so set the * bit after that. */ - if (PageReadahead(page)) - SetPageReadahead(newpage); + if (folio_test_readahead(folio)) + folio_set_readahead(newfolio); - copy_page_owner(page, newpage); + folio_copy_owner(folio, newfolio); - if (!PageHuge(page)) + if (!folio_test_hugetlb(folio)) mem_cgroup_migrate(folio, newfolio); } -EXPORT_SYMBOL(migrate_page_states); +EXPORT_SYMBOL(folio_migrate_flags); void migrate_page_copy(struct page *newpage, struct page *page) { @@ -654,7 +652,7 @@ int migrate_page(struct address_space *mapping, if (mode != MIGRATE_SYNC_NO_COPY) migrate_page_copy(newpage, page); else - migrate_page_states(newpage, page); + folio_migrate_flags(newfolio, folio); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL(migrate_page); diff --git a/mm/page_owner.c b/mm/page_owner.c index f51a57e92aa38..23bfb074ca3fc 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -210,10 +210,10 @@ void __split_page_owner(struct page *page, unsigned int nr) } } -void __copy_page_owner(struct page *oldpage, struct page *newpage) +void __folio_copy_owner(struct folio *newfolio, struct folio *old) { - struct page_ext *old_ext = lookup_page_ext(oldpage); - struct page_ext *new_ext = lookup_page_ext(newpage); + struct page_ext *old_ext = lookup_page_ext(&old->page); + struct page_ext *new_ext = lookup_page_ext(&newfolio->page); struct page_owner *old_page_owner, *new_page_owner; if (unlikely(!old_ext || !new_ext)) @@ -231,11 +231,11 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) new_page_owner->free_ts_nsec = old_page_owner->ts_nsec; /* - * We don't clear the bit on the oldpage as it's going to be freed + * We don't clear the bit on the old folio as it's going to be freed * after migration. Until then, the info can be useful in case of * a bug, and the overall stats will be off a bit only temporarily. * Also, migrate_misplaced_transhuge_page() can still fail the - * migration and then we want the oldpage to retain the info. But + * migration and then we want the old folio to retain the info. But * in that case we also don't need to explicitly clear the info from * the new page, which will be freed. */ From 4811332763ce908beec94226be1701fa051fcaf7 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 22 Jul 2021 09:39:55 +0200 Subject: [PATCH 608/851] arm64: dts: rockchip: add csi-dphy to px30 Add the CSI dphy node to the core px30 devicetree for later use with the rkisp. Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20210722073955.1192168-1-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30.dtsi | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 248ebb61aa790..6e53a4cc75e6f 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -864,6 +864,19 @@ status = "disabled"; }; + csi_dphy: phy@ff2f0000 { + compatible = "rockchip,px30-csi-dphy"; + reg = <0x0 0xff2f0000 0x0 0x4000>; + clocks = <&cru PCLK_MIPICSIPHY>; + clock-names = "pclk"; + #phy-cells = <0>; + power-domains = <&power PX30_PD_VI>; + resets = <&cru SRST_MIPICSIPHY_P>; + reset-names = "apb"; + rockchip,grf = <&grf>; + status = "disabled"; + }; + usb20_otg: usb@ff300000 { compatible = "rockchip,px30-usb", "rockchip,rk3066-usb", "snps,dwc2"; From d7a81d3348477e51dfbc29d691644b9f36267089 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:31 -0700 Subject: [PATCH 609/851] interconnect: qcom: icc-rpmh: Ensure floor BW is enforced for all nodes We currently only enforce BW floors for a subset of nodes in a path. All BCMs that need updating are queued in the pre_aggregate/aggregate phase. The first set() commits all queued BCMs and subsequent set() calls short-circuit without committing anything. Since the floor BW isn't set in sum_avg/max_peak until set(), then some BCMs are committed before their associated nodes reflect the floor. Set the floor as each node is being aggregated. This ensures that all all relevant floors are set before the BCMs are committed. Fixes: 266cd33b5913 ("interconnect: qcom: Ensure that the floor bandwidth value is enforced") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-4-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index bf01d09dba6c4..f118f57eae370 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -57,6 +57,11 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, qn->sum_avg[i] += avg_bw; qn->max_peak[i] = max_t(u32, qn->max_peak[i], peak_bw); } + + if (node->init_avg || node->init_peak) { + qn->sum_avg[i] = max_t(u64, qn->sum_avg[i], node->init_avg); + qn->max_peak[i] = max_t(u64, qn->max_peak[i], node->init_peak); + } } *agg_avg += avg_bw; @@ -90,11 +95,6 @@ int qcom_icc_set(struct icc_node *src, struct icc_node *dst) qp = to_qcom_provider(node->provider); qn = node->data; - qn->sum_avg[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->sum_avg[QCOM_ICC_BUCKET_AMC], - node->avg_bw); - qn->max_peak[QCOM_ICC_BUCKET_AMC] = max_t(u64, qn->max_peak[QCOM_ICC_BUCKET_AMC], - node->peak_bw); - qcom_icc_bcm_voter_commit(qp->voter); return 0; From 7ec2438bdf21c591d3406caa31668e0dfcc2e2de Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 21 Jul 2021 10:54:32 -0700 Subject: [PATCH 610/851] interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate We're only adding BCMs to the commit list in aggregate(), but there are cases where pre_aggregate() is called without subsequently calling aggregate(). In particular, in icc_sync_state() when a node with initial BW has zero requests. Since BCMs aren't added to the commit list in these cases, we don't actually send the zero BW request to HW. So the resources remain on unnecessarily. Add BCMs to the commit list in pre_aggregate() instead, which is always called even when there are no requests. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20210721175432.2119-5-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index f118f57eae370..b26fda0588e0c 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -20,13 +20,18 @@ void qcom_icc_pre_aggregate(struct icc_node *node) { size_t i; struct qcom_icc_node *qn; + struct qcom_icc_provider *qp; qn = node->data; + qp = to_qcom_provider(node->provider); for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) { qn->sum_avg[i] = 0; qn->max_peak[i] = 0; } + + for (i = 0; i < qn->num_bcms; i++) + qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); } EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate); @@ -44,10 +49,8 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, { size_t i; struct qcom_icc_node *qn; - struct qcom_icc_provider *qp; qn = node->data; - qp = to_qcom_provider(node->provider); if (!tag) tag = QCOM_ICC_TAG_ALWAYS; @@ -67,9 +70,6 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, *agg_avg += avg_bw; *agg_peak = max_t(u32, *agg_peak, peak_bw); - for (i = 0; i < qn->num_bcms; i++) - qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); - return 0; } EXPORT_SYMBOL_GPL(qcom_icc_aggregate); From 762f3d4403745793db2be3b0f20d038e644c1d91 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jun 2021 17:16:15 +0200 Subject: [PATCH 611/851] mmc: host: add kdoc for mmc_retune_{en|dis}able I wanted to use it in a wrong way, so document the intended way. Signed-off-by: Wolfram Sang Acked-by: Adrian Hunter Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20210624151616.38770-3-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/host.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index eda4a1892c33c..0f084c9b26843 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -95,6 +95,10 @@ void mmc_unregister_host_class(void) class_unregister(&mmc_host_class); } +/** + * mmc_retune_enable() - enter a transfer mode that requires retuning + * @host: host which should retune now + */ void mmc_retune_enable(struct mmc_host *host) { host->can_retune = 1; @@ -126,6 +130,12 @@ void mmc_retune_unpause(struct mmc_host *host) } EXPORT_SYMBOL(mmc_retune_unpause); +/** + * mmc_retune_disable() - exit a transfer mode that requires retuning + * @host: host which should not retune anymore + * + * It is not meant for temporarily preventing retuning! + */ void mmc_retune_disable(struct mmc_host *host) { mmc_retune_unpause(host); From 6616f06fd977aa1ea4e43e10252c52b203fa8566 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Jun 2021 17:16:16 +0200 Subject: [PATCH 612/851] mmc: host: factor out clearing the retune state We have this in two places, so let's have a dedicated function. It is also more readable. Signed-off-by: Wolfram Sang Acked-by: Adrian Hunter Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20210624151616.38770-4-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 3 +-- drivers/mmc/core/host.c | 3 +-- drivers/mmc/core/host.h | 6 ++++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 95fedcf56e4a0..84f39a59a28e6 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -941,8 +941,7 @@ int mmc_execute_tuning(struct mmc_card *card) pr_err("%s: tuning execution failed: %d\n", mmc_hostname(host), err); } else { - host->retune_now = 0; - host->need_retune = 0; + mmc_retune_clear(host); mmc_retune_enable(host); } diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 0f084c9b26843..52d37587cf45a 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -141,8 +141,7 @@ void mmc_retune_disable(struct mmc_host *host) mmc_retune_unpause(host); host->can_retune = 0; del_timer_sync(&host->retune_timer); - host->retune_now = 0; - host->need_retune = 0; + mmc_retune_clear(host); } void mmc_retune_timer_stop(struct mmc_host *host) diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index ba407617ed23a..48c4952512a58 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -21,6 +21,12 @@ int mmc_retune(struct mmc_host *host); void mmc_retune_pause(struct mmc_host *host); void mmc_retune_unpause(struct mmc_host *host); +static inline void mmc_retune_clear(struct mmc_host *host) +{ + host->retune_now = 0; + host->need_retune = 0; +} + static inline void mmc_retune_hold_now(struct mmc_host *host) { host->retune_now = 0; From bcb0d814995efffbe5cac9059ccbc582561191f7 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 15 Jun 2021 16:13:51 +0530 Subject: [PATCH 613/851] mmc: sdhci-of-arasan: Modified SD default speed to 19MHz for ZynqMP SD standard speed timing was met only at 19MHz and not 25 MHz, that's why changing driver to 19MHz. The reason for this is when a level shifter is used on the board, timing was met for standard speed only at 19MHz. Since this level shifter is commonly required for high speed modes, the driver is modified to use standard speed of 19Mhz. Signed-off-by: Manish Narani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1623753837-21035-2-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 0e7c07ed96904..03c80c9bb7e3f 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -159,6 +159,12 @@ struct sdhci_arasan_data { /* Controller immediately reports SDHCI_CLOCK_INT_STABLE after enabling the * internal clock even when the clock isn't stable */ #define SDHCI_ARASAN_QUIRK_CLOCK_UNSTABLE BIT(1) +/* + * Some of the Arasan variations might not have timing requirements + * met at 25MHz for Default Speed mode, those controllers work at + * 19MHz instead + */ +#define SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN BIT(2) }; struct sdhci_arasan_of_data { @@ -290,6 +296,16 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_arasan->is_phy_on = false; } + if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN) { + /* + * Some of the Arasan variations might not have timing + * requirements met at 25MHz for Default Speed mode, + * those controllers work at 19MHz instead. + */ + if (clock == DEFAULT_SPEED_MAX_DTR) + clock = (DEFAULT_SPEED_MAX_DTR * 19) / 25; + } + /* Set the Input and Output Clock Phase Delays */ if (clk_data->set_clk_delays) clk_data->set_clk_delays(host); @@ -1608,6 +1624,8 @@ static int sdhci_arasan_probe(struct platform_device *pdev) if (of_device_is_compatible(np, "xlnx,zynqmp-8.9a")) { host->mmc_host_ops.execute_tuning = arasan_zynqmp_execute_tuning; + + sdhci_arasan->quirks |= SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN; } arasan_dt_parse_clk_phases(dev, &sdhci_arasan->clk_data); From 1e69cf1b41375ae52f45d8256136e15b1e40e93f Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 15 Jun 2021 16:13:52 +0530 Subject: [PATCH 614/851] mmc: sdhci-of-arasan: Add "SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12" quirk. Arasan controller supports AUTO CMD12, this patch adds "SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12" quirk to enable auto cmd12 feature. By using auto cmd12 we can also avoid following error message "Got data interrupt even though no data operation in progress" Signed-off-by: Manish Narani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1623753837-21035-3-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 03c80c9bb7e3f..e61dbe0cf9fe6 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -1626,6 +1626,7 @@ static int sdhci_arasan_probe(struct platform_device *pdev) arasan_zynqmp_execute_tuning; sdhci_arasan->quirks |= SDHCI_ARASAN_QUIRK_CLOCK_25_BROKEN; + host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; } arasan_dt_parse_clk_phases(dev, &sdhci_arasan->clk_data); From 5136c6871ef960d5488c041ce00ef64cf312b3a3 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 15 Jun 2021 16:13:53 +0530 Subject: [PATCH 615/851] mmc: sdhci-of-arasan: Skip Auto tuning for DDR50 mode in ZynqMP platform ZynqMP platform does not perform auto tuning in DDR50 mode. Skip the same while the card is operating in DDR50 mode. Signed-off-by: Manish Narani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1623753837-21035-4-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index e61dbe0cf9fe6..0f6ef4f458250 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -894,6 +894,10 @@ static int arasan_zynqmp_execute_tuning(struct mmc_host *mmc, u32 opcode) NODE_SD_1; int err; + /* ZynqMP SD controller does not perform auto tuning in DDR50 mode */ + if (mmc->ios.timing == MMC_TIMING_UHS_DDR50) + return 0; + arasan_zynqmp_dll_reset(host, device_id); err = sdhci_execute_tuning(mmc, opcode); From 2b43f521c53ddb7068299aa46370a84c6daf7bca Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 15 Jun 2021 16:13:54 +0530 Subject: [PATCH 616/851] mmc: sdhci-of-arasan: Check return value of non-void funtions At a couple of places, the return values of the non-void functions were not getting checked. This was reported by the coverity tool. Modify the code to check the return values of the same. Addresses-Coverity: ("check_return") Signed-off-by: Manish Narani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1623753837-21035-5-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 0f6ef4f458250..ec49207ad768e 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -273,7 +273,12 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock) * through low speeds without power cycling. */ sdhci_set_clock(host, host->max_clk); - phy_power_on(sdhci_arasan->phy); + if (phy_power_on(sdhci_arasan->phy)) { + pr_err("%s: Cannot power on phy.\n", + mmc_hostname(host->mmc)); + return; + } + sdhci_arasan->is_phy_on = true; /* @@ -323,7 +328,12 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock) msleep(20); if (ctrl_phy) { - phy_power_on(sdhci_arasan->phy); + if (phy_power_on(sdhci_arasan->phy)) { + pr_err("%s: Cannot power on phy.\n", + mmc_hostname(host->mmc)); + return; + } + sdhci_arasan->is_phy_on = true; } } @@ -479,7 +489,9 @@ static int sdhci_arasan_suspend(struct device *dev) ret = phy_power_off(sdhci_arasan->phy); if (ret) { dev_err(dev, "Cannot power off phy.\n"); - sdhci_resume_host(host); + if (sdhci_resume_host(host)) + dev_err(dev, "Cannot resume host.\n"); + return ret; } sdhci_arasan->is_phy_on = false; From 070e83b0cfee8be5468c1fbb61d64e6953539c11 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 15 Jun 2021 16:13:55 +0530 Subject: [PATCH 617/851] mmc: sdhci-of-arasan: Use appropriate type of division macro The division macro DIV_ROUND_CLOSEST takes int values as the argument. However the code here uses unsigned int values for this, which is causing the values comparison with 0 as always true. We can use DIV_ROUND_CLOSEST_ULL instead for the same. Addresses-coverity: ("result_independent_of_operands") Signed-off-by: Manish Narani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1623753837-21035-6-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index ec49207ad768e..fbcb9b0703511 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -984,7 +984,7 @@ static void sdhci_arasan_update_baseclkfreq(struct sdhci_host *host) struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host); const struct sdhci_arasan_soc_ctl_map *soc_ctl_map = sdhci_arasan->soc_ctl_map; - u32 mhz = DIV_ROUND_CLOSEST(clk_get_rate(pltfm_host->clk), 1000000); + u32 mhz = DIV_ROUND_CLOSEST_ULL(clk_get_rate(pltfm_host->clk), 1000000); /* Having a map is optional */ if (!soc_ctl_map) From 8cebd26e404d81a1bb8e2cc8bfae9db0aaba5650 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 15 Jun 2021 16:13:56 +0530 Subject: [PATCH 618/851] mmc: sdhci-of-arasan: Modify data type of the clk_phase array Modify the data type of the clk_phase array to u32 to make it compatible with the argument requirement of "of_property_read_variable_u32_array". Addresses-coverity: ("incompatible_param") Signed-off-by: Manish Narani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1623753837-21035-7-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index fbcb9b0703511..ba425b9563bfd 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -1018,7 +1018,7 @@ static void arasan_dt_read_clk_phase(struct device *dev, { struct device_node *np = dev->of_node; - int clk_phase[2] = {0}; + u32 clk_phase[2] = {0}; /* * Read Tap Delay values from DT, if the DT does not contain the From fe8b30d632cb1fdf1c75c3126e7f5e1775b0d0ee Mon Sep 17 00:00:00 2001 From: Sai Krishna Potthuri Date: Tue, 15 Jun 2021 16:13:57 +0530 Subject: [PATCH 619/851] mmc: arasan: Fix the issue in reading tap values from DT 'of_property_read_variable_u32_array' function returns number of elements read on success. This patch updates the condition check in the driver to overwrite the tap values from DT if exist. Signed-off-by: Sai Krishna Potthuri Signed-off-by: Manish Narani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1623753837-21035-8-git-send-email-manish.narani@xilinx.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index ba425b9563bfd..737e2bfdedc28 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -1019,13 +1019,15 @@ static void arasan_dt_read_clk_phase(struct device *dev, struct device_node *np = dev->of_node; u32 clk_phase[2] = {0}; + int ret; /* * Read Tap Delay values from DT, if the DT does not contain the * Tap Values then use the pre-defined values. */ - if (of_property_read_variable_u32_array(np, prop, &clk_phase[0], - 2, 0)) { + ret = of_property_read_variable_u32_array(np, prop, &clk_phase[0], + 2, 0); + if (ret < 0) { dev_dbg(dev, "Using predefined clock phase for %s = %d %d\n", prop, clk_data->clk_phase_in[timing], clk_data->clk_phase_out[timing]); From 0fda3787caf943a662855bd9827ec4335ac5f604 Mon Sep 17 00:00:00 2001 From: Shaik Sajida Bhanu Date: Wed, 16 Jun 2021 14:50:07 +0530 Subject: [PATCH 620/851] dt-bindings: mmc: sdhci-msm: Add compatible string for sc7280 Add sc7280 SoC specific compatible strings for qcom-sdhci controller. Signed-off-by: Shaik Sajida Bhanu Acked-by: Rob Herring Link: https://lore.kernel.org/r/1623835207-29462-1-git-send-email-sbhanu@codeaurora.org Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/sdhci-msm.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt index 4c7fa6a4ed15c..365c3fc122ea9 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt +++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt @@ -19,6 +19,7 @@ Required properties: "qcom,msm8996-sdhci", "qcom,sdhci-msm-v4" "qcom,qcs404-sdhci", "qcom,sdhci-msm-v5" "qcom,sc7180-sdhci", "qcom,sdhci-msm-v5"; + "qcom,sc7280-sdhci", "qcom,sdhci-msm-v5"; "qcom,sdm845-sdhci", "qcom,sdhci-msm-v5" "qcom,sdx55-sdhci", "qcom,sdhci-msm-v5"; "qcom,sm8250-sdhci", "qcom,sdhci-msm-v5" From b19f9e69a7c2e3bc2ada89f81e6f1061051d6399 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 23 Jun 2021 11:57:31 +0200 Subject: [PATCH 621/851] mmc: renesas_sdhi_sys_dmac: use proper DMAENGINE API for termination dmaengine_terminate_all() is deprecated in favor of explicitly saying if it should be sync or async. Here, we want dmaengine_terminate_sync() because there is no other synchronization code in the driver to handle an async case. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20210623095734.3046-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_sys_dmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index ffa64211f4deb..6956b83469c8f 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -108,9 +108,9 @@ static void renesas_sdhi_sys_dmac_abort_dma(struct tmio_mmc_host *host) renesas_sdhi_sys_dmac_enable_dma(host, false); if (host->chan_rx) - dmaengine_terminate_all(host->chan_rx); + dmaengine_terminate_sync(host->chan_rx); if (host->chan_tx) - dmaengine_terminate_all(host->chan_tx); + dmaengine_terminate_sync(host->chan_tx); renesas_sdhi_sys_dmac_enable_dma(host, true); } From 23cc9853eeab0a73d5a5b7475e806033f2ac47c3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 23 Jun 2021 11:57:32 +0200 Subject: [PATCH 622/851] mmc: sh_mmcif: use proper DMAENGINE API for termination dmaengine_terminate_all() is deprecated in favor of explicitly saying if it should be sync or async. Here, we want dmaengine_terminate_sync() because there is no other synchronization code in the driver to handle an async case. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210623095734.3046-3-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mmcif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index e5e457037235a..bcc595c70a9fb 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -1164,9 +1164,9 @@ static bool sh_mmcif_end_cmd(struct sh_mmcif_host *host) data->bytes_xfered = 0; /* Abort DMA */ if (data->flags & MMC_DATA_READ) - dmaengine_terminate_all(host->chan_rx); + dmaengine_terminate_sync(host->chan_rx); else - dmaengine_terminate_all(host->chan_tx); + dmaengine_terminate_sync(host->chan_tx); } return false; From f94d8d3030260f4a9b50dcd947b8f78e2a0644ce Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 23 Jun 2021 11:57:33 +0200 Subject: [PATCH 623/851] mmc: usdhi6rol0: use proper DMAENGINE API for termination dmaengine_terminate_all() is deprecated in favor of explicitly saying if it should be sync or async. Here, we want dmaengine_terminate_sync() because there is no other synchronization code in the driver to handle an async case. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210623095734.3046-4-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/usdhi6rol0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c index b9b79b1089a00..e34a5e6d69a24 100644 --- a/drivers/mmc/host/usdhi6rol0.c +++ b/drivers/mmc/host/usdhi6rol0.c @@ -631,9 +631,9 @@ static void usdhi6_dma_kill(struct usdhi6_host *host) __func__, data->sg_len, data->blocks, data->blksz); /* Abort DMA */ if (data->flags & MMC_DATA_READ) - dmaengine_terminate_all(host->chan_rx); + dmaengine_terminate_sync(host->chan_rx); else - dmaengine_terminate_all(host->chan_tx); + dmaengine_terminate_sync(host->chan_tx); } static void usdhi6_dma_check_error(struct usdhi6_host *host) From 4801760af0e51eef8dca757f2bd4dbe112fdf0f3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 30 Jun 2021 12:24:08 +0200 Subject: [PATCH 624/851] mmc: mmci: De-assert reset on probe If we find a reset handle when probing the MMCI block, make sure the reset is de-asserted. It could happen that a hardware has reset asserted at boot. Cc: Russell King Cc: Yann Gautier Cc: Ludovic Barre Signed-off-by: Linus Walleij Tested-by: Yann Gautier Link: https://lore.kernel.org/r/20210630102408.3543024-1-linus.walleij@linaro.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 984d350551567..3765e2f4ad98a 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -2126,6 +2126,9 @@ static int mmci_probe(struct amba_device *dev, ret = PTR_ERR(host->rst); goto clk_disable; } + ret = reset_control_deassert(host->rst); + if (ret) + dev_err(mmc_dev(mmc), "failed to de-assert reset\n"); /* Get regulators and the supported OCR mask */ ret = mmc_regulator_get_supply(mmc); From e00d7d22aa831589dc866d249f47c3e4a66dafd4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 23 Jun 2021 13:17:31 +0300 Subject: [PATCH 625/851] mmc: mmc_spi: Simplify busy loop in mmc_spi_skip() Infinite loops are hard to read and understand because of hidden main loop condition. Simplify such one in mmc_spi_skip(). Using schedule() to schedule (and be friendly to others) is discouraged and cond_resched() should be used instead. Hence, replace schedule() with cond_resched() at the same time. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210623101731.87885-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmc_spi.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 65c65bb5737fc..a1bcde3395a6e 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -180,7 +180,7 @@ static int mmc_spi_skip(struct mmc_spi_host *host, unsigned long timeout, u8 *cp = host->data->status; unsigned long start = jiffies; - while (1) { + do { int status; unsigned i; @@ -193,16 +193,9 @@ static int mmc_spi_skip(struct mmc_spi_host *host, unsigned long timeout, return cp[i]; } - if (time_is_before_jiffies(start + timeout)) - break; - - /* If we need long timeouts, we may release the CPU. - * We use jiffies here because we want to have a relation - * between elapsed time and the blocking of the scheduler. - */ - if (time_is_before_jiffies(start + 1)) - schedule(); - } + /* If we need long timeouts, we may release the CPU */ + cond_resched(); + } while (time_is_after_jiffies(start + timeout)); return -ETIMEDOUT; } From cd94017fb9fa721bc71791024af2983698b88af3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 22 Jul 2021 14:18:14 +0200 Subject: [PATCH 626/851] ovl: use kvalloc in xattr copy-up Extended attributes are usually small, but could be up to 64k in size, so use the most efficient method for doing the allocation. Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9d8ebf0e72375..4e7d5bfa2949f 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -63,7 +63,7 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, return list_size; } - buf = kzalloc(list_size, GFP_KERNEL); + buf = kvzalloc(list_size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -106,11 +106,12 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, if (size > value_size) { void *new; - new = krealloc(value, size, GFP_KERNEL); + new = kvmalloc(size, GFP_KERNEL); if (!new) { error = -ENOMEM; break; } + kvfree(value); value = new; value_size = size; goto retry; @@ -125,9 +126,9 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, error = 0; } } - kfree(value); + kvfree(value); out: - kfree(buf); + kvfree(buf); return error; } From 496211c7d6b3928c4098efac61d2b6b203df3d01 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 15:05:06 -0400 Subject: [PATCH 627/851] mm/migrate: Add folio_migrate_copy() This is the folio equivalent of migrate_page_copy(), which is retained as a wrapper for filesystems which are not yet converted to folios. Also convert copy_huge_page() to folio_copy(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan --- include/linux/migrate.h | 1 + include/linux/mm.h | 2 +- mm/folio-compat.c | 6 ++++++ mm/hugetlb.c | 2 +- mm/migrate.c | 14 +++++--------- mm/util.c | 11 +++++++---- 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ba0a554b3eae4..6a01de9faff58 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -52,6 +52,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); +void folio_migrate_copy(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index 776a2e0787358..3d8b4fcac8305 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -911,7 +911,7 @@ void __put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); -void copy_huge_page(struct page *dst, struct page *src); +void folio_copy(struct folio *dst, struct folio *src); /* * Compound pages have a destructor function. Provide a diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 3f00ad92d1ff5..2ccd8f213fc4d 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -64,4 +64,10 @@ void migrate_page_states(struct page *newpage, struct page *page) folio_migrate_flags(page_folio(newpage), page_folio(page)); } EXPORT_SYMBOL(migrate_page_states); + +void migrate_page_copy(struct page *newpage, struct page *page) +{ + folio_migrate_copy(page_folio(newpage), page_folio(page)); +} +EXPORT_SYMBOL(migrate_page_copy); #endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dfc940d5221dc..68eead0259ccb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5200,7 +5200,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, *pagep = NULL; goto out; } - copy_huge_page(page, *pagep); + folio_copy(page_folio(page), page_folio(*pagep)); put_page(*pagep); *pagep = NULL; } diff --git a/mm/migrate.c b/mm/migrate.c index a86be2bfc9a15..36cdae0a12352 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -613,16 +613,12 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio) } EXPORT_SYMBOL(folio_migrate_flags); -void migrate_page_copy(struct page *newpage, struct page *page) +void folio_migrate_copy(struct folio *newfolio, struct folio *folio) { - if (PageHuge(page) || PageTransHuge(page)) - copy_huge_page(newpage, page); - else - copy_highpage(newpage, page); - - migrate_page_states(newpage, page); + folio_copy(newfolio, folio); + folio_migrate_flags(newfolio, folio); } -EXPORT_SYMBOL(migrate_page_copy); +EXPORT_SYMBOL(folio_migrate_copy); /************************************************************ * Migration functions @@ -650,7 +646,7 @@ int migrate_page(struct address_space *mapping, return rc; if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(newfolio, folio); else folio_migrate_flags(newfolio, folio); return MIGRATEPAGE_SUCCESS; diff --git a/mm/util.c b/mm/util.c index 149537120a918..a0e859def6a8d 100644 --- a/mm/util.c +++ b/mm/util.c @@ -728,13 +728,16 @@ int __page_mapcount(struct page *page) } EXPORT_SYMBOL_GPL(__page_mapcount); -void copy_huge_page(struct page *dst, struct page *src) +void folio_copy(struct folio *dst, struct folio *src) { - unsigned i, nr = compound_nr(src); + unsigned i = 0; + unsigned nr = folio_nr_pages(src); - for (i = 0; i < nr; i++) { + for (;;) { + copy_highpage(folio_page(dst, i), folio_page(src, i)); + if (++i == nr) + break; cond_resched(); - copy_highpage(nth_page(dst, i), nth_page(src, i)); } } From 9dadb237652f392f284c17b364bda21b95086ef2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 19 Mar 2021 08:39:50 -0400 Subject: [PATCH 628/851] mm/writeback: Rename __add_wb_stat() to wb_stat_mod() Make this look like the newly renamed vmstat functions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/backing-dev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 44df4fcef65c1..a852876bb6e23 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) return atomic_long_read(&bdi->tot_write_bandwidth); } -static inline void __add_wb_stat(struct bdi_writeback *wb, +static inline void wb_stat_mod(struct bdi_writeback *wb, enum wb_stat_item item, s64 amount) { percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); @@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb, static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, 1); + wb_stat_mod(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - __add_wb_stat(wb, item, -1); + wb_stat_mod(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) From 0a767bf67355a4a07964fbe6b0b97de748710a43 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 20 Mar 2021 16:34:54 -0400 Subject: [PATCH 629/851] flex_proportions: Allow N events instead of 1 When batching events (such as writing back N pages in a single I/O), it is better to do one flex_proportion operation instead of N. There is only one caller of __fprop_inc_percpu_max(), and it's the one we're going to change in the next patch, so rename it instead of adding a compatibility wrapper. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- include/linux/flex_proportions.h | 9 +++++---- lib/flex_proportions.c | 28 +++++++++++++++++++--------- mm/page-writeback.c | 4 ++-- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index c12df59d3f5fc..3e378b1fb0bc8 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -83,9 +83,10 @@ struct fprop_local_percpu { int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp); void fprop_local_destroy_percpu(struct fprop_local_percpu *pl); -void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl); -void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, - int max_frac); +void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, + long nr); +void __fprop_add_percpu_max(struct fprop_global *p, + struct fprop_local_percpu *pl, int max_frac, long nr); void fprop_fraction_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, unsigned long *numerator, unsigned long *denominator); @@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) unsigned long flags; local_irq_save(flags); - __fprop_inc_percpu(p, pl); + __fprop_add_percpu(p, pl, 1); local_irq_restore(flags); } diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 4515439375243..53e7eb1dd76c9 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -217,11 +217,12 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, } /* Event of type pl happened */ -void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) +void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl, + long nr) { fprop_reflect_period_percpu(p, pl); - percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); - percpu_counter_add(&p->events, 1); + percpu_counter_add_batch(&pl->events, nr, PROP_BATCH); + percpu_counter_add(&p->events, nr); } void fprop_fraction_percpu(struct fprop_global *p, @@ -253,20 +254,29 @@ void fprop_fraction_percpu(struct fprop_global *p, } /* - * Like __fprop_inc_percpu() except that event is counted only if the given + * Like __fprop_add_percpu() except that event is counted only if the given * type has fraction smaller than @max_frac/FPROP_FRAC_BASE */ -void __fprop_inc_percpu_max(struct fprop_global *p, - struct fprop_local_percpu *pl, int max_frac) +void __fprop_add_percpu_max(struct fprop_global *p, + struct fprop_local_percpu *pl, int max_frac, long nr) { if (unlikely(max_frac < FPROP_FRAC_BASE)) { unsigned long numerator, denominator; + s64 tmp; fprop_fraction_percpu(p, pl, &numerator, &denominator); - if (numerator > - (((u64)denominator) * max_frac) >> FPROP_FRAC_SHIFT) + /* Adding 'nr' to fraction exceeds max_frac/FPROP_FRAC_BASE? */ + tmp = (u64)denominator * max_frac - + ((u64)numerator << FPROP_FRAC_SHIFT); + if (tmp < 0) { + /* Maximum fraction already exceeded? */ return; + } else if (tmp < nr * (FPROP_FRAC_BASE - max_frac)) { + /* Add just enough for the fraction to saturate */ + nr = div_u64(tmp + FPROP_FRAC_BASE - max_frac - 1, + FPROP_FRAC_BASE - max_frac); + } } - __fprop_inc_percpu(p, pl); + __fprop_add_percpu(p, pl, nr); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b34278d05395f..f55f2ebdd9a9f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -566,8 +566,8 @@ static void wb_domain_writeout_inc(struct wb_domain *dom, struct fprop_local_percpu *completions, unsigned int max_prop_frac) { - __fprop_inc_percpu_max(&dom->completions, completions, - max_prop_frac); + __fprop_add_percpu_max(&dom->completions, completions, + max_prop_frac, 1); /* First event after period switching was turned off? */ if (unlikely(!dom->period_time)) { /* From d73b19c64e018412912c17728ca9828e9410a58c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 20 Mar 2021 16:45:15 -0400 Subject: [PATCH 630/851] mm/writeback: Change __wb_writeout_inc() to __wb_writeout_add() Allow for accounting N pages at once instead of one page at a time. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- mm/page-writeback.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f55f2ebdd9a9f..e542ea37d605e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -562,12 +562,12 @@ static unsigned long wp_next_time(unsigned long cur_time) return cur_time; } -static void wb_domain_writeout_inc(struct wb_domain *dom, +static void wb_domain_writeout_add(struct wb_domain *dom, struct fprop_local_percpu *completions, - unsigned int max_prop_frac) + unsigned int max_prop_frac, long nr) { __fprop_add_percpu_max(&dom->completions, completions, - max_prop_frac, 1); + max_prop_frac, nr); /* First event after period switching was turned off? */ if (unlikely(!dom->period_time)) { /* @@ -585,18 +585,18 @@ static void wb_domain_writeout_inc(struct wb_domain *dom, * Increment @wb's writeout completion count and the global writeout * completion count. Called from test_clear_page_writeback(). */ -static inline void __wb_writeout_inc(struct bdi_writeback *wb) +static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr) { struct wb_domain *cgdom; - inc_wb_stat(wb, WB_WRITTEN); - wb_domain_writeout_inc(&global_wb_domain, &wb->completions, - wb->bdi->max_prop_frac); + wb_stat_mod(wb, WB_WRITTEN, nr); + wb_domain_writeout_add(&global_wb_domain, &wb->completions, + wb->bdi->max_prop_frac, nr); cgdom = mem_cgroup_wb_domain(wb); if (cgdom) - wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb), - wb->bdi->max_prop_frac); + wb_domain_writeout_add(cgdom, wb_memcg_completions(wb), + wb->bdi->max_prop_frac, nr); } void wb_writeout_inc(struct bdi_writeback *wb) @@ -604,7 +604,7 @@ void wb_writeout_inc(struct bdi_writeback *wb) unsigned long flags; local_irq_save(flags); - __wb_writeout_inc(wb); + __wb_writeout_add(wb, 1); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(wb_writeout_inc); @@ -2751,7 +2751,7 @@ int test_clear_page_writeback(struct page *page) struct bdi_writeback *wb = inode_to_wb(inode); dec_wb_stat(wb, WB_WRITEBACK); - __wb_writeout_inc(wb); + __wb_writeout_add(wb, 1); } } From cdc8d3ce5bad2cdb3693dcd2e50ac2995229f04f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Jan 2021 23:34:16 -0500 Subject: [PATCH 631/851] mm/writeback: Add __folio_end_writeback() test_clear_page_writeback() is actually an mm-internal function, although it's named as if it's a pagecache function. Move it to mm/internal.h, rename it to __folio_end_writeback() and change the return type to bool. The conversion from page to folio is mostly about accounting the number of pages being written back, although it does eliminate a couple of calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/page-flags.h | 1 - mm/filemap.c | 2 +- mm/internal.h | 1 + mm/page-writeback.c | 29 +++++++++++++++-------------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ddb6606880860..6f9d1f26b1ef0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -655,7 +655,6 @@ static __always_inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -int test_clear_page_writeback(struct page *page); int __test_set_page_writeback(struct page *page, bool keep_write); #define test_set_page_writeback(page) \ diff --git a/mm/filemap.c b/mm/filemap.c index 2469f714520ab..7adef76691c99 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1535,7 +1535,7 @@ void folio_end_writeback(struct folio *folio) * reused before the folio_wake(). */ folio_get(folio); - if (!test_clear_page_writeback(&folio->page)) + if (!__folio_end_writeback(folio)) BUG(); smp_mb__after_atomic(); diff --git a/mm/internal.h b/mm/internal.h index fa31a7f0ed798..08e8a28994d14 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -43,6 +43,7 @@ static inline void *folio_raw_mapping(struct folio *folio) vm_fault_t do_swap_page(struct vm_fault *vmf); void folio_rotate_reclaimable(struct folio *folio); +bool __folio_end_writeback(struct folio *folio); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e542ea37d605e..8d5d7921b1577 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -583,7 +583,7 @@ static void wb_domain_writeout_add(struct wb_domain *dom, /* * Increment @wb's writeout completion count and the global writeout - * completion count. Called from test_clear_page_writeback(). + * completion count. Called from __folio_end_writeback(). */ static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr) { @@ -2731,27 +2731,28 @@ int clear_page_dirty_for_io(struct page *page) } EXPORT_SYMBOL(clear_page_dirty_for_io); -int test_clear_page_writeback(struct page *page) +bool __folio_end_writeback(struct folio *folio) { - struct address_space *mapping = page_mapping(page); - int ret; + long nr = folio_nr_pages(folio); + struct address_space *mapping = folio_mapping(folio); + bool ret; - lock_page_memcg(page); + folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - ret = TestClearPageWriteback(page); + ret = folio_test_clear_writeback(folio); if (ret) { - __xa_clear_mark(&mapping->i_pages, page_index(page), + __xa_clear_mark(&mapping->i_pages, folio_index(folio), PAGECACHE_TAG_WRITEBACK); if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); - dec_wb_stat(wb, WB_WRITEBACK); - __wb_writeout_add(wb, 1); + wb_stat_mod(wb, WB_WRITEBACK, -nr); + __wb_writeout_add(wb, nr); } } @@ -2761,14 +2762,14 @@ int test_clear_page_writeback(struct page *page) xa_unlock_irqrestore(&mapping->i_pages, flags); } else { - ret = TestClearPageWriteback(page); + ret = folio_test_clear_writeback(folio); } if (ret) { - dec_lruvec_page_state(page, NR_WRITEBACK); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - inc_node_page_state(page, NR_WRITTEN); + lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + node_stat_mod_folio(folio, NR_WRITTEN, nr); } - unlock_page_memcg(page); + folio_memcg_unlock(folio); return ret; } From 518480280a7630aa0b1171bbcd3169b4699d2f5d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 24 Apr 2021 12:00:48 -0400 Subject: [PATCH 632/851] mm/writeback: Add folio_start_writeback() Rename set_page_writeback() to folio_start_writeback() to match folio_end_writeback(). Do not bother with wrappers that return void; callers are perfectly capable of ignoring return values. Add wrappers for set_page_writeback(), set_page_writeback_keepwrite() and test_set_page_writeback() for compatibililty with existing filesystems. The main advantage of this patch is getting the statistics right, although it does eliminate a couple of calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/page-flags.h | 19 +++++++++--------- mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 40 ++++++++++++++++++++------------------ 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6f9d1f26b1ef0..54c4af35c6283 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -655,21 +655,22 @@ static __always_inline void SetPageUptodate(struct page *page) CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -int __test_set_page_writeback(struct page *page, bool keep_write); +bool __folio_start_writeback(struct folio *folio, bool keep_write); +bool set_page_writeback(struct page *page); -#define test_set_page_writeback(page) \ - __test_set_page_writeback(page, false) -#define test_set_page_writeback_keepwrite(page) \ - __test_set_page_writeback(page, true) +#define folio_start_writeback(folio) \ + __folio_start_writeback(folio, false) +#define folio_start_writeback_keepwrite(folio) \ + __folio_start_writeback(folio, true) -static inline void set_page_writeback(struct page *page) +static inline void set_page_writeback_keepwrite(struct page *page) { - test_set_page_writeback(page); + folio_start_writeback_keepwrite(page_folio(page)); } -static inline void set_page_writeback_keepwrite(struct page *page) +static inline bool test_set_page_writeback(struct page *page) { - test_set_page_writeback_keepwrite(page); + return set_page_writeback(page); } __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 2ccd8f213fc4d..10ce5582d8698 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -71,3 +71,9 @@ void migrate_page_copy(struct page *newpage, struct page *page) } EXPORT_SYMBOL(migrate_page_copy); #endif + +bool set_page_writeback(struct page *page) +{ + return folio_start_writeback(page_folio(page)); +} +EXPORT_SYMBOL(set_page_writeback); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8d5d7921b1577..0336273154fb9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2773,21 +2773,23 @@ bool __folio_end_writeback(struct folio *folio) return ret; } -int __test_set_page_writeback(struct page *page, bool keep_write) +bool __folio_start_writeback(struct folio *folio, bool keep_write) { - struct address_space *mapping = page_mapping(page); - int ret, access_ret; + long nr = folio_nr_pages(folio); + struct address_space *mapping = folio_mapping(folio); + bool ret; + int access_ret; - lock_page_memcg(page); + folio_memcg_lock(folio); if (mapping && mapping_use_writeback_tags(mapping)) { - XA_STATE(xas, &mapping->i_pages, page_index(page)); + XA_STATE(xas, &mapping->i_pages, folio_index(folio)); struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); unsigned long flags; xas_lock_irqsave(&xas, flags); xas_load(&xas); - ret = TestSetPageWriteback(page); + ret = folio_test_set_writeback(folio); if (!ret) { bool on_wblist; @@ -2796,40 +2798,40 @@ int __test_set_page_writeback(struct page *page, bool keep_write) xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) - inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK); + wb_stat_mod(inode_to_wb(inode), WB_WRITEBACK, + nr); /* - * We can come through here when swapping anonymous - * pages, so we don't necessarily have an inode to track - * for sync. + * We can come through here when swapping + * anonymous folios, so we don't necessarily + * have an inode to track for sync. */ if (mapping->host && !on_wblist) sb_mark_inode_writeback(mapping->host); } - if (!PageDirty(page)) + if (!folio_test_dirty(folio)) xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); if (!keep_write) xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); } else { - ret = TestSetPageWriteback(page); + ret = folio_test_set_writeback(folio); } if (!ret) { - inc_lruvec_page_state(page, NR_WRITEBACK); - inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); + lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); } - unlock_page_memcg(page); - access_ret = arch_make_page_accessible(page); + folio_memcg_unlock(folio); + access_ret = arch_make_folio_accessible(folio); /* * If writeback has been triggered on a page that cannot be made * accessible, it is too late to recover here. */ - VM_BUG_ON_PAGE(access_ret != 0, page); + VM_BUG_ON_FOLIO(access_ret != 0, folio); return ret; - } -EXPORT_SYMBOL(__test_set_page_writeback); +EXPORT_SYMBOL(__folio_start_writeback); /** * folio_wait_writeback - Wait for a folio to finish writeback. From d6a10efac1969044d037738117b113eb7faf30d1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 26 Apr 2021 23:53:10 -0400 Subject: [PATCH 633/851] mm/writeback: Add folio_mark_dirty() Reimplement set_page_dirty() as a wrapper around folio_mark_dirty(). There is no change to filesystems as they were already being called with the compound_head of the page being marked dirty. We avoid several calls to compound_head(), both statically (through using folio_test_dirty() instead of PageDirty() and dynamically by calling folio_mapping() instead of page_mapping(). Also return bool instead of int to show the range of values actually returned, and add kernel-doc. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 3 ++- mm/folio-compat.c | 6 ++++++ mm/page-writeback.c | 35 +++++++++++++++++++---------------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d8b4fcac8305..4f82a0092bdef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2006,7 +2006,8 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb); -int set_page_dirty(struct page *page); +bool folio_mark_dirty(struct folio *folio); +bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); void __cancel_dirty_page(struct page *page); static inline void cancel_dirty_page(struct page *page) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 10ce5582d8698..2c2b3917b5dc9 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -77,3 +77,9 @@ bool set_page_writeback(struct page *page) return folio_start_writeback(page_folio(page)); } EXPORT_SYMBOL(set_page_writeback); + +bool set_page_dirty(struct page *page) +{ + return folio_mark_dirty(page_folio(page)); +} +EXPORT_SYMBOL(set_page_dirty); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0336273154fb9..d7c0cad6a57f6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2564,18 +2564,21 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) } EXPORT_SYMBOL(redirty_page_for_writepage); -/* - * Dirty a page. +/** + * folio_mark_dirty - Mark a folio as being modified. + * @folio: The folio. + * + * For folios with a mapping this should be done under the page lock + * for the benefit of asynchronous memory errors who prefer a consistent + * dirty state. This rule can be broken in some special cases, + * but should be better not to. * - * For pages with a mapping this should be done under the page lock for the - * benefit of asynchronous memory errors who prefer a consistent dirty state. - * This rule can be broken in some special cases, but should be better not to. + * Return: True if the folio was newly dirtied, false if it was already dirty. */ -int set_page_dirty(struct page *page) +bool folio_mark_dirty(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); - page = compound_head(page); if (likely(mapping)) { /* * readahead/lru_deactivate_page could remain @@ -2587,17 +2590,17 @@ int set_page_dirty(struct page *page) * it will confuse readahead and make it restart the size rampup * process. But it's a trivial problem. */ - if (PageReclaim(page)) - ClearPageReclaim(page); - return mapping->a_ops->set_page_dirty(page); + if (folio_test_reclaim(folio)) + folio_clear_reclaim(folio); + return mapping->a_ops->set_page_dirty(&folio->page); } - if (!PageDirty(page)) { - if (!TestSetPageDirty(page)) - return 1; + if (!folio_test_dirty(folio)) { + if (!folio_test_set_dirty(folio)) + return true; } - return 0; + return false; } -EXPORT_SYMBOL(set_page_dirty); +EXPORT_SYMBOL(folio_mark_dirty); /* * set_page_dirty() is racy if the caller has no reference against From 4df5aefb8ab73fae8ed1a8686a74fe6a89ca4fd7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 11:01:10 -0400 Subject: [PATCH 634/851] mm/writeback: Add __folio_mark_dirty() Turn __set_page_dirty() into a wrapper around __folio_mark_dirty(). Convert account_page_dirtied() into folio_account_dirtied() and account the number of pages in the folio to support multi-page folios. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/memcontrol.h | 5 ++--- include/linux/pagemap.h | 7 ++++++- mm/page-writeback.c | 41 +++++++++++++++++++------------------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7bc6d5a6b7391..12e9a662f1ef6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1576,10 +1576,9 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, struct bdi_writeback *wb); -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { - struct folio *folio = page_folio(page); if (mem_cgroup_disabled()) return; @@ -1604,7 +1603,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, { } -static inline void mem_cgroup_track_foreign_dirty(struct page *page, +static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, struct bdi_writeback *wb) { } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 08f40e004d97e..3d88c17fedc92 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -773,8 +773,13 @@ void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); void folio_wait_stable(struct folio *folio); +void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); +static inline void __set_page_dirty(struct page *page, + struct address_space *mapping, int warn) +{ + __folio_mark_dirty(page_folio(page), mapping, warn); +} -void __set_page_dirty(struct page *, struct address_space *, int warn); int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d7c0cad6a57f6..3e02c86eb4453 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2421,29 +2421,30 @@ EXPORT_SYMBOL(__set_page_dirty_no_writeback); * * NOTE: This relies on being atomic wrt interrupts. */ -static void account_page_dirtied(struct page *page, +static void folio_account_dirtied(struct folio *folio, struct address_space *mapping) { struct inode *inode = mapping->host; - trace_writeback_dirty_page(page, mapping); + trace_writeback_dirty_page(&folio->page, mapping); if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; + long nr = folio_nr_pages(folio); - inode_attach_wb(inode, page); + inode_attach_wb(inode, &folio->page); wb = inode_to_wb(inode); - __inc_lruvec_page_state(page, NR_FILE_DIRTY); - __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); - __inc_node_page_state(page, NR_DIRTIED); - inc_wb_stat(wb, WB_RECLAIMABLE); - inc_wb_stat(wb, WB_DIRTIED); - task_io_account_write(PAGE_SIZE); - current->nr_dirtied++; - __this_cpu_inc(bdp_ratelimits); + __lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr); + __zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr); + __node_stat_mod_folio(folio, NR_DIRTIED, nr); + wb_stat_mod(wb, WB_RECLAIMABLE, nr); + wb_stat_mod(wb, WB_DIRTIED, nr); + task_io_account_write(nr * PAGE_SIZE); + current->nr_dirtied += nr; + __this_cpu_add(bdp_ratelimits, nr); - mem_cgroup_track_foreign_dirty(page, wb); + mem_cgroup_track_foreign_dirty(folio, wb); } } @@ -2464,24 +2465,24 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, } /* - * Mark the page dirty, and set it dirty in the page cache, and mark the inode - * dirty. + * Mark the folio dirty, and set it dirty in the page cache, and mark + * the inode dirty. * - * If warn is true, then emit a warning if the page is not uptodate and has + * If warn is true, then emit a warning if the folio is not uptodate and has * not been truncated. * * The caller must hold lock_page_memcg(). */ -void __set_page_dirty(struct page *page, struct address_space *mapping, +void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, int warn) { unsigned long flags; xa_lock_irqsave(&mapping->i_pages, flags); - if (page->mapping) { /* Race with truncate? */ - WARN_ON_ONCE(warn && !PageUptodate(page)); - account_page_dirtied(page, mapping); - __xa_set_mark(&mapping->i_pages, page_index(page), + if (folio->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(warn && !folio_test_uptodate(folio)); + folio_account_dirtied(folio, mapping); + __xa_set_mark(&mapping->i_pages, folio_index(folio), PAGECACHE_TAG_DIRTY); } xa_unlock_irqrestore(&mapping->i_pages, flags); From aed549d63450fb3737e4533892ac688ac833ed6c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Jul 2021 22:45:50 -0400 Subject: [PATCH 635/851] mm/writeback: Convert tracing writeback_page_template to folios Rename writeback_dirty_page() to writeback_dirty_folio() and wait_on_page_writeback() to folio_wait_writeback(). Signed-off-by: Matthew Wilcox (Oracle) --- include/trace/events/writeback.h | 20 ++++++++++---------- mm/page-writeback.c | 6 +++--- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 297871ca00047..7dccb66474f73 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -52,11 +52,11 @@ WB_WORK_REASON struct wb_writeback_work; -DECLARE_EVENT_CLASS(writeback_page_template, +DECLARE_EVENT_CLASS(writeback_folio_template, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping), + TP_ARGS(folio, mapping), TP_STRUCT__entry ( __array(char, name, 32) @@ -69,7 +69,7 @@ DECLARE_EVENT_CLASS(writeback_page_template, bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; - __entry->index = page->index; + __entry->index = folio->index; ), TP_printk("bdi %s: ino=%lu index=%lu", @@ -79,18 +79,18 @@ DECLARE_EVENT_CLASS(writeback_page_template, ) ); -DEFINE_EVENT(writeback_page_template, writeback_dirty_page, +DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping) + TP_ARGS(folio, mapping) ); -DEFINE_EVENT(writeback_page_template, wait_on_page_writeback, +DEFINE_EVENT(writeback_folio_template, folio_wait_writeback, - TP_PROTO(struct page *page, struct address_space *mapping), + TP_PROTO(struct folio *folio, struct address_space *mapping), - TP_ARGS(page, mapping) + TP_ARGS(folio, mapping) ); DECLARE_EVENT_CLASS(writeback_dirty_inode_template, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3e02c86eb4453..2dc410b110ff8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2426,7 +2426,7 @@ static void folio_account_dirtied(struct folio *folio, { struct inode *inode = mapping->host; - trace_writeback_dirty_page(&folio->page, mapping); + trace_writeback_dirty_folio(folio, mapping); if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; @@ -2852,7 +2852,7 @@ EXPORT_SYMBOL(__folio_start_writeback); void folio_wait_writeback(struct folio *folio) { while (folio_test_writeback(folio)) { - trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + trace_folio_wait_writeback(folio, folio_mapping(folio)); folio_wait_bit(folio, PG_writeback); } } @@ -2874,7 +2874,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback); int folio_wait_writeback_killable(struct folio *folio) { while (folio_test_writeback(folio)) { - trace_wait_on_page_writeback(&folio->page, folio_mapping(folio)); + trace_folio_wait_writeback(folio, folio_mapping(folio)); if (folio_wait_bit_killable(folio, PG_writeback)) return -EINTR; } From 2505826efe5519be94e268120ab844e2570d1e8a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 23:30:44 -0400 Subject: [PATCH 636/851] mm/writeback: Add filemap_dirty_folio() Reimplement __set_page_dirty_nobuffers() as a wrapper around filemap_dirty_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/writeback.h | 1 + mm/folio-compat.c | 6 ++++ mm/page-writeback.c | 60 ++++++++++++++++++++------------------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 667e86cfbdcfe..eda9cc778ef6d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -398,6 +398,7 @@ void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); void account_page_redirty(struct page *page); void sb_mark_inode_writeback(struct inode *inode); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 2c2b3917b5dc9..dad962b920e5e 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -83,3 +83,9 @@ bool set_page_dirty(struct page *page) return folio_mark_dirty(page_folio(page)); } EXPORT_SYMBOL(set_page_dirty); + +int __set_page_dirty_nobuffers(struct page *page) +{ + return filemap_dirty_folio(page_mapping(page), page_folio(page)); +} +EXPORT_SYMBOL(__set_page_dirty_nobuffers); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2dc410b110ff8..bd97c461d4998 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2488,41 +2488,43 @@ void __folio_mark_dirty(struct folio *folio, struct address_space *mapping, xa_unlock_irqrestore(&mapping->i_pages, flags); } -/* - * For address_spaces which do not use buffers. Just tag the page as dirty in - * the xarray. - * - * This is also used when a single buffer is being dirtied: we want to set the - * page dirty in that case, but not all the buffers. This is a "bottom-up" - * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. - * - * The caller must ensure this doesn't race with truncation. Most will simply - * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and - * the pte lock held, which also locks out truncation. +/** + * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads. + * @mapping: Address space this folio belongs to. + * @folio: Folio to be marked as dirty. + * + * Filesystems which do not use buffer heads should call this function + * from their set_page_dirty address space operation. It ignores the + * contents of folio_get_private(), so if the filesystem marks individual + * blocks as dirty, the filesystem should handle that itself. + * + * This is also sometimes used by filesystems which use buffer_heads when + * a single buffer is being dirtied: we want to set the folio dirty in + * that case, but not all the buffers. This is a "bottom-up" dirtying, + * whereas __set_page_dirty_buffers() is a "top-down" dirtying. + * + * The caller must ensure this doesn't race with truncation. Most will + * simply hold the folio lock, but e.g. zap_pte_range() calls with the + * folio mapped and the pte lock held, which also locks out truncation. */ -int __set_page_dirty_nobuffers(struct page *page) +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) { - lock_page_memcg(page); - if (!TestSetPageDirty(page)) { - struct address_space *mapping = page_mapping(page); + folio_memcg_lock(folio); + if (folio_test_set_dirty(folio)) { + folio_memcg_unlock(folio); + return false; + } - if (!mapping) { - unlock_page_memcg(page); - return 1; - } - __set_page_dirty(page, mapping, !PagePrivate(page)); - unlock_page_memcg(page); + __folio_mark_dirty(folio, mapping, !folio_test_private(folio)); + folio_memcg_unlock(folio); - if (mapping->host) { - /* !PageAnon && !swapper_space */ - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - } - return 1; + if (mapping->host) { + /* !PageAnon && !swapper_space */ + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } - unlock_page_memcg(page); - return 0; + return true; } -EXPORT_SYMBOL(__set_page_dirty_nobuffers); +EXPORT_SYMBOL(filemap_dirty_folio); /* * Call this whenever redirtying a page, to de-account the dirty counters From aff8d48a6fceca48ada355c41d91071dd87dfc2a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 4 May 2021 16:12:09 -0400 Subject: [PATCH 637/851] mm/writeback: Add folio_account_cleaned() Get the statistics right; compound pages were being accounted as a single page. This didn't matter before now as no filesystem which supported compound pages did writeback. Also move the declaration to filemap.h since this is part of the page cache. Add a wrapper for account_page_cleaned(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 3 --- include/linux/pagemap.h | 7 +++++++ mm/page-writeback.c | 11 ++++++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 4f82a0092bdef..fd5754b0655ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -39,7 +39,6 @@ struct anon_vma_chain; struct file_ra_state; struct user_struct; struct writeback_control; -struct bdi_writeback; struct pt_regs; extern int sysctl_page_lock_unfairness; @@ -2004,8 +2003,6 @@ extern void do_invalidatepage(struct page *page, unsigned int offset, int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -void account_page_cleaned(struct page *page, struct address_space *mapping, - struct bdi_writeback *wb); bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3d88c17fedc92..665ba6a673853 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -779,6 +779,13 @@ static inline void __set_page_dirty(struct page *page, { __folio_mark_dirty(page_folio(page), mapping, warn); } +void folio_account_cleaned(struct folio *folio, struct address_space *mapping, + struct bdi_writeback *wb); +static inline void account_page_cleaned(struct page *page, + struct address_space *mapping, struct bdi_writeback *wb) +{ + return folio_account_cleaned(page_folio(page), mapping, wb); +} int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index bd97c461d4998..792a83bd39178 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2453,14 +2453,15 @@ static void folio_account_dirtied(struct folio *folio, * * Caller must hold lock_page_memcg(). */ -void account_page_cleaned(struct page *page, struct address_space *mapping, +void folio_account_cleaned(struct folio *folio, struct address_space *mapping, struct bdi_writeback *wb) { if (mapping_can_writeback(mapping)) { - dec_lruvec_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - dec_wb_stat(wb, WB_RECLAIMABLE); - task_io_account_cancelled_write(PAGE_SIZE); + long nr = folio_nr_pages(folio); + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + wb_stat_mod(wb, WB_RECLAIMABLE, -nr); + task_io_account_cancelled_write(folio_size(folio)); } } From ba9fe82694a75bfe320a7295be5789f70d7ef19b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Mar 2021 16:43:04 -0500 Subject: [PATCH 638/851] mm/writeback: Add folio_cancel_dirty() Turn __cancel_dirty_page() into __folio_cancel_dirty() and add wrappers. Move the prototypes into pagemap.h since this is page cache functionality. Saves 44 bytes of kernel text in total; 33 bytes from __folio_cancel_dirty and 11 from two callers of cancel_dirty_page(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 7 ------- include/linux/pagemap.h | 11 +++++++++++ mm/page-writeback.c | 16 ++++++++-------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index fd5754b0655ca..451adcc058769 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2006,13 +2006,6 @@ int redirty_page_for_writepage(struct writeback_control *wbc, bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -void __cancel_dirty_page(struct page *page); -static inline void cancel_dirty_page(struct page *page) -{ - /* Avoid atomic ops, locking, etc. when not actually needed. */ - if (PageDirty(page)) - __cancel_dirty_page(page); -} int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 665ba6a673853..a4d0aeaf884d8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -786,6 +786,17 @@ static inline void account_page_cleaned(struct page *page, { return folio_account_cleaned(page_folio(page), mapping, wb); } +void __folio_cancel_dirty(struct folio *folio); +static inline void folio_cancel_dirty(struct folio *folio) +{ + /* Avoid atomic ops, locking, etc. when not actually needed. */ + if (folio_test_dirty(folio)) + __folio_cancel_dirty(folio); +} +static inline void cancel_dirty_page(struct page *page) +{ + folio_cancel_dirty(page_folio(page)); +} int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 792a83bd39178..0854ef768d06c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2640,28 +2640,28 @@ EXPORT_SYMBOL(set_page_dirty_lock); * page without actually doing it through the VM. Can you say "ext3 is * horribly ugly"? Thought you could. */ -void __cancel_dirty_page(struct page *page) +void __folio_cancel_dirty(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio_mapping(folio); if (mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; - lock_page_memcg(page); + folio_memcg_lock(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (TestClearPageDirty(page)) - account_page_cleaned(page, mapping, wb); + if (folio_test_clear_dirty(folio)) + folio_account_cleaned(folio, mapping, wb); unlocked_inode_to_wb_end(inode, &cookie); - unlock_page_memcg(page); + folio_memcg_unlock(folio); } else { - ClearPageDirty(page); + folio_clear_dirty(folio); } } -EXPORT_SYMBOL(__cancel_dirty_page); +EXPORT_SYMBOL(__folio_cancel_dirty); /* * Clear a page's dirty flag, while caring for dirty memory accounting. From 22cabba4404a0dc0f7902f6bf94678b1d1cc0449 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Feb 2021 16:21:20 -0500 Subject: [PATCH 639/851] mm/writeback: Add folio_clear_dirty_for_io() Transform clear_page_dirty_for_io() into folio_clear_dirty_for_io() and add a compatibility wrapper. Also move the declaration to pagemap.h as this is page cache functionality that doesn't need to be used by the rest of the kernel. Increases the size of the kernel by 79 bytes. While we remove a few calls to compound_head(), we add a call to folio_nr_pages() to get the stats correct for the eventual support of multi-page folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 1 - include/linux/pagemap.h | 2 ++ mm/folio-compat.c | 6 ++++ mm/page-writeback.c | 63 +++++++++++++++++++++-------------------- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 451adcc058769..1cb08a35cf456 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2006,7 +2006,6 @@ int redirty_page_for_writepage(struct writeback_control *wbc, bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a4d0aeaf884d8..006de2d84d065 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -797,6 +797,8 @@ static inline void cancel_dirty_page(struct page *page) { folio_cancel_dirty(page_folio(page)); } +bool folio_clear_dirty_for_io(struct folio *folio); +bool clear_page_dirty_for_io(struct page *page); int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index dad962b920e5e..39f5a8d963b18 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -89,3 +89,9 @@ int __set_page_dirty_nobuffers(struct page *page) return filemap_dirty_folio(page_mapping(page), page_folio(page)); } EXPORT_SYMBOL(__set_page_dirty_nobuffers); + +bool clear_page_dirty_for_io(struct page *page) +{ + return folio_clear_dirty_for_io(page_folio(page)); +} +EXPORT_SYMBOL(clear_page_dirty_for_io); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0854ef768d06c..66060bbf6aadb 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2664,25 +2664,25 @@ void __folio_cancel_dirty(struct folio *folio) EXPORT_SYMBOL(__folio_cancel_dirty); /* - * Clear a page's dirty flag, while caring for dirty memory accounting. - * Returns true if the page was previously dirty. - * - * This is for preparing to put the page under writeout. We leave the page - * tagged as dirty in the xarray so that a concurrent write-for-sync - * can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage - * implementation will run either set_page_writeback() or set_page_dirty(), - * at which stage we bring the page's dirty flag and xarray dirty tag - * back into sync. - * - * This incoherency between the page's dirty flag and xarray tag is - * unfortunate, but it only exists while the page is locked. + * Clear a folio's dirty flag, while caring for dirty memory accounting. + * Returns true if the folio was previously dirty. + * + * This is for preparing to put the folio under writeout. We leave + * the folio tagged as dirty in the xarray so that a concurrent + * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk. + * The ->writepage implementation will run either folio_start_writeback() + * or folio_mark_dirty(), at which stage we bring the folio's dirty flag + * and xarray dirty tag back into sync. + * + * This incoherency between the folio's dirty flag and xarray tag is + * unfortunate, but it only exists while the folio is locked. */ -int clear_page_dirty_for_io(struct page *page) +bool folio_clear_dirty_for_io(struct folio *folio) { - struct address_space *mapping = page_mapping(page); - int ret = 0; + struct address_space *mapping = folio_mapping(folio); + bool ret = false; - VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; @@ -2695,48 +2695,49 @@ int clear_page_dirty_for_io(struct page *page) * We use this sequence to make sure that * (a) we account for dirty stats properly * (b) we tell the low-level filesystem to - * mark the whole page dirty if it was + * mark the whole folio dirty if it was * dirty in a pagetable. Only to then - * (c) clean the page again and return 1 to + * (c) clean the folio again and return 1 to * cause the writeback. * * This way we avoid all nasty races with the * dirty bit in multiple places and clearing * them concurrently from different threads. * - * Note! Normally the "set_page_dirty(page)" + * Note! Normally the "folio_mark_dirty(folio)" * has no effect on the actual dirty bit - since * that will already usually be set. But we * need the side effects, and it can help us * avoid races. * - * We basically use the page "master dirty bit" + * We basically use the folio "master dirty bit" * as a serialization point for all the different * threads doing their things. */ - if (page_mkclean(page)) - set_page_dirty(page); + if (folio_mkclean(folio)) + folio_mark_dirty(folio); /* * We carefully synchronise fault handlers against - * installing a dirty pte and marking the page dirty + * installing a dirty pte and marking the folio dirty * at this point. We do this by having them hold the - * page lock while dirtying the page, and pages are + * page lock while dirtying the folio, and folios are * always locked coming in here, so we get the desired * exclusion. */ wb = unlocked_inode_to_wb_begin(inode, &cookie); - if (TestClearPageDirty(page)) { - dec_lruvec_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); - dec_wb_stat(wb, WB_RECLAIMABLE); - ret = 1; + if (folio_test_clear_dirty(folio)) { + long nr = folio_nr_pages(folio); + lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr); + zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr); + wb_stat_mod(wb, WB_RECLAIMABLE, -nr); + ret = true; } unlocked_inode_to_wb_end(inode, &cookie); return ret; } - return TestClearPageDirty(page); + return folio_test_clear_dirty(folio); } -EXPORT_SYMBOL(clear_page_dirty_for_io); +EXPORT_SYMBOL(folio_clear_dirty_for_io); bool __folio_end_writeback(struct folio *folio) { From 441a468d36f002584fb0d404e9e93787ea5651e5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 10:06:55 -0400 Subject: [PATCH 640/851] mm/writeback: Add folio_account_redirty() Account the number of pages in the folio that we're redirtying. Turn account_page_dirty() into a wrapper around it. Also turn the comment on folio_account_redirty() into kernel-doc and edit it slightly so it makes sense to its potential callers. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/writeback.h | 6 +++++- mm/page-writeback.c | 32 +++++++++++++++++++------------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index eda9cc778ef6d..50cb6e25ab9ee 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -399,7 +399,11 @@ void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); -void account_page_redirty(struct page *page); +void folio_account_redirty(struct folio *folio); +static inline void account_page_redirty(struct page *page) +{ + folio_account_redirty(page_folio(page)); +} void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 66060bbf6aadb..d7bd5580c91e5 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1084,7 +1084,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, * write_bandwidth = --------------------------------------------------- * period * - * @written may have decreased due to account_page_redirty(). + * @written may have decreased due to folio_account_redirty(). * Avoid underflowing @bw calculation. */ bw = written - min(written, wb->written_stamp); @@ -2527,30 +2527,36 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) } EXPORT_SYMBOL(filemap_dirty_folio); -/* - * Call this whenever redirtying a page, to de-account the dirty counters - * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written - * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to - * systematic errors in balanced_dirty_ratelimit and the dirty pages position - * control. +/** + * folio_account_redirty - Manually account for redirtying a page. + * @folio: The folio which is being redirtied. + * + * Most filesystems should call folio_redirty_for_writepage() instead + * of this fuction. If your filesystem is doing writeback outside the + * context of a writeback_control(), it can call this when redirtying + * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED, + * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN, + * WB_WRITTEN) in long term. The mismatches will lead to systematic errors + * in balanced_dirty_ratelimit and the dirty pages position control. */ -void account_page_redirty(struct page *page) +void folio_account_redirty(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; + unsigned nr = folio_nr_pages(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - current->nr_dirtied--; - dec_node_page_state(page, NR_DIRTIED); - dec_wb_stat(wb, WB_DIRTIED); + current->nr_dirtied -= nr; + node_stat_mod_folio(folio, NR_DIRTIED, -nr); + wb_stat_mod(wb, WB_DIRTIED, -nr); unlocked_inode_to_wb_end(inode, &cookie); } } -EXPORT_SYMBOL(account_page_redirty); +EXPORT_SYMBOL(folio_account_redirty); /* * When a writepage implementation decides that it doesn't want to write this From d3b5830e50e1c243a88743935e56107359fc6bd8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 2 May 2021 23:22:52 -0400 Subject: [PATCH 641/851] mm/writeback: Add folio_redirty_for_writepage() Reimplement redirty_page_for_writepage() as a wrapper around folio_redirty_for_writepage(). Account the number of pages in the folio, add kernel-doc and move the prototype to writeback.h. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- fs/jfs/jfs_metapage.c | 1 + include/linux/mm.h | 4 ---- include/linux/writeback.h | 2 ++ mm/folio-compat.c | 7 +++++++ mm/page-writeback.c | 30 ++++++++++++++++++++---------- 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 176580f54af96..104ae698443ed 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" diff --git a/include/linux/mm.h b/include/linux/mm.h index 1cb08a35cf456..5a2e6a41bfb23 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,9 +36,7 @@ struct mempolicy; struct anon_vma; struct anon_vma_chain; -struct file_ra_state; struct user_struct; -struct writeback_control; struct pt_regs; extern int sysctl_page_lock_unfairness; @@ -2001,8 +1999,6 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); -int redirty_page_for_writepage(struct writeback_control *wbc, - struct page *page); bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 50cb6e25ab9ee..5383f7e39816a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -404,6 +404,8 @@ static inline void account_page_redirty(struct page *page) { folio_account_redirty(page_folio(page)); } +bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); +bool redirty_page_for_writepage(struct writeback_control *, struct page *); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 39f5a8d963b18..c1e01bc36d322 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -95,3 +95,10 @@ bool clear_page_dirty_for_io(struct page *page) return folio_clear_dirty_for_io(page_folio(page)); } EXPORT_SYMBOL(clear_page_dirty_for_io); + +bool redirty_page_for_writepage(struct writeback_control *wbc, + struct page *page) +{ + return folio_redirty_for_writepage(wbc, page_folio(page)); +} +EXPORT_SYMBOL(redirty_page_for_writepage); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d7bd5580c91e5..c2987f05c9442 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2558,21 +2558,31 @@ void folio_account_redirty(struct folio *folio) } EXPORT_SYMBOL(folio_account_redirty); -/* - * When a writepage implementation decides that it doesn't want to write this - * page for some reason, it should redirty the locked page via - * redirty_page_for_writepage() and it should then unlock the page and return 0 +/** + * folio_redirty_for_writepage - Decline to write a dirty folio. + * @wbc: The writeback control. + * @folio: The folio. + * + * When a writepage implementation decides that it doesn't want to write + * @folio for some reason, it should call this function, unlock @folio and + * return 0. + * + * Return: True if we redirtied the folio. False if someone else dirtied + * it first. */ -int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) +bool folio_redirty_for_writepage(struct writeback_control *wbc, + struct folio *folio) { - int ret; + bool ret; + unsigned nr = folio_nr_pages(folio); + + wbc->pages_skipped += nr; + ret = filemap_dirty_folio(folio->mapping, folio); + folio_account_redirty(folio); - wbc->pages_skipped++; - ret = __set_page_dirty_nobuffers(page); - account_page_redirty(page); return ret; } -EXPORT_SYMBOL(redirty_page_for_writepage); +EXPORT_SYMBOL(folio_redirty_for_writepage); /** * folio_mark_dirty - Mark a folio as being modified. From 36be77420e03b0eb3d342ed1ce36ee160355a65e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 23:11:28 -0400 Subject: [PATCH 642/851] mm/filemap: Add i_blocks_per_folio() Reimplement i_blocks_per_page() as a wrapper around i_blocks_per_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/pagemap.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 006de2d84d065..412db88b8d0c4 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1150,19 +1150,25 @@ static inline int page_mkwrite_check_truncate(struct page *page, } /** - * i_blocks_per_page - How many blocks fit in this page. + * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks. - * @page: The page (head page if the page is a THP). + * @folio: The folio. * - * If the block size is larger than the size of this page, return zero. + * If the block size is larger than the size of this folio, return zero. * - * Context: The caller should hold a refcount on the page to prevent it + * Context: The caller should hold a refcount on the folio to prevent it * from being split. - * Return: The number of filesystem blocks covered by this page. + * Return: The number of filesystem blocks covered by this folio. */ +static inline +unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) +{ + return folio_size(folio) >> inode->i_blkbits; +} + static inline unsigned int i_blocks_per_page(struct inode *inode, struct page *page) { - return thp_size(page) >> inode->i_blkbits; + return i_blocks_per_folio(inode, page_folio(page)); } #endif /* _LINUX_PAGEMAP_H */ From 694d597dc82320dfc434f65481f5362043b8b666 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 22:30:06 -0400 Subject: [PATCH 643/851] mm/filemap: Add folio_mkwrite_check_truncate() This is the folio equivalent of page_mkwrite_check_truncate(). Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 412db88b8d0c4..18c06c3e42c3f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1121,6 +1121,34 @@ static inline unsigned long dir_pages(struct inode *inode) PAGE_SHIFT; } +/** + * folio_mkwrite_check_truncate - check if folio was truncated + * @folio: the folio to check + * @inode: the inode to check the folio against + * + * Return: the number of bytes in the folio up to EOF, + * or -EFAULT if the folio was truncated. + */ +static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, + struct inode *inode) +{ + loff_t size = i_size_read(inode); + pgoff_t index = size >> PAGE_SHIFT; + size_t offset = offset_in_folio(folio, size); + + if (!folio->mapping) + return -EFAULT; + + /* folio is wholly inside EOF */ + if (folio_next_index(folio) - 1 < index) + return folio_size(folio); + /* folio is wholly past EOF */ + if (folio->index > index || !offset) + return -EFAULT; + /* folio is partially inside EOF */ + return offset; +} + /** * page_mkwrite_check_truncate - check if page was truncated * @page: the page to check From aaadfdb56d17f1e3ba9199c0d3f2cdadbca1cce3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 16:37:09 -0400 Subject: [PATCH 644/851] mm/filemap: Add readahead_folio() The pointers stored in the page cache are folios, by definition. This change comes with a behaviour change -- callers of readahead_folio() are no longer required to put the page reference themselves. This matches how readpage works, rather than matching how readpages used to work. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/pagemap.h | 53 +++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 18c06c3e42c3f..bd4daebaf70ee 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -988,33 +988,56 @@ void page_cache_async_readahead(struct address_space *mapping, page_cache_async_ra(&ractl, page, req_count); } +static inline struct folio *__readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio; + + BUG_ON(ractl->_batch_count > ractl->_nr_pages); + ractl->_nr_pages -= ractl->_batch_count; + ractl->_index += ractl->_batch_count; + + if (!ractl->_nr_pages) { + ractl->_batch_count = 0; + return NULL; + } + + folio = xa_load(&ractl->mapping->i_pages, ractl->_index); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + ractl->_batch_count = folio_nr_pages(folio); + + return folio; +} + /** * readahead_page - Get the next page to read. - * @rac: The current readahead request. + * @ractl: The current readahead request. * * Context: The page is locked and has an elevated refcount. The caller * should decreases the refcount once the page has been submitted for I/O * and unlock the page once all I/O to that page has completed. * Return: A pointer to the next page, or %NULL if we are done. */ -static inline struct page *readahead_page(struct readahead_control *rac) +static inline struct page *readahead_page(struct readahead_control *ractl) { - struct page *page; + struct folio *folio = __readahead_folio(ractl); - BUG_ON(rac->_batch_count > rac->_nr_pages); - rac->_nr_pages -= rac->_batch_count; - rac->_index += rac->_batch_count; - - if (!rac->_nr_pages) { - rac->_batch_count = 0; - return NULL; - } + return &folio->page; +} - page = xa_load(&rac->mapping->i_pages, rac->_index); - VM_BUG_ON_PAGE(!PageLocked(page), page); - rac->_batch_count = thp_nr_pages(page); +/** + * readahead_folio - Get the next folio to read. + * @ractl: The current readahead request. + * + * Context: The folio is locked. The caller should unlock the folio once + * all I/O to that folio has completed. + * Return: A pointer to the next folio, or %NULL if we are done. + */ +static inline struct folio *readahead_folio(struct readahead_control *ractl) +{ + struct folio *folio = __readahead_folio(ractl); - return page; + folio_put(folio); + return folio; } static inline unsigned int __readahead_batch(struct readahead_control *rac, From 45b9a6db4c8d59c1c327235e84d9dc4ee9389ee8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 10:27:16 -0400 Subject: [PATCH 645/851] mm/workingset: Convert workingset_refault() to take a folio This nets us 178 bytes of savings from removing calls to compound_head. The three callers all grow a little, but each of them will be converted to use folios soon, so that's fine. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/swap.h | 4 ++-- mm/filemap.c | 2 +- mm/memory.c | 3 ++- mm/swap.c | 7 +++---- mm/swap_state.c | 2 +- mm/workingset.c | 34 +++++++++++++++++----------------- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index c7a4c0a5863d1..5e01675af7ab9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -329,7 +329,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio) /* linux/mm/workingset.c */ void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); -void workingset_refault(struct page *page, void *shadow); +void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); /* Only track the nodes of mappings with shadow entries */ @@ -350,7 +350,7 @@ extern unsigned long nr_free_buffer_pages(void); /* linux/mm/swap.c */ extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); -extern void lru_note_cost_page(struct page *); +extern void lru_note_cost_folio(struct folio *); extern void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); diff --git a/mm/filemap.c b/mm/filemap.c index 7adef76691c99..675c1429d6865 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -981,7 +981,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, */ WARN_ON_ONCE(PageActive(page)); if (!(gfp_mask & __GFP_WRITE) && shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), shadow); lru_cache_add(page); } return ret; diff --git a/mm/memory.c b/mm/memory.c index 614418e26e2c9..627e7836ade64 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3538,7 +3538,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) shadow = get_shadow_from_swap_cache(entry); if (shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), + shadow); lru_cache_add(page); diff --git a/mm/swap.c b/mm/swap.c index d32007fe23b34..6e80f30d2e5ed 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -315,11 +315,10 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) } while ((lruvec = parent_lruvec(lruvec))); } -void lru_note_cost_page(struct page *page) +void lru_note_cost_folio(struct folio *folio) { - struct folio *folio = page_folio(page); - lru_note_cost(folio_lruvec(folio), - page_is_file_lru(page), thp_nr_pages(page)); + lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio), + folio_nr_pages(folio)); } static void __folio_activate(struct folio *folio, struct lruvec *lruvec) diff --git a/mm/swap_state.c b/mm/swap_state.c index c56aa9ac050dd..1a29b4f982087 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -498,7 +498,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, mem_cgroup_swapin_uncharge_swap(entry); if (shadow) - workingset_refault(page, shadow); + workingset_refault(page_folio(page), shadow); /* Caller will initiate read into locked page */ lru_cache_add(page); diff --git a/mm/workingset.c b/mm/workingset.c index 39bb60d502173..10830211a187f 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -273,17 +273,17 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) } /** - * workingset_refault - evaluate the refault of a previously evicted page - * @page: the freshly allocated replacement page - * @shadow: shadow entry of the evicted page + * workingset_refault - evaluate the refault of a previously evicted folio + * @page: the freshly allocated replacement folio + * @shadow: shadow entry of the evicted folio * * Calculates and evaluates the refault distance of the previously - * evicted page in the context of the node and the memcg whose memory + * evicted folio in the context of the node and the memcg whose memory * pressure caused the eviction. */ -void workingset_refault(struct page *page, void *shadow) +void workingset_refault(struct folio *folio, void *shadow) { - bool file = page_is_file_lru(page); + bool file = folio_is_file_lru(folio); struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; @@ -301,10 +301,10 @@ void workingset_refault(struct page *page, void *shadow) rcu_read_lock(); /* * Look up the memcg associated with the stored ID. It might - * have been deleted since the page's eviction. + * have been deleted since the folio's eviction. * * Note that in rare events the ID could have been recycled - * for a new cgroup that refaults a shared page. This is + * for a new cgroup that refaults a shared folio. This is * impossible to tell from the available data. However, this * should be a rare and limited disturbance, and activations * are always speculative anyway. Ultimately, it's the aging @@ -340,14 +340,14 @@ void workingset_refault(struct page *page, void *shadow) refault_distance = (refault - eviction) & EVICTION_MASK; /* - * The activation decision for this page is made at the level + * The activation decision for this folio is made at the level * where the eviction occurred, as that is where the LRU order - * during page reclaim is being determined. + * during folio reclaim is being determined. * - * However, the cgroup that will own the page is the one that + * However, the cgroup that will own the folio is the one that * is actually experiencing the refault event. */ - memcg = page_memcg(page); + memcg = folio_memcg(folio); lruvec = mem_cgroup_lruvec(memcg, pgdat); inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); @@ -375,15 +375,15 @@ void workingset_refault(struct page *page, void *shadow) if (refault_distance > workingset_size) goto out; - SetPageActive(page); - workingset_age_nonresident(lruvec, thp_nr_pages(page)); + folio_set_active(folio); + workingset_age_nonresident(lruvec, folio_nr_pages(folio)); inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); - /* Page was active prior to eviction */ + /* Folio was active prior to eviction */ if (workingset) { - SetPageWorkingset(page); + folio_set_workingset(folio); /* XXX: Move to lru_cache_add() when it supports new vs putback */ - lru_note_cost_page(page); + lru_note_cost_folio(folio); inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); } out: From 6b59d53b7a80ba3efa1121dcb0414639924154f5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 15:04:28 -0400 Subject: [PATCH 646/851] mm: Add folio_evictable() This is the folio equivalent of page_evictable(). Unfortunately, it's different from !folio_test_unevictable(), but I think it's used in places where you have to be a VM expert and can reasonably be expected to know the difference. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/internal.h | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 08e8a28994d14..0910efec5821a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -72,17 +72,28 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, pgoff_t end, struct pagevec *pvec, pgoff_t *indices); /** - * page_evictable - test whether a page is evictable - * @page: the page to test + * folio_evictable - Test whether a folio is evictable. + * @folio: The folio to test. * - * Test whether page is evictable--i.e., should be placed on active/inactive - * lists vs unevictable list. - * - * Reasons page might not be evictable: - * (1) page's mapping marked unevictable - * (2) page is part of an mlocked VMA + * Test whether @folio is evictable -- i.e., should be placed on + * active/inactive lists vs unevictable list. * + * Reasons folio might not be evictable: + * 1. folio's mapping marked unevictable + * 2. One of the pages in the folio is part of an mlocked VMA */ +static inline bool folio_evictable(struct folio *folio) +{ + bool ret; + + /* Prevent address_space of inode and swap cache from being freed */ + rcu_read_lock(); + ret = !mapping_unevictable(folio_mapping(folio)) && + !folio_test_mlocked(folio); + rcu_read_unlock(); + return ret; +} + static inline bool page_evictable(struct page *page) { bool ret; From 36e54906776dbe6527b31a5b2a67a5b48756dc2e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 14 May 2021 15:08:29 -0400 Subject: [PATCH 647/851] mm/lru: Convert __pagevec_lru_add_fn to take a folio This saves five calls to compound_head(), totalling 60 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/trace/events/pagemap.h | 32 ++++++++++++++++---------------- mm/swap.c | 34 +++++++++++++++++----------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 1fd0185d66e80..171524d3526db 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -16,38 +16,38 @@ #define PAGEMAP_MAPPEDDISK 0x0020u #define PAGEMAP_BUFFERS 0x0040u -#define trace_pagemap_flags(page) ( \ - (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ - (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \ - (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \ - (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \ - (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \ - (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \ +#define trace_pagemap_flags(folio) ( \ + (folio_test_anon(folio) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \ + (folio_mapped(folio) ? PAGEMAP_MAPPED : 0) | \ + (folio_test_swapcache(folio) ? PAGEMAP_SWAPCACHE : 0) | \ + (folio_test_swapbacked(folio) ? PAGEMAP_SWAPBACKED : 0) | \ + (folio_test_mappedtodisk(folio) ? PAGEMAP_MAPPEDDISK : 0) | \ + (folio_test_private(folio) ? PAGEMAP_BUFFERS : 0) \ ) TRACE_EVENT(mm_lru_insertion, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( - __field(struct page *, page ) + __field(struct folio *, folio ) __field(unsigned long, pfn ) __field(enum lru_list, lru ) __field(unsigned long, flags ) ), TP_fast_assign( - __entry->page = page; - __entry->pfn = page_to_pfn(page); - __entry->lru = folio_lru_list(page_folio(page)); - __entry->flags = trace_pagemap_flags(page); + __entry->folio = folio; + __entry->pfn = folio_pfn(folio); + __entry->lru = folio_lru_list(folio); + __entry->flags = trace_pagemap_flags(folio); ), /* Flag format is based on page-types.c formatting for pagemap */ - TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", - __entry->page, + TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s", + __entry->folio, __entry->pfn, __entry->lru, __entry->flags & PAGEMAP_MAPPED ? "M" : " ", diff --git a/mm/swap.c b/mm/swap.c index 6e80f30d2e5ed..89d4471ceb804 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1001,17 +1001,18 @@ void __pagevec_release(struct pagevec *pvec) } EXPORT_SYMBOL(__pagevec_release); -static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) +static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec) { - int was_unevictable = TestClearPageUnevictable(page); - int nr_pages = thp_nr_pages(page); + int was_unevictable = folio_test_clear_unevictable(folio); + int nr_pages = folio_nr_pages(folio); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* - * Page becomes evictable in two ways: + * Folio becomes evictable in two ways: * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()]. - * 2) Before acquiring LRU lock to put the page to correct LRU and then + * 2) Before acquiring LRU lock to put the folio on the correct LRU + * and then * a) do PageLRU check with lock [check_move_unevictable_pages] * b) do PageLRU check before lock [clear_page_mlock] * @@ -1020,10 +1021,10 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) * * #0: __pagevec_lru_add_fn #1: clear_page_mlock * - * SetPageLRU() TestClearPageMlocked() + * folio_set_lru() folio_test_clear_mlocked() * smp_mb() // explicit ordering // above provides strict * // ordering - * PageMlocked() PageLRU() + * folio_test_mlocked() folio_test_lru() * * * if '#1' does not observe setting of PG_lru by '#0' and fails @@ -1034,21 +1035,21 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) * looking at the same page) and the evictable page will be stranded * in an unevictable LRU. */ - SetPageLRU(page); + folio_set_lru(folio); smp_mb__after_atomic(); - if (page_evictable(page)) { + if (folio_evictable(folio)) { if (was_unevictable) __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { - ClearPageActive(page); - SetPageUnevictable(page); + folio_clear_active(folio); + folio_set_unevictable(folio); if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); } - add_page_to_lru_list(page, lruvec); - trace_mm_lru_insertion(page); + lruvec_add_folio(lruvec, folio); + trace_mm_lru_insertion(folio); } /* @@ -1062,11 +1063,10 @@ void __pagevec_lru_add(struct pagevec *pvec) unsigned long flags = 0; for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - struct folio *folio = page_folio(page); + struct folio *folio = page_folio(pvec->pages[i]); lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags); - __pagevec_lru_add_fn(page, lruvec); + __pagevec_lru_add_fn(folio, lruvec); } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); From 06cbd93e0ca9d5b2451d8eb0849d720be804bb40 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 29 Apr 2021 11:09:31 -0400 Subject: [PATCH 648/851] mm/lru: Add folio_add_lru() Reimplement lru_cache_add() as a wrapper around folio_add_lru(). Saves 159 bytes of kernel text due to removing calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/swap.h | 1 + mm/folio-compat.c | 6 ++++++ mm/swap.c | 22 +++++++++++----------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 5e01675af7ab9..81801ba78b1e8 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -351,6 +351,7 @@ extern unsigned long nr_free_buffer_pages(void); extern void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); extern void lru_note_cost_folio(struct folio *); +extern void folio_add_lru(struct folio *); extern void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index c1e01bc36d322..6de3cd78a4aed 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -102,3 +102,9 @@ bool redirty_page_for_writepage(struct writeback_control *wbc, return folio_redirty_for_writepage(wbc, page_folio(page)); } EXPORT_SYMBOL(redirty_page_for_writepage); + +void lru_cache_add(struct page *page) +{ + folio_add_lru(page_folio(page)); +} +EXPORT_SYMBOL(lru_cache_add); diff --git a/mm/swap.c b/mm/swap.c index 89d4471ceb804..6f382abeccf9d 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -459,29 +459,29 @@ void folio_mark_accessed(struct folio *folio) EXPORT_SYMBOL(folio_mark_accessed); /** - * lru_cache_add - add a page to a page list - * @page: the page to be added to the LRU. + * folio_add_lru - Add a folio to an LRU list. + * @folio: The folio to be added to the LRU. * - * Queue the page for addition to the LRU via pagevec. The decision on whether + * Queue the folio for addition to the LRU. The decision on whether * to add the page to the [in]active [file|anon] list is deferred until the - * pagevec is drained. This gives a chance for the caller of lru_cache_add() - * have the page added to the active list using mark_page_accessed(). + * pagevec is drained. This gives a chance for the caller of folio_add_lru() + * have the folio added to the active list using folio_mark_accessed(). */ -void lru_cache_add(struct page *page) +void folio_add_lru(struct folio *folio) { struct pagevec *pvec; - VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); - VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - get_page(page); + folio_get(folio); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); - if (pagevec_add_and_need_flush(pvec, page)) + if (pagevec_add_and_need_flush(pvec, &folio->page)) __pagevec_lru_add(pvec); local_unlock(&lru_pvecs.lock); } -EXPORT_SYMBOL(lru_cache_add); +EXPORT_SYMBOL(folio_add_lru); /** * lru_cache_add_inactive_or_unevictable From 8f13d55bbc1404e6178b400c7b1aea7a9483c26e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 15 Dec 2020 22:55:54 -0500 Subject: [PATCH 649/851] mm/page_alloc: Add folio allocation functions The __folio_alloc(), __folio_alloc_node() and folio_alloc() functions are mostly for type safety, but they also ensure that the page allocator allocates a compound page and initialises the deferred list if the page is large enough to have one. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/gfp.h | 16 ++++++++++++++++ mm/mempolicy.c | 10 ++++++++++ mm/page_alloc.c | 12 ++++++++++++ 3 files changed, 38 insertions(+) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index dc5ff40608ce9..3745efd21cf6d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -523,6 +523,8 @@ static inline void arch_alloc_page(struct page *page, int order) { } struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask); unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, @@ -564,6 +566,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) return __alloc_pages(gfp_mask, order, nid, NULL); } +static inline +struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) +{ + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); + VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + + return __folio_alloc(gfp, order, nid, NULL); +} + /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and @@ -580,6 +591,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages(gfp_t gfp, unsigned int order); +struct folio *folio_alloc(gfp_t gfp, unsigned order); extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, int node, bool hugepage); @@ -590,6 +602,10 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node(numa_node_id(), gfp_mask, order); } +static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) +{ + return __folio_alloc_node(gfp, order, numa_node_id()); +} #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e32360e902744..95d0cf05f7ca4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2249,6 +2249,16 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages); +struct folio *folio_alloc(gfp_t gfp, unsigned order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + if (page && order > 1) + prep_transhuge_page(page); + return (struct folio *)page; +} +EXPORT_SYMBOL(folio_alloc); + int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(vma_policy(src)); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dbf25555c9b8b..77d2a3811bdd1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5413,6 +5413,18 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, } EXPORT_SYMBOL(__alloc_pages); +struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, + nodemask_t *nodemask) +{ + struct page *page = __alloc_pages(gfp | __GFP_COMP, order, + preferred_nid, nodemask); + + if (page && order > 1) + prep_transhuge_page(page); + return (struct folio *)page; +} +EXPORT_SYMBOL(__folio_alloc); + /* * Common helper functions. Never use with __GFP_HIGHMEM because the returned * address cannot represent highmem pages. Use alloc_pages and then kmap if From 786f71c43caedd9958fed1a4ac3810435791a429 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 15 Dec 2020 23:11:07 -0500 Subject: [PATCH 650/851] mm/filemap: Add filemap_alloc_folio Reimplement __page_cache_alloc as a wrapper around filemap_alloc_folio to allow filesystems to be converted at our leisure. Increases kernel text size by 133 bytes, mostly in cachefiles_read_backing_file(). pagecache_get_page() shrinks by 32 bytes, though. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/pagemap.h | 11 ++++++++--- mm/filemap.c | 14 +++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bd4daebaf70ee..848acb44ac80a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -262,14 +262,19 @@ static inline void *detach_page_private(struct page *page) } #ifdef CONFIG_NUMA -extern struct page *__page_cache_alloc(gfp_t gfp); +struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else -static inline struct page *__page_cache_alloc(gfp_t gfp) +static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) { - return alloc_pages(gfp, 0); + return folio_alloc(gfp, order); } #endif +static inline struct page *__page_cache_alloc(gfp_t gfp) +{ + return &filemap_alloc_folio(gfp, 0)->page; +} + static inline struct page *page_cache_alloc(struct address_space *x) { return __page_cache_alloc(mapping_gfp_mask(x)); diff --git a/mm/filemap.c b/mm/filemap.c index 675c1429d6865..989030671b646 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -989,24 +989,24 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, EXPORT_SYMBOL_GPL(add_to_page_cache_lru); #ifdef CONFIG_NUMA -struct page *__page_cache_alloc(gfp_t gfp) +struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) { int n; - struct page *page; + struct folio *folio; if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - page = __alloc_pages_node(n, gfp, 0); - } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); + folio = __folio_alloc_node(gfp, order, n); + } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie)); - return page; + return folio; } - return alloc_pages(gfp, 0); + return folio_alloc(gfp, order); } -EXPORT_SYMBOL(__page_cache_alloc); +EXPORT_SYMBOL(filemap_alloc_folio); #endif /* From 4cbf12a89a987b8e0240847bf3575b27770022f6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 8 Dec 2020 08:56:28 -0500 Subject: [PATCH 651/851] mm/filemap: Add filemap_add_folio() Convert __add_to_page_cache_locked() into __filemap_add_folio(). Add an assertion to it that (for !hugetlbfs), the folio is naturally aligned within the file. Move the prototype from mm.h to pagemap.h. Convert add_to_page_cache_lru() into filemap_add_folio(). Add a compatibility wrapper for unconverted callers. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- include/linux/mm.h | 7 ----- include/linux/pagemap.h | 10 ++++-- kernel/bpf/verifier.c | 2 +- mm/filemap.c | 70 ++++++++++++++++++++--------------------- mm/folio-compat.c | 7 +++++ 5 files changed, 50 insertions(+), 46 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a2e6a41bfb23..52796adf7a2fc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -213,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, loff_t *); -/* - * Any attempt to mark this function as static leads to build failure - * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() - * is referred to by BPF code. This must be visible for error injection. - */ -int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp, void **shadowp); #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 848acb44ac80a..19b2e3bea14ca 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -877,9 +877,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) } int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); + pgoff_t index, gfp_t gfp); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); + pgoff_t index, gfp_t gfp); +int filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp); extern void delete_from_page_cache(struct page *page); extern void __delete_from_page_cache(struct page *page, void *shadow); void replace_page_cache_page(struct page *old, struct page *new); @@ -904,6 +906,10 @@ static inline int add_to_page_cache(struct page *page, return error; } +/* Must be non-static for BPF error injection */ +int __filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp, void **shadowp); + /** * struct readahead_control - Describes a readahead request. * diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 42a4063de7cd2..f0a4f8b818e42 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13015,7 +13015,7 @@ BTF_SET_START(btf_non_sleepable_error_inject) /* Three functions below can be called from sleepable and non-sleepable context. * Assume non-sleepable from bpf safety point of view. */ -BTF_ID(func, __add_to_page_cache_locked) +BTF_ID(func, __filemap_add_folio) BTF_ID(func, should_fail_alloc_page) BTF_ID(func, should_failslab) BTF_SET_END(btf_non_sleepable_error_inject) diff --git a/mm/filemap.c b/mm/filemap.c index 989030671b646..79290853aac54 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -855,26 +855,25 @@ void replace_page_cache_page(struct page *old, struct page *new) } EXPORT_SYMBOL_GPL(replace_page_cache_page); -noinline int __add_to_page_cache_locked(struct page *page, - struct address_space *mapping, - pgoff_t offset, gfp_t gfp, - void **shadowp) +noinline int __filemap_add_folio(struct address_space *mapping, + struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp) { - XA_STATE(xas, &mapping->i_pages, offset); - int huge = PageHuge(page); + XA_STATE(xas, &mapping->i_pages, index); + int huge = folio_test_hugetlb(folio); int error; bool charged = false; - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(PageSwapBacked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); mapping_set_update(&xas, mapping); - get_page(page); - page->mapping = mapping; - page->index = offset; + folio_get(folio); + folio->mapping = mapping; + folio->index = index; if (!huge) { - error = mem_cgroup_charge(page_folio(page), NULL, gfp); + error = mem_cgroup_charge(folio, NULL, gfp); + VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); if (error) goto error; charged = true; @@ -886,7 +885,7 @@ noinline int __add_to_page_cache_locked(struct page *page, unsigned int order = xa_get_order(xas.xa, xas.xa_index); void *entry, *old = NULL; - if (order > thp_order(page)) + if (order > folio_order(folio)) xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index), order, gfp); xas_lock_irq(&xas); @@ -903,13 +902,13 @@ noinline int __add_to_page_cache_locked(struct page *page, *shadowp = old; /* entry may have been split before we acquired lock */ order = xa_get_order(xas.xa, xas.xa_index); - if (order > thp_order(page)) { + if (order > folio_order(folio)) { xas_split(&xas, old, order); xas_reset(&xas); } } - xas_store(&xas, page); + xas_store(&xas, folio); if (xas_error(&xas)) goto unlock; @@ -917,7 +916,7 @@ noinline int __add_to_page_cache_locked(struct page *page, /* hugetlb pages do not participate in page cache accounting */ if (!huge) - __inc_lruvec_page_state(page, NR_FILE_PAGES); + __lruvec_stat_add_folio(folio, NR_FILE_PAGES); unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); @@ -925,19 +924,19 @@ noinline int __add_to_page_cache_locked(struct page *page, if (xas_error(&xas)) { error = xas_error(&xas); if (charged) - mem_cgroup_uncharge(page_folio(page)); + mem_cgroup_uncharge(folio); goto error; } - trace_mm_filemap_add_to_page_cache(page); + trace_mm_filemap_add_to_page_cache(&folio->page); return 0; error: - page->mapping = NULL; + folio->mapping = NULL; /* Leave page->index set: truncation relies upon it */ - put_page(page); + folio_put(folio); return error; } -ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); +ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); /** * add_to_page_cache_locked - add a locked page to the pagecache @@ -954,39 +953,38 @@ ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO); int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) { - return __add_to_page_cache_locked(page, mapping, offset, + return __filemap_add_folio(mapping, page_folio(page), offset, gfp_mask, NULL); } EXPORT_SYMBOL(add_to_page_cache_locked); -int add_to_page_cache_lru(struct page *page, struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) +int filemap_add_folio(struct address_space *mapping, struct folio *folio, + pgoff_t index, gfp_t gfp) { void *shadow = NULL; int ret; - __SetPageLocked(page); - ret = __add_to_page_cache_locked(page, mapping, offset, - gfp_mask, &shadow); + __folio_set_locked(folio); + ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow); if (unlikely(ret)) - __ClearPageLocked(page); + __folio_clear_locked(folio); else { /* - * The page might have been evicted from cache only + * The folio might have been evicted from cache only * recently, in which case it should be activated like - * any other repeatedly accessed page. - * The exception is pages getting rewritten; evicting other + * any other repeatedly accessed folio. + * The exception is folios getting rewritten; evicting other * data from the working set, only to cache data that will * get overwritten with something else, is a waste of memory. */ - WARN_ON_ONCE(PageActive(page)); - if (!(gfp_mask & __GFP_WRITE) && shadow) - workingset_refault(page_folio(page), shadow); - lru_cache_add(page); + WARN_ON_ONCE(folio_test_active(folio)); + if (!(gfp & __GFP_WRITE) && shadow) + workingset_refault(folio, shadow); + folio_add_lru(folio); } return ret; } -EXPORT_SYMBOL_GPL(add_to_page_cache_lru); +EXPORT_SYMBOL_GPL(filemap_add_folio); #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 6de3cd78a4aed..6b19bc4ed6b05 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -108,3 +108,10 @@ void lru_cache_add(struct page *page) folio_add_lru(page_folio(page)); } EXPORT_SYMBOL(lru_cache_add); + +int add_to_page_cache_lru(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp) +{ + return filemap_add_folio(mapping, page_folio(page), index, gfp); +} +EXPORT_SYMBOL(add_to_page_cache_lru); From 848977816e200bbc0a8fd06be82e786817d8c7e5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 15 Dec 2020 23:22:38 -0500 Subject: [PATCH 652/851] mm/filemap: Convert mapping_get_entry to return a folio The pagecache only contains folios, so indicate that this is definitely not a tail page. Shrinks mapping_get_entry() by 56 bytes, but grows pagecache_get_page() by 21 bytes as gcc makes slightly different hot/cold code decisions. A net reduction of 35 bytes of text. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- mm/filemap.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 79290853aac54..25959c17716bc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1755,49 +1755,42 @@ EXPORT_SYMBOL(page_cache_prev_miss); * @mapping: the address_space to search * @index: The page cache index. * - * Looks up the page cache slot at @mapping & @index. If there is a - * page cache page, the head page is returned with an increased refcount. + * Looks up the page cache entry at @mapping & @index. If it is a folio, + * it is returned with an increased refcount. If it is a shadow entry + * of a previously evicted folio, or a swap entry from shmem/tmpfs, + * it is returned without further action. * - * If the slot holds a shadow entry of a previously evicted page, or a - * swap entry from shmem/tmpfs, it is returned. - * - * Return: The head page or shadow entry, %NULL if nothing is found. + * Return: The folio, swap or shadow entry, %NULL if nothing is found. */ -static struct page *mapping_get_entry(struct address_space *mapping, - pgoff_t index) +static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) { XA_STATE(xas, &mapping->i_pages, index); - struct page *page; + struct folio *folio; rcu_read_lock(); repeat: xas_reset(&xas); - page = xas_load(&xas); - if (xas_retry(&xas, page)) + folio = xas_load(&xas); + if (xas_retry(&xas, folio)) goto repeat; /* * A shadow entry of a recently evicted page, or a swap entry from * shmem/tmpfs. Return it without attempting to raise page count. */ - if (!page || xa_is_value(page)) + if (!folio || xa_is_value(folio)) goto out; - if (!page_cache_get_speculative(page)) + if (!folio_try_get_rcu(folio)) goto repeat; - /* - * Has the page moved or been split? - * This is part of the lockless pagecache protocol. See - * include/linux/pagemap.h for details. - */ - if (unlikely(page != xas_reload(&xas))) { - put_page(page); + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); goto repeat; } out: rcu_read_unlock(); - return page; + return folio; } /** From bd3ee02a91ac8c52ac928a86033be634f3d245ef Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 8 Mar 2021 11:45:35 -0500 Subject: [PATCH 653/851] mm/filemap: Add filemap_get_folio filemap_get_folio() is a replacement for find_get_page(). Turn pagecache_get_page() into a wrapper around __filemap_get_folio(). Remove find_lock_head() as this use case is now covered by filemap_get_folio(). Reduces overall kernel size by 209 bytes. __filemap_get_folio() is 316 bytes shorter than pagecache_get_page() was, but the new pagecache_get_page() is 99 bytes. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 41 +++++++++--------- mm/filemap.c | 92 ++++++++++++++++++++--------------------- mm/folio-compat.c | 12 ++++++ 3 files changed, 76 insertions(+), 69 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 19b2e3bea14ca..b24933eced181 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -302,8 +302,26 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_HEAD 0x00000080 #define FGP_ENTRY 0x00000100 -struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, - int fgp_flags, gfp_t cache_gfp_mask); +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp); + +/** + * filemap_get_folio - Find and get a folio. + * @mapping: The address_space to search. + * @index: The page index. + * + * Looks up the page cache entry at @mapping & @index. If a folio is + * present, it is returned with an increased refcount. + * + * Otherwise, %NULL is returned. + */ +static inline struct folio *filemap_get_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, 0, 0); +} /** * find_get_page - find and get a page reference @@ -346,25 +364,6 @@ static inline struct page *find_lock_page(struct address_space *mapping, return pagecache_get_page(mapping, index, FGP_LOCK, 0); } -/** - * find_lock_head - Locate, pin and lock a pagecache page. - * @mapping: The address_space to search. - * @index: The page index. - * - * Looks up the page cache entry at @mapping & @index. If there is a - * page cache page, its head page is returned locked and with an increased - * refcount. - * - * Context: May sleep. - * Return: A struct page which is !PageTail, or %NULL if there is no page - * in the cache for this index. - */ -static inline struct page *find_lock_head(struct address_space *mapping, - pgoff_t index) -{ - return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0); -} - /** * find_or_create_page - locate or add a pagecache page * @mapping: the page's address_space diff --git a/mm/filemap.c b/mm/filemap.c index 25959c17716bc..b0a241036267d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1794,93 +1794,89 @@ static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) } /** - * pagecache_get_page - Find and get a reference to a page. + * __filemap_get_folio - Find and get a reference to a folio. * @mapping: The address_space to search. * @index: The page index. - * @fgp_flags: %FGP flags modify how the page is returned. - * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified. + * @fgp_flags: %FGP flags modify how the folio is returned. + * @gfp: Memory allocation flags to use if %FGP_CREAT is specified. * * Looks up the page cache entry at @mapping & @index. * * @fgp_flags can be zero or more of these flags: * - * * %FGP_ACCESSED - The page will be marked accessed. - * * %FGP_LOCK - The page is returned locked. - * * %FGP_HEAD - If the page is present and a THP, return the head page - * rather than the exact page specified by the index. + * * %FGP_ACCESSED - The folio will be marked accessed. + * * %FGP_LOCK - The folio is returned locked. * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it - * instead of allocating a new page to replace it. + * instead of allocating a new folio to replace it. * * %FGP_CREAT - If no page is present then a new page is allocated using - * @gfp_mask and added to the page cache and the VM's LRU list. + * @gfp and added to the page cache and the VM's LRU list. * The page is returned locked and with an increased refcount. * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the * page is already in cache. If the page was allocated, unlock it before * returning so the caller can do the same dance. - * * %FGP_WRITE - The page will be written - * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask - * * %FGP_NOWAIT - Don't get blocked by page lock + * * %FGP_WRITE - The page will be written to by the caller. + * * %FGP_NOFS - __GFP_FS will get cleared in gfp. + * * %FGP_NOWAIT - Don't get blocked by page lock. * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. * * If there is a page cache page, it is returned with an increased refcount. * - * Return: The found page or %NULL otherwise. + * Return: The found folio or %NULL otherwise. */ -struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp_mask) +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp) { - struct page *page; + struct folio *folio; repeat: - page = mapping_get_entry(mapping, index); - if (xa_is_value(page)) { + folio = mapping_get_entry(mapping, index); + if (xa_is_value(folio)) { if (fgp_flags & FGP_ENTRY) - return page; - page = NULL; + return folio; + folio = NULL; } - if (!page) + if (!folio) goto no_page; if (fgp_flags & FGP_LOCK) { if (fgp_flags & FGP_NOWAIT) { - if (!trylock_page(page)) { - put_page(page); + if (!folio_trylock(folio)) { + folio_put(folio); return NULL; } } else { - lock_page(page); + folio_lock(folio); } /* Has the page been truncated? */ - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - put_page(page); + if (unlikely(folio->mapping != mapping)) { + folio_unlock(folio); + folio_put(folio); goto repeat; } - VM_BUG_ON_PAGE(!thp_contains(page, index), page); + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); } if (fgp_flags & FGP_ACCESSED) - mark_page_accessed(page); + folio_mark_accessed(folio); else if (fgp_flags & FGP_WRITE) { /* Clear idle flag for buffer write */ - if (page_is_idle(page)) - clear_page_idle(page); + if (folio_test_idle(folio)) + folio_clear_idle(folio); } - if (!(fgp_flags & FGP_HEAD)) - page = find_subpage(page, index); no_page: - if (!page && (fgp_flags & FGP_CREAT)) { + if (!folio && (fgp_flags & FGP_CREAT)) { int err; if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) - gfp_mask |= __GFP_WRITE; + gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) - gfp_mask &= ~__GFP_FS; + gfp &= ~__GFP_FS; - page = __page_cache_alloc(gfp_mask); - if (!page) + folio = filemap_alloc_folio(gfp, 0); + if (!folio) return NULL; if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) @@ -1888,27 +1884,27 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, /* Init accessed so avoid atomic mark_page_accessed later */ if (fgp_flags & FGP_ACCESSED) - __SetPageReferenced(page); + __folio_set_referenced(folio); - err = add_to_page_cache_lru(page, mapping, index, gfp_mask); + err = filemap_add_folio(mapping, folio, index, gfp); if (unlikely(err)) { - put_page(page); - page = NULL; + folio_put(folio); + folio = NULL; if (err == -EEXIST) goto repeat; } /* - * add_to_page_cache_lru locks the page, and for mmap we expect - * an unlocked page. + * filemap_add_folio locks the page, and for mmap + * we expect an unlocked page. */ - if (page && (fgp_flags & FGP_FOR_MMAP)) - unlock_page(page); + if (folio && (fgp_flags & FGP_FOR_MMAP)) + folio_unlock(folio); } - return page; + return folio; } -EXPORT_SYMBOL(pagecache_get_page); +EXPORT_SYMBOL(__filemap_get_folio); static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max, xa_mark_t mark) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 6b19bc4ed6b05..e833e680e944a 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -115,3 +115,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, return filemap_add_folio(mapping, page_folio(page), index, gfp); } EXPORT_SYMBOL(add_to_page_cache_lru); + +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp) +{ + struct folio *folio; + + folio = __filemap_get_folio(mapping, index, fgp_flags, gfp); + if ((fgp_flags & FGP_HEAD) || !folio || xa_is_value(folio)) + return &folio->page; + return folio_file_page(folio, index); +} +EXPORT_SYMBOL(pagecache_get_page); From a4757d06df0838a9bf9ed54cfa2522a610499d1c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 24 Dec 2020 12:55:56 -0500 Subject: [PATCH 654/851] mm/filemap: Add FGP_STABLE Allow filemap_get_folio() to wait for writeback to complete (if the filesystem wants that behaviour). This is the folio equivalent of grab_cache_page_write_begin(), which is moved into the folio-compat file as a reminder to migrate all the code using it. This paves the way for getting rid of AOP_FLAG_NOFS once grab_cache_page_write_begin() is removed. Kernel grows by 11 bytes. filemap_get_folio() grows by 33 bytes but grab_cache_page_write_begin() shrinks by 22 bytes to make up for it. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/pagemap.h | 1 + mm/filemap.c | 25 +++---------------------- mm/folio-compat.c | 13 +++++++++++++ 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b24933eced181..83c1a798265f8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -301,6 +301,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_FOR_MMAP 0x00000040 #define FGP_HEAD 0x00000080 #define FGP_ENTRY 0x00000100 +#define FGP_STABLE 0x00000200 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp); diff --git a/mm/filemap.c b/mm/filemap.c index b0a241036267d..29a1e53683125 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1817,6 +1817,7 @@ static void *mapping_get_entry(struct address_space *mapping, pgoff_t index) * * %FGP_WRITE - The page will be written to by the caller. * * %FGP_NOFS - __GFP_FS will get cleared in gfp. * * %FGP_NOWAIT - Don't get blocked by page lock. + * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. @@ -1867,6 +1868,8 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, folio_clear_idle(folio); } + if (fgp_flags & FGP_STABLE) + folio_wait_stable(folio); no_page: if (!folio && (fgp_flags & FGP_CREAT)) { int err; @@ -3590,28 +3593,6 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) } EXPORT_SYMBOL(generic_file_direct_write); -/* - * Find or create a page at the given pagecache position. Return the locked - * page. This function is specifically for buffered writes. - */ -struct page *grab_cache_page_write_begin(struct address_space *mapping, - pgoff_t index, unsigned flags) -{ - struct page *page; - int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT; - - if (flags & AOP_FLAG_NOFS) - fgp_flags |= FGP_NOFS; - - page = pagecache_get_page(mapping, index, fgp_flags, - mapping_gfp_mask(mapping)); - if (page) - wait_for_stable_page(page); - - return page; -} -EXPORT_SYMBOL(grab_cache_page_write_begin); - ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { diff --git a/mm/folio-compat.c b/mm/folio-compat.c index e833e680e944a..5b6ae1da314ee 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -116,6 +116,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, } EXPORT_SYMBOL(add_to_page_cache_lru); +noinline struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp) { @@ -127,3 +128,15 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, return folio_file_page(folio, index); } EXPORT_SYMBOL(pagecache_get_page); + +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags) +{ + unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; + + if (flags & AOP_FLAG_NOFS) + fgp_flags |= FGP_NOFS; + return pagecache_get_page(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); +} +EXPORT_SYMBOL(grab_cache_page_write_begin); From 5499f2b80b562c07b5aee157dee51d083083ab97 Mon Sep 17 00:00:00 2001 From: Andreas Rammhold Date: Fri, 16 Jul 2021 22:00:34 +0200 Subject: [PATCH 655/851] tools: cpupower: fix typo in cpupower-idle-set(1) manpage The tools name was wrong in the SYNTAX section of the manpage it should read "idle-set" instead of "idle-info". Signed-off-by: Andreas Rammhold Signed-off-by: Shuah Khan --- tools/power/cpupower/man/cpupower-idle-set.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 21916cff7516a..8cef3c71e19e7 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -4,7 +4,7 @@ cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options .SH "SYNTAX" .LP -cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] +cpupower [ \-c cpulist ] idle\-set [\fIoptions\fP] .SH "DESCRIPTION" .LP The cpupower idle\-set subcommand allows to set cpu idle, also called cpu From 9f9d11cfad914a01c2b26815ac78cffeef09f1cc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 22 Jul 2021 11:59:59 +0200 Subject: [PATCH 656/851] dt-bindings: firmware: update arm,scpi.yaml reference Changeset 1496be719468 ("dt-bindings: firmware: arm,scpi: Convert to json schema") renamed: Documentation/devicetree/bindings/arm/arm,scpi.txt to: Documentation/devicetree/bindings/firmware/arm,scpi.yaml. Update its cross-reference accordingly. Fixes: 1496be719468 ("dt-bindings: firmware: arm,scpi: Convert to json schema") Signed-off-by: Mauro Carvalho Chehab Acked-by: Sudeep Holla Link: https://lore.kernel.org/r/b5a2b0cb83e7f8193b4be4cef9250dd4c42877ab.1626947923.git.mchehab+huawei@kernel.org Signed-off-by: Guenter Roeck --- Documentation/hwmon/scpi-hwmon.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/scpi-hwmon.rst b/Documentation/hwmon/scpi-hwmon.rst index eee7022b44db6..1e3f83ec06580 100644 --- a/Documentation/hwmon/scpi-hwmon.rst +++ b/Documentation/hwmon/scpi-hwmon.rst @@ -32,5 +32,5 @@ Usage Notes The driver relies on device tree node to indicate the presence of SCPI support in the kernel. See -Documentation/devicetree/bindings/arm/arm,scpi.txt for details of the +Documentation/devicetree/bindings/firmware/arm,scpi.yaml for details of the devicetree node. From e2c744af9cd3f1949a01a11ca97bd76b455ce3a9 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 21 Jul 2021 21:25:19 +0200 Subject: [PATCH 657/851] hwmon: (w83627ehf) Switch to SIMPLE_DEV_PM_OPS Use SIMPLE_DEV_PM_OPS() to also assign poweroff and thaw callbacks. Remove the now obsolete checking of CONFIG_PM too. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20210721192519.28784-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/w83627ehf.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index 19af845743241..3cea66c58c257 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -372,12 +372,10 @@ struct w83627ehf_data { u8 temp3_val_only:1; u8 have_vid:1; -#ifdef CONFIG_PM /* Remember extra register values over suspend/resume */ u8 vbat; u8 fandiv1; u8 fandiv2; -#endif }; struct w83627ehf_sio_data { @@ -1946,8 +1944,7 @@ static int __init w83627ehf_probe(struct platform_device *pdev) return PTR_ERR_OR_ZERO(hwmon_dev); } -#ifdef CONFIG_PM -static int w83627ehf_suspend(struct device *dev) +static int __maybe_unused w83627ehf_suspend(struct device *dev) { struct w83627ehf_data *data = w83627ehf_update_device(dev); @@ -1958,7 +1955,7 @@ static int w83627ehf_suspend(struct device *dev) return 0; } -static int w83627ehf_resume(struct device *dev) +static int __maybe_unused w83627ehf_resume(struct device *dev) { struct w83627ehf_data *data = dev_get_drvdata(dev); int i; @@ -2013,22 +2010,12 @@ static int w83627ehf_resume(struct device *dev) return 0; } -static const struct dev_pm_ops w83627ehf_dev_pm_ops = { - .suspend = w83627ehf_suspend, - .resume = w83627ehf_resume, - .freeze = w83627ehf_suspend, - .restore = w83627ehf_resume, -}; - -#define W83627EHF_DEV_PM_OPS (&w83627ehf_dev_pm_ops) -#else -#define W83627EHF_DEV_PM_OPS NULL -#endif /* CONFIG_PM */ +static SIMPLE_DEV_PM_OPS(w83627ehf_dev_pm_ops, w83627ehf_suspend, w83627ehf_resume); static struct platform_driver w83627ehf_driver = { .driver = { .name = DRVNAME, - .pm = W83627EHF_DEV_PM_OPS, + .pm = &w83627ehf_dev_pm_ops, }, }; From ceddc02b76131d69101967515cf50391fb22f931 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Jul 2021 09:39:11 -0500 Subject: [PATCH 658/851] f2fs: make f2fs_write_failed() take struct inode Make f2fs_write_failed() take a 'struct inode' directly rather than a 'struct address_space', as this simplifies it slightly. Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ba120d55e9b1c..1803c68fa2697 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3178,9 +3178,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, FS_CP_DATA_IO : FS_DATA_IO); } -static void f2fs_write_failed(struct address_space *mapping, loff_t to) +static void f2fs_write_failed(struct inode *inode, loff_t to) { - struct inode *inode = mapping->host; loff_t i_size = i_size_read(inode); if (IS_NOQUOTA(inode)) @@ -3412,7 +3411,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, fail: f2fs_put_page(page, 1); - f2fs_write_failed(mapping, pos + len); + f2fs_write_failed(inode, pos + len); if (drop_atomic) f2fs_drop_inmem_pages_all(sbi, false); return err; @@ -3602,7 +3601,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, count - iov_iter_count(iter)); } else if (err < 0) { - f2fs_write_failed(mapping, offset + count); + f2fs_write_failed(inode, offset + count); } } else { if (err > 0) From 23aa64d999dc0eb724271e64541654ce45dda182 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Jul 2021 09:39:12 -0500 Subject: [PATCH 659/851] f2fs: remove allow_outplace_dio() We can just check f2fs_lfs_mode() directly. The block_unaligned_IO() check is redundant because in LFS mode, f2fs doesn't do direct I/O writes that aren't block-aligned (due to f2fs_force_buffered_io() returning true in this case, triggering the fallback to buffered I/O). Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 10 ---------- fs/f2fs/file.c | 2 +- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1803c68fa2697..28ad1f533c2a3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3553,7 +3553,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; - do_opu = allow_outplace_dio(inode, iocb, iter); + do_opu = rw == WRITE && f2fs_lfs_mode(sbi); trace_f2fs_direct_IO_enter(inode, offset, count, rw); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 867f2c5d95595..8459b6d5a2f8f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4311,16 +4311,6 @@ static inline int block_unaligned_IO(struct inode *inode, return align & blocksize_mask; } -static inline int allow_outplace_dio(struct inode *inode, - struct kiocb *iocb, struct iov_iter *iter) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int rw = iov_iter_rw(iter); - - return (f2fs_lfs_mode(sbi) && (rw == WRITE) && - !block_unaligned_IO(inode, iocb, iter)); -} - static inline bool f2fs_force_buffered_io(struct inode *inode, struct kiocb *iocb, struct iov_iter *iter) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6afd4562335fc..b1cb5b50faac2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4292,7 +4292,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) * back to buffered IO. */ if (!f2fs_force_buffered_io(inode, iocb, from) && - allow_outplace_dio(inode, iocb, from)) + f2fs_lfs_mode(F2FS_I_SB(inode))) goto write; } preallocated = true; From 54dbc19d84f1e8fc63cddc1fe6d9b40dcf6af276 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Jul 2021 09:39:13 -0500 Subject: [PATCH 660/851] f2fs: rework write preallocations f2fs_write_begin() assumes that all blocks were preallocated by default unless FI_NO_PREALLOC is explicitly set. This invites data corruption, as there are cases in which not all blocks are preallocated. Commit 47501f87c61a ("f2fs: preallocate DIO blocks when forcing buffered_io") fixed one case, but there are others remaining. Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which only gets set if all blocks for the current write were preallocated. Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it handle some of the logic that was previously in write_iter() directly. Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 55 ++-------------------- fs/f2fs/f2fs.h | 3 +- fs/f2fs/file.c | 123 ++++++++++++++++++++++++++++++++----------------- 3 files changed, 84 insertions(+), 97 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 28ad1f533c2a3..1967b59a031ff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1370,53 +1370,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return 0; } -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) -{ - struct inode *inode = file_inode(iocb->ki_filp); - struct f2fs_map_blocks map; - int flag; - int err = 0; - bool direct_io = iocb->ki_flags & IOCB_DIRECT; - - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); - map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); - if (map.m_len > map.m_lblk) - map.m_len -= map.m_lblk; - else - map.m_len = 0; - - map.m_next_pgofs = NULL; - map.m_next_extent = NULL; - map.m_seg_type = NO_CHECK_TYPE; - map.m_may_create = true; - - if (direct_io) { - map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, iocb, from) ? - F2FS_GET_BLOCK_PRE_AIO : - F2FS_GET_BLOCK_PRE_DIO; - goto map_blocks; - } - if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } - if (f2fs_has_inline_data(inode)) - return err; - - flag = F2FS_GET_BLOCK_PRE_AIO; - -map_blocks: - err = f2fs_map_blocks(inode, &map, 1, flag); - if (map.m_len > 0 && err == -ENOSPC) { - if (!direct_io) - set_inode_flag(inode, FI_NO_PREALLOC); - err = 0; - } - return err; -} - void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { @@ -3212,12 +3165,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, int flag; /* - * we already allocated all the blocks, so we don't need to get - * the block addresses when there is no need to fill the page. + * If a whole page is being written and we already preallocated all the + * blocks, then there is no need to get a block address now. */ - if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && - !is_inode_flag_set(inode, FI_NO_PREALLOC) && - !f2fs_verity_in_progress(inode)) + if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL)) return 0; /* f2fs_lock_op avoids race between write CP and convert_inline_page */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8459b6d5a2f8f..506382fa3c776 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -699,7 +699,7 @@ enum { FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_DO_DEFRAG, /* indicate defragment is running */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ FI_HOT_DATA, /* indicate file is hot */ FI_EXTRA_ATTR, /* indicate file has extra attribute */ FI_PROJ_INHERIT, /* indicate file inherits projectid */ @@ -3609,7 +3609,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b1cb5b50faac2..9b12004e78c68 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4218,10 +4218,72 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) return ret; } +/* + * Preallocate blocks for a write request, if it is possible and helpful to do + * so. Returns a positive number if blocks may have been preallocated, 0 if no + * blocks were preallocated, or a negative errno value if something went + * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the + * requested blocks (not just some of them) have been allocated. + */ +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + const loff_t pos = iocb->ki_pos; + const size_t count = iov_iter_count(iter); + struct f2fs_map_blocks map = {}; + bool dio = (iocb->ki_flags & IOCB_DIRECT) && + !f2fs_force_buffered_io(inode, iocb, iter); + int flag; + int ret; + + /* If it will be an in-place direct write, don't bother. */ + if (dio && !f2fs_lfs_mode(sbi)) + return 0; + + /* No-wait I/O can't allocate blocks. */ + if (iocb->ki_flags & IOCB_NOWAIT) + return 0; + + /* If it will be a short write, don't bother. */ + if (iov_iter_fault_in_readable(iter, count) != 0) + return 0; + + if (f2fs_has_inline_data(inode)) { + /* If the data will fit inline, don't bother. */ + if (pos + count <= MAX_INLINE_DATA(inode)) + return 0; + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } + + map.m_lblk = (pos >> inode->i_blkbits); + map.m_len = ((pos + count - 1) >> inode->i_blkbits) - map.m_lblk + 1; + map.m_may_create = true; + if (dio) { + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + flag = F2FS_GET_BLOCK_PRE_DIO; + } else { + map.m_seg_type = NO_CHECK_TYPE; + flag = F2FS_GET_BLOCK_PRE_AIO; + } + + ret = f2fs_map_blocks(inode, &map, 1, flag); + /* -ENOSPC is only a fatal error if no blocks could be allocated. */ + if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0)) + return ret; + if (ret == 0) + set_inode_flag(inode, FI_PREALLOCATED_ALL); + return map.m_len; +} + static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + loff_t target_size; + int preallocated; ssize_t ret; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { @@ -4245,84 +4307,59 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (unlikely(IS_IMMUTABLE(inode))) { ret = -EPERM; - goto unlock; + goto out_unlock; } if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EPERM; - goto unlock; + goto out_unlock; } ret = generic_write_checks(iocb, from); if (ret > 0) { - bool preallocated = false; - size_t target_size = 0; - int err; - - if (iov_iter_fault_in_readable(from, iov_iter_count(from))) - set_inode_flag(inode, FI_NO_PREALLOC); - - if ((iocb->ki_flags & IOCB_NOWAIT)) { + if (iocb->ki_flags & IOCB_NOWAIT) { if (!f2fs_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)) || f2fs_has_inline_data(inode) || f2fs_force_buffered_io(inode, iocb, from)) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); ret = -EAGAIN; - goto out; + goto out_unlock; } - goto write; } - - if (is_inode_flag_set(inode, FI_NO_PREALLOC)) - goto write; - if (iocb->ki_flags & IOCB_DIRECT) { /* * Convert inline data for Direct I/O before entering * f2fs_direct_IO(). */ - err = f2fs_convert_inline_inode(inode); - if (err) - goto out_err; - /* - * If force_buffere_io() is true, we have to allocate - * blocks all the time, since f2fs_direct_IO will fall - * back to buffered IO. - */ - if (!f2fs_force_buffered_io(inode, iocb, from) && - f2fs_lfs_mode(F2FS_I_SB(inode))) - goto write; + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out_unlock; } - preallocated = true; - target_size = iocb->ki_pos + iov_iter_count(from); - err = f2fs_preallocate_blocks(iocb, from); - if (err) { -out_err: - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - ret = err; - goto out; + /* Possibly preallocate the blocks for the write. */ + target_size = iocb->ki_pos + iov_iter_count(from); + preallocated = f2fs_preallocate_blocks(iocb, from); + if (preallocated < 0) { + ret = preallocated; + goto out_unlock; } -write: + ret = __generic_file_write_iter(iocb, from); - clear_inode_flag(inode, FI_NO_PREALLOC); - /* if we couldn't write data, we should deallocate blocks. */ - if (preallocated && i_size_read(inode) < target_size) { + /* Don't leave any preallocated blocks around past i_size. */ + if (preallocated > 0 && inode->i_size < target_size) { down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); f2fs_truncate(inode); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } + clear_inode_flag(inode, FI_PREALLOCATED_ALL); if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } -unlock: +out_unlock: inode_unlock(inode); out: trace_f2fs_file_write_iter(inode, iocb->ki_pos, From 5f2632fa147124bb033eca0b58e9d5df65db6936 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Jul 2021 09:39:14 -0500 Subject: [PATCH 661/851] f2fs: reduce indentation in f2fs_file_write_iter() Replace 'if (ret > 0)' with 'if (ret <= 0) goto out_unlock;'. No change in behavior. Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 73 +++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9b12004e78c68..878b2460f79b7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4316,49 +4316,50 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } ret = generic_write_checks(iocb, from); - if (ret > 0) { - if (iocb->ki_flags & IOCB_NOWAIT) { - if (!f2fs_overwrite_io(inode, iocb->ki_pos, - iov_iter_count(from)) || - f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, iocb, from)) { - ret = -EAGAIN; - goto out_unlock; - } - } - if (iocb->ki_flags & IOCB_DIRECT) { - /* - * Convert inline data for Direct I/O before entering - * f2fs_direct_IO(). - */ - ret = f2fs_convert_inline_inode(inode); - if (ret) - goto out_unlock; - } + if (ret <= 0) + goto out_unlock; - /* Possibly preallocate the blocks for the write. */ - target_size = iocb->ki_pos + iov_iter_count(from); - preallocated = f2fs_preallocate_blocks(iocb, from); - if (preallocated < 0) { - ret = preallocated; + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, iocb, from)) { + ret = -EAGAIN; goto out_unlock; } + } + if (iocb->ki_flags & IOCB_DIRECT) { + /* + * Convert inline data for Direct I/O before entering + * f2fs_direct_IO(). + */ + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out_unlock; + } - ret = __generic_file_write_iter(iocb, from); + /* Possibly preallocate the blocks for the write. */ + target_size = iocb->ki_pos + iov_iter_count(from); + preallocated = f2fs_preallocate_blocks(iocb, from); + if (preallocated < 0) { + ret = preallocated; + goto out_unlock; + } - /* Don't leave any preallocated blocks around past i_size. */ - if (preallocated > 0 && inode->i_size < target_size) { - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); - f2fs_truncate(inode); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - } - clear_inode_flag(inode, FI_PREALLOCATED_ALL); + ret = __generic_file_write_iter(iocb, from); - if (ret > 0) - f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); + /* Don't leave any preallocated blocks around past i_size. */ + if (preallocated > 0 && inode->i_size < target_size) { + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_truncate(inode); + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } + clear_inode_flag(inode, FI_PREALLOCATED_ALL); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); out_unlock: inode_unlock(inode); out: From 00359b2a8bbbcde6b2e0ad23320e2ec57557acbd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 16 Jul 2021 09:39:15 -0500 Subject: [PATCH 662/851] f2fs: fix the f2fs_file_write_iter tracepoint Pass in the original position and count rather than the position and count that were updated by the write. Also use the correct types for all arguments, in particular the file offset which was being truncated to 32 bits on 32-bit platforms. Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 +++-- include/trace/events/f2fs.h | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 878b2460f79b7..279252c7f7bc9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4282,6 +4282,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + const loff_t orig_pos = iocb->ki_pos; + const size_t orig_count = iov_iter_count(from); loff_t target_size; int preallocated; ssize_t ret; @@ -4363,8 +4365,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) out_unlock: inode_unlock(inode); out: - trace_f2fs_file_write_iter(inode, iocb->ki_pos, - iov_iter_count(from), ret); + trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b113e3cd6aa..bffb38622e9b6 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TRACE_EVENT(f2fs_file_write_iter, - TP_PROTO(struct inode *inode, unsigned long offset, - unsigned long length, int ret), + TP_PROTO(struct inode *inode, loff_t offset, size_t length, + ssize_t ret), TP_ARGS(inode, offset, length, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(unsigned long, offset) - __field(unsigned long, length) - __field(int, ret) + __field(loff_t, offset) + __field(size_t, length) + __field(ssize_t, ret) ), TP_fast_assign( @@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter, ), TP_printk("dev = (%d,%d), ino = %lu, " - "offset = %lu, length = %lu, written(err) = %d", + "offset = %lld, length = %zu, written(err) = %zd", show_dev_ino(__entry), __entry->offset, __entry->length, From fbc515e1c049ab40616d4fbc8363a885071a0324 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Jul 2021 10:30:58 -0700 Subject: [PATCH 663/851] f2fs: don't sleep while grabing nat_tree_lock This tries to fix priority inversion in the below condition resulting in long checkpoint delay. f2fs_get_node_info() - nat_tree_lock -> sleep to grab journal_rwsem by contention checkpoint - waiting for nat_tree_lock In order to let checkpoint go, let's release nat_tree_lock, if there's a journal_rwsem contention. Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0be9e2d7120e3..c60ba4179bb25 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -552,7 +552,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, int i; ni->nid = nid; - +retry: /* Check nat cache */ down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); @@ -564,10 +564,16 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, return 0; } - memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + /* + * Check current segment summary by trying to grab journal_rwsem first. + * This sem is on the critical path on the checkpoint requiring the above + * nat_tree_lock. Therefore, we should retry, if we failed to grab here. + */ + if (!down_read_trylock(&curseg->journal_rwsem)) { + up_read(&nm_i->nat_tree_lock); + goto retry; + } - /* Check current segment summary */ - down_read(&curseg->journal_rwsem); i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0); if (i >= 0) { ne = nat_in_journal(journal, i); From 3f3bb788dc0fe01344a91b6d5ed734066405ee29 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 23 Jul 2021 14:58:25 +1000 Subject: [PATCH 664/851] fix for "drm: Introduce the DP AUX bus" interaction with "bus: Make remove callback return void" Signed-off-by: Stephen Rothwell --- drivers/gpu/drm/drm_dp_aux_bus.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_aux_bus.c b/drivers/gpu/drm/drm_dp_aux_bus.c index e49a70f3691b2..298ea7a495913 100644 --- a/drivers/gpu/drm/drm_dp_aux_bus.c +++ b/drivers/gpu/drm/drm_dp_aux_bus.c @@ -67,9 +67,8 @@ static int dp_aux_ep_probe(struct device *dev) * * Calls through to the endpoint driver remove. * - * Return: 0 if no error or negative error code. */ -static int dp_aux_ep_remove(struct device *dev) +static void dp_aux_ep_remove(struct device *dev) { struct dp_aux_ep_driver *aux_ep_drv = to_dp_aux_ep_drv(dev->driver); struct dp_aux_ep_device *aux_ep = to_dp_aux_ep_dev(dev); @@ -77,8 +76,6 @@ static int dp_aux_ep_remove(struct device *dev) if (aux_ep_drv->remove) aux_ep_drv->remove(aux_ep); dev_pm_domain_detach(dev, true); - - return 0; } /** From 90d856e71443a2fcacca8e7539bac44d9cb3f7ab Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 23 Jul 2021 18:04:35 +1000 Subject: [PATCH 665/851] Add linux-next specific files for 20210723 Signed-off-by: Stephen Rothwell --- Next/SHA1s | 334 +++ Next/Trees | 336 +++ Next/merge.log | 4991 +++++++++++++++++++++++++++++++++++++++++++++ localversion-next | 1 + 4 files changed, 5662 insertions(+) create mode 100644 Next/SHA1s create mode 100644 Next/Trees create mode 100644 Next/merge.log create mode 100644 localversion-next diff --git a/Next/SHA1s b/Next/SHA1s new file mode 100644 index 0000000000000..f91a74244258f --- /dev/null +++ b/Next/SHA1s @@ -0,0 +1,334 @@ +Name SHA1 +---- ---- +origin 9bead1b58c4c4a85319d0fc8a5c83e7374977d28 +fixes 614124bea77e452aa6df7a8714e8bc820b489922 +kbuild-current 5e60f363b38fd40e4d8838b5d6f4d4ecee92c777 +arc-current 3eada7b4bd85d0f73b34108db74d212fc8ad1e8c +arm-current dad7b9896a5dbac5da8275d5a6147c65c81fb5f2 +arm64-fixes d8a719059b9dc963aa190598778ac804ff3e6a87 +arm-soc-fixes 82a1c67554dff610d6be4e1982c425717b3c6a23 +drivers-memory-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +m68k-current 21ed49265986931b8921a2404394426870245bd2 +powerpc-fixes bc4188a2f56e821ea057aca6bf444e138d06c252 +s390-fixes 0cde560a8bfc3cb790715f39d4535129cca9e6ae +sparc 05a59d79793d482f628a31753c671f2e92178a21 +fscrypt-current d19d8d345eecd9247cbe6cbf27aef271bd88aba7 +net 9f42f674a89200d4f465a7db6070e079f3c6145f +bpf d6371c76e20d7d3f61b05fd67b596af4d14a8886 +ipsec 7cb745800df9d352db83f163778fdfc301457625 +netfilter cfbe3650dd3ef2ea9a4420ca89d9a4df98af3fb6 +ipvs cfbe3650dd3ef2ea9a4420ca89d9a4df98af3fb6 +wireless-drivers 09cfae9f13d51700b0fecf591dcd658fc5375428 +mac80211 f4b29d2ee903f15b5e3f9bbb485079b2a7fe6616 +rdma-fixes dc6afef7e14252c5ca5b8a8444946cb4b75b0aa0 +sound-current b0084afde27fe8a504377dee65f55bc6aa776937 +sound-asoc-fixes a7b7e43d4747177a8c7508391b64b8a9f1829362 +regmap-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +regulator-fixes 0ffa19b4209906366d693c52c48fdd271d6c504e +spi-fixes c6edba9759132f99b6b306daed2e4012f561ebcf +pci-current e73f0f0ee7541171d89f2e2491130c7771ba58d3 +driver-core.current e64daad660a0c9ace3acdc57099fffe5ed83f977 +tty.current 7f0909db761535aefafa77031062603a71557267 +usb.current 1d1b97d5e7636d72890a5bdd0b40e980e54b2d34 +usb-gadget-fixes e49d033bddf5b565044e2abe4241353959bc9120 +usb-serial-fixes d6a206e60124a9759dd7f6dfb86b0e1d3b1df82e +usb-chipidea-fixes 42c4417937beaf68a9ab07e95cf8634c7080174c +phy e73f0f0ee7541171d89f2e2491130c7771ba58d3 +staging.current cb7abd1db6e5f99a05f1a00b65be29029a6a152a +iio-fixes 7e77ef8b8d600cf8448a2bbd32f682c28884551f +char-misc.current c453db6cd96418c79702eaf38259002755ab23ff +soundwire-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +thunderbolt-fixes 13311e74253fe64329390df80bed3f07314ddd61 +input-current 818b26588994d9d95743fca0a427f08ec6c1c41d +crypto-current 66192b2e3fd8ab97ed518d6c0240e26655a20b4b +vfio-fixes dc51ff91cf2d1e9a2d941da483602f71d4a51472 +kselftest-fixes 2734d6c1b1a089fb593ef6a23d4b70903526fe0c +modules-fixes 055f23b74b20f2824ce33047b4cf2e2aa856bf3b +dmaengine-fixes 6b4b87f2c31ac1af4f244990a7cbfb50d3f3e33f +backlight-fixes a38fd8748464831584a19438cbb3082b5a2dab15 +mtd-fixes ba356289261975ff061580ff30d1026448b9ffd2 +mfd-fixes a61f4661fba404418a7c77e86586dc52a58a93c6 +v4l-dvb-fixes 61c6f04a988e420a1fc5e8e81cf9aebf142a7bd6 +reset-fixes 1435f82689e1d195e56ce8b19c81aa7b1a3bd0b0 +mips-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +at91-fixes 6efb943b8616ec53a5e444193dccf1af9ad627b5 +omap-fixes 3ff340e24c9dd5cff9fc07d67914c5adf67f80d6 +kvm-fixes 7025098af33430b302d3c2d78ef12327c60ee8f9 +kvms390-fixes cd4220d23bf3f43cf720e82bdee681f383433ae2 +hwmon-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +nvdimm-fixes b05d4c576b697b9f462b9c532c997171d5c3b067 +cxl-fixes fae8817ae804a682c6823ad1672438f39fc46c28 +btrfs-fixes a34d068248b72080732dae356d26301b5b7c5f17 +vfs-fixes 173e84953eaaf17864a707efa2b8cd4c233b3129 +dma-mapping-fixes 18a3c5f7abfdf97f88536d35338ebbee119c355c +i3c-fixes fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8 +drivers-x86-fixes f7e506ec4a9966be8b2a87d3324302f0f5dd5a29 +samsung-krzk-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +pinctrl-samsung-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +devicetree-fixes 50d8d7e19c4398da74d028f367754e73547b078b +scsi-fixes a47fa41381a09e5997afd762664db4f5f6657e03 +drm-fixes 995a1460f26f159bf98908543f7311ece0019399 +amdgpu-fixes 2c409ba81be25516afe05ae27a4a15da01740b01 +drm-intel-fixes 6e0b6528d783b2b87bd9e1bea97cf4dac87540d7 +mmc-fixes 10252bae863d09b9648bed2e035572d207200ca1 +rtc-fixes bd33335aa93d615cac77d991c448b986761e7a8d +gnss-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +hyperv-fixes f5a11c69b69923a4367d24365ad4dff6d4f3fc42 +soc-fsl-fixes 2663b3388551230cbc4606a40fabf3331ceb59e4 +risc-v-fixes c79e89ecaa246c880292ba68cbe08c9c30db77e3 +pidfd-fixes 03ba0fe4d09f2eb0a91888caaa057ed67462ae2d +fpga-fixes 2e8496f31d0be8f43849b2980b069f3a9805d047 +spdx de5540965853e514a85d3b775e9049deb85a2ff3 +gpio-brgl-fixes ec7099fdea8025988710ee6fecfd4e4210c29ab5 +gpio-intel-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +pinctrl-intel-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +erofs-fixes 0852b6ca941ef3ff75076e85738877bd3271e1cd +integrity-fixes 8433856947217ebb5697a8ff9c4c9cad4639a2cf +kunit-fixes 2734d6c1b1a089fb593ef6a23d4b70903526fe0c +ubifs-fixes 78c7d49f55d8631b67c09f9bfbe8155211a9ea06 +memblock-fixes 024591f9a6e0164ec23301784d1e6d8f6cacbe59 +cel-fixes 9f4ad9e425a1d3b6a34617b8ea226d56a119a717 +irqchip-fixes 1fee9db9b42d821e8007289d4eea74bdf85b1543 +renesas-fixes 432b52eea3dcf48083bafa4b2b6ef5b054ead609 +drm-misc-fixes 69de4421bb4c103ef42a32bafc596e23918c106f +kspp-gustavo 802dad70a535ee5c804fe96e449031a9e6f0589a +kbuild 27932b6a2088eac7a5afa5471963b926cfbb4de7 +compiler-attributes 7ed012969bbcdbd7aef5778a061681e6cbc4b402 +dma-mapping 40ac971eab89330d6153e7721e88acd2d98833f9 +asm-generic 14462376858e35b83932f94616effc2f49fd8494 +arc def9d2780727cec3313ed3522d0123158d87224d +arm 3bd1461d1691d5405c59c4415c54fcf674eaa5ac +arm64 3d1bf78c7b13a0ad88e77dce94df88400f47e0dd +arm-perf d96b1b8c9f79b6bb234a31c80972a6f422079376 +arm-soc 82a1c67554dff610d6be4e1982c425717b3c6a23 +actions 444d018d8d3874c9c3784a5df3ad2e5f554fbdb6 +amlogic b03485fe99f23d95bb43b5b77df7b83121905a06 +aspeed 00e9e776fa197592addc1f3002c63585f884a5dd +at91 b2a7f104a03540e24b11ee5718f9e9ef0d2dd137 +drivers-memory e73f0f0ee7541171d89f2e2491130c7771ba58d3 +imx-mxs 2332f8e991981f6f7d08ec0df5b06497472f8a22 +keystone 9d2e21ed98a24584daa3c90698ce81f5cc307a27 +mediatek a1c8c49de3d7859db3f5dea72a049b92d7ad2e20 +mvebu 72a0a49b7c77b77ca7a1d03c38138166136d1a88 +omap 91ee322340fb88eacf437367562a3b9a0428a362 +qcom 3f8b3dfa5522b8aa4c12c717a590bb52207cefe0 +raspberrypi 21c6bf8304f0141af6460cfe404dbbdeb96bdd62 +renesas 2a022240886d918d1f09a50b86a4065256caac42 +reset 28edf1d77cd5b5f35828dcc3ac2bfa912bf04a3f +rockchip 3d5667e52c374f9d1e8799501b3c141fd05d9581 +samsung-krzk bb2c20c976aa1018851f1c619a9049dff9331d64 +scmi 7929b794edc738b37d5c3fcfffdf43fd11d48371 +stm32 b462e2fc78f098ab732c7d1aca6ecab879a1d103 +sunxi 3f1c53207cf051d9aec85e9139bd35ada6184143 +tegra 51b89b49954fae783a7d9216899e7b9b901ee99b +ti-k3 1e3d655fe7b48c2341d63f981774742e21744f53 +ti-k3-new e73f0f0ee7541171d89f2e2491130c7771ba58d3 +xilinx d98fb9ab6255136813af65d17524cb224ddd9829 +clk 67ce7b523a9de7dd755bd0c9b1dfd90fbefd5a47 +clk-imx c586f53ae159c6c1390f093a1ec94baef2df9f3a +clk-renesas 1b87d5bba32c1f25a12ba0625546e5375e3f998d +clk-samsung a38fd8748464831584a19438cbb3082b5a2dab15 +csky 90dc8c0e664efcb14e2f133309d84bfdcb0b3d24 +h8300 6e5e55227c959716103945e7140614c4c2faf439 +m68k a0b22464ce9352c4a3e100f10f976c85b0ae6690 +m68knommu 64151620227a2fcb13dae0b99b6a1003edb38c67 +microblaze 14a832498c23cf480243222189066a8006182b9d +mips d656132d2a2abc06917d822f7adcda86fd6dd192 +nds32 40e0dd851e7b7afe219820fb270b09016e41d4fc +nios2 7f7bc20bc41a4fbcd2db75b375ac95e5faf958ae +openrisc ad4e600cbf897f47525b342cd4b02e88ed300a83 +parisc-hd ca2b19a4bdb6200dc1d64619ea4b8de2a84a0439 +powerpc e73f0f0ee7541171d89f2e2491130c7771ba58d3 +soc-fsl 242b0b398ccd34f73772120bd27a87a1bf00e30b +risc-v 67979e927dd053bde3b71128495f651256b3161c +s390 2f5f912c5b60501646a06cfbcefa902f5f640f6c +sh 2882b7626f4903a8e9250b328cdf7396a6deecac +sparc-next dd0d718152e4c65b173070d48ea9dfc06894c3e5 +uml 1aee020155f364ef538370d3392969f1077b9bae +xtensa ac637a0ada717bdf798f9903d94e3ac65c357423 +pidfd 5ddf9602d7112188cb20aa5a33fd8440fb4567bd +fscrypt 2fc2b430f559fdf32d5d1dd5ceaa40e12fb77bdf +fscache 5193f26aef591d289b74fcf2055fef002bd25432 +afs 7af08140979a6e7e12b78c93b8625c8d25b084e2 +btrfs 808c6838dfeed706bda4757b1a2c2dd2cfc4d986 +ceph 0077a50082729c3f9ea2836f59e35d9b7dacfb12 +cifs 7b09d4e0be94968b7c6c117e34ca90cea9c6d986 +cifsd 9946aa630ae71d15dc304e92e55bc655603c942d +configfs 420405ecde061fde76d67bd3a67577a563ea758e +ecryptfs 682a8e2b41effcaf2e80697e395d47f77c91273f +erofs 8215d5b7f15f8643bf12fe005b2bc0cc322aff62 +exfat 50be9417e23af5a8ac860d998e1e3f06b8fd79d7 +ext3 f97d9dea9404720a899d5cc5a8c30d4b1a0219b3 +ext4 0705e8d1e2207ceeb83dc6e1751b6b82718b353a +f2fs fbc515e1c049ab40616d4fbc8363a885071a0324 +fsverity 07c99001312cbf90a357d4877a358f796eede65b +fuse c4e0cd4e0c16544ff0afecf07a5fe17de6077233 +gfs2 a6579cbfd7216b071008db13360c322a6b21400b +jfs 5d299f44d7658f4423e33a0b9915bc8d81687511 +nfs 2734d6c1b1a089fb593ef6a23d4b70903526fe0c +nfs-anna 4f8be1f53bf615102d103c0509ffa9596f65b718 +nfsd ab1016d39cc052064e32f25ad18ef8767a0ee3b8 +cel 9b5b7a72fdc4da142b7487eea4326a5bcebed181 +orangefs 0fdec1b3c9fbb5e856a40db5993c9eaf91c74a83 +overlayfs cd94017fb9fa721bc71791024af2983698b88af3 +ubifs a801fcfeef96702fa3f9b22ad56c5eb1989d9221 +v9fs cba7dcd8878e04647c47c829846bbde91e67ca2f +xfs b102a46ce16fd5550aed882c3c5b95f50da7992c +zonefs 95b115332a835fb0cbd36dfabacf1c57d915e705 +iomap 229adf3c64dbeae4e2f45fb561907ada9fcc0d0c +djw-vfs 9b8523423b23ee3dfd88e32f5b7207be56a4e782 +file-locks e1cc6e8c1969a598119b7a5578a08f6d9d0d14c4 +vfs 303392fd5c160822bf778270b28ec5ea50cab2b4 +printk fbdc7b96a5aeca3978adb7944d1501e72366cea6 +pci e73f0f0ee7541171d89f2e2491130c7771ba58d3 +pstore c5d4fb2539cad2e62c5a3f0d8237613c394f297e +hid 1b97ec646386cea5b4be139f7685b4a4b4d3799a +i2c 4a8ac5e45cdaa88884b4ce05303e304cbabeb367 +i3c 3e5feb11a82945c88a1518fd2682ca8de66c37d3 +dmi 3cb4d29a2633170208c96240c7e85148679ceee3 +hwmon-staging e2c744af9cd3f1949a01a11ca97bd76b455ce3a9 +jc_docs 4b3abe1fde4799ed92c378e545271ca7a7828a41 +v4l-dvb e73f0f0ee7541171d89f2e2491130c7771ba58d3 +v4l-dvb-next c27479d762de4eda72ba9e0aa150d439970f2077 +pm d713e0c8339876fb5070b53cb1bec248c37fbf38 +cpufreq-arm e73f0f0ee7541171d89f2e2491130c7771ba58d3 +cpupower 5499f2b80b562c07b5aee157dee51d083083ab97 +devfreq e73f0f0ee7541171d89f2e2491130c7771ba58d3 +opp e73f0f0ee7541171d89f2e2491130c7771ba58d3 +thermal fe6a6de6692e7f7159c1ff42b07ecd737df712b4 +ieee1394 54b3bd99f094b3b919de4078f60d722e62a767e3 +dlm 62699b3f0a62435fceb8debf295e90a5ea259e04 +swiotlb 868c9ddc182bc6728bb380cbfb3170734f72c599 +rdma 923232bbea88a29f18a2361790582a6474a538fc +net-next 4431531c482a2c05126caaa9fcc5053a4a5c495b +bpf-next 9907442fcddbdacf55fa22e31f2306ae0d6172d6 +ipsec-next 2d151d39073aff498358543801fca0f670fea981 +mlx5-next 96cd2dd65bb0b94c908f2df32bba7350fc1b954e +netfilter-next 84fe73996c2e7407006002ef92d7354a56b69fed +ipvs-next 84fe73996c2e7407006002ef92d7354a56b69fed +wireless-drivers-next 0d6835ffe50c9c1f098b5704394331710b67af48 +bluetooth 64832df2ac056f111b51aaebbe54a996e0fce7f1 +mac80211-next 0d6835ffe50c9c1f098b5704394331710b67af48 +mtd ff44b90b325dcd585cdba6ded6c9c52ea8ddead0 +nand c5b9ee9c361f52cd319135b9ec7fe684d5e2e026 +spi-nor 2734d6c1b1a089fb593ef6a23d4b70903526fe0c +crypto f03a3cab26c1b7f628a3be6d33ae1b483829b630 +drm 588b3eee528873d73bf777f329d35b2e65e24777 +drm-misc 85fd4a8a84316166640102676a356755ddec80e0 +amdgpu d91a713ed367868ad8ddb50f7927545073bf38b9 +drm-intel e73db72732dcb1bf3d8b1428f16616bbc263e509 +drm-tegra 8874e3a7baec1ee0dc5c20b8e0bf1cade3512b24 +drm-msm e88bbc91849b2bf57683119c339e52916d34433f +imx-drm fc1e985b67f9f318bc5797c9370715f5d020dac3 +etnaviv 81fd23e2b3ccf71c807e671444e8accaba98ca53 +regmap d63aa09f7c53bdeb83edb4d84c07d759a92223bb +sound f976e8a941763d37cddb365a755b27bbcf5de72e +sound-asoc 86db346793f98efe9fee9795f653b378949e60e5 +modules 4c5afb74d9450edc2e2e37243b469cc278b120d4 +input 7d3370e506ec5cd781ef6b938cf29c046eb77585 +block 60b8191c4777cd257d0e90bf571c9f2bf3dec5fd +device-mapper e820ba87f9d15399fa565ceba4a92b902c879d29 +pcmcia e9d503fef7da2cc0610ce9cd056d0347ec9cafc4 +mmc 49fc2be70e7f5c6b8fe542271277c794f080b1e1 +mfd 495fb48dbd9bcbe15859e086edd24519a6bd2961 +backlight 1181f2164135d770bdad297290b73d274787389b +battery 56d629af09b9d4db9792257165844287ecce0a98 +regulator 336e3a8679c43a5923a408dddd0c7b92901319b8 +security 047843bdb3160e8fb225f3752616ac7257033fe4 +apparmor d108370c644b153382632b3e5511ade575c91c86 +integrity 907a399de7b0566236c480d0c01ff52220532fb1 +keys e377c31f788fc98815e1ab90b5a35704ce35843a +safesetid 1b8b719229197b7afa1b1191e083fb41ace095c5 +selinux 893c47d1964f5c9b00c5468f2cd1a1e9351fbb6a +smack bfc3cac0c76126995737f1b858d2cdb476be5b1d +tomoyo 4fb9c588398fde1536b219a229e231a9f501c168 +tpmdd 0178f9d0f60ba07e09bab57381a3ef18e2c1fd7f +watchdog cf813c67d9619fd474c785698cbed543b94209dd +iommu 4a5c155a5ab372516a1a5ddd29473f8f696feb79 +audit d97e99386ad0dcae08cb0f0c70efa806a2d4811c +devicetree 1c14c1695e78f63a93a1347e15a6e363d7325b43 +mailbox 4f197188da668180d5ea7d808ae6221ce66cfe33 +spi 199ec5db2dc5598e45a1e1277812afceee9dd8d8 +tip de35038b40c12814cb736cb495d9a15c7a0b0979 +clockevents 6f64c8159af9a4c46aea60344d5ab66a9bb24bc0 +edac e1ca90b7cc5cb5d3a38321cbb65ad36a59fcb574 +irqchip c51e96dace68a67f1fcfa49d4ad1577875f50bf1 +ftrace 704adfb5a9978462cd861f170201ae2b5e3d3a80 +rcu fa31b17088f945653bbc8a1efba1e7311bb7266f +kvm 7025098af33430b302d3c2d78ef12327c60ee8f9 +kvm-arm 188982cda00ebfe28b50c2905d9bbaa2e9a001b9 +kvm-ppc 72476aaa469179222b92c380de60c76b4cb9a318 +kvms390 1f703d2cf20464338c3d5279dddfb65ac79b8782 +xen-tip 83f877a09516bcb82e34df621cc3a794509a11a3 +percpu a81a52b325ec886eb004ca28b943480dae0353c7 +workqueues b42b0bddcbc87b4c66f6497f66fc72d52b712aa7 +drivers-x86 f7e506ec4a9966be8b2a87d3324302f0f5dd5a29 +chrome-platform 6efb943b8616ec53a5e444193dccf1af9ad627b5 +hsi e73f0f0ee7541171d89f2e2491130c7771ba58d3 +leds e642197562cd9781453f835e1406cfe0feeb917e +ipmi 1a2055cd5091f814444a1128df96fb190a5c98be +driver-core e7deeb9d79d8691f1e6c4c6707471ec3d7b9886b +usb 8e6cb5d27e8246d9c986ec162d066a502d2b602b +usb-gadget e49d033bddf5b565044e2abe4241353959bc9120 +usb-serial 2734d6c1b1a089fb593ef6a23d4b70903526fe0c +usb-chipidea-next 956df1bb0ab8bb823541e66d186ed65559541b69 +tty e679004dec37566f658a255157d3aed9d762a2b7 +char-misc 03b1292d1c0ea195e025e667555d74db7da82026 +extcon e73f0f0ee7541171d89f2e2491130c7771ba58d3 +phy-next c1302e8ce517ed9c417aa4eb19776df4efbbebf4 +soundwire e73f0f0ee7541171d89f2e2491130c7771ba58d3 +thunderbolt b18f901382fdb74a138a0bf30458c54a023a1d86 +vfio 6a45ece4c9af473555f01f0f8b97eba56e3c7d0d +staging 334201d503d5903f38f6e804263fc291ce8f451a +iio 73380b9ad2478fd80fcb3056edebb61f37a89ec9 +mux 3516bd729358a2a9b090c1905bd2a3fa926e24c6 +icc 2092cdb412f0586b90591c55121f77050ba702f8 +dmaengine 0e96454ca26cc5c594ec792f7e5168cce726f7cf +cgroup 1e7107c5ef44431bc1ebbd4c353f1d7c22e5f2ec +scsi c18a4e657ce67416954fa4097782e951f1d6e0d2 +scsi-mkp e15f669cd996b85bb07b0e787fa78806477bf211 +vhost db7b337709a15d33cc5e901d2ee35d3bb3e42b2f +rpmsg 7486f29e5e6003c0672020be02011b0eab87a56d +gpio 7ac554888233468a9fd7c4f28721396952dd9959 +gpio-brgl f3f1017a98f91355671feb0e741391999a43b55d +gpio-intel e73f0f0ee7541171d89f2e2491130c7771ba58d3 +pinctrl bfa50166cd9d5d190b20dc33d1ec7ae19ced7022 +pinctrl-intel e73f0f0ee7541171d89f2e2491130c7771ba58d3 +pinctrl-renesas e9d66bdbc5abecaf705bf5a2f4f6279b9e313b0c +pinctrl-samsung e73f0f0ee7541171d89f2e2491130c7771ba58d3 +pwm f4a8e31ed84ec646c158824f423cb22d1f362bbf +userns 5e6b8a50a7cec5686ee2c4bda1d49899c79a7eae +ktest 170f4869e66275f498ae4736106fb54c0fdcd036 +kselftest 2734d6c1b1a089fb593ef6a23d4b70903526fe0c +livepatching c150bbbb1731c5b6c0b232320c450dcd3b5c9fde +coresight 1efbcec2ef8c037f1e801c76e4b9434ee2400be7 +rtc 4aa90c036df670b8757140e0dae2a94e7b0d42b4 +nvdimm 30c10d32152dd09c2158e9cab8a6c7adaa09278e +at24 6efb943b8616ec53a5e444193dccf1af9ad627b5 +ntb 28293b6c68cd93178f021f51b1b504f3ec33f463 +seccomp 19d67694745c5c9ed085d0d8332fa02d835a07d0 +kspp 6160d948cc6f83d429813449a032e64dfadd1d39 +cisco 9e98c678c2d6ae3a17cb2de55d17f69dddaa231b +gnss 0f79ce970e79ffb771733f9634d5918d0eb3e30a +fsi 9ab1428dfe2c66b51e0b41337cd0164da0ab6080 +slimbus e5c578adcdd974f73380f0e5a5595354fcab0d60 +nvmem e73f0f0ee7541171d89f2e2491130c7771ba58d3 +xarray 2c7e57a02708a69d0194f9ef2a7b7e54f5a0484a +hyperv 63fb60c2fcc94d595a184fa187bdfb25e5ecd4a2 +auxdisplay 24ebc044c72ee6e88dc902a0041bac672f012537 +kgdb c8daba4640ac9619f9cb34ca7c314ff1eaff5f33 +hmm fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8 +fpga c485d3bf3cc7790faed2b90c799a38caa2f69268 +kunit e73f0f0ee7541171d89f2e2491130c7771ba58d3 +cfi 6efb943b8616ec53a5e444193dccf1af9ad627b5 +kunit-next 2734d6c1b1a089fb593ef6a23d4b70903526fe0c +trivial 9ff9b0d392ea08090cd1780fb196f36dbb586529 +mhi b8c95616d154830be38482557d8eef6100f3c3dd +memblock a4d5613c4dc6d413e0733e37db9d116a2a36b9f3 +init 38b082236e77d403fed23ac2d30d570598744ec3 +counters e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62 +rust 5d3986cf8ed63ff8d86270e578649e71143112d6 +cxl 4ad6181e4b216ed0cb52f45d3c6d2c70c8ae9243 +folio a4757d06df0838a9bf9ed54cfa2522a610499d1c +akpm-current ace6e27b90197751ad3e6baadae3b8b065fd4e2d +akpm 1141df70117a3f1ab07b5da5f937fd153e2fb91a diff --git a/Next/Trees b/Next/Trees new file mode 100644 index 0000000000000..113246f897e88 --- /dev/null +++ b/Next/Trees @@ -0,0 +1,336 @@ +Trees included into this release: + +Name Type URL +---- ---- --- +origin git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git#master +fixes git git://git.kernel.org/pub/scm/linux/kernel/git/sfr/next-fixes.git#fixes +kbuild-current git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git#fixes +arc-current git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git#for-curr +arm-current git git://git.armlinux.org.uk/~rmk/linux-arm.git#fixes +arm64-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux#for-next/fixes +arm-soc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git#arm/fixes +drivers-memory-fixes git https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git#fixes +m68k-current git git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git#for-linus +powerpc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git#fixes +s390-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git#fixes +sparc git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git#master +fscrypt-current git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git#for-stable +net git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git#master +bpf git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git#master +ipsec git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git#master +netfilter git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git#master +ipvs git git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git#master +wireless-drivers git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git#master +mac80211 git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git#master +rdma-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git#for-rc +sound-current git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git#for-linus +sound-asoc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git#for-linus +regmap-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git#for-linus +regulator-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git#for-linus +spi-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git#for-linus +pci-current git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git#for-linus +driver-core.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git#driver-core-linus +tty.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git#tty-linus +usb.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git#usb-linus +usb-gadget-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git#fixes +usb-serial-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git#usb-linus +usb-chipidea-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git#for-usb-fixes +phy git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git#fixes +staging.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git#staging-linus +iio-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git#fixes-togreg +char-misc.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git#char-misc-linus +soundwire-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git#fixes +thunderbolt-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git#fixes +input-current git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git#for-linus +crypto-current git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git#master +vfio-fixes git git://github.com/awilliam/linux-vfio.git#for-linus +kselftest-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#fixes +modules-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git#modules-linus +dmaengine-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git#fixes +backlight-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git#for-backlight-fixes +mtd-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#mtd/fixes +mfd-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git#for-mfd-fixes +v4l-dvb-fixes git git://linuxtv.org/mchehab/media-next.git#fixes +reset-fixes git https://git.pengutronix.de/git/pza/linux#reset/fixes +mips-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git#mips-fixes +at91-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git#at91-fixes +omap-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git#fixes +kvm-fixes git git://git.kernel.org/pub/scm/virt/kvm/kvm.git#master +kvms390-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git#master +hwmon-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git#hwmon +nvdimm-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git#libnvdimm-fixes +cxl-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git#fixes +btrfs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git#next-fixes +vfs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git#fixes +dma-mapping-fixes git git://git.infradead.org/users/hch/dma-mapping.git#for-linus +i3c-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git#i3c/fixes +drivers-x86-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git#fixes +samsung-krzk-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git#fixes +pinctrl-samsung-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git#fixes +devicetree-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git#dt/linus +scsi-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git#fixes +drm-fixes git git://git.freedesktop.org/git/drm/drm.git#drm-fixes +amdgpu-fixes git git://people.freedesktop.org/~agd5f/linux#drm-fixes +drm-intel-fixes git git://anongit.freedesktop.org/drm-intel#for-linux-next-fixes +mmc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git#fixes +rtc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git#rtc-fixes +gnss-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/johan/gnss.git#gnss-linus +hyperv-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git#hyperv-fixes +soc-fsl-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/leo/linux.git#fix +risc-v-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git#fixes +pidfd-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git#fixes +fpga-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git#fixes +spdx git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx.git#spdx-linus +gpio-brgl-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git#gpio/for-current +gpio-intel-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git#fixes +pinctrl-intel-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git#fixes +erofs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git#fixes +integrity-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity#fixes +kunit-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#kunit-fixes +ubifs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git#fixes +memblock-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git#fixes +cel-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux#for-rc +irqchip-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git#irq/irqchip-fixes +renesas-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git#fixes +drm-misc-fixes git git://anongit.freedesktop.org/drm/drm-misc#for-linux-next-fixes +kspp-gustavo git git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git#for-next/kspp +kbuild git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git#for-next +compiler-attributes git https://github.com/ojeda/linux.git#compiler-attributes +dma-mapping git git://git.infradead.org/users/hch/dma-mapping.git#for-next +asm-generic git git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git#master +arc git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git#for-next +arm git git://git.armlinux.org.uk/~rmk/linux-arm.git#for-next +arm64 git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux#for-next/core +arm-perf git git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git#for-next/perf +arm-soc git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git#for-next +actions git git://git.kernel.org/pub/scm/linux/kernel/git/mani/linux-actions.git#for-next +amlogic git git://git.kernel.org/pub/scm/linux/kernel/git/amlogic/linux.git#for-next +aspeed git git://git.kernel.org/pub/scm/linux/kernel/git/joel/bmc.git#for-next +at91 git git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git#at91-next +drivers-memory git https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git#for-next +imx-mxs git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git#for-next +keystone git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git#next +mediatek git git://git.kernel.org/pub/scm/linux/kernel/git/matthias.bgg/linux.git#for-next +mvebu git git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu.git#for-next +omap git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git#for-next +qcom git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git#for-next +raspberrypi git git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git#for-next +renesas git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git#next +reset git https://git.pengutronix.de/git/pza/linux#reset/next +rockchip git git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git#for-next +samsung-krzk git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git#for-next +scmi git git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux.git#for-linux-next +stm32 git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git#stm32-next +sunxi git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git#sunxi/for-next +tegra git git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux.git#for-next +ti-k3 git git://git.kernel.org/pub/scm/linux/kernel/git/kristo/linux.git#ti-k3-next +ti-k3-new git git://git.kernel.org/pub/scm/linux/kernel/git/nmenon/linux.git#ti-k3-next +xilinx git git://github.com/Xilinx/linux-xlnx.git#for-next +clk git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git#clk-next +clk-imx git git://git.kernel.org/pub/scm/linux/kernel/git/abelvesa/linux.git#for-next +clk-renesas git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git#renesas-clk +clk-samsung git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git#for-next +csky git git://github.com/c-sky/csky-linux.git#linux-next +h8300 git git://git.sourceforge.jp/gitroot/uclinux-h8/linux.git#h8300-next +m68k git git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git#for-next +m68knommu git git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu.git#for-next +microblaze git git://git.monstr.eu/linux-2.6-microblaze.git#next +mips git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git#mips-next +nds32 git git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux.git#next +nios2 git git://git.kernel.org/pub/scm/linux/kernel/git/lftan/nios2.git#for-next +openrisc git git://github.com/openrisc/linux.git#for-next +parisc-hd git git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git#for-next +powerpc git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git#next +soc-fsl git git://git.kernel.org/pub/scm/linux/kernel/git/leo/linux.git#next +risc-v git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git#for-next +s390 git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git#for-next +sh git git://git.libc.org/linux-sh#for-next +sparc-next git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next.git#master +uml git git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git#linux-next +xtensa git git://github.com/jcmvbkbc/linux-xtensa.git#xtensa-for-next +pidfd git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git#for-next +fscrypt git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git#master +fscache git git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git#fscache-next +afs git git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git#afs-next +btrfs git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git#for-next +ceph git git://github.com/ceph/ceph-client.git#master +cifs git git://git.samba.org/sfrench/cifs-2.6.git#for-next +cifsd git https://github.com/smfrench/smb3-kernel.git#cifsd-for-next +configfs git git://git.infradead.org/users/hch/configfs.git#for-next +ecryptfs git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git#next +erofs git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git#dev +exfat git git://git.kernel.org/pub/scm/linux/kernel/git/linkinjeon/exfat.git#dev +ext3 git git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git#for_next +ext4 git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git#dev +f2fs git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git#dev +fsverity git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git#fsverity +fuse git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git#for-next +gfs2 git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git#for-next +jfs git git://github.com/kleikamp/linux-shaggy.git#jfs-next +nfs git git://git.linux-nfs.org/projects/trondmy/nfs-2.6.git#linux-next +nfs-anna git git://git.linux-nfs.org/projects/anna/linux-nfs.git#linux-next +nfsd git git://git.linux-nfs.org/~bfields/linux.git#nfsd-next +cel git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux#for-next +orangefs git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux#for-next +overlayfs git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git#overlayfs-next +ubifs git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git#next +v9fs git git://github.com/martinetd/linux#9p-next +xfs git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git#for-next +zonefs git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git#for-next +iomap git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git#iomap-for-next +djw-vfs git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git#vfs-for-next +file-locks git git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git#locks-next +vfs git git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git#for-next +printk git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git#for-next +pci git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git#next +pstore git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git#for-next/pstore +hid git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git#for-next +i2c git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git#i2c/for-next +i3c git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git#i3c/next +dmi git git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging.git#dmi-for-next +hwmon-staging git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git#hwmon-next +jc_docs git git://git.lwn.net/linux.git#docs-next +v4l-dvb git git://linuxtv.org/media_tree.git#master +v4l-dvb-next git git://linuxtv.org/mchehab/media-next.git#master +pm git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git#linux-next +cpufreq-arm git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git#cpufreq/arm/linux-next +cpupower git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux.git#cpupower +devfreq git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git#devfreq-next +opp git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git#opp/linux-next +thermal git git://git.kernel.org/pub/scm/linux/kernel/git/thermal/linux.git#thermal/linux-next +ieee1394 git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git#for-next +dlm git git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git#next +swiotlb git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git#linux-next +rdma git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git#for-next +net-next git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git#master +bpf-next git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git#for-next +ipsec-next git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git#master +mlx5-next git git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux.git#mlx5-next +netfilter-next git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git#master +ipvs-next git git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git#master +wireless-drivers-next git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git#master +bluetooth git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git#master +mac80211-next git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git#master +mtd git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#mtd/next +nand git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#nand/next +spi-nor git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#spi-nor/next +crypto git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git#master +drm git git://git.freedesktop.org/git/drm/drm.git#drm-next +drm-misc git git://anongit.freedesktop.org/drm/drm-misc#for-linux-next +amdgpu git https://gitlab.freedesktop.org/agd5f/linux#drm-next +drm-intel git git://anongit.freedesktop.org/drm-intel#for-linux-next +drm-tegra git git://anongit.freedesktop.org/tegra/linux.git#drm/tegra/for-next +drm-msm git https://gitlab.freedesktop.org/drm/msm.git#msm-next +imx-drm git https://git.pengutronix.de/git/pza/linux#imx-drm/next +etnaviv git https://git.pengutronix.de/git/lst/linux#etnaviv/next +regmap git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git#for-next +sound git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git#for-next +sound-asoc git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git#for-next +modules git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git#modules-next +input git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git#next +block git git://git.kernel.dk/linux-block.git#for-next +device-mapper git git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git#for-next +pcmcia git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux.git#pcmcia-next +mmc git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git#next +mfd git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git#for-mfd-next +backlight git git://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git#for-backlight-next +battery git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git#for-next +regulator git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git#for-next +security git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git#next-testing +apparmor git git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor#apparmor-next +integrity git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity#next-integrity +keys git git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git#keys-next +safesetid git https://github.com/micah-morton/linux.git#safesetid-next +selinux git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git#next +smack git git://github.com/cschaufler/smack-next#next +tomoyo git https://scm.osdn.net/gitroot/tomoyo/tomoyo-test1.git#master +tpmdd git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git#next +watchdog git git://www.linux-watchdog.org/linux-watchdog-next.git#master +iommu git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git#next +audit git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git#next +devicetree git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git#for-next +mailbox git git://git.linaro.org/landing-teams/working/fujitsu/integration.git#mailbox-for-next +spi git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git#for-next +tip git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git#auto-latest +clockevents git git://git.linaro.org/people/daniel.lezcano/linux.git#timers/drivers/next +edac git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git#edac-for-next +irqchip git git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git#irq/irqchip-next +ftrace git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git#for-next +rcu git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git#rcu/next +kvm git git://git.kernel.org/pub/scm/virt/kvm/kvm.git#next +kvm-arm git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git#next +kvm-ppc git git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc.git#kvm-ppc-next +kvms390 git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git#next +xen-tip git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git#linux-next +percpu git git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git#for-next +workqueues git git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git#for-next +drivers-x86 git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git#for-next +chrome-platform git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git#for-next +hsi git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-hsi.git#for-next +leds git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git#for-next +ipmi git git://github.com/cminyard/linux-ipmi.git#for-next +driver-core git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git#driver-core-next +usb git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git#usb-next +usb-gadget git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git#next +usb-serial git git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git#usb-next +usb-chipidea-next git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git#for-usb-next +tty git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git#tty-next +char-misc git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git#char-misc-next +extcon git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon.git#extcon-next +phy-next git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git#next +soundwire git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git#next +thunderbolt git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git#next +vfio git git://github.com/awilliam/linux-vfio.git#next +staging git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git#staging-next +iio git git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git#togreg +mux git https://gitlab.com/peda-linux/mux.git#for-next +icc git git://git.kernel.org/pub/scm/linux/kernel/git/djakov/icc.git#icc-next +dmaengine git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git#next +cgroup git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git#for-next +scsi git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git#for-next +scsi-mkp git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git#for-next +vhost git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git#linux-next +rpmsg git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git#for-next +gpio git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git#for-next +gpio-brgl git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git#gpio/for-next +gpio-intel git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git#for-next +pinctrl git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git#for-next +pinctrl-intel git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git#for-next +pinctrl-renesas git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git#renesas-pinctrl +pinctrl-samsung git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git#for-next +pwm git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git#for-next +userns git git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git#for-next +ktest git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-ktest.git#for-next +kselftest git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#next +livepatching git git://git.kernel.org/pub/scm/linux/kernel/git/livepatching/livepatching#for-next +coresight git git://git.linaro.org/kernel/coresight.git#next +rtc git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git#rtc-next +nvdimm git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git#libnvdimm-for-next +at24 git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git#at24/for-next +ntb git https://github.com/jonmason/ntb.git#ntb-next +seccomp git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git#for-next/seccomp +kspp git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git#for-next/kspp +cisco git https://github.com/daniel-walker/cisco-linux.git#for-next +gnss git git://git.kernel.org/pub/scm/linux/kernel/git/johan/gnss.git#gnss-next +fsi git git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi.git#next +slimbus git git://git.kernel.org/pub/scm/linux/kernel/git/srini/slimbus.git#for-next +nvmem git git://git.kernel.org/pub/scm/linux/kernel/git/srini/nvmem.git#for-next +xarray git git://git.infradead.org/users/willy/xarray.git#main +hyperv git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git#hyperv-next +auxdisplay git https://github.com/ojeda/linux.git#auxdisplay +kgdb git git://git.kernel.org/pub/scm/linux/kernel/git/danielt/linux.git#kgdb/for-next +hmm git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git#hmm +fpga git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git#for-next +kunit git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#test +cfi git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#cfi/next +kunit-next git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#kunit +trivial git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git#for-next +mhi git git://git.kernel.org/pub/scm/linux/kernel/git/mani/mhi.git#mhi-next +memblock git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git#for-next +init git git://git.infradead.org/users/hch/misc.git#init-user-pointers +counters git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux.git#counters +rust git https://github.com/Rust-for-Linux/linux.git#rust-next +cxl git git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git#next +folio git git://git.infradead.org/users/willy/pagecache.git#for-next +akpm-current mmotm https://www.ozlabs.org/~akpm/mmotm/ +akpm mmotm https://www.ozlabs.org/~akpm/mmotm/ diff --git a/Next/merge.log b/Next/merge.log new file mode 100644 index 0000000000000..603a8801ba23d --- /dev/null +++ b/Next/merge.log @@ -0,0 +1,4991 @@ +$ date -R +Fri, 23 Jul 2021 09:07:36 +1000 +$ git checkout master +Already on 'master' +$ git reset --hard stable +HEAD is now at 3d5895cd3517 Merge tag 's390-5.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux +Merging origin/master (9bead1b58c4c Merge tag 'array-bounds-fixes-5.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux) +$ git merge origin/master +Updating 3d5895cd3517..9bead1b58c4c +Fast-forward + .../devicetree/bindings/net/imx-dwmac.txt | 56 ----- + .../devicetree/bindings/net/nxp,dwmac-imx.yaml | 93 ++++++++ + .../devicetree/bindings/net/snps,dwmac.yaml | 3 + + Documentation/networking/af_xdp.rst | 6 +- + Documentation/networking/ip-sysctl.rst | 2 +- + MAINTAINERS | 1 + + arch/arm64/boot/dts/freescale/imx8mp.dtsi | 6 +- + arch/arm64/kernel/smccc-call.S | 9 +- + arch/arm64/mm/mmu.c | 20 +- + arch/powerpc/mm/nohash/8xx.c | 10 + + arch/s390/net/bpf_jit_comp.c | 2 +- + arch/x86/kernel/cpu/mshyperv.c | 2 +- + arch/x86/mm/pgtable.c | 34 ++- + drivers/hv/channel_mgmt.c | 96 +++++--- + drivers/media/pci/ngene/ngene-core.c | 2 +- + drivers/media/pci/ngene/ngene.h | 14 +- + drivers/mmc/core/block.c | 35 +-- + drivers/mmc/core/host.c | 20 +- + drivers/net/bonding/bond_main.c | 2 + + drivers/net/dsa/mt7530.c | 2 + + drivers/net/dsa/mt7530.h | 1 + + drivers/net/dsa/mv88e6xxx/Kconfig | 2 +- + drivers/net/dsa/sja1105/sja1105_main.c | 6 + + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 85 +++++-- + drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 10 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 24 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 1 - + drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 9 +- + .../ethernet/cavium/liquidio/cn23xx_pf_device.c | 2 +- + .../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 16 +- + drivers/net/ethernet/freescale/fman/mac.c | 1 + + drivers/net/ethernet/hisilicon/hip04_eth.c | 6 +- + drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 7 +- + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 +- + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 1 + + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 10 + + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 19 ++ + drivers/net/ethernet/ibm/ibmvnic.c | 2 +- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- + drivers/net/ethernet/marvell/octeontx2/af/Makefile | 2 +- + drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 10 +- + drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 21 ++ + .../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 3 + + .../ethernet/marvell/octeontx2/af/rvu_debugfs.c | 5 +- + .../ethernet/marvell/octeontx2/af/rvu_devlink.c | 48 +++- + .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 36 +++ + .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 47 +++- + .../net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 29 ++- + .../net/ethernet/marvell/octeontx2/af/rvu_switch.c | 258 +++++++++++++++++++++ + drivers/net/ethernet/microchip/sparx5/Kconfig | 1 + + drivers/net/ethernet/realtek/r8169_main.c | 3 +- + drivers/net/ethernet/renesas/ravb.h | 2 +- + drivers/net/ethernet/renesas/ravb_main.c | 2 +- + drivers/net/ethernet/xscale/ptp_ixp46x.c | 2 + + drivers/net/usb/hso.c | 33 ++- + drivers/net/usb/r8152.c | 30 ++- + include/linux/pgtable.h | 26 +-- + include/net/tcp.h | 1 - + include/trace/events/net.h | 2 +- + include/trace/events/qdisc.h | 28 ++- + kernel/bpf/verifier.c | 2 + + net/bpf/test_run.c | 3 + + net/bridge/br_fdb.c | 2 +- + net/caif/caif_socket.c | 3 +- + net/core/dev.c | 34 ++- + net/core/skbuff.c | 18 +- + net/core/skmsg.c | 16 +- + net/decnet/af_decnet.c | 27 +-- + net/dsa/slave.c | 14 +- + net/dsa/tag_ksz.c | 9 + + net/ipv4/tcp_bpf.c | 2 +- + net/ipv4/tcp_fastopen.c | 28 ++- + net/ipv4/tcp_ipv4.c | 2 +- + net/ipv4/udp.c | 25 +- + net/ipv4/udp_bpf.c | 2 +- + net/ipv6/ip6_output.c | 2 +- + net/ipv6/route.c | 2 +- + net/ipv6/udp.c | 25 +- + net/netrom/nr_timer.c | 20 +- + net/sched/act_skbmod.c | 12 +- + net/sched/cls_api.c | 2 +- + net/sched/cls_tcindex.c | 5 +- + net/sctp/auth.c | 2 + + net/sctp/output.c | 4 +- + net/sctp/socket.c | 4 + + tools/bpf/bpftool/common.c | 5 + + tools/testing/selftests/net/nettest.c | 55 ++++- + tools/testing/selftests/net/pmtu.sh | 212 ++++++++++++++++- + 88 files changed, 1362 insertions(+), 392 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/net/imx-dwmac.txt + create mode 100644 Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml + create mode 100644 drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c +Merging fixes/fixes (614124bea77e Linux 5.13-rc5) +$ git merge fixes/fixes +Already up to date. +Merging kbuild-current/fixes (5e60f363b38f Documentation: Fix intiramfs script name) +$ git merge kbuild-current/fixes +Already up to date. +Merging arc-current/for-curr (3eada7b4bd85 ARC: fp: set FPU_STATUS.FWE to enable FPU_STATUS update on context switch) +$ git merge arc-current/for-curr +Merge made by the 'recursive' strategy. + arch/arc/Kconfig | 2 +- + arch/arc/include/asm/checksum.h | 2 +- + arch/arc/include/asm/perf_event.h | 2 +- + arch/arc/kernel/fpu.c | 9 ++++++--- + arch/arc/kernel/unwind.c | 10 +++++----- + arch/arc/kernel/vmlinux.lds.S | 2 ++ + 6 files changed, 16 insertions(+), 11 deletions(-) +Merging arm-current/fixes (dad7b9896a5d ARM: 9081/1: fix gcc-10 thumb2-kernel regression) +$ git merge arm-current/fixes +Already up to date. +Merging arm64-fixes/for-next/fixes (d8a719059b9d Revert "mm/pgtable: add stubs for {pmd/pub}_{set/clear}_huge") +$ git merge arm64-fixes/for-next/fixes +Already up to date. +Merging arm-soc-fixes/arm/fixes (82a1c67554df ARM: dts: versatile: Fix up interrupt controller node names) +$ git merge arm-soc-fixes/arm/fixes +Already up to date. +Merging drivers-memory-fixes/fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge drivers-memory-fixes/fixes +Already up to date. +Merging m68k-current/for-linus (21ed49265986 m68k: MAC should select HAVE_PATA_PLATFORM) +$ git merge m68k-current/for-linus +Merge made by the 'recursive' strategy. + arch/m68k/Kconfig.machine | 1 + + 1 file changed, 1 insertion(+) +Merging powerpc-fixes/fixes (bc4188a2f56e KVM: PPC: Fix kvm_arch_vcpu_ioctl vcpu_load leak) +$ git merge powerpc-fixes/fixes +Merge made by the 'recursive' strategy. + arch/powerpc/kvm/book3s_hv.c | 2 ++ + arch/powerpc/kvm/book3s_hv_p9_entry.c | 25 ++++++++++++++++++++++--- + arch/powerpc/kvm/powerpc.c | 4 ++-- + 3 files changed, 26 insertions(+), 5 deletions(-) +Merging s390-fixes/fixes (0cde560a8bfc s390: update defconfigs) +$ git merge s390-fixes/fixes +Already up to date. +Merging sparc/master (05a59d79793d Merge git://git.kernel.org:/pub/scm/linux/kernel/git/netdev/net) +$ git merge sparc/master +Already up to date. +Merging fscrypt-current/for-stable (d19d8d345eec fscrypt: fix inline encryption not used on new files) +$ git merge fscrypt-current/for-stable +Already up to date. +Merging net/master (9f42f674a892 Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux) +$ git merge net/master +Already up to date. +Merging bpf/master (d6371c76e20d bpf: Fix OOB read when printing XDP link fdinfo) +$ git merge bpf/master +Merge made by the 'recursive' strategy. + include/linux/bpf_types.h | 1 + + include/linux/bpf_verifier.h | 1 + + kernel/bpf/verifier.c | 61 ++---- + .../selftests/bpf/verifier/value_ptr_arith.c | 229 +++++++++++++++++++++ + 4 files changed, 248 insertions(+), 44 deletions(-) +Merging ipsec/master (7cb745800df9 Merge branch 'xfrm/compat: Fix xfrm_spdattr_type_t copying') +$ git merge ipsec/master +Auto-merging net/xfrm/xfrm_policy.c +Auto-merging include/net/netns/xfrm.h +Merge made by the 'recursive' strategy. + include/net/netns/xfrm.h | 1 + + net/xfrm/xfrm_compat.c | 49 +++++++++-- + net/xfrm/xfrm_policy.c | 32 +++---- + net/xfrm/xfrm_user.c | 10 +++ + tools/testing/selftests/net/ipsec.c | 165 +++++++++++++++++++++++++++++++++++- + 5 files changed, 230 insertions(+), 27 deletions(-) +Merging netfilter/master (cfbe3650dd3e netfilter: nf_tables: fix audit memory leak in nf_tables_commit) +$ git merge netfilter/master +Merge made by the 'recursive' strategy. + net/netfilter/nf_tables_api.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) +Merging ipvs/master (cfbe3650dd3e netfilter: nf_tables: fix audit memory leak in nf_tables_commit) +$ git merge ipvs/master +Already up to date. +Merging wireless-drivers/master (09cfae9f13d5 ixgbe: Fix packet corruption due to missing DMA sync) +$ git merge wireless-drivers/master +Already up to date. +Merging mac80211/master (f4b29d2ee903 Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf) +$ git merge mac80211/master +Already up to date. +Merging rdma-fixes/for-rc (dc6afef7e142 RDMA/irdma: Change returned type of irdma_setup_virt_qp to void) +$ git merge rdma-fixes/for-rc +Merge made by the 'recursive' strategy. + drivers/infiniband/hw/bnxt_re/main.c | 4 +++- + drivers/infiniband/hw/bnxt_re/qplib_res.c | 10 ++++------ + drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 + + drivers/infiniband/hw/irdma/ctrl.c | 13 +++++-------- + drivers/infiniband/hw/irdma/hw.c | 11 ++--------- + drivers/infiniband/hw/irdma/main.c | 9 ++++++--- + drivers/infiniband/hw/irdma/type.h | 3 +-- + drivers/infiniband/hw/irdma/uk.c | 5 +---- + drivers/infiniband/hw/irdma/verbs.c | 6 ++---- + drivers/infiniband/sw/rxe/rxe_mr.c | 27 +++++++++++++++++---------- + include/uapi/rdma/irdma-abi.h | 2 +- + 11 files changed, 43 insertions(+), 48 deletions(-) +Merging sound-current/for-linus (b0084afde27f ALSA: usb-audio: Add registration quirk for JBL Quantum headsets) +$ git merge sound-current/for-linus +Merge made by the 'recursive' strategy. + .../devicetree/bindings/sound/renesas,rsnd.yaml | 2 +- + include/sound/soc.h | 6 ++ + sound/core/pcm_native.c | 27 ++++++-- + sound/hda/intel-dsp-config.c | 4 ++ + sound/isa/sb/sb16_csp.c | 4 ++ + sound/pci/hda/patch_hdmi.c | 2 + + sound/pci/hda/patch_realtek.c | 1 + + sound/soc/amd/acp-da7219-max98357a.c | 5 ++ + sound/soc/codecs/Kconfig | 8 +-- + sound/soc/codecs/rt5631.c | 2 + + sound/soc/codecs/rt5682.c | 8 ++- + sound/soc/codecs/tlv320aic31xx.c | 2 + + sound/soc/codecs/tlv320aic31xx.h | 4 +- + sound/soc/codecs/tlv320aic32x4.c | 27 ++++---- + sound/soc/codecs/wcd938x.c | 18 ++--- + sound/soc/codecs/wm_adsp.c | 6 +- + sound/soc/intel/boards/sof_sdw_max98373.c | 81 ++++++++++++++-------- + sound/soc/soc-pcm.c | 22 ++++-- + sound/soc/sof/intel/pci-tgl.c | 1 + + sound/soc/tegra/tegra_pcm.c | 30 ++++---- + sound/soc/ti/j721e-evm.c | 18 +++-- + sound/usb/mixer.c | 10 ++- + sound/usb/quirks.c | 3 + + 23 files changed, 195 insertions(+), 96 deletions(-) +Merging sound-asoc-fixes/for-linus (a7b7e43d4747 Merge remote-tracking branch 'asoc/for-5.14' into asoc-linus) +$ git merge sound-asoc-fixes/for-linus +Merge made by the 'recursive' strategy. + sound/soc/amd/acp-da7219-max98357a.c | 5 +++++ + sound/soc/amd/renoir/rn-pci-acp3x.c | 2 ++ + sound/soc/codecs/Kconfig | 1 + + sound/soc/codecs/Makefile | 5 ++++- + sound/soc/codecs/rt5682.c | 1 + + sound/soc/codecs/tlv320aic32x4.c | 33 ++++++++++++++++++++++++++------- + 6 files changed, 39 insertions(+), 8 deletions(-) +Merging regmap-fixes/for-linus (e73f0f0ee754 Linux 5.14-rc1) +$ git merge regmap-fixes/for-linus +Already up to date. +Merging regulator-fixes/for-linus (0ffa19b42099 Merge remote-tracking branch 'regulator/for-5.14' into regulator-linus) +$ git merge regulator-fixes/for-linus +Already up to date! +Merge made by the 'recursive' strategy. +Merging spi-fixes/for-linus (c6edba975913 Merge remote-tracking branch 'spi/for-5.14' into spi-linus) +$ git merge spi-fixes/for-linus +Merge made by the 'recursive' strategy. + drivers/spi/spi-meson-spicc.c | 2 ++ + drivers/spi/spi-mux.c | 8 ++++++++ + drivers/spi/spi.c | 4 ++++ + 3 files changed, 14 insertions(+) +Merging pci-current/for-linus (e73f0f0ee754 Linux 5.14-rc1) +$ git merge pci-current/for-linus +Already up to date. +Merging driver-core.current/driver-core-linus (e64daad660a0 driver core: Prevent warning when removing a device link from unregistered consumer) +$ git merge driver-core.current/driver-core-linus +Merge made by the 'recursive' strategy. + drivers/base/auxiliary.c | 8 +++++++- + drivers/base/core.c | 6 ++++-- + 2 files changed, 11 insertions(+), 3 deletions(-) +Merging tty.current/tty-linus (7f0909db7615 serial: 8250_pci: Enumerate Elkhart Lake UARTs via dedicated driver) +$ git merge tty.current/tty-linus +Merge made by the 'recursive' strategy. + arch/mips/mti-malta/malta-platform.c | 3 ++- + drivers/tty/serial/8250/8250_aspeed_vuart.c | 5 +++-- + drivers/tty/serial/8250/8250_fsl.c | 5 +++-- + drivers/tty/serial/8250/8250_pci.c | 6 ++++++ + drivers/tty/serial/8250/8250_port.c | 17 ++++++++++++----- + drivers/tty/serial/max310x.c | 3 ++- + drivers/tty/serial/serial-tegra.c | 6 ++++-- + include/linux/serial_core.h | 24 ++++++++++++++++++++++++ + 8 files changed, 56 insertions(+), 13 deletions(-) +Merging usb.current/usb-linus (1d1b97d5e763 Merge tag 'usb-serial-5.14-rc3' of https://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial into usb-linus) +$ git merge usb.current/usb-linus +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 2 +- + arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 4 +- + arch/arm64/boot/dts/qcom/ipq8074.dtsi | 4 +- + arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 +- + arch/arm64/boot/dts/qcom/msm8998.dtsi | 2 +- + arch/arm64/boot/dts/qcom/qcs404-evb.dtsi | 2 +- + arch/arm64/boot/dts/qcom/qcs404.dtsi | 4 +- + arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 +- + arch/arm64/boot/dts/qcom/sdm845.dtsi | 4 +- + arch/arm64/boot/dts/qcom/sm8150.dtsi | 2 +- + drivers/usb/class/cdc-wdm.c | 6 +- + drivers/usb/core/devio.c | 2 +- + drivers/usb/core/hub.c | 120 +++++++++++++++++++-------- + drivers/usb/core/quirks.c | 4 - + drivers/usb/dwc2/core.h | 4 + + drivers/usb/dwc2/core_intr.c | 3 +- + drivers/usb/dwc2/gadget.c | 31 ++++++- + drivers/usb/dwc2/hcd.c | 6 +- + drivers/usb/dwc2/params.c | 1 + + drivers/usb/dwc3/core.h | 1 + + drivers/usb/dwc3/ep0.c | 10 ++- + drivers/usb/dwc3/gadget.c | 21 +++-- + drivers/usb/gadget/function/u_serial.c | 2 +- + drivers/usb/gadget/udc/tegra-xudc.c | 1 + + drivers/usb/host/ehci-hcd.c | 18 +++- + drivers/usb/host/max3421-hcd.c | 44 ++++------ + drivers/usb/host/xhci-hub.c | 3 +- + drivers/usb/host/xhci-pci-renesas.c | 16 ++-- + drivers/usb/host/xhci-pci.c | 7 ++ + drivers/usb/phy/phy.c | 10 ++- + drivers/usb/renesas_usbhs/fifo.c | 7 ++ + drivers/usb/serial/cp210x.c | 5 +- + drivers/usb/serial/option.c | 3 + + drivers/usb/storage/unusual_uas.h | 7 ++ + drivers/usb/typec/stusb160x.c | 20 ++++- + drivers/usb/typec/tipd/core.c | 9 ++ + 36 files changed, 264 insertions(+), 127 deletions(-) +Merging usb-gadget-fixes/fixes (e49d033bddf5 Linux 5.12-rc6) +$ git merge usb-gadget-fixes/fixes +Already up to date. +Merging usb-serial-fixes/usb-linus (d6a206e60124 USB: serial: cp210x: add ID for CEL EM3588 USB ZigBee stick) +$ git merge usb-serial-fixes/usb-linus +Already up to date. +Merging usb-chipidea-fixes/for-usb-fixes (42c4417937be usb: cdnsp: Fix the IMAN_IE_SET and IMAN_IE_CLEAR macro.) +$ git merge usb-chipidea-fixes/for-usb-fixes +Auto-merging drivers/usb/cdns3/cdnsp-gadget.c +Auto-merging drivers/usb/cdns3/cdns3-ep0.c +Merge made by the 'recursive' strategy. + drivers/usb/cdns3/cdns3-ep0.c | 1 + + drivers/usb/cdns3/cdnsp-gadget.c | 2 +- + drivers/usb/cdns3/cdnsp-gadget.h | 4 ++-- + drivers/usb/cdns3/cdnsp-ring.c | 18 ++++++++---------- + 4 files changed, 12 insertions(+), 13 deletions(-) +Merging phy/fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge phy/fixes +Already up to date. +Merging staging.current/staging-linus (cb7abd1db6e5 staging: rtl8723bs: select CONFIG_CRYPTO_LIB_ARC4) +$ git merge staging.current/staging-linus +Merge made by the 'recursive' strategy. + drivers/staging/rtl8723bs/Kconfig | 1 + + drivers/staging/rtl8723bs/hal/sdio_ops.c | 2 ++ + 2 files changed, 3 insertions(+) +Merging iio-fixes/fixes-togreg (7e77ef8b8d60 iio: adis: set GPIO reset pin direction) +$ git merge iio-fixes/fixes-togreg +Merge made by the 'recursive' strategy. + drivers/iio/accel/fxls8962af-core.c | 2 +- + drivers/iio/adc/ti-ads7950.c | 1 - + drivers/iio/imu/adis.c | 3 +-- + 3 files changed, 2 insertions(+), 4 deletions(-) +Merging char-misc.current/char-misc-linus (c453db6cd964 nds32: fix up stack guard gap) +$ git merge char-misc.current/char-misc-linus +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 10 +++++++++- + arch/nds32/mm/mmap.c | 2 +- + drivers/bus/mhi/core/main.c | 17 +++++++++++----- + drivers/bus/mhi/pci_generic.c | 45 ++++++++++++++++++++++++++++++++++--------- + 4 files changed, 58 insertions(+), 16 deletions(-) +Merging soundwire-fixes/fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge soundwire-fixes/fixes +Already up to date. +Merging thunderbolt-fixes/fixes (13311e74253f Linux 5.13-rc7) +$ git merge thunderbolt-fixes/fixes +Already up to date. +Merging input-current/for-linus (818b26588994 Merge branch 'next' into for-linus) +$ git merge input-current/for-linus +Already up to date. +Merging crypto-current/master (66192b2e3fd8 crypto: hisilicon/sec - fix the process of disabling sva prefetching) +$ git merge crypto-current/master +Already up to date. +Merging vfio-fixes/for-linus (dc51ff91cf2d vfio/platform: fix module_put call in error flow) +$ git merge vfio-fixes/for-linus +Already up to date. +Merging kselftest-fixes/fixes (2734d6c1b1a0 Linux 5.14-rc2) +$ git merge kselftest-fixes/fixes +Already up to date. +Merging modules-fixes/modules-linus (055f23b74b20 module: check for exit sections in layout_sections() instead of module_init_section()) +$ git merge modules-fixes/modules-linus +Already up to date. +Merging dmaengine-fixes/fixes (6b4b87f2c31a dmaengine: idxd: fix submission race window) +$ git merge dmaengine-fixes/fixes +Merge made by the 'recursive' strategy. + drivers/dma/idxd/idxd.h | 14 +++++++ + drivers/dma/idxd/init.c | 30 +++++++++----- + drivers/dma/idxd/irq.c | 27 ++++++++---- + drivers/dma/idxd/submit.c | 92 +++++++++++++++++++++++++++++++---------- + drivers/dma/idxd/sysfs.c | 2 - + drivers/dma/sh/usb-dmac.c | 2 +- + drivers/dma/xilinx/xilinx_dma.c | 12 ++++++ + 7 files changed, 135 insertions(+), 44 deletions(-) +Merging backlight-fixes/for-backlight-fixes (a38fd8748464 Linux 5.12-rc2) +$ git merge backlight-fixes/for-backlight-fixes +Already up to date. +Merging mtd-fixes/mtd/fixes (ba3562892619 mtd: devices: mchp48l640: Fix memory leak on cmd) +$ git merge mtd-fixes/mtd/fixes +Merge made by the 'recursive' strategy. + drivers/mtd/chips/cfi_cmdset_0002.c | 2 +- + drivers/mtd/devices/mchp48l640.c | 5 +++-- + drivers/mtd/mtd_blkdevs.c | 9 +++------ + drivers/mtd/mtdcore.c | 4 +++- + drivers/mtd/nand/raw/nand_base.c | 4 ++-- + 5 files changed, 12 insertions(+), 12 deletions(-) +Merging mfd-fixes/for-mfd-fixes (a61f4661fba4 mfd: intel_quark_i2c_gpio: Revert "Constify static struct resources") +$ git merge mfd-fixes/for-mfd-fixes +Already up to date. +Merging v4l-dvb-fixes/fixes (61c6f04a988e media: s5p-mfc: Fix display delay control creation) +$ git merge v4l-dvb-fixes/fixes +Already up to date. +Merging reset-fixes/reset/fixes (1435f82689e1 reset: RESET_MCHP_SPARX5 should depend on ARCH_SPARX5) +$ git merge reset-fixes/reset/fixes +Merge made by the 'recursive' strategy. + drivers/reset/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging mips-fixes/mips-fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge mips-fixes/mips-fixes +Already up to date. +Merging at91-fixes/at91-fixes (6efb943b8616 Linux 5.13-rc1) +$ git merge at91-fixes/at91-fixes +Already up to date. +Merging omap-fixes/fixes (3ff340e24c9d bus: ti-sysc: Fix gpt12 system timer issue with reserved status) +$ git merge omap-fixes/fixes +Merge made by the 'recursive' strategy. + drivers/bus/ti-sysc.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) +Merging kvm-fixes/master (7025098af334 Merge tag 'kvmarm-fixes-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD) +$ git merge kvm-fixes/master +Merge made by the 'recursive' strategy. + arch/arm64/kvm/mmu.c | 2 +- + tools/testing/selftests/kvm/aarch64/get-reg-list.c | 3 ++- + tools/testing/selftests/kvm/steal_time.c | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) +Merging kvms390-fixes/master (cd4220d23bf3 KVM: selftests: do not require 64GB in set_memory_region_test) +$ git merge kvms390-fixes/master +Already up to date. +Merging hwmon-fixes/hwmon (e73f0f0ee754 Linux 5.14-rc1) +$ git merge hwmon-fixes/hwmon +Already up to date. +Merging nvdimm-fixes/libnvdimm-fixes (b05d4c576b69 dax: Ensure errno is returned from dax_direct_access) +$ git merge nvdimm-fixes/libnvdimm-fixes +Auto-merging fs/fuse/dax.c +Auto-merging fs/dax.c +Merge made by the 'recursive' strategy. + drivers/dax/super.c | 2 +- + fs/dax.c | 2 +- + fs/fuse/dax.c | 6 ++---- + 3 files changed, 4 insertions(+), 6 deletions(-) +Merging cxl-fixes/fixes (fae8817ae804 cxl/mem: Fix memory device capacity probing) +$ git merge cxl-fixes/fixes +Already up to date. +Merging btrfs-fixes/next-fixes (a34d068248b7 Merge branch 'misc-5.14' into next-fixes) +$ git merge btrfs-fixes/next-fixes +Merge made by the 'recursive' strategy. + fs/btrfs/backref.c | 6 +++--- + fs/btrfs/backref.h | 3 ++- + fs/btrfs/delayed-ref.c | 4 ++-- + fs/btrfs/extent-tree.c | 3 +++ + fs/btrfs/inode.c | 2 +- + fs/btrfs/ordered-data.c | 2 -- + fs/btrfs/ordered-data.h | 3 +-- + fs/btrfs/qgroup.c | 38 ++++++++++++++++++++++++++++++-------- + fs/btrfs/qgroup.h | 2 +- + fs/btrfs/tests/qgroup-tests.c | 20 ++++++++++---------- + fs/btrfs/tree-log.c | 31 ++++++++++++++++++++++--------- + fs/btrfs/zoned.c | 12 ++++-------- + 12 files changed, 79 insertions(+), 47 deletions(-) +Merging vfs-fixes/fixes (173e84953eaa fs: fix reporting supported extra file attributes for statx()) +$ git merge vfs-fixes/fixes +Merge made by the 'recursive' strategy. +Merging dma-mapping-fixes/for-linus (18a3c5f7abfd Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost) +$ git merge dma-mapping-fixes/for-linus +Already up to date. +Merging i3c-fixes/i3c/fixes (fe07bfda2fb9 Linux 5.12-rc1) +$ git merge i3c-fixes/i3c/fixes +Already up to date. +Merging drivers-x86-fixes/fixes (f7e506ec4a99 platform/x86: think-lmi: Fix possible mem-leaks on tlmi_analyze() error-exit) +$ git merge drivers-x86-fixes/fixes +Merge made by the 'recursive' strategy. + drivers/platform/x86/amd-pmc.c | 249 +++++++++++++++++++++++++++++---- + drivers/platform/x86/think-lmi.c | 45 ++++-- + drivers/platform/x86/think-lmi.h | 1 + + drivers/platform/x86/wireless-hotkey.c | 2 +- + 4 files changed, 263 insertions(+), 34 deletions(-) +Merging samsung-krzk-fixes/fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge samsung-krzk-fixes/fixes +Already up to date. +Merging pinctrl-samsung-fixes/fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge pinctrl-samsung-fixes/fixes +Already up to date. +Merging devicetree-fixes/dt/linus (50d8d7e19c43 dt-bindings: display: renesas,du: Make resets optional on R-Car H1) +$ git merge devicetree-fixes/dt/linus +Already up to date. +Merging scsi-fixes/fixes (a47fa41381a0 scsi: target: Fix NULL dereference on XCOPY completion) +$ git merge scsi-fixes/fixes +Merge made by the 'recursive' strategy. + drivers/scsi/mpt3sas/mpt3sas_base.c | 32 ++++++------ + drivers/scsi/mpt3sas/mpt3sas_base.h | 4 ++ + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 7 ++- + drivers/scsi/scsi_transport_iscsi.c | 90 +++++++++++++--------------------- + drivers/target/target_core_sbc.c | 35 +++++++------ + drivers/target/target_core_transport.c | 2 +- + 6 files changed, 78 insertions(+), 92 deletions(-) +Merging drm-fixes/drm-fixes (995a1460f26f Merge tag 'amd-drm-fixes-5.14-2021-07-21' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes) +$ git merge drm-fixes/drm-fixes +Merge made by the 'recursive' strategy. + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 + + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 + + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 + + drivers/gpu/drm/amd/amdgpu/nv.c | 248 +++++---------------- + drivers/gpu/drm/amd/amdgpu/soc15.c | 176 ++------------- + .../amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 4 + + .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 59 ++++- + .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h | 54 ----- + drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 12 +- + drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- + drivers/gpu/drm/amd/display/dc/dc.h | 10 +- + drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h | 4 +- + .../gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 7 +- + .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 50 +++-- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 16 -- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h | 3 +- + .../drm/amd/display/dc/dcn302/dcn302_resource.c | 13 +- + .../drm/amd/display/dc/dcn303/dcn303_resource.c | 13 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 18 ++ + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h | 1 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 3 +- + .../gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 4 + + .../amd/display/dc/dml/dcn21/display_mode_vba_21.c | 2 +- + drivers/gpu/drm/amd/display/dc/inc/hw/transform.h | 3 + + .../drm/amd/display/dc/inc/hw_sequencer_private.h | 1 + + drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h | 3 +- + drivers/gpu/drm/amd/pm/inc/smu_types.h | 3 +- + drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 2 +- + drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 46 +++- + 29 files changed, 288 insertions(+), 483 deletions(-) +Merging amdgpu-fixes/drm-fixes (2c409ba81be2 drm/radeon: fix si_enable_smc_cac() failed issue) +$ git merge amdgpu-fixes/drm-fixes +Already up to date. +Merging drm-intel-fixes/for-linux-next-fixes (6e0b6528d783 drm/i915: Correct the docs for intel_engine_cmd_parser) +$ git merge drm-intel-fixes/for-linux-next-fixes +Merge made by the 'recursive' strategy. + drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 227 ++------------------- + .../drm/i915/gem/selftests/i915_gem_execbuffer.c | 4 + + drivers/gpu/drm/i915/gvt/handlers.c | 15 ++ + drivers/gpu/drm/i915/i915_cmd_parser.c | 136 ++++++------ + drivers/gpu/drm/i915/i915_drv.h | 7 +- + drivers/gpu/drm/i915/i915_request.c | 8 +- + 6 files changed, 109 insertions(+), 288 deletions(-) +Merging mmc-fixes/fixes (10252bae863d mmc: core: Don't allocate IDA for OF aliases) +$ git merge mmc-fixes/fixes +Already up to date. +Merging rtc-fixes/rtc-fixes (bd33335aa93d rtc: cmos: Disable irq around direct invocation of cmos_interrupt()) +$ git merge rtc-fixes/rtc-fixes +Merge made by the 'recursive' strategy. + drivers/rtc/rtc-cmos.c | 2 ++ + 1 file changed, 2 insertions(+) +Merging gnss-fixes/gnss-linus (e73f0f0ee754 Linux 5.14-rc1) +$ git merge gnss-fixes/gnss-linus +Already up to date. +Merging hyperv-fixes/hyperv-fixes (f5a11c69b699 Revert "x86/hyperv: fix logical processor creation") +$ git merge hyperv-fixes/hyperv-fixes +Already up to date. +Merging soc-fsl-fixes/fix (2663b3388551 soc: fsl: dpio: Get the cpumask through cpumask_of(cpu)) +$ git merge soc-fsl-fixes/fix +Already up to date. +Merging risc-v-fixes/fixes (c79e89ecaa24 RISC-V: load initrd wherever it fits into memory) +$ git merge risc-v-fixes/fixes +Merge made by the 'recursive' strategy. + arch/riscv/include/asm/efi.h | 4 ++-- + arch/riscv/mm/init.c | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) +Merging pidfd-fixes/fixes (03ba0fe4d09f file: simplify logic in __close_range()) +$ git merge pidfd-fixes/fixes +Auto-merging fs/file.c +Merge made by the 'recursive' strategy. + fs/file.c | 64 +++++++++++++++++++++++++++++++++++++++------------------------ + 1 file changed, 40 insertions(+), 24 deletions(-) +Merging fpga-fixes/fixes (2e8496f31d0b firmware: stratix10-svc: reset COMMAND_RECONFIG_FLAG_PARTIAL to 0) +$ git merge fpga-fixes/fixes +Already up to date. +Merging spdx/spdx-linus (de5540965853 Merge tag 'rtc-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux) +$ git merge spdx/spdx-linus +Already up to date. +Merging gpio-brgl-fixes/gpio/for-current (ec7099fdea80 Revert "gpio: mpc8xxx: change the gpio interrupt flags.") +$ git merge gpio-brgl-fixes/gpio/for-current +Merge made by the 'recursive' strategy. + drivers/gpio/gpio-mpc8xxx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging gpio-intel-fixes/fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge gpio-intel-fixes/fixes +Already up to date. +Merging pinctrl-intel-fixes/fixes (e73f0f0ee754 Linux 5.14-rc1) +$ git merge pinctrl-intel-fixes/fixes +Already up to date. +Merging erofs-fixes/fixes (0852b6ca941e erofs: fix 1 lcluster-sized pcluster for big pcluster) +$ git merge erofs-fixes/fixes +Already up to date. +Merging integrity-fixes/fixes (843385694721 evm: Fix a small race in init_desc()) +$ git merge integrity-fixes/fixes +Already up to date. +Merging kunit-fixes/kunit-fixes (2734d6c1b1a0 Linux 5.14-rc2) +$ git merge kunit-fixes/kunit-fixes +Already up to date. +Merging ubifs-fixes/fixes (78c7d49f55d8 ubifs: journal: Make sure to not dirty twice for auth nodes) +$ git merge ubifs-fixes/fixes +Already up to date. +Merging memblock-fixes/fixes (024591f9a6e0 arm: ioremap: don't abuse pfn_valid() to check if pfn is in RAM) +$ git merge memblock-fixes/fixes +Already up to date. +Merging cel-fixes/for-rc (9f4ad9e425a1 Linux 5.12) +$ git merge cel-fixes/for-rc +Already up to date. +Merging irqchip-fixes/irq/irqchip-fixes (1fee9db9b42d irqchip/mips: Fix RCU violation when using irqdomain lookup on interrupt entry) +$ git merge irqchip-fixes/irq/irqchip-fixes +Already up to date. +Merging renesas-fixes/fixes (432b52eea3dc ARM: shmobile: defconfig: Restore graphical consoles) +$ git merge renesas-fixes/fixes +Already up to date. +Merging drm-misc-fixes/for-linux-next-fixes (69de4421bb4c drm/ttm: Initialize debugfs from ttm_global_init()) +$ git merge drm-misc-fixes/for-linux-next-fixes +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +Merge made by the 'recursive' strategy. + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 9 ++++ + drivers/gpu/drm/drm_ioctl.c | 3 ++ + drivers/gpu/drm/nouveau/nouveau_bo.c | 6 +++ + .../gpu/drm/panel/panel-raspberrypi-touchscreen.c | 1 - + drivers/gpu/drm/ttm/ttm_bo.c | 3 ++ + drivers/gpu/drm/ttm/ttm_bo_util.c | 3 ++ + drivers/gpu/drm/ttm/ttm_device.c | 14 +++++++ + drivers/gpu/drm/ttm/ttm_module.c | 16 ------- + drivers/gpu/drm/vc4/vc4_hdmi.c | 49 +++++++++++++++------- + include/drm/drm_ioctl.h | 1 + + 10 files changed, 72 insertions(+), 33 deletions(-) +Merging kspp-gustavo/for-next/kspp (802dad70a535 Merge branch 'for-next/array-bounds' into for-next/kspp) +$ git merge kspp-gustavo/for-next/kspp +Merge made by the 'recursive' strategy. + arch/powerpc/platforms/pasemi/idle.c | 1 + + 1 file changed, 1 insertion(+) +Merging kbuild/for-next (27932b6a2088 scripts: add generic syscallnr.sh) +$ git merge kbuild/for-next +Already up to date. +Merging compiler-attributes/compiler-attributes (7ed012969bbc Compiler Attributes: fix __has_attribute(__no_sanitize_coverage__) for GCC 4) +$ git merge compiler-attributes/compiler-attributes +Auto-merging include/linux/compiler_attributes.h +CONFLICT (content): Merge conflict in include/linux/compiler_attributes.h +Resolved 'include/linux/compiler_attributes.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master d273adf3d5ce] Merge remote-tracking branch 'compiler-attributes/compiler-attributes' +$ git diff -M --stat --summary HEAD^.. + include/linux/compiler_attributes.h | 1 + + 1 file changed, 1 insertion(+) +Merging dma-mapping/for-next (40ac971eab89 dma-mapping: handle vmalloc addresses in dma_common_{mmap,get_sgtable}) +$ git merge dma-mapping/for-next +Merge made by the 'recursive' strategy. + kernel/dma/ops_helpers.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) +Merging asm-generic/master (14462376858e Merge branch 'asm-generic-unaligned' into asm-generic) +$ git merge asm-generic/master +Auto-merging arch/riscv/include/asm/io.h +Merge made by the 'recursive' strategy. + arch/riscv/include/asm/io.h | 5 ++-- + arch/sparc/include/asm/io.h | 8 ++++++ + include/asm-generic/io.h | 68 ++++++++++++++++++++++++++++++++++++++++++--- + 3 files changed, 75 insertions(+), 6 deletions(-) +Merging arc/for-next (def9d2780727 Linux 5.5-rc7) +$ git merge arc/for-next +Already up to date. +Merging arm/for-next (3bd1461d1691 Merge branches 'fixes' and 'misc' into for-next) +$ git merge arm/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging arm64/for-next/core (3d1bf78c7b13 Merge branch 'for-next/sve' into for-next/core) +$ git merge arm64/for-next/core +Already up to date. +Merging arm-perf/for-next/perf (d96b1b8c9f79 drivers/perf: fix the missed ida_simple_remove() in ddr_perf_probe()) +$ git merge arm-perf/for-next/perf +Already up to date. +Merging arm-soc/for-next (82a1c67554df ARM: dts: versatile: Fix up interrupt controller node names) +$ git merge arm-soc/for-next +Already up to date. +Merging actions/for-next (444d018d8d38 ARM: dts: owl-s500-roseapplepi: Add ATC2603C PMIC) +$ git merge actions/for-next +Merge made by the 'recursive' strategy. +Merging amlogic/for-next (b03485fe99f2 Merge tags 'amlogic-arm64-dt-for-v5.14-v2' and 'amlogic-arm-dt-for-v5.14' into for-next) +$ git merge amlogic/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging aspeed/for-next (00e9e776fa19 ARM: dts: aspeed: wedge100: Enable ADC channels) +$ git merge aspeed/for-next +Merge made by the 'recursive' strategy. + arch/arm/boot/dts/Makefile | 1 + + .../arm/boot/dts/aspeed-bmc-facebook-galaxy100.dts | 4 - + arch/arm/boot/dts/aspeed-bmc-facebook-wedge100.dts | 5 + + arch/arm/boot/dts/aspeed-bmc-facebook-wedge40.dts | 4 - + arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts | 2 +- + arch/arm/boot/dts/aspeed-bmc-inspur-nf5280m6.dts | 691 +++++++++++++++++++++ + arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi | 4 +- + .../boot/dts/ast2400-facebook-netbmc-common.dtsi | 4 + + 8 files changed, 704 insertions(+), 11 deletions(-) + create mode 100644 arch/arm/boot/dts/aspeed-bmc-inspur-nf5280m6.dts +Merging at91/at91-next (b2a7f104a035 Merge branches 'at91-dt' and 'at91-defconfig' into at91-next) +$ git merge at91/at91-next +Auto-merging arch/arm/configs/multi_v7_defconfig +Auto-merging arch/arm/boot/dts/Makefile +Merge made by the 'recursive' strategy. + .../devicetree/bindings/arm/atmel-at91.yaml | 5 + + .../devicetree/bindings/arm/atmel-sysregs.txt | 14 +- + arch/arm/Kconfig.debug | 10 + + arch/arm/boot/dts/Makefile | 2 + + arch/arm/boot/dts/at91-sama5d4_xplained.dts | 10 - + arch/arm/boot/dts/at91-sama7g5ek.dts | 656 +++++++++++++++ + arch/arm/boot/dts/sama7g5-pinfunc.h | 923 +++++++++++++++++++++ + arch/arm/boot/dts/sama7g5.dtsi | 528 ++++++++++++ + arch/arm/configs/multi_v7_defconfig | 2 + + arch/arm/configs/sama7_defconfig | 209 +++++ + arch/arm/mach-at91/Kconfig | 18 + + arch/arm/mach-at91/Makefile | 1 + + arch/arm/mach-at91/generic.h | 2 + + arch/arm/mach-at91/pm.c | 343 ++++++-- + arch/arm/mach-at91/pm.h | 3 + + arch/arm/mach-at91/pm_data-offsets.c | 2 + + arch/arm/mach-at91/pm_suspend.S | 827 +++++++++++++----- + arch/arm/mach-at91/sama7.c | 33 + + include/linux/clk/at91_pmc.h | 26 + + include/soc/at91/sama7-ddr.h | 80 ++ + include/soc/at91/sama7-sfrbu.h | 34 + + 21 files changed, 3428 insertions(+), 300 deletions(-) + create mode 100644 arch/arm/boot/dts/at91-sama7g5ek.dts + create mode 100644 arch/arm/boot/dts/sama7g5-pinfunc.h + create mode 100644 arch/arm/boot/dts/sama7g5.dtsi + create mode 100644 arch/arm/configs/sama7_defconfig + create mode 100644 arch/arm/mach-at91/sama7.c + create mode 100644 include/soc/at91/sama7-ddr.h + create mode 100644 include/soc/at91/sama7-sfrbu.h +Merging drivers-memory/for-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge drivers-memory/for-next +Already up to date. +Merging imx-mxs/for-next (2332f8e99198 Merge branch 'imx/dt64' into for-next) +$ git merge imx-mxs/for-next +Auto-merging arch/arm/boot/dts/Makefile +Merge made by the 'recursive' strategy. + Documentation/devicetree/bindings/arm/fsl.yaml | 4 + + .../devicetree/bindings/media/nxp,imx8-jpeg.yaml | 19 +- + .../devicetree/bindings/vendor-prefixes.yaml | 2 + + arch/arm/boot/dts/Makefile | 2 + + arch/arm/boot/dts/imx6dl-solidsense.dts | 54 ++++ + arch/arm/boot/dts/imx6q-solidsense.dts | 54 ++++ + arch/arm/boot/dts/imx6qdl-solidsense.dtsi | 160 ++++++++++++ + arch/arm/boot/dts/imx6qdl-sr-som.dtsi | 8 +- + arch/arm/mach-imx/common.h | 2 +- + arch/arm/mach-imx/mmdc.c | 15 +- + arch/arm64/boot/dts/freescale/Makefile | 1 + + arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +- + arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts | 9 + + arch/arm64/boot/dts/freescale/fsl-ls2088a-rdb.dts | 4 + + arch/arm64/boot/dts/freescale/fsl-lx2160a-rdb.dts | 4 + + arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 12 +- + arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi | 80 ++++++ + .../boot/dts/freescale/imx8mq-mnt-reform2.dts | 164 ++++++++++++ + .../boot/dts/freescale/imx8mq-nitrogen-som.dtsi | 275 +++++++++++++++++++++ + arch/arm64/boot/dts/freescale/imx8qm-ss-img.dtsi | 12 + + arch/arm64/boot/dts/freescale/imx8qm.dtsi | 2 + + arch/arm64/boot/dts/freescale/imx8qxp-ai_ml.dts | 2 +- + arch/arm64/boot/dts/freescale/imx8qxp-ss-img.dtsi | 13 + + arch/arm64/boot/dts/freescale/imx8qxp.dtsi | 2 + + drivers/soc/imx/soc-imx8m.c | 84 +------ + 25 files changed, 893 insertions(+), 93 deletions(-) + create mode 100644 arch/arm/boot/dts/imx6dl-solidsense.dts + create mode 100644 arch/arm/boot/dts/imx6q-solidsense.dts + create mode 100644 arch/arm/boot/dts/imx6qdl-solidsense.dtsi + create mode 100644 arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi + create mode 100644 arch/arm64/boot/dts/freescale/imx8mq-mnt-reform2.dts + create mode 100644 arch/arm64/boot/dts/freescale/imx8mq-nitrogen-som.dtsi + create mode 100644 arch/arm64/boot/dts/freescale/imx8qm-ss-img.dtsi + create mode 100644 arch/arm64/boot/dts/freescale/imx8qxp-ss-img.dtsi +Merging keystone/next (9d2e21ed98a2 Merge branch 'for_5.12/drivers-soc' into next) +$ git merge keystone/next +Already up to date! +Merge made by the 'recursive' strategy. +Merging mediatek/for-next (a1c8c49de3d7 Merge branch 'v5.13-next/soc' into for-next) +$ git merge mediatek/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging mvebu/for-next (72a0a49b7c77 Merge branch 'mvebu/dt64' into mvebu/for-next) +$ git merge mvebu/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging omap/for-next (91ee322340fb Merge branch 'omap-for-v5.14/dt' into for-next) +$ git merge omap/for-next +Merge made by the 'recursive' strategy. + drivers/bus/ti-sysc.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) +Merging qcom/for-next (3f8b3dfa5522 Merge remote-tracking branch 'refs/remotes/origin/for-next' into for-next) +$ git merge qcom/for-next +Auto-merging arch/arm64/boot/dts/qcom/sm8150.dtsi +Auto-merging arch/arm64/boot/dts/qcom/sc7180.dtsi +Auto-merging arch/arm64/boot/dts/qcom/ipq8074.dtsi +Auto-merging arch/arm/boot/dts/Makefile +Removing Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt +Merge made by the 'recursive' strategy. + Documentation/devicetree/bindings/arm/qcom.yaml | 6 + + .../devicetree/bindings/power/qcom,rpmpd.yaml | 1 + + .../devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt | 87 ----- + .../bindings/soc/qcom/qcom,aoss-qmp.yaml | 114 +++++++ + .../devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml | 1 + + arch/arm/boot/dts/Makefile | 1 + + arch/arm/boot/dts/qcom-apq8064.dtsi | 6 +- + arch/arm/boot/dts/qcom-msm8226-samsung-s3ve3g.dts | 25 ++ + arch/arm/boot/dts/qcom-msm8226.dtsi | 147 +++++++++ + arch/arm/boot/dts/qcom-msm8974-samsung-klte.dts | 11 + + arch/arm64/boot/dts/qcom/Makefile | 1 + + arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts | 8 + + arch/arm64/boot/dts/qcom/ipq6018.dtsi | 50 ++- + arch/arm64/boot/dts/qcom/ipq8074.dtsi | 6 + + .../boot/dts/qcom/msm8916-wingtech-wt88047.dts | 313 ++++++++++++++++++ + .../boot/dts/qcom/msm8992-bullhead-rev-101.dts | 12 + + arch/arm64/boot/dts/qcom/msm8994.dtsi | 2 +- + arch/arm64/boot/dts/qcom/sa8155p-adp.dts | 60 +++- + .../arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi | 2 +- + arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 15 +- + arch/arm64/boot/dts/qcom/sc7180.dtsi | 76 +++++ + arch/arm64/boot/dts/qcom/sc7280-idp.dts | 110 +++++++ + arch/arm64/boot/dts/qcom/sc7280.dtsi | 352 +++++++++++++++++++- + .../boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 4 +- + arch/arm64/boot/dts/qcom/sm8150.dtsi | 358 +++++++++++++++++++-- + arch/arm64/boot/dts/qcom/sm8250.dtsi | 6 +- + arch/arm64/boot/dts/qcom/sm8350.dtsi | 4 +- + drivers/firmware/Kconfig | 2 +- + drivers/firmware/Makefile | 3 +- + drivers/firmware/qcom_scm.c | 8 +- + drivers/iommu/Kconfig | 2 + + drivers/net/wireless/ath/ath10k/Kconfig | 1 + + drivers/soc/qcom/qcom_aoss.c | 9 +- + drivers/soc/qcom/rpmhpd.c | 5 +- + drivers/soc/qcom/rpmpd.c | 28 ++ + drivers/soc/qcom/socinfo.c | 4 +- + include/dt-bindings/power/qcom-rpmpd.h | 10 + + 37 files changed, 1707 insertions(+), 143 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.txt + create mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml + create mode 100644 arch/arm/boot/dts/qcom-msm8226-samsung-s3ve3g.dts + create mode 100644 arch/arm/boot/dts/qcom-msm8226.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/msm8916-wingtech-wt88047.dts +Merging raspberrypi/for-next (21c6bf8304f0 arm64: dts: broadcom: Add reference to RPi 400) +$ git merge raspberrypi/for-next +Already up to date. +Merging renesas/next (2a022240886d Merge branches 'renesas-arm-defconfig-for-v5.15' and 'renesas-arm-dt-for-v5.15' into renesas-next) +$ git merge renesas/next +Merge made by the 'recursive' strategy. + arch/arm/boot/dts/r8a73a4.dtsi | 44 -------- + arch/arm/boot/dts/r8a7742.dtsi | 4 +- + arch/arm/boot/dts/r8a7743.dtsi | 4 +- + arch/arm/boot/dts/r8a7744.dtsi | 4 +- + .../boot/dts/renesas/beacon-renesom-baseboard.dtsi | 8 ++ + arch/arm64/boot/dts/renesas/r8a774a1.dtsi | 4 +- + arch/arm64/boot/dts/renesas/r8a774b1.dtsi | 2 +- + arch/arm64/boot/dts/renesas/r8a774c0.dtsi | 8 +- + .../arm64/boot/dts/renesas/r8a77950-salvator-x.dts | 73 ------------ + arch/arm64/boot/dts/renesas/r8a77950-ulcb-kf.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77950-ulcb.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77950.dtsi | 2 + + .../arm64/boot/dts/renesas/r8a77951-salvator-x.dts | 73 ------------ + .../boot/dts/renesas/r8a77951-salvator-xs.dts | 122 --------------------- + arch/arm64/boot/dts/renesas/r8a77951-ulcb-kf.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77951-ulcb.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77951.dtsi | 5 + + arch/arm64/boot/dts/renesas/r8a77960-ulcb-kf.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77960-ulcb.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77961-ulcb-kf.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77961-ulcb.dts | 3 +- + .../boot/dts/renesas/r8a77965-salvator-xs.dts | 14 --- + arch/arm64/boot/dts/renesas/r8a77965-ulcb-kf.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77965-ulcb.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77965.dtsi | 2 + + arch/arm64/boot/dts/renesas/r8a77970-eagle.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77980-condor.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts | 50 ++++++++- + arch/arm64/boot/dts/renesas/r8a77990.dtsi | 6 +- + arch/arm64/boot/dts/renesas/r8a77995-draak.dts | 48 +++++++- + arch/arm64/boot/dts/renesas/r8a779a0-falcon.dts | 2 +- + arch/arm64/boot/dts/renesas/r8a779a0.dtsi | 26 ++--- + arch/arm64/boot/dts/renesas/r9a07g044.dtsi | 80 ++++++++++++++ + arch/arm64/boot/dts/renesas/salvator-common.dtsi | 76 ++++++++++++- + arch/arm64/boot/dts/renesas/salvator-xs.dtsi | 56 ++++++++++ + arch/arm64/configs/defconfig | 1 + + 36 files changed, 368 insertions(+), 371 deletions(-) +Merging reset/reset/next (28edf1d77cd5 dt-bindings: reset: Convert UniPhier glue reset to json-schema) +$ git merge reset/reset/next +Removing Documentation/devicetree/bindings/reset/uniphier-reset.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/reset/qcom,aoss-reset.yaml | 5 ++ + .../devicetree/bindings/reset/qcom,pdc-global.yaml | 4 + + .../reset/socionext,uniphier-glue-reset.yaml | 88 ++++++++++++++++++++++ + .../devicetree/bindings/reset/uniphier-reset.txt | 61 --------------- + drivers/reset/reset-qcom-pdc.c | 62 ++++++++++++--- + include/dt-bindings/reset/qcom,sdm845-pdc.h | 2 + + 6 files changed, 150 insertions(+), 72 deletions(-) + create mode 100644 Documentation/devicetree/bindings/reset/socionext,uniphier-glue-reset.yaml + delete mode 100644 Documentation/devicetree/bindings/reset/uniphier-reset.txt +Merging rockchip/for-next (3d5667e52c37 Merge branch 'v5.15-armsoc/dts64' into for-next) +$ git merge rockchip/for-next +Removing Documentation/devicetree/bindings/power/rockchip-io-domain.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/arm/rockchip.yaml | 15 +- + .../devicetree/bindings/arm/rockchip/pmu.yaml | 2 + + .../bindings/power/rockchip-io-domain.txt | 135 --------- + .../bindings/power/rockchip-io-domain.yaml | 330 +++++++++++++++++++++ + .../devicetree/bindings/soc/rockchip/grf.yaml | 18 +- + arch/arm/boot/dts/rk3066a.dtsi | 10 +- + arch/arm/boot/dts/rk3188.dtsi | 13 +- + arch/arm/boot/dts/rk3288.dtsi | 6 - + arch/arm/boot/dts/rv1108.dtsi | 12 +- + arch/arm64/boot/dts/rockchip/Makefile | 4 + + arch/arm64/boot/dts/rockchip/px30.dtsi | 13 + + arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts | 3 - + arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts | 110 +++++++ + arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 2 +- + arch/arm64/boot/dts/rockchip/rk3328.dtsi | 5 - + arch/arm64/boot/dts/rockchip/rk3368.dtsi | 5 - + arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi | 2 +- + .../boot/dts/rockchip/rk3399-kobol-helios64.dts | 36 +++ + .../boot/dts/rockchip/rk3399-pinebook-pro.dts | 4 - + .../arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts | 218 ++++++++++++++ + arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 54 ++++ + .../boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts | 14 + + .../boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts | 47 +++ + arch/arm64/boot/dts/rockchip/rk3399.dtsi | 8 - + arch/arm64/boot/dts/rockchip/rk3568.dtsi | 241 +++++++++++++++ + drivers/clk/rockchip/clk-rk3036.c | 5 +- + drivers/soc/rockchip/Kconfig | 4 +- + include/dt-bindings/clock/rk3036-cru.h | 1 + + 28 files changed, 1128 insertions(+), 189 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/power/rockchip-io-domain.txt + create mode 100644 Documentation/devicetree/bindings/power/rockchip-io-domain.yaml + create mode 100644 arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts +Merging samsung-krzk/for-next (bb2c20c976aa Merge branch 'next/soc' into for-next) +$ git merge samsung-krzk/for-next +Merge made by the 'recursive' strategy. + arch/arm/mach-s3c/mach-gta02.c | 3 +- + arch/arm64/boot/dts/exynos/exynos5433.dtsi | 70 ++++++++++++++++++++++++++++++ + arch/arm64/boot/dts/exynos/exynos7.dtsi | 35 +++++++++++++++ + 3 files changed, 106 insertions(+), 2 deletions(-) +Merging scmi/for-linux-next (7929b794edc7 Merge branch 'for-next/juno' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux into for-linux-next) +$ git merge scmi/for-linux-next +Merge made by the 'recursive' strategy. + arch/arm64/boot/dts/arm/fvp-base-revc.dts | 12 ------------ + 1 file changed, 12 deletions(-) +Merging stm32/stm32-next (b462e2fc78f0 ARM: dts: stm32: Update AV96 adv7513 node per dtbs_check) +$ git merge stm32/stm32-next +Merge made by the 'recursive' strategy. + arch/arm/boot/dts/stm32mp157c-ed1.dts | 4 ++-- + arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi | 4 ++++ + arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 25 ++++++++++++++++++---- + arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 4 +++- + arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi | 6 ++---- + arch/arm/boot/dts/stm32mp15xx-dkx.dtsi | 12 +++++------ + 6 files changed, 38 insertions(+), 17 deletions(-) +Merging sunxi/sunxi/for-next (3f1c53207cf0 Merge branches 'sunxi/dt-for-5.14' and 'sunxi/fixes-for-5.13' into sunxi/for-next) +$ git merge sunxi/sunxi/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging tegra/for-next (51b89b49954f Merge branch for-5.14/arm64/dt into for-next) +$ git merge tegra/for-next +Merge made by the 'recursive' strategy. + arch/arm64/boot/dts/nvidia/tegra194.dtsi | 60 ++++++++++++++++++++++++++++---- + 1 file changed, 54 insertions(+), 6 deletions(-) +Merging ti-k3/ti-k3-next (1e3d655fe7b4 Merge branch 'ti-k3-config-next' into ti-k3-next) +$ git merge ti-k3/ti-k3-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging ti-k3-new/ti-k3-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge ti-k3-new/ti-k3-next +Already up to date. +Merging xilinx/for-next (d98fb9ab6255 Merge branch 'zynq/dt' into for-next) +$ git merge xilinx/for-next +Auto-merging arch/arm/configs/multi_v7_defconfig +Merge made by the 'recursive' strategy. + arch/arm/boot/dts/zynq-7000.dtsi | 21 +++++++++++++++++++++ + arch/arm/boot/dts/zynq-ebaz4205.dts | 12 ++++++++++++ + arch/arm/configs/multi_v7_defconfig | 1 + + 3 files changed, 34 insertions(+) +Merging clk/clk-next (67ce7b523a9d Merge branch 'clk-qcom' into clk-next) +$ git merge clk/clk-next +Merge made by the 'recursive' strategy. + .../devicetree/bindings/clock/qcom,gpucc.yaml | 6 +- + .../bindings/clock/qcom,sc7280-dispcc.yaml | 94 +++ + .../devicetree/bindings/clock/qcom,videocc.yaml | 6 +- + drivers/clk/qcom/Kconfig | 25 + + drivers/clk/qcom/Makefile | 3 + + drivers/clk/qcom/dispcc-sc7280.c | 908 +++++++++++++++++++++ + drivers/clk/qcom/gpucc-sc7280.c | 491 +++++++++++ + drivers/clk/qcom/videocc-sc7280.c | 325 ++++++++ + include/dt-bindings/clock/qcom,dispcc-sc7280.h | 55 ++ + include/dt-bindings/clock/qcom,gcc-sc7280.h | 2 +- + include/dt-bindings/clock/qcom,gpucc-sc7280.h | 35 + + include/dt-bindings/clock/qcom,videocc-sc7280.h | 27 + + 12 files changed, 1972 insertions(+), 5 deletions(-) + create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml + create mode 100644 drivers/clk/qcom/dispcc-sc7280.c + create mode 100644 drivers/clk/qcom/gpucc-sc7280.c + create mode 100644 drivers/clk/qcom/videocc-sc7280.c + create mode 100644 include/dt-bindings/clock/qcom,dispcc-sc7280.h + create mode 100644 include/dt-bindings/clock/qcom,gpucc-sc7280.h + create mode 100644 include/dt-bindings/clock/qcom,videocc-sc7280.h +Merging clk-imx/for-next (c586f53ae159 clk: imx8mq: remove SYS PLL 1/2 clock gates) +$ git merge clk-imx/for-next +Already up to date. +Merging clk-renesas/renesas-clk (1b87d5bba32c clk: renesas: r9a07g044: Add clock and reset entries for ADC) +$ git merge clk-renesas/renesas-clk +Merge made by the 'recursive' strategy. + drivers/clk/renesas/Makefile | 2 +- + drivers/clk/renesas/r8a774a1-cpg-mssr.c | 2 +- + drivers/clk/renesas/r8a774b1-cpg-mssr.c | 2 +- + drivers/clk/renesas/r8a774c0-cpg-mssr.c | 2 +- + drivers/clk/renesas/r8a774e1-cpg-mssr.c | 2 +- + drivers/clk/renesas/r8a779a0-cpg-mssr.c | 5 +- + drivers/clk/renesas/r9a07g044-cpg.c | 69 +++++++++++++++++++++- + .../renesas/{renesas-rzg2l-cpg.c => rzg2l-cpg.c} | 22 +++---- + .../renesas/{renesas-rzg2l-cpg.h => rzg2l-cpg.h} | 0 + 9 files changed, 84 insertions(+), 22 deletions(-) + rename drivers/clk/renesas/{renesas-rzg2l-cpg.c => rzg2l-cpg.c} (98%) + rename drivers/clk/renesas/{renesas-rzg2l-cpg.h => rzg2l-cpg.h} (100%) +Merging clk-samsung/for-next (a38fd8748464 Linux 5.12-rc2) +$ git merge clk-samsung/for-next +Already up to date. +Merging csky/linux-next (90dc8c0e664e csky: Kconfig: Remove unused selects) +$ git merge csky/linux-next +Already up to date. +Merging h8300/h8300-next (6e5e55227c95 Merge tag 'v5.11' into h8300-next) +$ git merge h8300/h8300-next +Auto-merging drivers/tty/serial/sh-sci.c +Auto-merging drivers/net/ethernet/smsc/smc91x.c +Auto-merging arch/h8300/kernel/setup.c +Merge made by the 'recursive' strategy. + arch/h8300/Kconfig.cpu | 4 ++ + arch/h8300/boot/dts/edosk2674.dts | 10 ++++- + arch/h8300/boot/dts/h8300h_sim.dts | 2 +- + arch/h8300/boot/dts/h8s_sim.dts | 2 +- + arch/h8300/configs/edosk2674_defconfig | 10 ++--- + arch/h8300/configs/h8300h-sim_defconfig | 8 +--- + arch/h8300/configs/h8s-sim_defconfig | 8 +--- + arch/h8300/kernel/setup.c | 2 +- + arch/h8300/lib/memset.S | 17 +++++---- + drivers/clocksource/h8300_timer8.c | 20 +++++----- + drivers/irqchip/irq-renesas-h8300h.c | 19 +++++++++- + drivers/irqchip/irq-renesas-h8s.c | 67 +++++++++++++++++++++++++-------- + drivers/net/ethernet/smsc/Kconfig | 1 - + drivers/net/ethernet/smsc/smc91x.c | 10 +++++ + drivers/tty/serial/sh-sci.c | 5 +-- + 15 files changed, 123 insertions(+), 62 deletions(-) +Merging m68k/for-next (a0b22464ce93 m68k: defconfig: Update defconfigs for v5.14-rc1:) +$ git merge m68k/for-next +Merge made by the 'recursive' strategy. + arch/m68k/configs/amiga_defconfig | 6 ++++-- + arch/m68k/configs/apollo_defconfig | 4 +++- + arch/m68k/configs/atari_defconfig | 6 ++++-- + arch/m68k/configs/bvme6000_defconfig | 4 +++- + arch/m68k/configs/hp300_defconfig | 4 +++- + arch/m68k/configs/mac_defconfig | 6 ++++-- + arch/m68k/configs/multi_defconfig | 6 ++++-- + arch/m68k/configs/mvme147_defconfig | 4 +++- + arch/m68k/configs/mvme16x_defconfig | 4 +++- + arch/m68k/configs/q40_defconfig | 6 ++++-- + arch/m68k/configs/sun3_defconfig | 4 +++- + arch/m68k/configs/sun3x_defconfig | 4 +++- + arch/m68k/emu/nfeth.c | 4 ++-- + 13 files changed, 43 insertions(+), 19 deletions(-) +Merging m68knommu/for-next (64151620227a m68k: m5441x: add flexcan support) +$ git merge m68knommu/for-next +Auto-merging arch/m68k/Kconfig.machine +Merge made by the 'recursive' strategy. + arch/m68k/Kconfig.machine | 1 + + arch/m68k/coldfire/device.c | 44 +++++++++++++++++++++++++++++++++++++++ + arch/m68k/coldfire/m525x.c | 2 +- + arch/m68k/coldfire/m5441x.c | 8 +++---- + arch/m68k/coldfire/stmark2.c | 6 ++++-- + arch/m68k/include/asm/m5441xsim.h | 19 +++++++++++++++++ + 6 files changed, 73 insertions(+), 7 deletions(-) +Merging microblaze/next (14a832498c23 arch: microblaze: Fix spelling mistake "vesion" -> "version") +$ git merge microblaze/next +Already up to date. +Merging mips/mips-next (d656132d2a2a mips: clean up kvm Makefile) +$ git merge mips/mips-next +Merge made by the 'recursive' strategy. + arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c | 21 +++--------- + .../executive/cvmx-interrupt-decodes.c | 17 +++++++--- + arch/mips/cavium-octeon/setup.c | 38 +--------------------- + arch/mips/generic/board-ingenic.c | 21 ++++++++++++ + arch/mips/include/asm/bootinfo.h | 3 ++ + arch/mips/include/asm/cpu.h | 4 +-- + arch/mips/kvm/Makefile | 19 +++++------ + arch/mips/netlogic/xlr/fmn-config.c | 15 +++++---- + 8 files changed, 60 insertions(+), 78 deletions(-) +Merging nds32/next (40e0dd851e7b nds32: Fix bogus reference to ) +$ git merge nds32/next +Already up to date. +Merging nios2/for-next (7f7bc20bc41a nios2: Don't use _end for calculating min_low_pfn) +$ git merge nios2/for-next +Already up to date. +Merging openrisc/for-next (ad4e600cbf89 drivers/soc/litex: remove 8-bit subregister option) +$ git merge openrisc/for-next +Already up to date. +Merging parisc-hd/for-next (ca2b19a4bdb6 parisc: Increase gcc stack frame check to 2048 for 32- and 64-bit) +$ git merge parisc-hd/for-next +Auto-merging lib/Kconfig.debug +Merge made by the 'recursive' strategy. + drivers/parisc/led.c | 4 ++-- + lib/Kconfig.debug | 5 ++--- + 2 files changed, 4 insertions(+), 5 deletions(-) +Merging powerpc/next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge powerpc/next +Already up to date. +Merging soc-fsl/next (242b0b398ccd soc: fsl: enable acpi support in RCPM driver) +$ git merge soc-fsl/next +Already up to date. +Merging risc-v/for-next (67979e927dd0 riscv: kprobes: implement the branch instructions) +$ git merge risc-v/for-next +Merge made by the 'recursive' strategy. + arch/riscv/kernel/probes/decode-insn.c | 5 +- + arch/riscv/kernel/probes/simulate-insn.c | 112 +++++++++++++++++++++++++++++++ + 2 files changed, 114 insertions(+), 3 deletions(-) +Merging s390/for-next (2f5f912c5b60 Merge branch 'fixes' into for-next) +$ git merge s390/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging sh/for-next (2882b7626f49 sh: kernel: traps: remove unused variable) +$ git merge sh/for-next +Auto-merging drivers/sh/maple/maple.c +Merge made by the 'recursive' strategy. + arch/sh/boards/mach-landisk/irq.c | 4 ++-- + arch/sh/boot/Makefile | 4 ++-- + arch/sh/boot/compressed/.gitignore | 5 ----- + arch/sh/boot/compressed/Makefile | 32 +++++++++++++------------------- + arch/sh/boot/compressed/ashiftrt.S | 2 ++ + arch/sh/boot/compressed/ashldi3.c | 2 ++ + arch/sh/boot/compressed/ashlsi3.S | 2 ++ + arch/sh/boot/compressed/ashrsi3.S | 2 ++ + arch/sh/boot/compressed/lshrsi3.S | 2 ++ + arch/sh/include/asm/checksum_32.h | 5 +++-- + arch/sh/include/asm/uaccess.h | 4 ++-- + arch/sh/kernel/cpu/sh4a/smp-shx3.c | 5 +++-- + arch/sh/kernel/crash_dump.c | 4 ++-- + arch/sh/kernel/traps_32.c | 8 ++++---- + arch/sh/math-emu/math.c | 4 ++-- + arch/sh/mm/nommu.c | 4 ++-- + drivers/sh/maple/maple.c | 5 ++++- + 17 files changed, 49 insertions(+), 45 deletions(-) + create mode 100644 arch/sh/boot/compressed/ashiftrt.S + create mode 100644 arch/sh/boot/compressed/ashldi3.c + create mode 100644 arch/sh/boot/compressed/ashlsi3.S + create mode 100644 arch/sh/boot/compressed/ashrsi3.S + create mode 100644 arch/sh/boot/compressed/lshrsi3.S +Merging sparc-next/master (dd0d718152e4 Merge tag 'spi-fix-v5.8-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi) +$ git merge sparc-next/master +Already up to date. +Merging uml/linux-next (1aee020155f3 um: remove unneeded semicolon in um_arch.c) +$ git merge uml/linux-next +Already up to date. +Merging xtensa/xtensa-for-next (ac637a0ada71 xtensa: add fairness to IRQ handling) +$ git merge xtensa/xtensa-for-next +Merge made by the 'recursive' strategy. + arch/xtensa/Kconfig | 2 +- + arch/xtensa/kernel/traps.c | 7 +++++++ + 2 files changed, 8 insertions(+), 1 deletion(-) +Merging pidfd/for-next (5ddf9602d711 Merge branch 'fs.mount_setattr.nosymfollow' into for-next) +$ git merge pidfd/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging fscrypt/master (2fc2b430f559 fscrypt: fix derivation of SipHash keys on big endian CPUs) +$ git merge fscrypt/master +Already up to date. +Merging fscache/fscache-next (5193f26aef59 fscache: Use refcount_t for the cookie refcount instead of atomic_t) +$ git merge fscache/fscache-next +Removing fs/fscache/object-list.c +Removing fs/fscache/histogram.c +Removing fs/cachefiles/proc.c +Merge made by the 'recursive' strategy. + fs/cachefiles/Kconfig | 19 -- + fs/cachefiles/Makefile | 2 - + fs/cachefiles/bind.c | 2 - + fs/cachefiles/interface.c | 6 +- + fs/cachefiles/internal.h | 25 --- + fs/cachefiles/io.c | 6 +- + fs/cachefiles/key.c | 2 +- + fs/cachefiles/main.c | 7 - + fs/cachefiles/namei.c | 61 ++---- + fs/cachefiles/proc.c | 114 ----------- + fs/cachefiles/xattr.c | 4 +- + fs/fscache/Kconfig | 25 +-- + fs/fscache/Makefile | 2 - + fs/fscache/cache.c | 11 +- + fs/fscache/cookie.c | 201 +++++++++++++----- + fs/fscache/fsdef.c | 3 +- + fs/fscache/histogram.c | 87 -------- + fs/fscache/internal.h | 57 ++---- + fs/fscache/main.c | 39 ++++ + fs/fscache/netfs.c | 2 +- + fs/fscache/object-list.c | 414 -------------------------------------- + fs/fscache/object.c | 8 - + fs/fscache/operation.c | 3 - + fs/fscache/page.c | 6 - + fs/fscache/proc.c | 20 +- + include/linux/fscache-cache.h | 4 - + include/linux/fscache.h | 4 +- + include/linux/netfs.h | 2 +- + include/trace/events/cachefiles.h | 68 +++---- + include/trace/events/fscache.h | 160 +++++++-------- + include/trace/events/netfs.h | 2 +- + 31 files changed, 368 insertions(+), 998 deletions(-) + delete mode 100644 fs/cachefiles/proc.c + delete mode 100644 fs/fscache/histogram.c + delete mode 100644 fs/fscache/object-list.c +Merging afs/afs-next (7af08140979a Revert "gcov: clang: fix clang-11+ build") +$ git merge afs/afs-next +Already up to date. +Merging btrfs/for-next (808c6838dfee Merge branch 'for-next-current-v5.12-20210624' into for-next-20210624) +$ git merge btrfs/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging ceph/master (0077a5008272 rbd: resurrect setting of disk->private_data in rbd_init_disk()) +$ git merge ceph/master +Merge made by the 'recursive' strategy. + drivers/block/rbd.c | 33 +++++++++++++-------------------- + fs/ceph/mds_client.c | 2 +- + 2 files changed, 14 insertions(+), 21 deletions(-) +Merging cifs/for-next (7b09d4e0be94 CIFS: Clarify SMB1 code for POSIX delete file) +$ git merge cifs/for-next +Merge made by the 'recursive' strategy. + fs/cifs/cifssmb.c | 10 ++- + fs/cifs/connect.c | 4 +- + fs/cifs/dfs_cache.c | 229 ++++++++++++++++++++++++++++++++++++++++++--------- + fs/cifs/dfs_cache.h | 3 + + fs/cifs/fs_context.c | 7 ++ + fs/cifs/smb2ops.c | 26 ++++-- + 6 files changed, 229 insertions(+), 50 deletions(-) +Merging cifsd/cifsd-for-next (9946aa630ae7 Merge pull request #61 from namjaejeon/cifsd-for-next) +$ git merge cifsd/cifsd-for-next +Auto-merging fs/Kconfig +Auto-merging MAINTAINERS +Auto-merging Documentation/filesystems/index.rst +Merge made by the 'recursive' strategy. + Documentation/filesystems/cifs/index.rst | 10 + + Documentation/filesystems/cifs/ksmbd.rst | 164 + + Documentation/filesystems/index.rst | 2 +- + MAINTAINERS | 11 +- + fs/Kconfig | 1 + + fs/Makefile | 1 + + fs/ksmbd/Kconfig | 69 + + fs/ksmbd/Makefile | 20 + + fs/ksmbd/asn1.c | 343 ++ + fs/ksmbd/asn1.h | 21 + + fs/ksmbd/auth.c | 1364 +++++ + fs/ksmbd/auth.h | 67 + + fs/ksmbd/connection.c | 413 ++ + fs/ksmbd/connection.h | 211 + + fs/ksmbd/crypto_ctx.c | 282 + + fs/ksmbd/crypto_ctx.h | 74 + + fs/ksmbd/glob.h | 49 + + fs/ksmbd/ksmbd_netlink.h | 395 ++ + fs/ksmbd/ksmbd_spnego_negtokeninit.asn1 | 31 + + fs/ksmbd/ksmbd_spnego_negtokentarg.asn1 | 19 + + fs/ksmbd/ksmbd_work.c | 80 + + fs/ksmbd/ksmbd_work.h | 117 + + fs/ksmbd/mgmt/ksmbd_ida.c | 46 + + fs/ksmbd/mgmt/ksmbd_ida.h | 34 + + fs/ksmbd/mgmt/share_config.c | 238 + + fs/ksmbd/mgmt/share_config.h | 81 + + fs/ksmbd/mgmt/tree_connect.c | 121 + + fs/ksmbd/mgmt/tree_connect.h | 56 + + fs/ksmbd/mgmt/user_config.c | 69 + + fs/ksmbd/mgmt/user_config.h | 66 + + fs/ksmbd/mgmt/user_session.c | 369 ++ + fs/ksmbd/mgmt/user_session.h | 106 + + fs/ksmbd/misc.c | 338 ++ + fs/ksmbd/misc.h | 35 + + fs/ksmbd/ndr.c | 338 ++ + fs/ksmbd/ndr.h | 22 + + fs/ksmbd/nterr.h | 543 ++ + fs/ksmbd/ntlmssp.h | 169 + + fs/ksmbd/oplock.c | 1709 ++++++ + fs/ksmbd/oplock.h | 131 + + fs/ksmbd/server.c | 633 +++ + fs/ksmbd/server.h | 70 + + fs/ksmbd/smb2misc.c | 433 ++ + fs/ksmbd/smb2ops.c | 308 ++ + fs/ksmbd/smb2pdu.c | 8299 ++++++++++++++++++++++++++++++ + fs/ksmbd/smb2pdu.h | 1684 ++++++ + fs/ksmbd/smb_common.c | 655 +++ + fs/ksmbd/smb_common.h | 543 ++ + fs/ksmbd/smbacl.c | 1344 +++++ + fs/ksmbd/smbacl.h | 212 + + fs/ksmbd/smbfsctl.h | 91 + + fs/ksmbd/smbstatus.h | 1822 +++++++ + fs/ksmbd/transport_ipc.c | 874 ++++ + fs/ksmbd/transport_ipc.h | 47 + + fs/ksmbd/transport_rdma.c | 2057 ++++++++ + fs/ksmbd/transport_rdma.h | 63 + + fs/ksmbd/transport_tcp.c | 619 +++ + fs/ksmbd/transport_tcp.h | 13 + + fs/ksmbd/unicode.c | 384 ++ + fs/ksmbd/unicode.h | 357 ++ + fs/ksmbd/uniupr.h | 268 + + fs/ksmbd/vfs.c | 1886 +++++++ + fs/ksmbd/vfs.h | 197 + + fs/ksmbd/vfs_cache.c | 725 +++ + fs/ksmbd/vfs_cache.h | 178 + + fs/ksmbd/xattr.h | 122 + + 66 files changed, 32097 insertions(+), 2 deletions(-) + create mode 100644 Documentation/filesystems/cifs/index.rst + create mode 100644 Documentation/filesystems/cifs/ksmbd.rst + create mode 100644 fs/ksmbd/Kconfig + create mode 100644 fs/ksmbd/Makefile + create mode 100644 fs/ksmbd/asn1.c + create mode 100644 fs/ksmbd/asn1.h + create mode 100644 fs/ksmbd/auth.c + create mode 100644 fs/ksmbd/auth.h + create mode 100644 fs/ksmbd/connection.c + create mode 100644 fs/ksmbd/connection.h + create mode 100644 fs/ksmbd/crypto_ctx.c + create mode 100644 fs/ksmbd/crypto_ctx.h + create mode 100644 fs/ksmbd/glob.h + create mode 100644 fs/ksmbd/ksmbd_netlink.h + create mode 100644 fs/ksmbd/ksmbd_spnego_negtokeninit.asn1 + create mode 100644 fs/ksmbd/ksmbd_spnego_negtokentarg.asn1 + create mode 100644 fs/ksmbd/ksmbd_work.c + create mode 100644 fs/ksmbd/ksmbd_work.h + create mode 100644 fs/ksmbd/mgmt/ksmbd_ida.c + create mode 100644 fs/ksmbd/mgmt/ksmbd_ida.h + create mode 100644 fs/ksmbd/mgmt/share_config.c + create mode 100644 fs/ksmbd/mgmt/share_config.h + create mode 100644 fs/ksmbd/mgmt/tree_connect.c + create mode 100644 fs/ksmbd/mgmt/tree_connect.h + create mode 100644 fs/ksmbd/mgmt/user_config.c + create mode 100644 fs/ksmbd/mgmt/user_config.h + create mode 100644 fs/ksmbd/mgmt/user_session.c + create mode 100644 fs/ksmbd/mgmt/user_session.h + create mode 100644 fs/ksmbd/misc.c + create mode 100644 fs/ksmbd/misc.h + create mode 100644 fs/ksmbd/ndr.c + create mode 100644 fs/ksmbd/ndr.h + create mode 100644 fs/ksmbd/nterr.h + create mode 100644 fs/ksmbd/ntlmssp.h + create mode 100644 fs/ksmbd/oplock.c + create mode 100644 fs/ksmbd/oplock.h + create mode 100644 fs/ksmbd/server.c + create mode 100644 fs/ksmbd/server.h + create mode 100644 fs/ksmbd/smb2misc.c + create mode 100644 fs/ksmbd/smb2ops.c + create mode 100644 fs/ksmbd/smb2pdu.c + create mode 100644 fs/ksmbd/smb2pdu.h + create mode 100644 fs/ksmbd/smb_common.c + create mode 100644 fs/ksmbd/smb_common.h + create mode 100644 fs/ksmbd/smbacl.c + create mode 100644 fs/ksmbd/smbacl.h + create mode 100644 fs/ksmbd/smbfsctl.h + create mode 100644 fs/ksmbd/smbstatus.h + create mode 100644 fs/ksmbd/transport_ipc.c + create mode 100644 fs/ksmbd/transport_ipc.h + create mode 100644 fs/ksmbd/transport_rdma.c + create mode 100644 fs/ksmbd/transport_rdma.h + create mode 100644 fs/ksmbd/transport_tcp.c + create mode 100644 fs/ksmbd/transport_tcp.h + create mode 100644 fs/ksmbd/unicode.c + create mode 100644 fs/ksmbd/unicode.h + create mode 100644 fs/ksmbd/uniupr.h + create mode 100644 fs/ksmbd/vfs.c + create mode 100644 fs/ksmbd/vfs.h + create mode 100644 fs/ksmbd/vfs_cache.c + create mode 100644 fs/ksmbd/vfs_cache.h + create mode 100644 fs/ksmbd/xattr.h +Merging configfs/for-next (420405ecde06 configfs: fix the read and write iterators) +$ git merge configfs/for-next +Already up to date. +Merging ecryptfs/next (682a8e2b41ef Merge tag 'ecryptfs-5.13-rc1-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs) +$ git merge ecryptfs/next +Already up to date. +Merging erofs/dev (8215d5b7f15f MAINTAINERS: erofs: update my email address) +$ git merge erofs/dev +Already up to date. +Merging exfat/dev (50be9417e23a Merge tag 'io_uring-5.14-2021-07-09' of git://git.kernel.dk/linux-block) +$ git merge exfat/dev +Already up to date. +Merging ext3/for_next (f97d9dea9404 Merge hole punch fixes.) +$ git merge ext3/for_next +Auto-merging mm/rmap.c +Auto-merging fs/zonefs/super.c +Auto-merging fs/xfs/xfs_inode.c +Auto-merging fs/fuse/dax.c +Auto-merging fs/cifs/smb2ops.c +Merge made by the 'recursive' strategy. + Documentation/filesystems/locking.rst | 77 ++++++++++++------ + fs/ceph/addr.c | 9 ++- + fs/ceph/file.c | 2 + + fs/cifs/smb2ops.c | 2 + + fs/ext2/dir.c | 12 +-- + fs/ext2/ext2.h | 14 +--- + fs/ext2/file.c | 7 +- + fs/ext2/inode.c | 12 +-- + fs/ext2/namei.c | 4 +- + fs/ext2/super.c | 3 - + fs/ext4/ext4.h | 10 --- + fs/ext4/extents.c | 25 +++--- + fs/ext4/file.c | 13 +-- + fs/ext4/inode.c | 47 ++++------- + fs/ext4/ioctl.c | 4 +- + fs/ext4/super.c | 13 ++- + fs/ext4/truncate.h | 8 +- + fs/f2fs/data.c | 8 +- + fs/f2fs/f2fs.h | 1 - + fs/f2fs/file.c | 62 +++++++-------- + fs/f2fs/super.c | 1 - + fs/fuse/dax.c | 50 ++++++------ + fs/fuse/dir.c | 11 +-- + fs/fuse/file.c | 10 +-- + fs/fuse/fuse_i.h | 7 -- + fs/fuse/inode.c | 1 - + fs/inode.c | 2 + + fs/reiserfs/stree.c | 31 ++++++-- + fs/reiserfs/super.c | 8 ++ + fs/xfs/xfs_bmap_util.c | 15 ++-- + fs/xfs/xfs_file.c | 13 +-- + fs/xfs/xfs_inode.c | 121 ++++++++++++++-------------- + fs/xfs/xfs_inode.h | 3 +- + fs/xfs/xfs_super.c | 2 - + fs/zonefs/super.c | 23 ++---- + fs/zonefs/zonefs.h | 7 +- + include/linux/fs.h | 39 +++++++++ + mm/filemap.c | 145 ++++++++++++++++++++++++++++------ + mm/madvise.c | 2 +- + mm/memory-failure.c | 2 +- + mm/readahead.c | 2 + + mm/rmap.c | 41 +++++----- + mm/shmem.c | 20 ++--- + mm/truncate.c | 9 ++- + 44 files changed, 525 insertions(+), 373 deletions(-) +Merging ext4/dev (0705e8d1e220 ext4: inline jbd2_journal_[un]register_shrinker()) +$ git merge ext4/dev +Already up to date. +Merging f2fs/dev (fbc515e1c049 f2fs: don't sleep while grabing nat_tree_lock) +$ git merge f2fs/dev +Auto-merging fs/f2fs/super.c +Auto-merging fs/f2fs/file.c +CONFLICT (content): Merge conflict in fs/f2fs/file.c +Auto-merging fs/f2fs/f2fs.h +Auto-merging fs/f2fs/data.c +Recorded preimage for 'fs/f2fs/file.c' +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'fs/f2fs/file.c'. +[master 54451046bbf4] Merge remote-tracking branch 'f2fs/dev' +$ git diff -M --stat --summary HEAD^.. + Documentation/ABI/testing/sysfs-fs-f2fs | 14 +++ + fs/f2fs/data.c | 66 ++---------- + fs/f2fs/debug.c | 9 ++ + fs/f2fs/f2fs.h | 21 ++-- + fs/f2fs/file.c | 173 +++++++++++++++++++------------- + fs/f2fs/gc.c | 3 +- + fs/f2fs/node.c | 14 ++- + fs/f2fs/segment.c | 2 +- + fs/f2fs/super.c | 84 +++++++++------- + fs/f2fs/sysfs.c | 30 +++++- + include/trace/events/f2fs.h | 12 +-- + 11 files changed, 242 insertions(+), 186 deletions(-) +$ git am -3 ../patches/0001-fxup-for-f2fs-Convert-to-using-invalidate_lock.patch +Applying: fxup for "f2fs: Convert to using invalidate_lock" +$ git reset HEAD^ +Unstaged changes after reset: +M fs/f2fs/data.c +$ git add -A . +$ git commit -v -a --amend +[master 6a76d53a9418] Merge remote-tracking branch 'f2fs/dev' + Date: Fri Jul 23 10:20:12 2021 +1000 +Merging fsverity/fsverity (07c99001312c fs-verity: support reading signature with ioctl) +$ git merge fsverity/fsverity +Already up to date. +Merging fuse/for-next (c4e0cd4e0c16 virtiofs: Fix spelling mistakes) +$ git merge fuse/for-next +Already up to date. +Merging gfs2/for-next (a6579cbfd721 gfs2: Fix memory leak of object lsi on error return path) +$ git merge gfs2/for-next +Merge made by the 'recursive' strategy. + fs/gfs2/ops_fstype.c | 1 + + 1 file changed, 1 insertion(+) +Merging jfs/jfs-next (5d299f44d765 jfs: Avoid field-overflowing memcpy()) +$ git merge jfs/jfs-next +Already up to date. +Merging nfs/linux-next (2734d6c1b1a0 Linux 5.14-rc2) +$ git merge nfs/linux-next +Already up to date. +Merging nfs-anna/linux-next (4f8be1f53bf6 nfs: we don't support removing system.nfs4_acl) +$ git merge nfs-anna/linux-next +Already up to date. +Merging nfsd/nfsd-next (ab1016d39cc0 nfsd: fix NULL dereference in nfs3svc_encode_getaclres) +$ git merge nfsd/nfsd-next +Already up to date. +Merging cel/for-next (9b5b7a72fdc4 NFSD: Use new __string_len C macros for nfsd_clid_class) +$ git merge cel/for-next +Merge made by the 'recursive' strategy. + fs/nfsd/trace.h | 17 +++++++---------- + fs/nfsd/vfs.c | 20 +++++--------------- + include/linux/sunrpc/svc.h | 4 ++++ + include/trace/trace_events.h | 22 ++++++++++++++++++++++ + net/sunrpc/svc.c | 21 +++++++++++++++++++++ + net/sunrpc/svc_xprt.c | 3 +++ + samples/trace_events/trace-events-sample.h | 27 +++++++++++++++++++++++++++ + 7 files changed, 89 insertions(+), 25 deletions(-) +Merging orangefs/for-next (0fdec1b3c9fb orangefs: fix orangefs df output.) +$ git merge orangefs/for-next +Already up to date. +Merging overlayfs/overlayfs-next (cd94017fb9fa ovl: use kvalloc in xattr copy-up) +$ git merge overlayfs/overlayfs-next +Auto-merging mm/util.c +Auto-merging include/linux/mm.h +Auto-merging include/linux/fs.h +Merge made by the 'recursive' strategy. + Documentation/filesystems/overlayfs.rst | 3 + + fs/orangefs/inode.c | 7 +-- + fs/overlayfs/copy_up.c | 83 +++++++++++++++++++++------ + fs/overlayfs/dir.c | 10 +++- + fs/overlayfs/file.c | 4 +- + fs/overlayfs/inode.c | 99 ++++++++++++++++++++++++++------- + fs/overlayfs/namei.c | 4 +- + fs/overlayfs/overlayfs.h | 42 ++++++++++++-- + fs/overlayfs/readdir.c | 5 ++ + fs/overlayfs/super.c | 4 +- + fs/overlayfs/util.c | 92 ++++++++++++++++++++++++++++-- + fs/stat.c | 18 ++++++ + include/linux/fs.h | 1 + + include/linux/mm.h | 2 +- + include/linux/stat.h | 4 ++ + mm/mmap.c | 2 +- + mm/util.c | 27 ++++++++- + 17 files changed, 345 insertions(+), 62 deletions(-) +Merging ubifs/next (a801fcfeef96 ubifs: Set/Clear I_LINKABLE under i_lock for whiteout inode) +$ git merge ubifs/next +Already up to date. +Merging v9fs/9p-next (cba7dcd8878e 9p/trans_virtio: Remove sysfs file on probe failure) +$ git merge v9fs/9p-next +Auto-merging net/9p/trans_virtio.c +Merge made by the 'recursive' strategy. + net/9p/trans_virtio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) +Merging xfs/for-next (b102a46ce16f xfs: detect misaligned rtinherit directory extent size hints) +$ git merge xfs/for-next +Already up to date. +Merging zonefs/for-next (95b115332a83 zonefs: remove redundant null bio check) +$ git merge zonefs/for-next +Auto-merging fs/zonefs/super.c +Merge made by the 'recursive' strategy. +Merging iomap/iomap-for-next (229adf3c64db iomap: Don't create iomap_page objects in iomap_page_mkwrite_actor) +$ git merge iomap/iomap-for-next +Already up to date. +Merging djw-vfs/vfs-for-next (9b8523423b23 vfs: move __sb_{start,end}_write* to fs.h) +$ git merge djw-vfs/vfs-for-next +Already up to date. +Merging file-locks/locks-next (e1cc6e8c1969 fcntl: fix potential deadlock for &fasync_struct.fa_lock) +$ git merge file-locks/locks-next +Auto-merging fs/fcntl.c +Merge made by the 'recursive' strategy. + fs/fcntl.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) +Merging vfs/for-next (303392fd5c16 Merge tag 'leds-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds) +$ git merge vfs/for-next +Already up to date. +Merging printk/for-next (fbdc7b96a5ae Merge branch 'for-5.15-printk-index' into for-next) +$ git merge printk/for-next +Auto-merging init/Kconfig +Auto-merging drivers/base/core.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 5 + + arch/arm/kernel/entry-v7m.S | 2 +- + arch/arm/lib/backtrace-clang.S | 2 +- + arch/arm/lib/backtrace.S | 2 +- + arch/arm/mach-rpc/io-acorn.S | 2 +- + arch/arm/vfp/vfphw.S | 6 +- + arch/ia64/include/uapi/asm/cmpxchg.h | 4 +- + arch/openrisc/kernel/entry.S | 6 +- + arch/powerpc/kernel/head_fsl_booke.S | 2 +- + arch/um/include/shared/user.h | 3 +- + arch/x86/kernel/head_32.S | 2 +- + drivers/base/core.c | 6 +- + include/asm-generic/vmlinux.lds.h | 13 +++ + include/linux/dev_printk.h | 66 +++++++++--- + include/linux/module.h | 5 + + include/linux/printk.h | 95 ++++++++++++++++- + init/Kconfig | 14 +++ + kernel/module.c | 5 + + kernel/printk/Makefile | 1 + + kernel/printk/index.c | 195 +++++++++++++++++++++++++++++++++++ + kernel/printk/internal.h | 8 ++ + kernel/printk/printk.c | 67 ++++++------ + lib/string_helpers.c | 4 + + lib/test-string_helpers.c | 14 +-- + 24 files changed, 451 insertions(+), 78 deletions(-) + create mode 100644 kernel/printk/index.c +Merging pci/next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge pci/next +Already up to date. +Merging pstore/for-next/pstore (c5d4fb2539ca pstore/blk: Use "%lu" to format unsigned long) +$ git merge pstore/for-next/pstore +Merge made by the 'recursive' strategy. + fs/pstore/blk.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging hid/for-next (1b97ec646386 Merge branch 'for-5.15/goodix' into for-next) +$ git merge hid/for-next +Merge made by the 'recursive' strategy. + drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 2 +- + drivers/hid/hid-asus.c | 3 - + drivers/hid/hid-input.c | 2 - + drivers/hid/hid-magicmouse.c | 52 ++++++++++++++++ + drivers/hid/i2c-hid/i2c-hid-of-goodix.c | 92 ++++++++++++++++++++++++---- + drivers/hid/intel-ish-hid/ishtp-hid-client.c | 15 ++++- + drivers/hid/intel-ish-hid/ishtp-hid.h | 1 + + drivers/hid/intel-ish-hid/ishtp/bus.c | 29 ++++++--- + include/linux/intel-ish-client-if.h | 2 + + 9 files changed, 171 insertions(+), 27 deletions(-) +Merging i2c/i2c/for-next (4a8ac5e45cda i2c: mpc: Poll for MCF) +$ git merge i2c/i2c/for-next +Merge made by the 'recursive' strategy. + drivers/i2c/busses/i2c-mpc.c | 4 ++-- + drivers/misc/eeprom/at24.c | 17 +++++++---------- + 2 files changed, 9 insertions(+), 12 deletions(-) +Merging i3c/i3c/next (3e5feb11a829 i3c: master: svc: fix doc warning in svc-i3c-master.c) +$ git merge i3c/i3c/next +Already up to date. +Merging dmi/dmi-for-next (3cb4d29a2633 MAINTAINERS: The DMI/SMBIOS tree has moved) +$ git merge dmi/dmi-for-next +Already up to date. +Merging hwmon-staging/hwmon-next (e2c744af9cd3 hwmon: (w83627ehf) Switch to SIMPLE_DEV_PM_OPS) +$ git merge hwmon-staging/hwmon-next +Merge made by the 'recursive' strategy. + Documentation/hwmon/scpi-hwmon.rst | 2 +- + drivers/hwmon/intel-m10-bmc-hwmon.c | 116 ++++++++++++++++++++++++++++++++++ + drivers/hwmon/k10temp.c | 2 + + drivers/hwmon/ntc_thermistor.c | 20 +++--- + drivers/hwmon/pmbus/bpa-rs600.c | 22 ++++--- + drivers/hwmon/w83627ehf.c | 120 +++++++----------------------------- + 6 files changed, 166 insertions(+), 116 deletions(-) +Merging jc_docs/docs-next (4b3abe1fde47 docs/zh_CN: add core api protection keys translation) +$ git merge jc_docs/docs-next +Merge made by the 'recursive' strategy. + Documentation/admin-guide/sysrq.rst | 12 +- + Documentation/arm/marvell.rst | 18 + + Documentation/core-api/cpu_hotplug.rst | 38 +- + .../translations/zh_CN/core-api/cpu_hotplug.rst | 348 ++++++++++++++++++ + .../translations/zh_CN/core-api/genericirq.rst | 409 +++++++++++++++++++++ + .../translations/zh_CN/core-api/index.rst | 7 +- + .../translations/zh_CN/core-api/memory-hotplug.rst | 126 +++++++ + .../zh_CN/core-api/protection-keys.rst | 99 +++++ + .../translations/zh_CN/dev-tools/index.rst | 5 + + .../zh_CN/dev-tools/testing-overview.rst | 108 ++++++ + .../translations/zh_CN/process/coding-style.rst | 3 +- + 11 files changed, 1150 insertions(+), 23 deletions(-) + create mode 100644 Documentation/translations/zh_CN/core-api/cpu_hotplug.rst + create mode 100644 Documentation/translations/zh_CN/core-api/genericirq.rst + create mode 100644 Documentation/translations/zh_CN/core-api/memory-hotplug.rst + create mode 100644 Documentation/translations/zh_CN/core-api/protection-keys.rst + create mode 100644 Documentation/translations/zh_CN/dev-tools/testing-overview.rst +Merging v4l-dvb/master (e73f0f0ee754 Linux 5.14-rc1) +$ git merge v4l-dvb/master +Already up to date. +Merging v4l-dvb-next/master (c27479d762de media: atomisp: pci: reposition braces as per coding style) +$ git merge v4l-dvb-next/master +Merge made by the 'recursive' strategy. + .../devicetree/bindings/media/i2c/adv7180.yaml | 8 + + .../devicetree/bindings/media/rockchip-vpu.yaml | 1 + + Documentation/driver-api/media/cec-core.rst | 9 +- + .../userspace-api/media/cec.h.rst.exceptions | 2 +- + .../userspace-api/media/v4l/ext-ctrls-codec.rst | 3 + + drivers/media/cec/platform/stm32/stm32-cec.c | 26 +- + drivers/media/dvb-frontends/cx24117.c | 1 - + drivers/media/dvb-frontends/dib8000.c | 58 ++- + drivers/media/i2c/adv7180.c | 66 ++- + drivers/media/i2c/tda1997x.c | 1 + + drivers/media/i2c/tvp5150.c | 2 +- + drivers/media/pci/ivtv/ivtv-cards.h | 68 ++- + drivers/media/pci/ivtv/ivtv-i2c.c | 16 +- + drivers/media/pci/saa7134/saa7134-alsa.c | 4 +- + drivers/media/pci/saa7134/saa7134-core.c | 7 +- + drivers/media/pci/saa7164/saa7164-cmd.c | 3 - + drivers/media/pci/tw5864/tw5864-reg.h | 2 +- + drivers/media/platform/atmel/atmel-sama5d2-isc.c | 17 + + drivers/media/platform/coda/coda-bit.c | 18 +- + drivers/media/platform/davinci/vpfe_capture.c | 2 +- + drivers/media/platform/marvell-ccic/mcam-core.c | 2 +- + drivers/media/platform/rcar-vin/rcar-v4l2.c | 4 +- + drivers/media/platform/rockchip/rga/rga.c | 27 +- + .../platform/rockchip/rkisp1/rkisp1-capture.c | 12 +- + .../media/platform/rockchip/rkisp1/rkisp1-common.h | 6 +- + .../media/platform/rockchip/rkisp1/rkisp1-params.c | 3 +- + .../media/platform/rockchip/rkisp1/rkisp1-stats.c | 6 +- + drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c | 2 +- + drivers/media/platform/sti/delta/delta-ipc.c | 3 +- + drivers/media/platform/ti-vpe/cal-camerarx.c | 247 ++++++----- + drivers/media/platform/ti-vpe/cal-video.c | 176 ++++++-- + drivers/media/platform/ti-vpe/cal.c | 278 ++++++++---- + drivers/media/platform/ti-vpe/cal.h | 66 ++- + drivers/media/platform/ti-vpe/cal_regs.h | 53 +-- + drivers/media/platform/vsp1/vsp1_entity.c | 4 +- + drivers/media/rc/ene_ir.c | 2 +- + drivers/media/rc/lirc_dev.c | 6 +- + drivers/media/rc/mceusb.c | 2 +- + drivers/media/rc/rc-loopback.c | 82 ++-- + drivers/media/rc/rc-main.c | 2 +- + drivers/media/rc/redrat3.c | 2 +- + drivers/media/rc/streamzap.c | 2 - + drivers/media/spi/cxd2880-spi.c | 7 +- + drivers/media/test-drivers/vivid/vivid-cec.c | 2 +- + drivers/media/usb/dvb-usb-v2/Kconfig | 2 + + drivers/media/usb/dvb-usb-v2/af9035.c | 1 + + drivers/media/usb/dvb-usb-v2/dvbsky.c | 37 +- + drivers/media/usb/dvb-usb/dvb-usb-i2c.c | 9 +- + drivers/media/usb/dvb-usb/dvb-usb-init.c | 2 +- + drivers/media/usb/dvb-usb/nova-t-usb2.c | 6 +- + drivers/media/usb/dvb-usb/vp702x.c | 12 +- + drivers/media/usb/go7007/go7007-driver.c | 26 -- + drivers/media/usb/go7007/go7007-usb.c | 2 +- + drivers/media/usb/gspca/Kconfig | 1 - + drivers/media/usb/gspca/vc032x.c | 6 +- + drivers/media/usb/gspca/zc3xx.c | 134 +++--- + drivers/media/usb/stkwebcam/stk-webcam.c | 6 +- + drivers/media/usb/uvc/uvc_v4l2.c | 34 +- + drivers/staging/media/atomisp/i2c/atomisp-gc2235.c | 13 +- + .../staging/media/atomisp/i2c/atomisp-mt9m114.c | 22 +- + drivers/staging/media/atomisp/i2c/ov2680.h | 2 +- + .../media/atomisp/i2c/ov5693/atomisp-ov5693.c | 2 +- + drivers/staging/media/atomisp/pci/atomisp_compat.h | 2 - + .../media/atomisp/pci/atomisp_compat_css20.c | 38 +- + drivers/staging/media/atomisp/pci/atomisp_csi2.c | 3 +- + drivers/staging/media/atomisp/pci/atomisp_drvfs.c | 6 +- + .../media/atomisp/pci/atomisp_gmin_platform.c | 8 +- + drivers/staging/media/atomisp/pci/atomisp_v4l2.c | 4 +- + .../pci/hive_isp_css_common/host/input_system.c | 62 --- + .../media/atomisp/pci/input_system_ctrl_defs.h | 2 +- + .../pci/isp/kernels/anr/anr_1.0/ia_css_anr_types.h | 4 +- + .../media/atomisp/pci/isp2400_input_system_local.h | 32 +- + .../media/atomisp/pci/isp_acquisition_defs.h | 2 +- + .../staging/media/atomisp/pci/isp_capture_defs.h | 2 +- + .../media/atomisp/pci/runtime/isys/src/rx.c | 14 +- + drivers/staging/media/atomisp/pci/sh_css.c | 47 +- + .../staging/media/atomisp/pci/sh_css_firmware.c | 6 +- + drivers/staging/media/atomisp/pci/sh_css_mipi.c | 69 ++- + drivers/staging/media/atomisp/pci/sh_css_params.c | 171 ++++--- + drivers/staging/media/atomisp/pci/sh_css_sp.c | 108 ++--- + drivers/staging/media/atomisp/pci/sh_css_version.c | 3 +- + drivers/staging/media/hantro/Makefile | 1 + + drivers/staging/media/hantro/hantro.h | 4 +- + drivers/staging/media/hantro/hantro_drv.c | 11 +- + drivers/staging/media/hantro/hantro_g1_h264_dec.c | 48 +- + drivers/staging/media/hantro/hantro_g1_vp8_dec.c | 31 +- + drivers/staging/media/hantro/hantro_h264.c | 24 + + drivers/staging/media/hantro/hantro_hw.h | 8 + + .../media/hantro/rockchip_vpu2_hw_h264_dec.c | 491 +++++++++++++++++++++ + .../media/hantro/rockchip_vpu2_hw_vp8_dec.c | 32 +- + drivers/staging/media/hantro/rockchip_vpu_hw.c | 43 +- + drivers/staging/media/hantro/sama5d4_vdec_hw.c | 6 - + drivers/staging/media/sunxi/cedrus/cedrus_h265.c | 26 +- + drivers/staging/media/sunxi/cedrus/cedrus_video.c | 1 + + drivers/staging/media/tegra-video/vi.c | 4 +- + include/media/dvb-usb-ids.h | 2 + + include/media/hevc-ctrls.h | 3 +- + include/media/rc-core.h | 5 +- + include/uapi/linux/cec.h | 2 +- + 99 files changed, 1889 insertions(+), 1063 deletions(-) + create mode 100644 drivers/staging/media/hantro/rockchip_vpu2_hw_h264_dec.c +Merging pm/linux-next (d713e0c83398 Merge branch 'pm-pci' into linux-next) +$ git merge pm/linux-next +Auto-merging drivers/base/core.c +Merge made by the 'recursive' strategy. + drivers/acpi/Kconfig | 6 +-- + drivers/acpi/acpi_configfs.c | 54 +++++++++++++------------- + drivers/acpi/bus.c | 12 +++--- + drivers/acpi/glue.c | 51 +++++++------------------ + drivers/acpi/numa/Kconfig | 2 +- + drivers/acpi/numa/srat.c | 2 +- + drivers/acpi/pmic/intel_pmic_xpower.c | 47 ++++++++++++++++++++--- + drivers/acpi/utils.c | 7 ++-- + drivers/base/base.h | 3 ++ + drivers/base/core.c | 35 ++++++++--------- + drivers/base/swnode.c | 61 ++++++++++++++++-------------- + drivers/firmware/efi/dev-path-parser.c | 1 - + drivers/media/pci/intel/ipu3/cio2-bridge.c | 6 +-- + drivers/pci/pci.c | 6 +-- + include/acpi/acpi_bus.h | 5 --- + include/linux/acpi.h | 12 +++--- + include/linux/property.h | 2 - + 17 files changed, 156 insertions(+), 156 deletions(-) +Merging cpufreq-arm/cpufreq/arm/linux-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge cpufreq-arm/cpufreq/arm/linux-next +Already up to date. +Merging cpupower/cpupower (5499f2b80b56 tools: cpupower: fix typo in cpupower-idle-set(1) manpage) +$ git merge cpupower/cpupower +Merge made by the 'recursive' strategy. + tools/power/cpupower/man/cpupower-idle-set.1 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging devfreq/devfreq-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge devfreq/devfreq-next +Already up to date. +Merging opp/opp/linux-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge opp/opp/linux-next +Already up to date. +Merging thermal/thermal/linux-next (fe6a6de6692e thermal/drivers/int340x/processor_thermal: Fix tcc setting) +$ git merge thermal/thermal/linux-next +Already up to date. +Merging ieee1394/for-next (54b3bd99f094 firewire: nosy: switch from 'pci_' to 'dma_' API) +$ git merge ieee1394/for-next +Merge made by the 'recursive' strategy. + drivers/firewire/net.c | 4 ---- + 1 file changed, 4 deletions(-) +Merging dlm/next (62699b3f0a62 fs: dlm: move receive loop into receive handler) +$ git merge dlm/next +Merge made by the 'recursive' strategy. + fs/dlm/dlm_internal.h | 2 +- + fs/dlm/lockspace.c | 2 +- + fs/dlm/lowcomms.c | 769 +++++++++++++++++++++++++------------------------- + fs/dlm/rcom.c | 29 +- + 4 files changed, 398 insertions(+), 404 deletions(-) +Merging swiotlb/linux-next (868c9ddc182b swiotlb: add overflow checks to swiotlb_bounce) +$ git merge swiotlb/linux-next +Auto-merging drivers/base/core.c +Merge made by the 'recursive' strategy. + .../bindings/reserved-memory/reserved-memory.txt | 36 ++- + drivers/base/core.c | 4 + + drivers/gpu/drm/i915/gem/i915_gem_internal.c | 2 +- + drivers/gpu/drm/nouveau/nouveau_ttm.c | 2 +- + drivers/iommu/dma-iommu.c | 12 +- + drivers/of/address.c | 33 +++ + drivers/of/device.c | 3 + + drivers/of/of_private.h | 6 + + drivers/pci/xen-pcifront.c | 2 +- + drivers/xen/swiotlb-xen.c | 4 +- + include/linux/device.h | 4 + + include/linux/swiotlb.h | 53 +++- + kernel/dma/Kconfig | 14 + + kernel/dma/direct.c | 59 +++-- + kernel/dma/direct.h | 8 +- + kernel/dma/swiotlb.c | 282 ++++++++++++++++----- + 16 files changed, 418 insertions(+), 106 deletions(-) +Merging rdma/for-next (923232bbea88 RDMA/rxe: Fix types in rxe_icrc.c) +$ git merge rdma/for-next +Auto-merging drivers/infiniband/sw/rxe/rxe_mr.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 2 +- + drivers/infiniband/core/cache.c | 10 +-- + drivers/infiniband/core/device.c | 10 ++- + drivers/infiniband/hw/efa/efa_verbs.c | 118 ++++++++++++++++++------------- + drivers/infiniband/sw/rxe/rxe.h | 22 ------ + drivers/infiniband/sw/rxe/rxe_comp.c | 4 +- + drivers/infiniband/sw/rxe/rxe_icrc.c | 124 +++++++++++++++++++++++++++++++-- + drivers/infiniband/sw/rxe/rxe_loc.h | 61 +++------------- + drivers/infiniband/sw/rxe/rxe_mr.c | 23 ++---- + drivers/infiniband/sw/rxe/rxe_net.c | 59 +++++++++++++--- + drivers/infiniband/sw/rxe/rxe_recv.c | 23 +----- + drivers/infiniband/sw/rxe/rxe_req.c | 13 +--- + drivers/infiniband/sw/rxe/rxe_resp.c | 33 +++------ + drivers/infiniband/sw/rxe/rxe_verbs.c | 11 +-- + drivers/infiniband/sw/siw/siw_qp_tx.c | 44 +++++++----- + drivers/infiniband/ulp/rtrs/rtrs-clt.c | 11 ++- + drivers/infiniband/ulp/rtrs/rtrs-clt.h | 1 - + drivers/infiniband/ulp/rtrs/rtrs-pri.h | 6 +- + drivers/infiniband/ulp/rtrs/rtrs-srv.c | 19 ++--- + drivers/infiniband/ulp/rtrs/rtrs-srv.h | 2 - + drivers/infiniband/ulp/rtrs/rtrs.c | 23 ++++-- + 21 files changed, 346 insertions(+), 273 deletions(-) +Merging net-next/master (4431531c482a nfp: fix return statement in nfp_net_parse_meta()) +$ git merge net-next/master +Auto-merging net/sched/cls_api.c +Auto-merging net/ipv6/route.c +Auto-merging net/ipv6/ip6_output.c +Auto-merging net/ipv4/udp_bpf.c +Auto-merging net/ipv4/tcp_ipv4.c +CONFLICT (content): Merge conflict in net/ipv4/tcp_ipv4.c +Auto-merging net/ipv4/tcp_fastopen.c +Auto-merging net/dsa/slave.c +Auto-merging net/core/dev.c +Auto-merging net/bridge/br_fdb.c +Auto-merging net/bpf/test_run.c +CONFLICT (content): Merge conflict in net/bpf/test_run.c +Auto-merging kernel/bpf/verifier.c +Auto-merging include/linux/bpf_verifier.h +Auto-merging drivers/s390/net/qeth_l3_main.c +Auto-merging drivers/net/usb/hso.c +Auto-merging drivers/net/ethernet/netronome/nfp/flower/conntrack.c +Auto-merging drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +Auto-merging drivers/net/dsa/sja1105/sja1105_main.c +Auto-merging drivers/bus/mhi/pci_generic.c +CONFLICT (content): Merge conflict in drivers/bus/mhi/pci_generic.c +Auto-merging arch/arm64/boot/dts/qcom/sc7280.dtsi +Auto-merging arch/arm64/boot/dts/qcom/sc7180.dtsi +Auto-merging MAINTAINERS +Auto-merging Documentation/networking/ip-sysctl.rst +Removing Documentation/devicetree/bindings/net/fsl-fec.txt +Resolved 'drivers/bus/mhi/pci_generic.c' using previous resolution. +Resolved 'net/bpf/test_run.c' using previous resolution. +Recorded preimage for 'net/ipv4/tcp_ipv4.c' +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'net/ipv4/tcp_ipv4.c'. +[master 861c7995866d] Merge remote-tracking branch 'net-next/master' +$ git diff -M --stat --summary HEAD^.. + Documentation/devicetree/bindings/net/fsl,fec.yaml | 217 +++ + Documentation/devicetree/bindings/net/fsl-fec.txt | 95 -- + .../devicetree/bindings/net/qcom,ipa.yaml | 18 +- + Documentation/networking/ioam6-sysctl.rst | 26 + + Documentation/networking/ip-sysctl.rst | 17 + + MAINTAINERS | 7 + + arch/arm/boot/dts/imx35.dtsi | 2 +- + arch/arm/boot/dts/imx6q-novena.dts | 34 +- + arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi | 18 +- + arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi | 34 +- + arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi | 34 +- + arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi | 34 +- + arch/arm/boot/dts/imx6qdl-sabrelite.dtsi | 34 +- + arch/arm/boot/dts/imx7-mba7.dtsi | 1 - + arch/arm/boot/dts/imx7d-mba7.dts | 1 - + arch/arm64/boot/dts/qcom/sc7180.dtsi | 5 + + arch/arm64/boot/dts/qcom/sc7280.dtsi | 42 + + arch/s390/include/asm/ccwgroup.h | 2 - + arch/x86/net/bpf_jit_comp.c | 19 + + drivers/atm/idt77252.c | 2 +- + drivers/bus/mhi/pci_generic.c | 4 + + drivers/net/dsa/ocelot/felix.c | 34 +- + drivers/net/dsa/ocelot/felix.h | 1 - + drivers/net/dsa/sja1105/sja1105.h | 14 +- + drivers/net/dsa/sja1105/sja1105_devlink.c | 114 +- + drivers/net/dsa/sja1105/sja1105_main.c | 668 +------- + drivers/net/dsa/sja1105/sja1105_vl.c | 14 +- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 6 - + .../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 67 +- + drivers/net/ethernet/google/gve/gve_adminq.c | 6 +- + drivers/net/ethernet/intel/e1000e/ethtool.c | 2 + + drivers/net/ethernet/intel/e1000e/hw.h | 9 + + drivers/net/ethernet/intel/e1000e/ich8lan.c | 13 +- + drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 + + drivers/net/ethernet/intel/e1000e/netdev.c | 370 +++-- + drivers/net/ethernet/intel/e1000e/ptp.c | 1 + + drivers/net/ethernet/intel/e1000e/regs.h | 1 + + drivers/net/ethernet/intel/i40e/i40e.h | 78 + + drivers/net/ethernet/intel/i40e/i40e_main.c | 28 +- + drivers/net/ethernet/intel/i40e/i40e_ptp.c | 756 ++++++++- + drivers/net/ethernet/intel/i40e/i40e_register.h | 29 + + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 23 +- + drivers/net/ethernet/intel/iavf/iavf_main.c | 58 +- + drivers/net/ethernet/intel/igc/igc.h | 38 +- + drivers/net/ethernet/intel/igc/igc_base.c | 10 +- + drivers/net/ethernet/intel/igc/igc_defines.h | 52 +- + drivers/net/ethernet/intel/igc/igc_ethtool.c | 41 +- + drivers/net/ethernet/intel/igc/igc_main.c | 347 +++- + drivers/net/ethernet/intel/igc/igc_phy.c | 6 +- + drivers/net/ethernet/intel/igc/igc_regs.h | 17 + + drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +- + drivers/net/ethernet/marvell/mvneta.c | 22 +- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 9 + + .../net/ethernet/marvell/prestera/prestera_main.c | 3 +- + .../ethernet/marvell/prestera/prestera_switchdev.c | 12 +- + .../ethernet/marvell/prestera/prestera_switchdev.h | 3 +- + .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 32 +- + .../ethernet/microchip/sparx5/sparx5_switchdev.c | 24 +- + drivers/net/ethernet/mscc/ocelot_net.c | 104 +- + drivers/net/ethernet/netronome/nfp/flower/action.c | 35 +- + .../net/ethernet/netronome/nfp/flower/conntrack.c | 616 +++++++- + .../net/ethernet/netronome/nfp/flower/conntrack.h | 26 + + drivers/net/ethernet/netronome/nfp/flower/main.h | 79 +- + drivers/net/ethernet/netronome/nfp/flower/match.c | 333 ++-- + .../net/ethernet/netronome/nfp/flower/metadata.c | 7 +- + .../net/ethernet/netronome/nfp/flower/offload.c | 51 +- + .../net/ethernet/netronome/nfp/nfp_net_common.c | 2 +- + .../net/ethernet/pensando/ionic/ionic_bus_pci.c | 3 - + .../net/ethernet/pensando/ionic/ionic_devlink.c | 14 +- + drivers/net/ethernet/rocker/rocker.h | 3 +- + drivers/net/ethernet/rocker/rocker_main.c | 9 +- + drivers/net/ethernet/rocker/rocker_ofdpa.c | 19 +- + drivers/net/ethernet/ti/am65-cpsw-nuss.c | 18 +- + drivers/net/ethernet/ti/cpsw_new.c | 16 +- + drivers/net/ipa/ipa_data-v4.11.c | 13 +- + drivers/net/mhi/net.c | 1 + + drivers/net/mhi/proto_mbim.c | 4 +- + drivers/net/netdevsim/bus.c | 17 +- + drivers/net/netdevsim/netdev.c | 6 +- + drivers/net/netdevsim/netdevsim.h | 1 + + drivers/net/phy/Kconfig | 6 + + drivers/net/phy/Makefile | 1 + + drivers/net/phy/at803x.c | 18 +- + drivers/net/phy/dp83822.c | 8 +- + drivers/net/phy/intel-xway.c | 76 + + drivers/net/phy/marvell10g.c | 89 ++ + drivers/net/phy/mxl-gpy.c | 728 +++++++++ + drivers/net/phy/phy_device.c | 14 + + drivers/net/phy/phylink.c | 21 +- + drivers/net/usb/asix_devices.c | 6 +- + drivers/net/usb/hso.c | 6 +- + drivers/net/veth.c | 305 +++- + drivers/net/vmxnet3/Makefile | 2 +- + drivers/net/vmxnet3/upt1_defs.h | 2 +- + drivers/net/vmxnet3/vmxnet3_defs.h | 50 +- + drivers/net/vmxnet3/vmxnet3_drv.c | 221 ++- + drivers/net/vmxnet3/vmxnet3_ethtool.c | 20 + + drivers/net/vmxnet3/vmxnet3_int.h | 22 +- + drivers/net/wwan/iosm/iosm_ipc_pcie.c | 19 +- + drivers/s390/cio/ccwgroup.c | 22 - + drivers/s390/net/Kconfig | 9 - + drivers/s390/net/qeth_core.h | 46 - + drivers/s390/net/qeth_core_main.c | 154 +- + drivers/s390/net/qeth_core_mpc.c | 3 - + drivers/s390/net/qeth_core_mpc.h | 23 +- + drivers/s390/net/qeth_core_sys.c | 5 - + drivers/s390/net/qeth_ethtool.c | 7 - + drivers/s390/net/qeth_l2_main.c | 172 +- + drivers/s390/net/qeth_l3_main.c | 7 +- + include/linux/bitops.h | 50 + + include/linux/bpf.h | 100 +- + include/linux/bpf_verifier.h | 19 +- + include/linux/btf.h | 1 + + include/linux/dsa/8021q.h | 34 +- + include/linux/dsa/sja1105.h | 1 - + include/linux/filter.h | 3 +- + include/linux/if_bridge.h | 57 +- + include/linux/ioam6.h | 13 + + include/linux/ioam6_genl.h | 13 + + include/linux/ioam6_iptunnel.h | 13 + + include/linux/ipv6.h | 3 + + include/linux/mhi.h | 2 + + include/linux/netdevice.h | 2 + + include/linux/phy.h | 1 + + include/linux/skbuff.h | 10 +- + include/linux/typecheck.h | 9 + + include/net/af_unix.h | 12 + + include/net/dsa.h | 10 + + include/net/ioam6.h | 67 + + include/net/ip.h | 22 +- + include/net/ip6_route.h | 5 +- + include/net/netns/ipv4.h | 1 - + include/net/netns/ipv6.h | 3 + + include/net/switchdev.h | 62 + + include/net/xdp.h | 5 + + include/uapi/linux/bpf.h | 85 +- + include/uapi/linux/if_bridge.h | 18 + + include/uapi/linux/in6.h | 1 + + include/uapi/linux/ioam6.h | 133 ++ + include/uapi/linux/ioam6_genl.h | 52 + + include/uapi/linux/ioam6_iptunnel.h | 20 + + include/uapi/linux/ipv6.h | 3 + + include/uapi/linux/lwtunnel.h | 1 + + include/uapi/linux/openvswitch.h | 8 + + kernel/bpf/Kconfig | 2 +- + kernel/bpf/arraymap.c | 21 + + kernel/bpf/btf.c | 77 +- + kernel/bpf/cpumap.c | 116 +- + kernel/bpf/devmap.c | 49 +- + kernel/bpf/hashtab.c | 105 +- + kernel/bpf/helpers.c | 340 +++- + kernel/bpf/local_storage.c | 4 +- + kernel/bpf/map_in_map.c | 8 + + kernel/bpf/syscall.c | 53 +- + kernel/bpf/trampoline.c | 12 +- + kernel/bpf/verifier.c | 379 ++++- + kernel/trace/bpf_trace.c | 33 +- + mm/memcontrol.c | 2 +- + net/8021q/vlan.c | 2 +- + net/bpf/test_run.c | 109 +- + net/bridge/br.c | 9 +- + net/bridge/br_device.c | 14 +- + net/bridge/br_fdb.c | 4 +- + net/bridge/br_forward.c | 7 +- + net/bridge/br_if.c | 11 +- + net/bridge/br_input.c | 17 +- + net/bridge/br_mdb.c | 109 +- + net/bridge/br_multicast.c | 1670 +++++++++++++------- + net/bridge/br_multicast_eht.c | 92 +- + net/bridge/br_netlink.c | 41 +- + net/bridge/br_private.h | 418 +++-- + net/bridge/br_private_mcast_eht.h | 3 +- + net/bridge/br_switchdev.c | 227 ++- + net/bridge/br_sysfs_br.c | 38 +- + net/bridge/br_sysfs_if.c | 2 +- + net/bridge/br_vlan.c | 89 +- + net/bridge/br_vlan_options.c | 216 +++ + net/core/Makefile | 2 - + net/core/dev.c | 109 +- + net/core/fib_rules.c | 4 +- + net/core/filter.c | 10 +- + net/core/lwtunnel.c | 2 + + net/core/rtnetlink.c | 9 +- + net/core/scm.c | 4 +- + net/core/selftests.c | 12 + + net/core/sock_map.c | 22 +- + net/dccp/proto.c | 2 +- + net/dsa/Kconfig | 12 - + net/dsa/Makefile | 3 +- + net/dsa/dsa_priv.h | 47 +- + net/dsa/port.c | 118 +- + net/dsa/slave.c | 209 ++- + net/dsa/switch.c | 30 +- + net/dsa/tag_8021q.c | 569 ++++--- + net/dsa/tag_ocelot_8021q.c | 4 +- + net/dsa/tag_sja1105.c | 28 +- + net/ipv4/devinet.c | 12 +- + net/ipv4/fib_trie.c | 4 +- + net/ipv4/igmp.c | 2 + + net/ipv4/route.c | 21 +- + net/ipv4/tcp.c | 4 +- + net/ipv4/tcp_fastopen.c | 17 +- + net/ipv4/tcp_input.c | 40 +- + net/ipv4/tcp_ipv4.c | 1 - + net/ipv4/udp_bpf.c | 1 - + net/ipv6/Kconfig | 11 + + net/ipv6/Makefile | 3 +- + net/ipv6/addrconf.c | 44 +- + net/ipv6/af_inet6.c | 10 + + net/ipv6/exthdrs.c | 61 + + net/ipv6/ioam6.c | 910 +++++++++++ + net/ipv6/ioam6_iptunnel.c | 274 ++++ + net/ipv6/ip6_fib.c | 4 +- + net/ipv6/ip6_output.c | 2 +- + net/ipv6/route.c | 22 +- + net/ipv6/sit.c | 5 +- + net/ipv6/sysctl_net_ipv6.c | 19 + + net/netfilter/nf_flow_table_core.c | 2 +- + net/netlink/af_netlink.c | 4 +- + net/openvswitch/actions.c | 6 +- + net/openvswitch/datapath.c | 72 +- + net/openvswitch/datapath.h | 20 + + net/sched/act_api.c | 12 +- + net/sched/cls_api.c | 15 +- + net/sched/sch_api.c | 10 +- + net/switchdev/switchdev.c | 260 +++ + net/tipc/socket.c | 36 +- + net/unix/Makefile | 1 + + net/unix/af_unix.c | 85 +- + net/unix/unix_bpf.c | 122 ++ + samples/bpf/xdp_redirect_cpu_user.c | 22 +- + scripts/bpf_doc.py | 2 + + tools/include/uapi/linux/bpf.h | 85 +- + tools/lib/bpf/libbpf.c | 104 +- + .../selftests/bpf/prog_tests/get_func_ip_test.c | 53 + + .../selftests/bpf/prog_tests/sockmap_listen.c | 406 ++++- + tools/testing/selftests/bpf/prog_tests/timer.c | 55 + + tools/testing/selftests/bpf/prog_tests/timer_mim.c | 69 + + .../bpf/prog_tests/xdp_context_test_run.c | 105 ++ + .../selftests/bpf/prog_tests/xdp_cpumap_attach.c | 43 +- + .../selftests/bpf/prog_tests/xdp_devmap_attach.c | 39 +- + .../testing/selftests/bpf/progs/get_func_ip_test.c | 73 + + tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 1 - + .../bpf/progs/test_xdp_context_test_run.c | 20 + + tools/testing/selftests/bpf/progs/timer.c | 297 ++++ + tools/testing/selftests/bpf/progs/timer_mim.c | 88 ++ + .../testing/selftests/bpf/progs/timer_mim_reject.c | 74 + + tools/testing/selftests/net/Makefile | 2 + + tools/testing/selftests/net/config | 1 + + tools/testing/selftests/net/ioam6.sh | 297 ++++ + tools/testing/selftests/net/ioam6_parser.c | 402 +++++ + tools/testing/selftests/net/veth.sh | 183 ++- + 252 files changed, 13786 insertions(+), 4059 deletions(-) + create mode 100644 Documentation/devicetree/bindings/net/fsl,fec.yaml + delete mode 100644 Documentation/devicetree/bindings/net/fsl-fec.txt + create mode 100644 Documentation/networking/ioam6-sysctl.rst + create mode 100644 drivers/net/phy/mxl-gpy.c + create mode 100644 include/linux/ioam6.h + create mode 100644 include/linux/ioam6_genl.h + create mode 100644 include/linux/ioam6_iptunnel.h + create mode 100644 include/net/ioam6.h + create mode 100644 include/uapi/linux/ioam6.h + create mode 100644 include/uapi/linux/ioam6_genl.h + create mode 100644 include/uapi/linux/ioam6_iptunnel.h + create mode 100644 net/ipv6/ioam6.c + create mode 100644 net/ipv6/ioam6_iptunnel.c + create mode 100644 net/unix/unix_bpf.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/timer.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/timer_mim.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c + create mode 100644 tools/testing/selftests/bpf/progs/get_func_ip_test.c + create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c + create mode 100644 tools/testing/selftests/bpf/progs/timer.c + create mode 100644 tools/testing/selftests/bpf/progs/timer_mim.c + create mode 100644 tools/testing/selftests/bpf/progs/timer_mim_reject.c + create mode 100644 tools/testing/selftests/net/ioam6.sh + create mode 100644 tools/testing/selftests/net/ioam6_parser.c +Merging bpf-next/for-next (9907442fcddb selftests/bpf: Mute expected invalid map creation error msg) +$ git merge bpf-next/for-next +Auto-merging net/bpf/test_run.c +Merge made by the 'recursive' strategy. + Documentation/networking/filter.rst | 23 +- + include/linux/bpf-cgroup.h | 54 -- + include/linux/bpf.h | 54 +- + include/linux/sched.h | 3 + + kernel/bpf/helpers.c | 16 +- + kernel/bpf/local_storage.c | 5 +- + kernel/fork.c | 1 + + kernel/trace/bpf_trace.c | 2 +- + net/bpf/test_run.c | 23 +- + tools/lib/bpf/btf.h | 19 + + tools/lib/bpf/btf_dump.c | 871 ++++++++++++++++++++- + tools/lib/bpf/libbpf.c | 44 +- + tools/lib/bpf/libbpf.h | 9 +- + tools/lib/bpf/libbpf.map | 1 + + tools/testing/selftests/bpf/prog_tests/btf_dump.c | 615 +++++++++++++++ + .../selftests/bpf/prog_tests/core_autosize.c | 22 +- + .../testing/selftests/bpf/prog_tests/core_reloc.c | 25 +- + .../selftests/bpf/progs/test_map_in_map_invalid.c | 26 + + tools/testing/selftests/bpf/test_maps.c | 72 +- + tools/testing/selftests/bpf/test_progs.h | 12 + + tools/testing/selftests/bpf/test_tc_tunnel.sh | 2 +- + 21 files changed, 1742 insertions(+), 157 deletions(-) + create mode 100644 tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c +Merging ipsec-next/master (2d151d39073a xfrm: Add possibility to set the default to block if we have no policy) +$ git merge ipsec-next/master +Auto-merging net/xfrm/xfrm_user.c +Auto-merging net/xfrm/xfrm_policy.c +Auto-merging include/net/netns/xfrm.h +Merge made by the 'recursive' strategy. + include/net/netns/xfrm.h | 7 +++++++ + include/net/xfrm.h | 36 ++++++++++++++++++++++++++------ + include/uapi/linux/xfrm.h | 10 +++++++++ + net/ipv4/esp4.c | 4 +--- + net/xfrm/xfrm_policy.c | 16 +++++++++++++++ + net/xfrm/xfrm_user.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++ + 6 files changed, 116 insertions(+), 9 deletions(-) +Merging mlx5-next/mlx5-next (96cd2dd65bb0 net/mlx5: Add DCS caps & fields support) +$ git merge mlx5-next/mlx5-next +Merge made by the 'recursive' strategy. + include/linux/mlx5/mlx5_ifc.h | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) +Merging netfilter-next/master (84fe73996c2e Merge branch 'do_once_lite') +$ git merge netfilter-next/master +Already up to date. +Merging ipvs-next/master (84fe73996c2e Merge branch 'do_once_lite') +$ git merge ipvs-next/master +Already up to date. +Merging wireless-drivers-next/master (0d6835ffe50c net: phy: Fix data type in DP83822 dp8382x_disable_wol()) +$ git merge wireless-drivers-next/master +Already up to date. +Merging bluetooth/master (64832df2ac05 Bluetooth: btusb: Add support for Foxconn Mediatek Chip) +$ git merge bluetooth/master +Merge made by the 'recursive' strategy. + drivers/bluetooth/btrsi.c | 2 +- + drivers/bluetooth/btrtl.c | 10 +++++++--- + drivers/bluetooth/btusb.c | 29 +++++++++++++++++++++-------- + drivers/bluetooth/hci_h5.c | 7 +++++++ + drivers/bluetooth/hci_serdev.c | 3 +++ + drivers/bluetooth/hci_uart.h | 7 ++++--- + net/6lowpan/debugfs.c | 3 ++- + net/bluetooth/sco.c | 5 +++++ + 8 files changed, 50 insertions(+), 16 deletions(-) +Merging mac80211-next/master (0d6835ffe50c net: phy: Fix data type in DP83822 dp8382x_disable_wol()) +$ git merge mac80211-next/master +Already up to date. +Merging mtd/mtd/next (ff44b90b325d dt_bindings: mtd: partitions: redboot: convert to YAML) +$ git merge mtd/mtd/next +Removing Documentation/devicetree/bindings/mtd/partitions/redboot-fis.txt +Merge made by the 'recursive' strategy. + .../bindings/mtd/partitions/redboot-fis.txt | 27 -------------- + .../bindings/mtd/partitions/redboot-fis.yaml | 42 ++++++++++++++++++++++ + 2 files changed, 42 insertions(+), 27 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/mtd/partitions/redboot-fis.txt + create mode 100644 Documentation/devicetree/bindings/mtd/partitions/redboot-fis.yaml +Merging nand/nand/next (c5b9ee9c361f mtd: rawnand: Fix a couple of spelling mistakes in Kconfig) +$ git merge nand/nand/next +Merge made by the 'recursive' strategy. + .../devicetree/bindings/mtd/gpmc-nand.txt | 2 +- + drivers/mtd/nand/raw/Kconfig | 4 +-- + drivers/mtd/nand/raw/nand_bbt.c | 33 ++++++++++++++++++++++ + drivers/mtd/nand/spi/core.c | 2 +- + 4 files changed, 37 insertions(+), 4 deletions(-) +Merging spi-nor/spi-nor/next (2734d6c1b1a0 Linux 5.14-rc2) +$ git merge spi-nor/spi-nor/next +Already up to date. +Merging crypto/master (f03a3cab26c1 crypto: lib - rename 'mod_init' & 'mod_exit' functions to be module-specific) +$ git merge crypto/master +Merge made by the 'recursive' strategy. + arch/arm/crypto/curve25519-glue.c | 8 ++-- + crypto/Kconfig | 2 +- + drivers/char/hw_random/amd-rng.c | 8 ++-- + drivers/char/hw_random/geode-rng.c | 8 ++-- + drivers/char/hw_random/intel-rng.c | 8 ++-- + drivers/char/hw_random/via-rng.c | 8 ++-- + drivers/crypto/mxs-dcp.c | 81 +++++++++++++++++++++----------------- + lib/crypto/blake2s.c | 8 ++-- + lib/crypto/chacha20poly1305.c | 8 ++-- + lib/crypto/curve25519.c | 8 ++-- + 10 files changed, 77 insertions(+), 70 deletions(-) +Merging drm/drm-next (588b3eee5288 Merge tag 'drm-misc-next-2021-07-16' of git://anongit.freedesktop.org/drm/drm-misc into drm-next) +$ git merge drm/drm-next +Removing drivers/gpu/drm/vmwgfx/device_include/vmware_pack_end.h +Removing drivers/gpu/drm/vmwgfx/device_include/vmware_pack_begin.h +Removing drivers/gpu/drm/vmwgfx/device_include/svga_types.h +Removing drivers/gpu/drm/vmwgfx/device_include/svga3d_caps.h +Removing drivers/gpu/drm/vmwgfx/device_include/includeCheck.h +Auto-merging drivers/gpu/drm/vc4/vc4_hdmi.c +Auto-merging drivers/gpu/drm/ttm/ttm_bo.c +Auto-merging drivers/gpu/drm/i915/i915_drv.h +Auto-merging drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +Removing drivers/gpu/drm/bochs/bochs_mm.c +Removing drivers/gpu/drm/bochs/bochs_kms.c +Removing drivers/gpu/drm/bochs/bochs_hw.c +Removing drivers/gpu/drm/bochs/bochs_drv.c +Removing drivers/gpu/drm/bochs/bochs.h +Removing drivers/gpu/drm/bochs/Makefile +Removing drivers/gpu/drm/bochs/Kconfig +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +Auto-merging arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + .../ABI/testing/sysfs-kernel-dmabuf-buffers | 52 + + .../bindings/display/bridge/ti,sn65dsi86.yaml | 21 +- + .../devicetree/bindings/display/dp-aux-bus.yaml | 37 + + .../bindings/display/panel/innolux,ej030na.yaml | 62 + + .../bindings/display/panel/panel-simple.yaml | 10 + + .../bindings/display/panel/samsung,lms397kf04.yaml | 18 +- + Documentation/driver-api/dma-buf.rst | 13 + + Documentation/gpu/drm-uapi.rst | 13 + + Documentation/gpu/vkms.rst | 38 +- + MAINTAINERS | 12 +- + arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 30 +- + drivers/dma-buf/Kconfig | 11 + + drivers/dma-buf/Makefile | 1 + + drivers/dma-buf/dma-buf-sysfs-stats.c | 337 ++ + drivers/dma-buf/dma-buf-sysfs-stats.h | 62 + + drivers/dma-buf/dma-buf.c | 40 + + drivers/dma-buf/dma-fence-chain.c | 2 +- + drivers/dma-buf/dma-resv.c | 33 +- + drivers/dma-buf/st-dma-fence-chain.c | 16 +- + drivers/dma-buf/udmabuf.c | 59 +- + drivers/gpu/drm/Kconfig | 7 +- + drivers/gpu/drm/Makefile | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 66 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 - + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 26 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 - + drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- + drivers/gpu/drm/arm/display/komeda/komeda_kms.c | 5 - + drivers/gpu/drm/arm/malidp_drv.c | 4 - + drivers/gpu/drm/armada/armada_drv.c | 4 +- + drivers/gpu/drm/armada/armada_overlay.c | 2 - + drivers/gpu/drm/armada/armada_plane.c | 29 - + drivers/gpu/drm/armada/armada_plane.h | 2 - + drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c | 1 - + drivers/gpu/drm/ast/ast_drv.c | 2 +- + drivers/gpu/drm/ast/ast_mode.c | 21 +- + drivers/gpu/drm/bochs/Kconfig | 11 - + drivers/gpu/drm/bochs/Makefile | 4 - + drivers/gpu/drm/bochs/bochs.h | 98 - + drivers/gpu/drm/bochs/bochs_drv.c | 205 -- + drivers/gpu/drm/bochs/bochs_hw.c | 323 -- + drivers/gpu/drm/bochs/bochs_kms.c | 178 - + drivers/gpu/drm/bochs/bochs_mm.c | 24 - + drivers/gpu/drm/bridge/Kconfig | 1 + + drivers/gpu/drm/bridge/analogix/anx7625.c | 5 +- + drivers/gpu/drm/bridge/nwl-dsi.c | 6 - + drivers/gpu/drm/bridge/ti-sn65dsi83.c | 200 +- + drivers/gpu/drm/bridge/ti-sn65dsi86.c | 744 ++-- + drivers/gpu/drm/drm_aperture.c | 19 +- + drivers/gpu/drm/drm_atomic_helper.c | 10 + + drivers/gpu/drm/drm_debugfs_crc.c | 8 +- + drivers/gpu/drm/drm_dp_aux_bus.c | 326 ++ + drivers/gpu/drm/drm_dp_helper.c | 462 +++ + drivers/gpu/drm/drm_gem.c | 12 +- + drivers/gpu/drm/drm_gem_atomic_helper.c | 58 +- + drivers/gpu/drm/drm_gem_shmem_helper.c | 4 +- + drivers/gpu/drm/drm_gem_vram_helper.c | 9 +- + drivers/gpu/drm/drm_irq.c | 13 +- + drivers/gpu/drm/drm_mipi_dbi.c | 55 +- + drivers/gpu/drm/drm_of.c | 6 +- + drivers/gpu/drm/drm_simple_kms_helper.c | 12 +- + drivers/gpu/drm/drm_syncobj.c | 6 +- + drivers/gpu/drm/drm_vblank.c | 15 +- + drivers/gpu/drm/etnaviv/etnaviv_sched.c | 3 +- + drivers/gpu/drm/exynos/exynos_drm_drv.c | 10 - + drivers/gpu/drm/gma500/oaktrail_lvds.c | 6 +- + drivers/gpu/drm/gud/gud_drv.c | 61 +- + drivers/gpu/drm/gud/gud_internal.h | 2 + + drivers/gpu/drm/gud/gud_pipe.c | 62 +- + drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c | 3 +- + drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 16 +- + drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c | 2 - + drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 4 +- + drivers/gpu/drm/i915/Makefile | 1 + + drivers/gpu/drm/i915/display/icl_dsi.c | 46 +- + drivers/gpu/drm/i915/display/intel_crt.c | 3 + + drivers/gpu/drm/i915/display/intel_crtc.c | 8 +- + drivers/gpu/drm/i915/display/intel_ddi.c | 210 +- + drivers/gpu/drm/i915/display/intel_ddi.h | 4 +- + drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 2461 +++++++------ + drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h | 68 +- + drivers/gpu/drm/i915/display/intel_display.c | 170 +- + .../gpu/drm/i915/display/intel_display_debugfs.c | 35 +- + drivers/gpu/drm/i915/display/intel_display_power.c | 5 +- + drivers/gpu/drm/i915/display/intel_display_types.h | 12 +- + drivers/gpu/drm/i915/display/intel_dmc.c | 165 +- + drivers/gpu/drm/i915/display/intel_dmc.h | 23 +- + drivers/gpu/drm/i915/display/intel_dp.c | 10 +- + .../gpu/drm/i915/display/intel_dp_aux_backlight.c | 329 +- + drivers/gpu/drm/i915/display/intel_dp_mst.c | 15 +- + drivers/gpu/drm/i915/display/intel_fbc.c | 215 +- + drivers/gpu/drm/i915/display/intel_fbdev.c | 96 +- + drivers/gpu/drm/i915/display/intel_fdi.c | 25 +- + drivers/gpu/drm/i915/display/intel_hdmi.c | 42 +- + drivers/gpu/drm/i915/display/intel_lvds.c | 4 +- + drivers/gpu/drm/i915/display/intel_psr.c | 203 +- + drivers/gpu/drm/i915/display/intel_quirks.c | 34 + + drivers/gpu/drm/i915/display/intel_sdvo.c | 4 +- + drivers/gpu/drm/i915/display/intel_tc.c | 34 +- + drivers/gpu/drm/i915/display/intel_tc.h | 2 + + drivers/gpu/drm/i915/display/intel_tv.c | 13 +- + drivers/gpu/drm/i915/display/intel_vga.c | 3 + + drivers/gpu/drm/i915/display/skl_scaler.c | 27 +- + drivers/gpu/drm/i915/display/vlv_dsi.c | 13 +- + drivers/gpu/drm/i915/gem/i915_gem_create.c | 9 +- + drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 6 +- + drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 126 +- + drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 5 - + drivers/gpu/drm/i915/gem/i915_gem_mman.c | 83 +- + drivers/gpu/drm/i915/gem/i915_gem_object.c | 143 +- + drivers/gpu/drm/i915/gem/i915_gem_object.h | 19 +- + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 30 +- + drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +- + drivers/gpu/drm/i915/gem/i915_gem_region.c | 6 +- + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 647 ++++ + drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 48 + + drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 90 +- + drivers/gpu/drm/i915/gt/intel_region_lmem.c | 3 +- + drivers/gpu/drm/i915/i915_debugfs.c | 2 +- + drivers/gpu/drm/i915/i915_drv.c | 2 +- + drivers/gpu/drm/i915/i915_drv.h | 29 +- + drivers/gpu/drm/i915/i915_gem.c | 5 +- + drivers/gpu/drm/i915/i915_irq.c | 30 +- + drivers/gpu/drm/i915/i915_pci.c | 52 +- + drivers/gpu/drm/i915/i915_reg.h | 18 +- + drivers/gpu/drm/i915/intel_dram.c | 3 +- + drivers/gpu/drm/i915/intel_memory_region.c | 1 - + drivers/gpu/drm/i915/intel_memory_region.h | 1 - + drivers/gpu/drm/i915/intel_pm.c | 119 +- + drivers/gpu/drm/i915/intel_region_ttm.c | 8 +- + drivers/gpu/drm/i915/intel_region_ttm.h | 11 +- + drivers/gpu/drm/i915/intel_uncore.c | 2 +- + drivers/gpu/drm/i915/selftests/igt_mmap.c | 25 +- + drivers/gpu/drm/i915/selftests/igt_mmap.h | 12 +- + drivers/gpu/drm/imx/dcss/dcss-kms.c | 3 - + drivers/gpu/drm/imx/dcss/dcss-plane.c | 1 - + drivers/gpu/drm/imx/imx-drm-core.c | 11 - + drivers/gpu/drm/imx/ipuv3-plane.c | 1 - + drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 1 - + drivers/gpu/drm/ingenic/ingenic-ipu.c | 1 - + drivers/gpu/drm/lima/lima_sched.c | 3 +- + drivers/gpu/drm/mcde/mcde_display.c | 1 - + drivers/gpu/drm/mediatek/mtk_drm_drv.c | 6 - + drivers/gpu/drm/mediatek/mtk_drm_plane.c | 1 - + drivers/gpu/drm/meson/meson_drv.c | 12 +- + drivers/gpu/drm/meson/meson_overlay.c | 1 - + drivers/gpu/drm/meson/meson_plane.c | 1 - + drivers/gpu/drm/mgag200/mgag200_drv.c | 64 +- + drivers/gpu/drm/mgag200/mgag200_drv.h | 14 - + drivers/gpu/drm/mgag200/mgag200_mode.c | 16 +- + drivers/gpu/drm/msm/msm_fbdev.c | 2 +- + drivers/gpu/drm/msm/msm_gem.c | 16 +- + drivers/gpu/drm/msm/msm_gem_submit.c | 6 +- + drivers/gpu/drm/mxsfb/mxsfb_kms.c | 2 - + drivers/gpu/drm/nouveau/dispnv50/disp.c | 28 + + drivers/gpu/drm/nouveau/nouveau_backlight.c | 166 +- + drivers/gpu/drm/nouveau/nouveau_connector.h | 9 +- + drivers/gpu/drm/nouveau/nouveau_drm.c | 5 +- + drivers/gpu/drm/nouveau/nouveau_encoder.h | 1 + + drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- + drivers/gpu/drm/omapdrm/omap_drv.h | 2 + + drivers/gpu/drm/omapdrm/omap_irq.c | 13 +- + drivers/gpu/drm/omapdrm/omap_plane.c | 3 + + drivers/gpu/drm/panel/Kconfig | 21 + + drivers/gpu/drm/panel/Makefile | 2 + + drivers/gpu/drm/panel/panel-innolux-ej030na.c | 289 ++ + drivers/gpu/drm/panel/panel-samsung-db7430.c | 347 ++ + drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c | 10 +- + drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c | 83 +- + drivers/gpu/drm/panel/panel-samsung-s6e63m0.c | 41 +- + drivers/gpu/drm/panel/panel-samsung-s6e63m0.h | 33 +- + drivers/gpu/drm/panel/panel-simple.c | 208 +- + drivers/gpu/drm/panfrost/panfrost_device.c | 142 +- + drivers/gpu/drm/panfrost/panfrost_device.h | 99 +- + drivers/gpu/drm/panfrost/panfrost_drv.c | 91 +- + drivers/gpu/drm/panfrost/panfrost_gem.c | 20 +- + drivers/gpu/drm/panfrost/panfrost_gpu.c | 2 +- + drivers/gpu/drm/panfrost/panfrost_job.c | 695 ++-- + drivers/gpu/drm/panfrost/panfrost_job.h | 8 +- + drivers/gpu/drm/panfrost/panfrost_mmu.c | 203 +- + drivers/gpu/drm/panfrost/panfrost_mmu.h | 5 +- + drivers/gpu/drm/panfrost/panfrost_regs.h | 3 - + drivers/gpu/drm/pl111/pl111_display.c | 1 - + drivers/gpu/drm/qxl/qxl_drv.c | 4 +- + drivers/gpu/drm/qxl/qxl_drv.h | 3 - + drivers/gpu/drm/qxl/qxl_irq.c | 9 +- + drivers/gpu/drm/qxl/qxl_prime.c | 6 - + drivers/gpu/drm/radeon/radeon_drv.c | 2 +- + drivers/gpu/drm/radeon/radeon_fence.c | 2 +- + drivers/gpu/drm/radeon/radeon_irq_kms.c | 16 +- + drivers/gpu/drm/rcar-du/rcar_du_drv.c | 2 - + drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 8 +- + drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 1 - + drivers/gpu/drm/scheduler/sched_main.c | 21 +- + drivers/gpu/drm/sti/sti_compositor.c | 2 - + drivers/gpu/drm/stm/ltdc.c | 4 - + drivers/gpu/drm/sun4i/sun4i_drv.c | 4 +- + drivers/gpu/drm/sun4i/sun4i_layer.c | 1 - + drivers/gpu/drm/sun4i/sun8i_ui_layer.c | 1 - + drivers/gpu/drm/sun4i/sun8i_vi_layer.c | 1 - + drivers/gpu/drm/tegra/drm.c | 9 +- + drivers/gpu/drm/tidss/tidss_irq.c | 3 - + drivers/gpu/drm/tidss/tidss_plane.c | 1 - + drivers/gpu/drm/tiny/Kconfig | 13 + + drivers/gpu/drm/tiny/Makefile | 1 + + drivers/gpu/drm/tiny/bochs.c | 733 ++++ + drivers/gpu/drm/tiny/cirrus.c | 2 +- + drivers/gpu/drm/tiny/hx8357d.c | 1 - + drivers/gpu/drm/tiny/ili9225.c | 1 - + drivers/gpu/drm/tiny/ili9341.c | 1 - + drivers/gpu/drm/tiny/ili9486.c | 1 - + drivers/gpu/drm/tiny/mi0283qt.c | 1 - + drivers/gpu/drm/tiny/repaper.c | 1 - + drivers/gpu/drm/tiny/st7586.c | 1 - + drivers/gpu/drm/tiny/st7735r.c | 1 - + drivers/gpu/drm/ttm/ttm_bo.c | 66 +- + drivers/gpu/drm/tve200/tve200_display.c | 1 - + drivers/gpu/drm/v3d/v3d_sched.c | 10 +- + drivers/gpu/drm/vboxvideo/vbox_drv.c | 3 +- + drivers/gpu/drm/vboxvideo/vbox_drv.h | 1 - + drivers/gpu/drm/vboxvideo/vbox_irq.c | 16 +- + drivers/gpu/drm/vboxvideo/vbox_mode.c | 3 +- + drivers/gpu/drm/vc4/Kconfig | 1 + + drivers/gpu/drm/vc4/vc4_drv.c | 2 +- + drivers/gpu/drm/vc4/vc4_hdmi.c | 424 +-- + drivers/gpu/drm/vc4/vc4_hdmi.h | 3 +- + drivers/gpu/drm/vc4/vc4_kms.c | 1 - + drivers/gpu/drm/vc4/vc4_regs.h | 30 + + drivers/gpu/drm/virtio/virtgpu_drv.c | 2 +- + drivers/gpu/drm/virtio/virtgpu_drv.h | 4 +- + drivers/gpu/drm/virtio/virtgpu_plane.c | 64 +- + drivers/gpu/drm/virtio/virtgpu_vq.c | 7 +- + drivers/gpu/drm/vkms/vkms_composer.c | 26 +- + drivers/gpu/drm/vkms/vkms_drv.c | 32 +- + drivers/gpu/drm/vkms/vkms_drv.h | 6 +- + drivers/gpu/drm/vkms/vkms_plane.c | 57 +- + drivers/gpu/drm/vmwgfx/Kconfig | 8 + + drivers/gpu/drm/vmwgfx/Makefile | 2 +- + .../gpu/drm/vmwgfx/device_include/includeCheck.h | 3 - + .../gpu/drm/vmwgfx/device_include/svga3d_caps.h | 111 - + drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h | 3680 ++++++++------------ + .../gpu/drm/vmwgfx/device_include/svga3d_devcaps.h | 793 ++--- + drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h | 3503 +++++++++---------- + .../gpu/drm/vmwgfx/device_include/svga3d_limits.h | 101 +- + drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h | 15 +- + .../drm/vmwgfx/device_include/svga3d_surfacedefs.h | 3132 ++++++++--------- + .../gpu/drm/vmwgfx/device_include/svga3d_types.h | 3020 +++++++--------- + .../gpu/drm/vmwgfx/device_include/svga_escape.h | 68 +- + .../gpu/drm/vmwgfx/device_include/svga_overlay.h | 203 +- + drivers/gpu/drm/vmwgfx/device_include/svga_reg.h | 2847 ++++----------- + drivers/gpu/drm/vmwgfx/device_include/svga_types.h | 51 - + .../gpu/drm/vmwgfx/device_include/vm_basic_types.h | 131 +- + .../drm/vmwgfx/device_include/vmware_pack_begin.h | 2 - + .../drm/vmwgfx/device_include/vmware_pack_end.h | 2 - + drivers/gpu/drm/vmwgfx/ttm_memory.c | 2 + + drivers/gpu/drm/vmwgfx/vmw_surface_cache.h | 539 +++ + drivers/gpu/drm/vmwgfx/vmwgfx_binding.c | 24 +- + drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 6 +- + drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 2 +- + drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 4 +- + drivers/gpu/drm/vmwgfx/vmwgfx_context.c | 4 +- + drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c | 142 + + drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.h | 50 + + drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 68 +- + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 42 +- + drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 17 +- + drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 111 +- + drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 24 +- + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 6 +- + drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h | 144 + + drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 40 +- + drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 579 ++- + drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 8 +- + drivers/gpu/drm/vmwgfx/vmwgfx_so.c | 3 +- + drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 2 +- + drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 80 +- + drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 35 - + drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 4 +- + drivers/gpu/drm/xen/xen_drm_front_kms.c | 1 - + drivers/gpu/drm/xlnx/zynqmp_dpsub.c | 2 - + drivers/gpu/drm/zte/zx_drm_drv.c | 6 - + drivers/video/fbdev/omap2/omapfb/dss/apply.c | 4 +- + include/drm/drm_aperture.h | 14 +- + include/drm/drm_bridge.h | 26 +- + include/drm/drm_connector.h | 5 + + include/drm/drm_dp_aux_bus.h | 57 + + include/drm/drm_dp_helper.h | 175 +- + include/drm/drm_gem_atomic_helper.h | 6 + + include/drm/drm_gem_vram_helper.h | 16 +- + include/drm/drm_mipi_dbi.h | 7 +- + include/drm/drm_mipi_dsi.h | 5 + + include/drm/drm_modeset_helper_vtables.h | 7 +- + include/drm/drm_panel.h | 8 +- + include/drm/drm_simple_kms_helper.h | 7 +- + include/drm/drm_vma_manager.h | 2 +- + include/drm/gpu_scheduler.h | 37 +- + include/drm/ttm/ttm_placement.h | 7 +- + include/linux/dma-buf.h | 177 +- + include/linux/dma-fence-chain.h | 52 +- + include/uapi/drm/drm.h | 4 +- + include/uapi/drm/drm_mode.h | 32 + + include/uapi/drm/vmwgfx_drm.h | 41 + + include/uapi/linux/dma-buf.h | 50 +- + 311 files changed, 18729 insertions(+), 15990 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers + create mode 100644 Documentation/devicetree/bindings/display/dp-aux-bus.yaml + create mode 100644 Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml + create mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.c + create mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.h + delete mode 100644 drivers/gpu/drm/bochs/Kconfig + delete mode 100644 drivers/gpu/drm/bochs/Makefile + delete mode 100644 drivers/gpu/drm/bochs/bochs.h + delete mode 100644 drivers/gpu/drm/bochs/bochs_drv.c + delete mode 100644 drivers/gpu/drm/bochs/bochs_hw.c + delete mode 100644 drivers/gpu/drm/bochs/bochs_kms.c + delete mode 100644 drivers/gpu/drm/bochs/bochs_mm.c + create mode 100644 drivers/gpu/drm/drm_dp_aux_bus.c + create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm.c + create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm.h + create mode 100644 drivers/gpu/drm/panel/panel-innolux-ej030na.c + create mode 100644 drivers/gpu/drm/panel/panel-samsung-db7430.c + create mode 100644 drivers/gpu/drm/tiny/bochs.c + delete mode 100644 drivers/gpu/drm/vmwgfx/device_include/includeCheck.h + delete mode 100644 drivers/gpu/drm/vmwgfx/device_include/svga3d_caps.h + delete mode 100644 drivers/gpu/drm/vmwgfx/device_include/svga_types.h + delete mode 100644 drivers/gpu/drm/vmwgfx/device_include/vmware_pack_begin.h + delete mode 100644 drivers/gpu/drm/vmwgfx/device_include/vmware_pack_end.h + create mode 100644 drivers/gpu/drm/vmwgfx/vmw_surface_cache.h + create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c + create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.h + create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h + create mode 100644 include/drm/drm_dp_aux_bus.h +Merging drm-misc/for-linux-next (85fd4a8a8431 drm/ttm: Fix COW check) +$ git merge drm-misc/for-linux-next +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +Merge made by the 'recursive' strategy. + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 ++- + drivers/gpu/drm/nouveau/nouveau_gem.c | 3 ++- + drivers/gpu/drm/radeon/radeon_gem.c | 3 ++- + drivers/gpu/drm/ttm/ttm_bo_vm.c | 14 +++++++++++++- + drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c | 1 + + include/drm/ttm/ttm_bo_api.h | 4 ++++ + 6 files changed, 24 insertions(+), 4 deletions(-) +Merging amdgpu/drm-next (d91a713ed367 drm/amdgpu/display: add support for multiple backlights) +$ git merge amdgpu/drm-next +Auto-merging drivers/usb/host/xhci-pci.c +Auto-merging drivers/pci/quirks.c +Auto-merging drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +Auto-merging drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +Auto-merging drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +CONFLICT (content): Merge conflict in drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +Auto-merging drivers/gpu/drm/amd/pm/inc/smu_types.h +Auto-merging drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +Auto-merging drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +Auto-merging drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +Auto-merging drivers/gpu/drm/amd/display/dc/dc.h +Auto-merging drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +Auto-merging drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +Auto-merging drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +Auto-merging drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +Auto-merging drivers/gpu/drm/amd/amdkfd/kfd_svm.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/amd/amdkfd/kfd_svm.c +Auto-merging drivers/gpu/drm/amd/amdgpu/soc15.c +Auto-merging drivers/gpu/drm/amd/amdgpu/nv.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/amd/amdgpu/nv.c +Auto-merging drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +CONFLICT (rename/delete): drivers/gpu/drm/amd/pm/inc/smu_v13_0_1.h deleted in HEAD and renamed to drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h in amdgpu/drm-next. Version amdgpu/drm-next of drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h left in tree. +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu.h +Auto-merging drivers/gpu/drm/Kconfig +Resolved 'drivers/gpu/drm/amd/amdgpu/nv.c' using previous resolution. +Resolved 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c' using previous resolution. +Resolved 'drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c' using previous resolution. +Resolved 'drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c' using previous resolution. +Resolved 'drivers/gpu/drm/amd/pm/inc/smu_v11_0.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master c7f09be79268] Merge remote-tracking branch 'amdgpu/drm-next' +$ git diff -M --stat --summary HEAD^.. + drivers/gpu/drm/Kconfig | 1 + + drivers/gpu/drm/amd/amdgpu/Makefile | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 54 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 83 ++ + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 127 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 12 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c | 239 ++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h | 37 + + drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c | 32 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 24 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 243 ++-- + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 39 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 116 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 1258 +++++++++++++------- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 70 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h | 4 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 90 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 85 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 + + drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 147 +-- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 10 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 12 + + drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 2 + + .../gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c | 51 + + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 162 ++- + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +- + drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 + + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 13 + + drivers/gpu/drm/amd/amdgpu/nv.c | 27 +- + drivers/gpu/drm/amd/amdgpu/nv.h | 1 + + drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 49 +- + drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c | 208 ++++ + drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h | 30 + + drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 11 +- + drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 72 +- + drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 4 +- + drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 50 +- + drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c | 324 ++--- + drivers/gpu/drm/amd/amdgpu/soc15.c | 7 +- + drivers/gpu/drm/amd/amdgpu/soc15_common.h | 8 +- + drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h | 14 +- + drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 15 + + drivers/gpu/drm/amd/amdgpu/umc_v6_7.h | 15 + + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- + drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 +- + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 17 + + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 30 +- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 39 +- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 1 + + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 5 + + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 10 +- + drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 19 +- + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 3 +- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 256 ++-- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 4 +- + .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 18 +- + .../drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c | 12 +- + .../dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c | 4 +- + .../amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 12 +- + .../dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c | 16 +- + .../amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 25 +- + .../drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c | 10 +- + .../drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 2 +- + .../drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c | 10 +- + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 8 + + drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 187 +-- + drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c | 25 + + drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 24 + + drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 3 + + drivers/gpu/drm/amd/display/dc/dc.h | 2 +- + drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 1 + + drivers/gpu/drm/amd/display/dc/dc_stat.h | 1 + + drivers/gpu/drm/amd/display/dc/dc_types.h | 2 +- + .../gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h | 19 +- + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 29 + + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h | 4 + + .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 42 +- + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 6 + + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 2 + + .../gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c | 21 + + .../gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h | 33 +- + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 17 + + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 6 + + .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 10 +- + .../gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c | 1 + + .../gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h | 14 +- + drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 2 +- + .../gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 3 +- + .../gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c | 1 + + .../gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h | 14 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 25 +- + .../gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c | 1 + + .../gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c | 25 +- + .../gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h | 15 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 5 +- + .../amd/display/dc/dml/dcn21/display_mode_vba_21.c | 11 +- + .../amd/display/dc/dml/dcn30/display_mode_vba_30.c | 18 +- + .../amd/display/dc/dml/dcn31/display_mode_vba_31.c | 15 +- + .../drm/amd/display/dc/dml/display_mode_enums.h | 4 +- + .../drm/amd/display/dc/dml/display_mode_structs.h | 2 + + .../gpu/drm/amd/display/dc/dml/display_mode_vba.c | 12 +- + .../gpu/drm/amd/display/dc/dml/display_mode_vba.h | 4 +- + drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h | 1 - + .../drm/amd/display/dc/inc/hw/clk_mgr_internal.h | 5 + + drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 14 + + drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h | 25 + + drivers/gpu/drm/amd/display/dc/irq_types.h | 2 +- + drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 18 + + drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 47 +- + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 15 + + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h | 10 +- + drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 17 + + drivers/gpu/drm/amd/include/amd_shared.h | 1 + + .../amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h | 4 + + .../drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h | 352 ++++++ + .../amd/include/asic_reg/umc/umc_8_7_0_sh_mask.h | 179 +-- + .../gpu/drm/amd/include/cyan_skillfish_ip_offset.h | 714 +++++++++++ + drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 1 + + .../amd/pm/inc/smu11_driver_if_cyan_skillfish.h | 95 ++ + .../amd/pm/inc/smu11_driver_if_sienna_cichlid.h | 65 +- + drivers/gpu/drm/amd/pm/inc/smu_types.h | 2 +- + drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 3 +- + drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h | 2 +- + drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h | 152 +++ + drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h | 70 ++ + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 + + drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile | 1 + + drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 246 ++-- + .../drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 76 ++ + .../drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h | 29 + + drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 118 ++ + .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 457 ++++--- + drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 3 + + drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +- + drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h | 1 - + drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 243 ++-- + drivers/gpu/drm/radeon/radeon.h | 3 + + drivers/gpu/drm/radeon/radeon_audio.c | 95 ++ + drivers/pci/quirks.c | 1 + + drivers/usb/host/xhci-pci.c | 3 + + include/drm/amd_asic_type.h | 17 +- + include/uapi/linux/kfd_ioctl.h | 16 +- + 155 files changed, 5924 insertions(+), 2111 deletions(-) + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h + create mode 100644 drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c + create mode 100644 drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c + create mode 100644 drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h + create mode 100644 drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h + create mode 100644 drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h + create mode 100644 drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h + create mode 100644 drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h + create mode 100644 drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h + create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c + create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h +Merging drm-intel/for-linux-next (e73db72732dc drm/i915/firmware: Update to DMC v2.03 on RKL) +$ git merge drm-intel/for-linux-next +Auto-merging drivers/gpu/drm/i915/i915_irq.c +Auto-merging drivers/gpu/drm/i915/i915_drv.h +Auto-merging drivers/gpu/drm/i915/i915_drv.c +Auto-merging drivers/gpu/drm/i915/gt/intel_region_lmem.c +Auto-merging drivers/gpu/drm/i915/display/intel_display_types.h +Auto-merging drivers/gpu/drm/i915/display/intel_display.c +Merge made by the 'recursive' strategy. + drivers/gpu/drm/i915/display/i9xx_plane.c | 2 +- + drivers/gpu/drm/i915/display/intel_acpi.c | 19 +++ + drivers/gpu/drm/i915/display/intel_acpi.h | 3 + + drivers/gpu/drm/i915/display/intel_atomic_plane.c | 7 +- + drivers/gpu/drm/i915/display/intel_atomic_plane.h | 3 +- + drivers/gpu/drm/i915/display/intel_bw.c | 65 ++++++++- + drivers/gpu/drm/i915/display/intel_cdclk.c | 35 +++-- + drivers/gpu/drm/i915/display/intel_cursor.c | 4 +- + drivers/gpu/drm/i915/display/intel_display.c | 23 +++- + .../gpu/drm/i915/display/intel_display_debugfs.c | 83 +++++++++++- + drivers/gpu/drm/i915/display/intel_display_power.c | 38 ++++-- + drivers/gpu/drm/i915/display/intel_display_power.h | 10 ++ + drivers/gpu/drm/i915/display/intel_display_types.h | 1 + + drivers/gpu/drm/i915/display/intel_dmc.c | 58 ++------ + drivers/gpu/drm/i915/display/intel_dp.c | 17 +++ + drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 7 +- + drivers/gpu/drm/i915/display/intel_fbc.c | 4 +- + drivers/gpu/drm/i915/display/intel_opregion.c | 3 + + drivers/gpu/drm/i915/display/intel_psr.c | 77 +++++++---- + drivers/gpu/drm/i915/display/intel_sprite.c | 2 +- + drivers/gpu/drm/i915/display/skl_universal_plane.c | 12 +- + drivers/gpu/drm/i915/gt/intel_region_lmem.c | 2 +- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 111 ++-------------- + drivers/gpu/drm/i915/i915_drv.c | 9 +- + drivers/gpu/drm/i915/i915_drv.h | 123 ++++++++--------- + drivers/gpu/drm/i915/i915_irq.c | 141 +++++++++++++------- + drivers/gpu/drm/i915/i915_pci.c | 65 ++++++++- + drivers/gpu/drm/i915/i915_reg.h | 42 ++++-- + drivers/gpu/drm/i915/intel_device_info.c | 20 ++- + drivers/gpu/drm/i915/intel_device_info.h | 12 +- + drivers/gpu/drm/i915/intel_dram.c | 6 +- + drivers/gpu/drm/i915/intel_pch.c | 3 + + drivers/gpu/drm/i915/intel_pch.h | 2 + + drivers/gpu/drm/i915/intel_pm.c | 124 ++++++++++++++++- + drivers/gpu/drm/i915/intel_step.c | 148 ++++++++++++++++++--- + drivers/gpu/drm/i915/intel_step.h | 33 +++-- + 36 files changed, 909 insertions(+), 405 deletions(-) +Merging drm-tegra/drm/tegra/for-next (8874e3a7baec drm/tegra: Fix compilation of variadic macro) +$ git merge drm-tegra/drm/tegra/for-next +Auto-merging drivers/gpu/drm/tegra/drm.c +Merge made by the 'recursive' strategy. + drivers/gpu/drm/tegra/Makefile | 4 + + drivers/gpu/drm/tegra/drm.c | 82 +++-- + drivers/gpu/drm/tegra/drm.h | 12 + + drivers/gpu/drm/tegra/firewall.c | 254 ++++++++++++++ + drivers/gpu/drm/tegra/gather_bo.c | 81 +++++ + drivers/gpu/drm/tegra/gather_bo.h | 22 ++ + drivers/gpu/drm/tegra/gem.c | 13 + + drivers/gpu/drm/tegra/gem.h | 2 + + drivers/gpu/drm/tegra/submit.c | 527 +++++++++++++++++++++++++++++ + drivers/gpu/drm/tegra/submit.h | 21 ++ + drivers/gpu/drm/tegra/uapi.c | 387 +++++++++++++++++++++ + drivers/gpu/drm/tegra/uapi.h | 58 ++++ + drivers/gpu/drm/tegra/vic.c | 112 +++--- + drivers/gpu/host1x/Makefile | 1 + + drivers/gpu/host1x/cdma.c | 58 +++- + drivers/gpu/host1x/fence.c | 209 ++++++++++++ + drivers/gpu/host1x/fence.h | 13 + + drivers/gpu/host1x/hw/channel_hw.c | 87 +++-- + drivers/gpu/host1x/hw/debug_hw.c | 9 +- + drivers/gpu/host1x/hw/hw_host1x02_uclass.h | 12 + + drivers/gpu/host1x/hw/hw_host1x04_uclass.h | 12 + + drivers/gpu/host1x/hw/hw_host1x05_uclass.h | 12 + + drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 12 + + drivers/gpu/host1x/hw/hw_host1x07_uclass.h | 12 + + drivers/gpu/host1x/intr.c | 9 + + drivers/gpu/host1x/intr.h | 2 + + drivers/gpu/host1x/job.c | 77 +++-- + drivers/gpu/host1x/job.h | 16 + + drivers/gpu/host1x/syncpt.c | 2 + + drivers/gpu/host1x/syncpt.h | 12 + + include/linux/host1x.h | 22 +- + include/uapi/drm/tegra_drm.h | 425 +++++++++++++++++++++-- + 32 files changed, 2408 insertions(+), 169 deletions(-) + create mode 100644 drivers/gpu/drm/tegra/firewall.c + create mode 100644 drivers/gpu/drm/tegra/gather_bo.c + create mode 100644 drivers/gpu/drm/tegra/gather_bo.h + create mode 100644 drivers/gpu/drm/tegra/submit.c + create mode 100644 drivers/gpu/drm/tegra/submit.h + create mode 100644 drivers/gpu/drm/tegra/uapi.c + create mode 100644 drivers/gpu/drm/tegra/uapi.h + create mode 100644 drivers/gpu/host1x/fence.c + create mode 100644 drivers/gpu/host1x/fence.h +Merging drm-msm/msm-next (e88bbc91849b Revert "drm/msm/mdp5: provide dynamic bandwidth management") +$ git merge drm-msm/msm-next +Already up to date. +Merging imx-drm/imx-drm/next (fc1e985b67f9 drm/imx: ipuv3-plane: add color encoding and range properties) +$ git merge imx-drm/imx-drm/next +Already up to date. +Merging etnaviv/etnaviv/next (81fd23e2b3cc drm/etnaviv: Implement mmap as GEM object function) +$ git merge etnaviv/etnaviv/next +Merge made by the 'recursive' strategy. + drivers/gpu/drm/etnaviv/etnaviv_drv.c | 14 ++----------- + drivers/gpu/drm/etnaviv/etnaviv_drv.h | 3 --- + drivers/gpu/drm/etnaviv/etnaviv_gem.c | 18 +++++------------ + drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 13 ------------ + drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 6 ++++++ + drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 31 +++++++++++++++++++++++++++++ + 6 files changed, 44 insertions(+), 41 deletions(-) +Merging regmap/for-next (d63aa09f7c53 regmap: Prefer unsigned int to bare use of unsigned) +$ git merge regmap/for-next +Merge made by the 'recursive' strategy. + drivers/base/regmap/regmap-debugfs.c | 2 +- + drivers/base/regmap/regmap-mmio.c | 2 +- + drivers/base/regmap/regmap.c | 10 +++++----- + 3 files changed, 7 insertions(+), 7 deletions(-) +Merging sound/for-next (f976e8a94176 ALSA: sc6000: Use explicit cast for __iomem pointer) +$ git merge sound/for-next +Auto-merging sound/pci/hda/patch_hdmi.c +Merge made by the 'recursive' strategy. + .../sound/kernel-api/writing-an-alsa-driver.rst | 33 ++++ + include/sound/compress_driver.h | 2 - + include/sound/core.h | 6 + + include/sound/emu10k1.h | 6 +- + include/sound/emu8000.h | 3 - + include/sound/es1688.h | 1 - + include/sound/memalloc.h | 4 + + sound/core/compress_offload.c | 69 +------ + sound/core/init.c | 99 +++++++++- + sound/core/isadma.c | 38 ++++ + sound/core/memalloc.c | 46 +++++ + sound/drivers/Kconfig | 2 +- + sound/drivers/aloop.c | 26 +-- + sound/drivers/dummy.c | 24 +-- + sound/drivers/mpu401/mpu401.c | 34 +--- + sound/drivers/mtpav.c | 30 +-- + sound/drivers/pcsp/pcsp.c | 49 ++--- + sound/drivers/pcsp/pcsp_input.c | 14 +- + sound/drivers/pcsp/pcsp_input.h | 1 - + sound/drivers/serial-u16550.c | 57 +----- + sound/drivers/virmidi.c | 21 +-- + sound/drivers/vx/vx_core.c | 12 +- + sound/isa/ad1816a/ad1816a.c | 41 +--- + sound/isa/ad1816a/ad1816a_lib.c | 49 +---- + sound/isa/ad1848/ad1848.c | 19 +- + sound/isa/adlib.c | 28 +-- + sound/isa/als100.c | 41 +--- + sound/isa/azt2320.c | 49 ++--- + sound/isa/cmi8328.c | 31 +-- + sound/isa/cmi8330.c | 27 +-- + sound/isa/cs423x/cs4231.c | 21 +-- + sound/isa/cs423x/cs4236.c | 52 +---- + sound/isa/cs423x/cs4236_lib.c | 2 - + sound/isa/es1688/es1688.c | 33 +--- + sound/isa/es1688/es1688_lib.c | 29 +-- + sound/isa/es18xx.c | 112 ++--------- + sound/isa/galaxy/galaxy.c | 82 ++++---- + sound/isa/gus/gus_main.c | 44 ++--- + sound/isa/gus/gusclassic.c | 28 +-- + sound/isa/gus/gusextreme.c | 39 ++-- + sound/isa/gus/gusmax.c | 65 ++----- + sound/isa/gus/interwave.c | 53 ++---- + sound/isa/msnd/msnd_pinnacle.c | 119 +++--------- + sound/isa/opl3sa2.c | 60 ++---- + sound/isa/opti9xx/miro.c | 76 +++----- + sound/isa/opti9xx/opti92x-ad1848.c | 77 +++----- + sound/isa/sb/emu8000.c | 48 +---- + sound/isa/sb/jazz16.c | 39 ++-- + sound/isa/sb/sb16.c | 42 +---- + sound/isa/sb/sb8.c | 48 ++--- + sound/isa/sb/sb_common.c | 64 ++----- + sound/isa/sc6000.c | 84 +++------ + sound/isa/sscape.c | 92 ++------- + sound/isa/wavefront/wavefront.c | 46 +---- + sound/isa/wss/wss_lib.c | 67 ++----- + sound/pci/ad1889.c | 144 +++----------- + sound/pci/ali5451/ali5451.c | 90 +++------ + sound/pci/als300.c | 80 ++------ + sound/pci/als4000.c | 59 ++---- + sound/pci/atiixp.c | 92 ++------- + sound/pci/atiixp_modem.c | 92 ++------- + sound/pci/au88x0/au88x0.c | 134 +++---------- + sound/pci/aw2/aw2-alsa.c | 102 ++-------- + sound/pci/azt3328.c | 124 +++--------- + sound/pci/bt87x.c | 98 +++------- + sound/pci/ca0106/ca0106.h | 3 +- + sound/pci/ca0106/ca0106_main.c | 114 +++-------- + sound/pci/cmipci.c | 104 +++------- + sound/pci/cs4281.c | 114 +++-------- + sound/pci/cs46xx/cs46xx.c | 51 ++--- + sound/pci/cs46xx/cs46xx.h | 4 +- + sound/pci/cs46xx/cs46xx_lib.c | 111 +++-------- + sound/pci/cs5530.c | 86 ++------- + sound/pci/cs5535audio/cs5535audio.c | 94 ++------- + sound/pci/cs5535audio/cs5535audio_olpc.c | 7 +- + sound/pci/echoaudio/echoaudio.c | 168 +++++------------ + sound/pci/echoaudio/echoaudio.h | 2 +- + sound/pci/emu10k1/emu10k1.c | 53 +++--- + sound/pci/emu10k1/emu10k1_main.c | 102 +++------- + sound/pci/emu10k1/emu10k1x.c | 128 +++---------- + sound/pci/emu10k1/p16v.c | 22 +-- + sound/pci/ens1370.c | 115 +++--------- + sound/pci/es1938.c | 97 +++------- + sound/pci/es1968.c | 112 +++-------- + sound/pci/fm801.c | 103 ++-------- + sound/pci/hda/hda_controller.h | 1 - + sound/pci/hda/hda_intel.c | 26 +-- + sound/pci/hda/patch_ca0132.c | 2 +- + sound/pci/hda/patch_hdmi.c | 7 + + sound/pci/ice1712/ice1712.c | 133 +++---------- + sound/pci/ice1712/ice1724.c | 167 ++++------------ + sound/pci/intel8x0.c | 142 +++++--------- + sound/pci/intel8x0m.c | 139 ++++---------- + sound/pci/korg1212/korg1212.c | 209 ++++++--------------- + sound/pci/lola/lola.c | 127 ++++--------- + sound/pci/lola/lola.h | 5 +- + sound/pci/lola/lola_pcm.c | 20 +- + sound/pci/lx6464es/lx6464es.c | 112 +++-------- + sound/pci/maestro3.c | 106 +++-------- + sound/pci/nm256/nm256.c | 131 +++---------- + sound/pci/oxygen/oxygen.c | 1 - + sound/pci/oxygen/oxygen.h | 1 - + sound/pci/oxygen/oxygen_lib.c | 66 +++---- + sound/pci/oxygen/se6x.c | 1 - + sound/pci/oxygen/virtuoso.c | 1 - + sound/pci/riptide/riptide.c | 89 +++------ + sound/pci/rme32.c | 49 ++--- + sound/pci/rme96.c | 57 ++---- + sound/pci/rme9652/hdsp.c | 89 +++------ + sound/pci/rme9652/hdspm.c | 64 ++----- + sound/pci/rme9652/rme9652.c | 85 +++------ + sound/pci/sis7019.c | 87 ++------- + sound/pci/sonicvibes.c | 117 +++--------- + sound/pci/trident/trident.c | 39 +--- + sound/pci/trident/trident.h | 7 +- + sound/pci/trident/trident_main.c | 90 +++------ + sound/pci/trident/trident_memory.c | 8 +- + sound/pci/via82xx.c | 116 +++--------- + sound/pci/via82xx_modem.c | 88 ++------- + sound/pci/vx222/vx222.c | 69 ++----- + sound/pci/ymfpci/ymfpci.c | 66 +++---- + sound/pci/ymfpci/ymfpci.h | 8 +- + sound/pci/ymfpci/ymfpci_main.c | 149 +++++---------- + sound/pcmcia/vx/vxpocket.c | 22 --- + sound/soc/soc-compress.c | 1 - + sound/x86/intel_hdmi_audio.c | 88 +++------ + sound/x86/intel_hdmi_audio.h | 1 - + 127 files changed, 2036 insertions(+), 5639 deletions(-) +Merging sound-asoc/for-next (86db346793f9 Merge remote-tracking branch 'asoc/for-5.15' into asoc-next) +$ git merge sound-asoc/for-next +Merge made by the 'recursive' strategy. + .../bindings/sound/davinci-mcasp-audio.txt | 1 + + include/linux/platform_data/davinci_asp.h | 1 + + include/sound/sof.h | 1 + + sound/soc/amd/Kconfig | 9 + + sound/soc/amd/Makefile | 1 + + sound/soc/amd/acp-pcm-dma.c | 12 +- + sound/soc/amd/raven/acp3x-pcm-dma.c | 2 +- + sound/soc/amd/vangogh/Makefile | 9 + + sound/soc/amd/vangogh/acp5x-i2s.c | 427 +++++++++++++++++ + sound/soc/amd/vangogh/acp5x-pcm-dma.c | 525 +++++++++++++++++++++ + sound/soc/amd/vangogh/acp5x.h | 193 ++++++++ + sound/soc/amd/vangogh/pci-acp5x.c | 326 +++++++++++++ + sound/soc/amd/vangogh/vg_chip_offset_byte.h | 337 +++++++++++++ + sound/soc/atmel/Kconfig | 1 - + sound/soc/atmel/mchp-spdifrx.c | 6 +- + sound/soc/atmel/mchp-spdiftx.c | 2 +- + sound/soc/atmel/tse850-pcm5142.c | 2 +- + sound/soc/codecs/Kconfig | 18 + + sound/soc/codecs/Makefile | 2 + + sound/soc/codecs/lpass-rx-macro.c | 65 +-- + sound/soc/codecs/sdw-mockup.c | 310 ++++++++++++ + sound/soc/codecs/tlv320aic32x4.c | 2 +- + sound/soc/codecs/wcd938x.c | 11 +- + sound/soc/codecs/wcd938x.h | 1 - + sound/soc/codecs/wm_adsp.c | 7 +- + sound/soc/fsl/fsl_xcvr.c | 4 +- + sound/soc/intel/boards/Kconfig | 1 + + sound/soc/intel/boards/sof_cs42l42.c | 361 +++++++++----- + sound/soc/intel/boards/sof_maxim_common.c | 17 +- + sound/soc/intel/boards/sof_maxim_common.h | 4 +- + sound/soc/intel/boards/sof_rt5682.c | 52 +- + sound/soc/intel/boards/sof_sdw.c | 75 ++- + sound/soc/intel/boards/sof_sdw_common.h | 27 +- + sound/soc/intel/common/Makefile | 3 +- + sound/soc/intel/common/soc-acpi-intel-cnl-match.c | 15 + + sound/soc/intel/common/soc-acpi-intel-jsl-match.c | 8 + + .../intel/common/soc-acpi-intel-sdw-mockup-match.c | 166 +++++++ + .../intel/common/soc-acpi-intel-sdw-mockup-match.h | 17 + + sound/soc/intel/common/soc-acpi-intel-tgl-match.c | 38 ++ + sound/soc/mediatek/mt8183/mt8183-afe-pcm.c | 43 +- + sound/soc/mediatek/mt8192/mt8192-afe-pcm.c | 27 +- + sound/soc/sh/rcar/dma.c | 2 +- + sound/soc/sof/sof-audio.c | 42 +- + sound/soc/ti/Kconfig | 1 + + sound/soc/ti/davinci-mcasp.c | 176 ++++++- + sound/soc/ti/j721e-evm.c | 30 +- + sound/soc/ti/omap-abe-twl6040.c | 5 - + 47 files changed, 3053 insertions(+), 332 deletions(-) + create mode 100644 sound/soc/amd/vangogh/Makefile + create mode 100644 sound/soc/amd/vangogh/acp5x-i2s.c + create mode 100644 sound/soc/amd/vangogh/acp5x-pcm-dma.c + create mode 100644 sound/soc/amd/vangogh/acp5x.h + create mode 100644 sound/soc/amd/vangogh/pci-acp5x.c + create mode 100644 sound/soc/amd/vangogh/vg_chip_offset_byte.h + create mode 100644 sound/soc/codecs/sdw-mockup.c + create mode 100644 sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.c + create mode 100644 sound/soc/intel/common/soc-acpi-intel-sdw-mockup-match.h +Merging modules/modules-next (4c5afb74d945 module: combine constructors in module linker script) +$ git merge modules/modules-next +Merge made by the 'recursive' strategy. + scripts/module.lds.S | 1 + + 1 file changed, 1 insertion(+) +Merging input/next (7d3370e506ec Revert "Input: serio - make write method mandatory") +$ git merge input/next +Removing drivers/input/misc/ixp4xx-beeper.c +Merge made by the 'recursive' strategy. + drivers/input/misc/Kconfig | 12 --- + drivers/input/misc/Makefile | 1 - + drivers/input/misc/ixp4xx-beeper.c | 183 ------------------------------------- + drivers/input/serio/parkbd.c | 14 +-- + 4 files changed, 1 insertion(+), 209 deletions(-) + delete mode 100644 drivers/input/misc/ixp4xx-beeper.c +Merging block/for-next (60b8191c4777 Merge branch 'for-5.15/block' into for-next) +$ git merge block/for-next +Merge made by the 'recursive' strategy. + include/linux/ioprio.h | 41 +--------------------------------------- + include/uapi/linux/ioprio.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 47 insertions(+), 40 deletions(-) + create mode 100644 include/uapi/linux/ioprio.h +Merging device-mapper/for-next (e820ba87f9d1 dm: add documentation for IMA measurement support) +$ git merge device-mapper/for-next +Merge made by the 'recursive' strategy. + Documentation/admin-guide/device-mapper/dm-ima.rst | 306 +++++++++ + Documentation/admin-guide/device-mapper/index.rst | 1 + + drivers/md/Makefile | 4 + + drivers/md/dm-cache-target.c | 24 + + drivers/md/dm-clone-target.c | 5 + + drivers/md/dm-core.h | 5 + + drivers/md/dm-crypt.c | 27 + + drivers/md/dm-delay.c | 4 + + drivers/md/dm-dust.c | 4 + + drivers/md/dm-ebs-target.c | 3 + + drivers/md/dm-era-target.c | 4 + + drivers/md/dm-flakey.c | 4 + + drivers/md/dm-ima.c | 704 +++++++++++++++++++++ + drivers/md/dm-ima.h | 68 ++ + drivers/md/dm-integrity.c | 25 + + drivers/md/dm-ioctl.c | 24 +- + drivers/md/dm-linear.c | 10 +- + drivers/md/dm-log-userspace-base.c | 3 + + drivers/md/dm-log-writes.c | 4 + + drivers/md/dm-log.c | 10 + + drivers/md/dm-mpath.c | 28 + + drivers/md/dm-ps-historical-service-time.c | 3 + + drivers/md/dm-ps-io-affinity.c | 3 + + drivers/md/dm-ps-queue-length.c | 3 + + drivers/md/dm-ps-round-robin.c | 4 + + drivers/md/dm-ps-service-time.c | 3 + + drivers/md/dm-raid.c | 38 ++ + drivers/md/dm-raid1.c | 17 + + drivers/md/dm-snap-persistent.c | 4 + + drivers/md/dm-snap-transient.c | 4 + + drivers/md/dm-snap.c | 13 + + drivers/md/dm-stripe.c | 15 + + drivers/md/dm-switch.c | 4 + + drivers/md/dm-thin.c | 8 + + drivers/md/dm-unstripe.c | 4 + + drivers/md/dm-verity-target.c | 41 ++ + drivers/md/dm-writecache.c | 408 ++++++------ + drivers/md/dm-zoned-target.c | 3 + + drivers/md/dm.c | 3 + + include/linux/device-mapper.h | 6 +- + include/uapi/linux/dm-ioctl.h | 6 + + security/integrity/ima/ima_main.c | 1 + + 42 files changed, 1673 insertions(+), 185 deletions(-) + create mode 100644 Documentation/admin-guide/device-mapper/dm-ima.rst + create mode 100644 drivers/md/dm-ima.c + create mode 100644 drivers/md/dm-ima.h +Merging pcmcia/pcmcia-next (e9d503fef7da pcmcia: rsrc_nonstatic: Fix call-back function as reference formatting) +$ git merge pcmcia/pcmcia-next +Already up to date. +Merging mmc/next (49fc2be70e7f Merge branch 'fixes' into next) +$ git merge mmc/next +Merge made by the 'recursive' strategy. + .../devicetree/bindings/mmc/sdhci-msm.txt | 1 + + drivers/mmc/core/core.c | 3 +- + drivers/mmc/core/host.c | 13 +++++- + drivers/mmc/core/host.h | 6 +++ + drivers/mmc/host/mmc_spi.c | 15 ++----- + drivers/mmc/host/mmci.c | 3 ++ + drivers/mmc/host/renesas_sdhi_sys_dmac.c | 4 +- + drivers/mmc/host/sdhci-of-arasan.c | 51 +++++++++++++++++++--- + drivers/mmc/host/sh_mmcif.c | 4 +- + drivers/mmc/host/usdhi6rol0.c | 4 +- + 10 files changed, 76 insertions(+), 28 deletions(-) +Merging mfd/for-mfd-next (495fb48dbd9b mfd: wm831x: Use DEFINE_RES_IRQ_NAMED() and DEFINE_RES_IRQ() to simplify code) +$ git merge mfd/for-mfd-next +Merge made by the 'recursive' strategy. +Merging backlight/for-backlight-next (1181f2164135 backlight: lm3630a: Convert to atomic PWM API and check for errors) +$ git merge backlight/for-backlight-next +Already up to date. +Merging battery/for-next (56d629af09b9 power: supply: PCHG: Peripheral device charger) +$ git merge battery/for-next +Auto-merging drivers/power/supply/ab8500_fg.c +Auto-merging drivers/power/supply/ab8500_chargalg.c +Merge made by the 'recursive' strategy. + drivers/power/supply/Kconfig | 10 + + drivers/power/supply/Makefile | 3 +- + drivers/power/supply/ab8500-bm.h | 219 ++------ + drivers/power/supply/ab8500_bmdata.c | 34 +- + drivers/power/supply/ab8500_btemp.c | 22 +- + .../{abx500_chargalg.c => ab8500_chargalg.c} | 583 +++++++++++---------- + drivers/power/supply/ab8500_charger.c | 4 +- + drivers/power/supply/ab8500_fg.c | 20 +- + drivers/power/supply/cros_peripheral_charger.c | 386 ++++++++++++++ + include/linux/platform_data/cros_ec_commands.h | 67 +++ + 10 files changed, 830 insertions(+), 518 deletions(-) + rename drivers/power/supply/{abx500_chargalg.c => ab8500_chargalg.c} (75%) + create mode 100644 drivers/power/supply/cros_peripheral_charger.c +Merging regulator/for-next (336e3a8679c4 Merge remote-tracking branch 'regulator/for-5.15' into regulator-next) +$ git merge regulator/for-next +Removing Documentation/devicetree/bindings/regulator/uniphier-regulator.txt +Merge made by the 'recursive' strategy. + .../regulator/richtek,rtq2134-regulator.yaml | 106 ++++++ + .../regulator/richtek,rtq6752-regulator.yaml | 76 +++++ + .../regulator/socionext,uniphier-regulator.yaml | 85 +++++ + .../bindings/regulator/uniphier-regulator.txt | 58 ---- + drivers/regulator/Kconfig | 24 +- + drivers/regulator/Makefile | 2 + + drivers/regulator/bd718x7-regulator.c | 369 ++++++++++++++------ + drivers/regulator/da9063-regulator.c | 132 +++++++- + drivers/regulator/dbx500-prcmu.c | 4 +- + drivers/regulator/devres.c | 69 +--- + drivers/regulator/fixed.c | 5 +- + drivers/regulator/hi6421v600-regulator.c | 2 +- + drivers/regulator/mt6358-regulator.c | 87 +++-- + drivers/regulator/mt6359-regulator.c | 19 +- + drivers/regulator/mt6397-regulator.c | 15 +- + drivers/regulator/rt5033-regulator.c | 21 +- + drivers/regulator/rt6245-regulator.c | 2 +- + drivers/regulator/rtq2134-regulator.c | 373 +++++++++++++++++++++ + drivers/regulator/rtq6752-regulator.c | 289 ++++++++++++++++ + drivers/regulator/tps65910-regulator.c | 10 +- + include/linux/mfd/rt5033-private.h | 4 +- + include/linux/regulator/consumer.h | 15 - + include/linux/regulator/driver.h | 1 - + include/linux/regulator/machine.h | 2 +- + 24 files changed, 1430 insertions(+), 340 deletions(-) + create mode 100644 Documentation/devicetree/bindings/regulator/richtek,rtq2134-regulator.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/richtek,rtq6752-regulator.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/socionext,uniphier-regulator.yaml + delete mode 100644 Documentation/devicetree/bindings/regulator/uniphier-regulator.txt + create mode 100644 drivers/regulator/rtq2134-regulator.c + create mode 100644 drivers/regulator/rtq6752-regulator.c +Merging security/next-testing (047843bdb316 Merge branch 'landlock_lsm_v34' into next-testing) +$ git merge security/next-testing +Already up to date! +Merge made by the 'recursive' strategy. +Merging apparmor/apparmor-next (d108370c644b apparmor: fix error check) +$ git merge apparmor/apparmor-next +Auto-merging security/apparmor/policy_unpack.c +Auto-merging security/apparmor/policy.c +Auto-merging security/apparmor/lsm.c +Auto-merging security/apparmor/label.c +Auto-merging security/apparmor/include/label.h +Auto-merging security/apparmor/include/file.h +Auto-merging security/apparmor/apparmorfs.c +Merge made by the 'recursive' strategy. + security/apparmor/apparmorfs.c | 4 +-- + security/apparmor/include/file.h | 2 +- + security/apparmor/include/label.h | 1 + + security/apparmor/include/policy.h | 6 ++-- + security/apparmor/label.c | 4 +-- + security/apparmor/lsm.c | 22 +++++++------- + security/apparmor/path.c | 2 +- + security/apparmor/policy.c | 59 ++++++++++++++++++++++++++++++++------ + security/apparmor/policy_unpack.c | 2 +- + 9 files changed, 73 insertions(+), 29 deletions(-) +Merging integrity/next-integrity (907a399de7b0 evm: Check xattr size discrepancy between kernel and user) +$ git merge integrity/next-integrity +Already up to date. +Merging keys/keys-next (e377c31f788f integrity: Load mokx variables into the blacklist keyring) +$ git merge keys/keys-next +Auto-merging scripts/Makefile +Auto-merging include/keys/system_keyring.h +Auto-merging certs/system_keyring.c +CONFLICT (content): Merge conflict in certs/system_keyring.c +Auto-merging certs/Makefile +Auto-merging certs/Kconfig +Resolved 'certs/system_keyring.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 37a5e8094924] Merge remote-tracking branch 'keys/keys-next' +$ git diff -M --stat --summary HEAD^.. +Merging safesetid/safesetid-next (1b8b71922919 LSM: SafeSetID: Mark safesetid_initialized as __initdata) +$ git merge safesetid/safesetid-next +Already up to date. +Merging selinux/next (893c47d1964f selinux: return early for possible NULL audit buffers) +$ git merge selinux/next +Merge made by the 'recursive' strategy. + security/selinux/hooks.c | 4 ++++ + security/selinux/ss/services.c | 2 ++ + 2 files changed, 6 insertions(+) +Merging smack/next (bfc3cac0c761 smack: mark 'smack_enabled' global variable as __initdata) +$ git merge smack/next +Merge made by the 'recursive' strategy. + security/smack/smack.h | 2 +- + security/smack/smack_access.c | 17 ++++++++--------- + security/smack/smack_lsm.c | 2 +- + 3 files changed, 10 insertions(+), 11 deletions(-) +Merging tomoyo/master (4fb9c588398f Bluetooth: call lock_sock() outside of spinlock section) +$ git merge tomoyo/master +Merge made by the 'recursive' strategy. + drivers/block/loop.c | 128 +++++++++++++++++++++++++++++++++++------------ + net/bluetooth/hci_sock.c | 30 ++++++++++- + 2 files changed, 126 insertions(+), 32 deletions(-) +Merging tpmdd/next (0178f9d0f60b tpm: Replace WARN_ONCE() with dev_err_once() in tpm_tis_status()) +$ git merge tpmdd/next +Already up to date. +Merging watchdog/master (cf813c67d961 watchdog: iTCO_wdt: use dev_err() instead of pr_err()) +$ git merge watchdog/master +Already up to date. +Merging iommu/next (4a5c155a5ab3 MAINTAINERS: Add Suravee Suthikulpanit as Reviewer for AMD IOMMU (AMD-Vi)) +$ git merge iommu/next +Already up to date. +Merging audit/next (d97e99386ad0 audit: add header protection to kernel/audit.h) +$ git merge audit/next +Merge made by the 'recursive' strategy. + kernel/audit.h | 5 +++++ + 1 file changed, 5 insertions(+) +Merging devicetree/for-next (1c14c1695e78 dt-bindings: PCI: remove designware-pcie.txt) +$ git merge devicetree/for-next +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/pci/faraday,ftpci100.txt +Removing Documentation/devicetree/bindings/pci/designware-pcie.txt +Removing Documentation/devicetree/bindings/net/micrel-ks8851.txt +Removing Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/display/fsl,lcdif.yaml | 1 + + .../devicetree/bindings/i2c/brcm,iproc-i2c.txt | 46 ------ + .../devicetree/bindings/i2c/brcm,iproc-i2c.yaml | 71 +++++++++ + .../interrupt-controller/ti,pruss-intc.yaml | 4 +- + .../devicetree/bindings/net/micrel,ks8851.yaml | 96 +++++++++++ + .../devicetree/bindings/net/micrel-ks8851.txt | 18 --- + .../devicetree/bindings/pci/amlogic,meson-pcie.txt | 4 +- + .../devicetree/bindings/pci/axis,artpec6-pcie.txt | 2 +- + .../devicetree/bindings/pci/designware-pcie.txt | 77 --------- + .../devicetree/bindings/pci/faraday,ftpci100.txt | 135 ---------------- + .../devicetree/bindings/pci/faraday,ftpci100.yaml | 176 +++++++++++++++++++++ + .../devicetree/bindings/pci/fsl,imx6q-pcie.txt | 2 +- + .../bindings/pci/hisilicon-histb-pcie.txt | 2 +- + .../devicetree/bindings/pci/kirin-pcie.txt | 2 +- + .../devicetree/bindings/pci/layerscape-pci.txt | 2 +- + .../bindings/pci/nvidia,tegra194-pcie.txt | 5 +- + .../devicetree/bindings/pci/pci-armada8k.txt | 2 +- + Documentation/devicetree/bindings/pci/pcie-al.txt | 2 +- + .../devicetree/bindings/pci/qcom,pcie.txt | 14 +- + .../bindings/pci/samsung,exynos-pcie.yaml | 4 +- + .../devicetree/bindings/pci/sifive,fu740-pcie.yaml | 4 +- + .../devicetree/bindings/pci/snps,dw-pcie-ep.yaml | 90 +++++++++++ + .../devicetree/bindings/pci/snps,dw-pcie.yaml | 101 ++++++++++++ + .../bindings/pci/socionext,uniphier-pcie-ep.yaml | 4 +- + Documentation/devicetree/bindings/pci/ti-pci.txt | 4 +- + .../devicetree/bindings/pci/uniphier-pcie.txt | 2 +- + MAINTAINERS | 3 +- + drivers/of/base.c | 4 +- + drivers/of/fdt.c | 3 - + drivers/of/kexec.c | 40 ++--- + 30 files changed, 584 insertions(+), 336 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.txt + create mode 100644 Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.yaml + create mode 100644 Documentation/devicetree/bindings/net/micrel,ks8851.yaml + delete mode 100644 Documentation/devicetree/bindings/net/micrel-ks8851.txt + delete mode 100644 Documentation/devicetree/bindings/pci/designware-pcie.txt + delete mode 100644 Documentation/devicetree/bindings/pci/faraday,ftpci100.txt + create mode 100644 Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml + create mode 100644 Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml + create mode 100644 Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +Merging mailbox/mailbox-for-next (4f197188da66 MAINTAINERS: add entry for polarfire soc mailbox) +$ git merge mailbox/mailbox-for-next +Already up to date. +Merging spi/for-next (199ec5db2dc5 Merge remote-tracking branch 'spi/for-5.15' into spi-next) +$ git merge spi/for-next +Removing Documentation/devicetree/bindings/spi/omap-spi.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/fsi/ibm,fsi2spi.yaml | 1 - + Documentation/devicetree/bindings/spi/omap-spi.txt | 48 -------- + .../devicetree/bindings/spi/omap-spi.yaml | 117 +++++++++++++++++++ + .../devicetree/bindings/spi/spi-mt65xx.txt | 1 + + drivers/spi/spi-fsi.c | 125 ++++----------------- + drivers/spi/spi-geni-qcom.c | 6 - + drivers/spi/spi-imx.c | 11 +- + drivers/spi/spi-mt65xx.c | 52 +++++++-- + drivers/spi/spi-pxa2xx.c | 35 +++--- + drivers/spi/spi-stm32.c | 122 ++++++++------------ + include/linux/platform_data/spi-mt65xx.h | 1 + + include/linux/spi/spi.h | 3 +- + 12 files changed, 251 insertions(+), 271 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/spi/omap-spi.txt + create mode 100644 Documentation/devicetree/bindings/spi/omap-spi.yaml +Merging tip/auto-latest (de35038b40c1 Merge branch 'timers/urgent') +$ git merge tip/auto-latest +Merge made by the 'recursive' strategy. + .../testing/sysfs-bus-event_source-devices-uncore | 13 + + Documentation/atomic_t.txt | 41 ++ + arch/x86/events/intel/uncore.c | 45 +- + arch/x86/events/intel/uncore.h | 4 + + arch/x86/events/intel/uncore_discovery.c | 42 +- + arch/x86/events/intel/uncore_discovery.h | 21 + + arch/x86/events/intel/uncore_snbep.c | 543 ++++++++++++++++++++- + arch/x86/kernel/jump_label.c | 7 +- + drivers/firmware/efi/efi.c | 13 +- + drivers/firmware/efi/libstub/efi-stub-helper.c | 4 +- + drivers/firmware/efi/mokvar-table.c | 5 +- + drivers/firmware/efi/tpm.c | 8 +- + include/linux/sched/sysctl.h | 18 - + include/linux/wait.h | 2 +- + kernel/locking/mutex.c | 124 ++--- + kernel/sched/core.c | 6 +- + kernel/sched/debug.c | 7 + + kernel/sched/fair.c | 6 +- + kernel/sched/sched.h | 15 + + kernel/smpboot.c | 2 +- + kernel/time/posix-cpu-timers.c | 10 +- + kernel/time/timer.c | 8 +- + 22 files changed, 789 insertions(+), 155 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-uncore +Merging clockevents/timers/drivers/next (6f64c8159af9 clocksource/drivers/arm_global_timer: Remove duplicated argument in arm_global_timer) +$ git merge clockevents/timers/drivers/next +Auto-merging drivers/clocksource/Kconfig +Merge made by the 'recursive' strategy. + drivers/clocksource/ingenic-sysost.c | 315 ++++++++++++++++++++++++++--------- + 1 file changed, 236 insertions(+), 79 deletions(-) +Merging edac/edac-for-next (e1ca90b7cc5c EDAC/mc: Add new HBM2 memory type) +$ git merge edac/edac-for-next +Merge made by the 'recursive' strategy. + drivers/edac/amd64_edac.c | 21 ++++++++------------- + drivers/edac/edac_mc.c | 1 + + include/linux/edac.h | 3 +++ + 3 files changed, 12 insertions(+), 13 deletions(-) +Merging irqchip/irq/irqchip-next (c51e96dace68 Merge branch irq/irqchip-driver-updates into irq/irqchip-next) +$ git merge irqchip/irq/irqchip-next +Already up to date. +Merging ftrace/for-next (704adfb5a997 tracing: Do not reference char * as a string in histograms) +$ git merge ftrace/for-next +Already up to date. +Merging rcu/rcu/next (fa31b17088f9 Merge branch 'clocksource.2021.07.20c' into HEAD) +$ git merge rcu/rcu/next +Auto-merging lib/Kconfig.debug +Auto-merging kernel/sched/core.c +Removing kernel/kcsan/atomic.h +Merge made by the 'recursive' strategy. + .../Memory-Ordering/Tree-RCU-Memory-Ordering.rst | 29 + + .../RCU/Design/Requirements/Requirements.rst | 8 +- + Documentation/RCU/checklist.rst | 24 +- + Documentation/RCU/rcu_dereference.rst | 6 +- + Documentation/RCU/stallwarn.rst | 31 +- + Documentation/dev-tools/kcsan.rst | 12 + + .../litmus-tests/locking/DCL-broken.litmus | 55 + + .../litmus-tests/locking/DCL-fixed.litmus | 56 + + .../litmus-tests/locking/RM-broken.litmus | 42 + + Documentation/litmus-tests/locking/RM-fixed.litmus | 42 + + include/linux/rculist.h | 35 +- + include/linux/rcupdate.h | 4 +- + include/linux/rcutiny.h | 3 - + include/linux/srcutiny.h | 8 +- + kernel/kcsan/atomic.h | 23 - + kernel/kcsan/core.c | 77 +- + kernel/kcsan/kcsan_test.c | 32 + + kernel/kcsan/permissive.h | 94 ++ + kernel/locking/locktorture.c | 25 +- + kernel/rcu/rcuscale.c | 4 +- + kernel/rcu/rcutorture.c | 7 +- + kernel/rcu/refscale.c | 37 +- + kernel/rcu/srcutiny.c | 2 +- + kernel/rcu/tasks.h | 36 +- + kernel/rcu/tree.c | 119 +- + kernel/rcu/tree.h | 2 +- + kernel/rcu/tree_nocb.h | 1496 +++++++++++++++++++ + kernel/rcu/tree_plugin.h | 1506 +------------------- + kernel/rcu/tree_stall.h | 111 +- + kernel/scftorture.c | 78 +- + kernel/sched/core.c | 11 + + lib/Kconfig.debug | 1 + + lib/Kconfig.kcsan | 42 +- + tools/include/nolibc/nolibc.h | 15 +- + .../memory-model/Documentation/access-marking.txt | 152 +- + tools/memory-model/Documentation/locking.txt | 320 +++++ + tools/memory-model/litmus-tests/.gitignore | 2 +- + tools/memory-model/scripts/README | 16 +- + tools/memory-model/scripts/checkalllitmus.sh | 29 +- + tools/memory-model/scripts/checkghlitmus.sh | 11 +- + tools/memory-model/scripts/checklitmus.sh | 25 +- + tools/memory-model/scripts/checklitmushist.sh | 2 +- + tools/memory-model/scripts/checktheselitmus.sh | 43 + + tools/memory-model/scripts/cmplitmushist.sh | 49 +- + tools/memory-model/scripts/hwfnseg.sh | 20 + + tools/memory-model/scripts/initlitmushist.sh | 2 +- + tools/memory-model/scripts/judgelitmus.sh | 120 +- + tools/memory-model/scripts/newlitmushist.sh | 4 +- + tools/memory-model/scripts/parseargs.sh | 21 +- + tools/memory-model/scripts/runlitmus.sh | 80 ++ + tools/memory-model/scripts/runlitmushist.sh | 29 +- + tools/memory-model/scripts/simpletest.sh | 35 + + tools/testing/selftests/rcutorture/bin/jitter.sh | 10 +- + .../selftests/rcutorture/bin/kcsan-collapse.sh | 2 +- + .../testing/selftests/rcutorture/bin/kvm-again.sh | 4 +- + .../selftests/rcutorture/bin/kvm-assign-cpus.sh | 105 ++ + .../rcutorture/bin/kvm-get-cpus-script.sh | 87 ++ + .../selftests/rcutorture/bin/kvm-recheck-lock.sh | 2 +- + .../selftests/rcutorture/bin/kvm-recheck-scf.sh | 2 +- + .../selftests/rcutorture/bin/kvm-recheck.sh | 5 +- + .../selftests/rcutorture/bin/kvm-remote-noreap.sh | 30 + + .../testing/selftests/rcutorture/bin/kvm-remote.sh | 20 +- + .../rcutorture/bin/kvm-test-1-run-batch.sh | 24 + + .../rcutorture/bin/kvm-test-1-run-qemu.sh | 49 +- + .../selftests/rcutorture/bin/kvm-test-1-run.sh | 2 + + tools/testing/selftests/rcutorture/bin/kvm.sh | 39 +- + tools/testing/selftests/rcutorture/bin/torture.sh | 37 +- + .../selftests/rcutorture/configs/rcu/RUDE01 | 2 +- + .../selftests/rcutorture/configs/rcu/TASKS01 | 2 +- + .../selftests/rcutorture/configs/rcu/TASKS03 | 2 +- + 70 files changed, 3516 insertions(+), 1941 deletions(-) + create mode 100644 Documentation/litmus-tests/locking/DCL-broken.litmus + create mode 100644 Documentation/litmus-tests/locking/DCL-fixed.litmus + create mode 100644 Documentation/litmus-tests/locking/RM-broken.litmus + create mode 100644 Documentation/litmus-tests/locking/RM-fixed.litmus + delete mode 100644 kernel/kcsan/atomic.h + create mode 100644 kernel/kcsan/permissive.h + create mode 100644 kernel/rcu/tree_nocb.h + create mode 100644 tools/memory-model/Documentation/locking.txt + create mode 100755 tools/memory-model/scripts/checktheselitmus.sh + create mode 100755 tools/memory-model/scripts/hwfnseg.sh + create mode 100755 tools/memory-model/scripts/runlitmus.sh + create mode 100755 tools/memory-model/scripts/simpletest.sh + create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh + create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh + create mode 100755 tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh +Merging kvm/next (7025098af334 Merge tag 'kvmarm-fixes-5.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD) +$ git merge kvm/next +Already up to date. +Merging kvm-arm/next (188982cda00e Merge branch kvm-arm64/mmu/mte into kvmarm-master/next) +$ git merge kvm-arm/next +Already up to date. +Merging kvm-ppc/kvm-ppc-next (72476aaa4691 KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests) +$ git merge kvm-ppc/kvm-ppc-next +Already up to date. +Merging kvms390/next (1f703d2cf204 KVM: s390: allow facility 192 (vector-packed-decimal-enhancement facility 2)) +$ git merge kvms390/next +Already up to date. +Merging xen-tip/linux-next (83f877a09516 xen/events: remove redundant initialization of variable irq) +$ git merge xen-tip/linux-next +Merge made by the 'recursive' strategy. + drivers/xen/events/events_base.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging percpu/for-next (a81a52b325ec Merge branch 'for-5.14-fixes' into for-next) +$ git merge percpu/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging workqueues/for-next (b42b0bddcbc8 workqueue: fix UAF in pwq_unbound_release_workfn()) +$ git merge workqueues/for-next +Merge made by the 'recursive' strategy. + kernel/workqueue.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) +Merging drivers-x86/for-next (f7e506ec4a99 platform/x86: think-lmi: Fix possible mem-leaks on tlmi_analyze() error-exit) +$ git merge drivers-x86/for-next +Already up to date. +Merging chrome-platform/for-next (6efb943b8616 Linux 5.13-rc1) +$ git merge chrome-platform/for-next +Already up to date. +Merging hsi/for-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge hsi/for-next +Already up to date. +Merging leds/for-next (e642197562cd leds: is31fl32xx: Fix missing error code in is31fl32xx_parse_dt()) +$ git merge leds/for-next +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 2 +- + drivers/leds/Kconfig | 57 -------------------------------- + drivers/leds/Makefile | 6 ---- + drivers/leds/flash/Kconfig | 53 +++++++++++++++++++++++++++++ + drivers/leds/flash/Makefile | 6 ++++ + drivers/leds/{ => flash}/leds-aat1290.c | 0 + drivers/leds/{ => flash}/leds-as3645a.c | 0 + drivers/leds/{ => flash}/leds-ktd2692.c | 0 + drivers/leds/{ => flash}/leds-lm3601x.c | 0 + drivers/leds/{ => flash}/leds-max77693.c | 0 + drivers/leds/{ => flash}/leds-sgm3140.c | 0 + drivers/leds/leds-is31fl32xx.c | 1 + + 12 files changed, 61 insertions(+), 64 deletions(-) + rename drivers/leds/{ => flash}/leds-aat1290.c (100%) + rename drivers/leds/{ => flash}/leds-as3645a.c (100%) + rename drivers/leds/{ => flash}/leds-ktd2692.c (100%) + rename drivers/leds/{ => flash}/leds-lm3601x.c (100%) + rename drivers/leds/{ => flash}/leds-max77693.c (100%) + rename drivers/leds/{ => flash}/leds-sgm3140.c (100%) +Merging ipmi/for-next (1a2055cd5091 bindings: ipmi: Add binding for SSIF BMC driver) +$ git merge ipmi/for-next +Merge made by the 'recursive' strategy. + .../devicetree/bindings/ipmi/ssif-bmc.yaml | 38 + + drivers/char/ipmi/Kconfig | 11 + + drivers/char/ipmi/Makefile | 1 + + drivers/char/ipmi/ssif_bmc.c | 781 +++++++++++++++++++++ + drivers/char/ipmi/ssif_bmc.h | 106 +++ + drivers/i2c/busses/i2c-aspeed.c | 20 + + include/linux/i2c.h | 2 + + 7 files changed, 959 insertions(+) + create mode 100644 Documentation/devicetree/bindings/ipmi/ssif-bmc.yaml + create mode 100644 drivers/char/ipmi/ssif_bmc.c + create mode 100644 drivers/char/ipmi/ssif_bmc.h +Merging driver-core/driver-core-next (e7deeb9d79d8 driver: base: Prefer unsigned int to bare use of unsigned) +$ git merge driver-core/driver-core-next +Auto-merging drivers/spi/spi.c +Auto-merging drivers/s390/cio/ccwgroup.c +Auto-merging drivers/net/netdevsim/bus.c +Auto-merging drivers/hid/intel-ish-hid/ishtp/bus.c +Auto-merging drivers/dma/idxd/sysfs.c +Auto-merging drivers/base/auxiliary.c +Auto-merging drivers/acpi/bus.c +Merge made by the 'recursive' strategy. + arch/arm/common/locomo.c | 3 +-- + arch/arm/common/sa1111.c | 4 +--- + arch/arm/mach-rpc/ecard.c | 4 +--- + arch/mips/sgi-ip22/ip22-gio.c | 3 +-- + arch/parisc/kernel/drivers.c | 5 ++--- + arch/powerpc/platforms/ps3/system-bus.c | 3 +-- + arch/powerpc/platforms/pseries/ibmebus.c | 3 +-- + arch/powerpc/platforms/pseries/vio.c | 3 +-- + arch/s390/include/asm/eadm.h | 2 +- + arch/sparc/kernel/vio.c | 4 +--- + drivers/acpi/bus.c | 3 +-- + drivers/amba/bus.c | 4 +--- + drivers/base/auxiliary.c | 4 +--- + drivers/base/bus.c | 6 +++--- + drivers/base/cacheinfo.c | 1 + + drivers/base/cpu.c | 4 ++-- + drivers/base/dd.c | 16 ++++++++++------ + drivers/base/firmware_loader/main.c | 20 ++++++++------------ + drivers/base/isa.c | 4 +--- + drivers/base/map.c | 12 ++++++------ + drivers/base/node.c | 8 ++++---- + drivers/base/platform.c | 4 +--- + drivers/base/power/wakeup.c | 2 +- + drivers/bcma/main.c | 6 ++---- + drivers/bus/sunxi-rsb.c | 4 +--- + drivers/cxl/core.c | 3 +-- + drivers/dax/bus.c | 4 +--- + drivers/dma/idxd/sysfs.c | 4 +--- + drivers/firewire/core-device.c | 4 +--- + drivers/firmware/arm_scmi/bus.c | 4 +--- + drivers/firmware/google/coreboot_table.c | 4 +--- + drivers/fpga/dfl.c | 4 +--- + drivers/hid/hid-core.c | 4 +--- + drivers/hid/intel-ish-hid/ishtp/bus.c | 4 +--- + drivers/hv/vmbus_drv.c | 5 +---- + drivers/hwtracing/intel_th/core.c | 4 +--- + drivers/i2c/i2c-core-base.c | 5 +---- + drivers/i3c/master.c | 4 +--- + drivers/input/gameport/gameport.c | 3 +-- + drivers/input/serio/serio.c | 3 +-- + drivers/ipack/ipack.c | 4 +--- + drivers/macintosh/macio_asic.c | 4 +--- + drivers/mcb/mcb-core.c | 4 +--- + drivers/media/pci/bt8xx/bttv-gpio.c | 3 +-- + drivers/memstick/core/memstick.c | 3 +-- + drivers/mfd/mcp-core.c | 3 +-- + drivers/misc/mei/bus.c | 4 +--- + drivers/misc/tifm_core.c | 3 +-- + drivers/mmc/core/bus.c | 4 +--- + drivers/mmc/core/sdio_bus.c | 4 +--- + drivers/net/netdevsim/bus.c | 3 +-- + drivers/ntb/core.c | 4 +--- + drivers/ntb/ntb_transport.c | 4 +--- + drivers/nubus/bus.c | 6 ++---- + drivers/nvdimm/bus.c | 3 +-- + drivers/pci/endpoint/pci-epf-core.c | 7 ++----- + drivers/pci/pci-driver.c | 3 +-- + drivers/pcmcia/ds.c | 4 +--- + drivers/platform/surface/aggregator/bus.c | 4 +--- + drivers/platform/x86/wmi.c | 4 +--- + drivers/pnp/driver.c | 3 +-- + drivers/rapidio/rio-driver.c | 4 +--- + drivers/rpmsg/rpmsg_core.c | 7 ++----- + drivers/s390/block/scm_drv.c | 4 +--- + drivers/s390/cio/ccwgroup.c | 6 +----- + drivers/s390/cio/chsc_sch.c | 3 +-- + drivers/s390/cio/css.c | 7 +++---- + drivers/s390/cio/css.h | 2 +- + drivers/s390/cio/device.c | 9 +++------ + drivers/s390/cio/eadm_sch.c | 4 +--- + drivers/s390/cio/scm.c | 5 +++-- + drivers/s390/cio/vfio_ccw_drv.c | 3 +-- + drivers/s390/crypto/ap_bus.c | 4 +--- + drivers/scsi/scsi_debug.c | 3 +-- + drivers/sh/superhyway/superhyway.c | 8 ++------ + drivers/siox/siox-core.c | 4 +--- + drivers/slimbus/core.c | 4 +--- + drivers/soc/qcom/apr.c | 4 +--- + drivers/spi/spi.c | 4 +--- + drivers/spmi/spmi.c | 3 +-- + drivers/ssb/main.c | 4 +--- + drivers/staging/fieldbus/anybuss/host.c | 4 +--- + drivers/staging/greybus/gbphy.c | 4 +--- + drivers/target/loopback/tcm_loop.c | 5 ++--- + drivers/thunderbolt/domain.c | 4 +--- + drivers/tty/serdev/core.c | 4 +--- + drivers/usb/common/ulpi.c | 4 +--- + drivers/usb/serial/bus.c | 4 +--- + drivers/usb/typec/bus.c | 4 +--- + drivers/vdpa/vdpa.c | 4 +--- + drivers/vfio/mdev/mdev_driver.c | 4 +--- + drivers/virtio/virtio.c | 3 +-- + drivers/vlynq/vlynq.c | 4 +--- + drivers/vme/vme.c | 4 +--- + drivers/xen/xenbus/xenbus.h | 2 +- + drivers/xen/xenbus/xenbus_probe.c | 4 +--- + drivers/zorro/zorro-driver.c | 3 +-- + fs/sysfs/group.c | 2 +- + include/linux/device/bus.h | 2 +- + include/linux/pci-epf.h | 2 +- + sound/ac97/bus.c | 6 ++---- + sound/aoa/soundbus/core.c | 4 +--- + tools/testing/selftests/firmware/fw_namespace.c | 3 ++- + 103 files changed, 145 insertions(+), 299 deletions(-) +$ git am -3 ../patches/0001-fix-for-drm-Introduce-the-DP-AUX-bus.patch +Applying: fix for "drm: Introduce the DP AUX bus" +Merging usb/usb-next (8e6cb5d27e82 usb: dwc3: dwc3-qcom: Fix typo in the dwc3 vbus override API) +$ git merge usb/usb-next +Auto-merging drivers/usb/host/xhci-pci.c +Auto-merging drivers/usb/host/xhci-pci-renesas.c +Auto-merging drivers/usb/dwc3/gadget.c +CONFLICT (content): Merge conflict in drivers/usb/dwc3/gadget.c +Auto-merging drivers/usb/dwc3/ep0.c +Auto-merging drivers/usb/dwc3/core.h +Auto-merging drivers/usb/dwc2/gadget.c +Auto-merging drivers/usb/dwc2/core.h +Auto-merging drivers/of/base.c +Auto-merging arch/arm64/boot/dts/qcom/sc7280.dtsi +CONFLICT (content): Merge conflict in arch/arm64/boot/dts/qcom/sc7280.dtsi +Auto-merging arch/arm64/boot/dts/qcom/sc7280-idp.dts +Recorded preimage for 'arch/arm64/boot/dts/qcom/sc7280.dtsi' +Resolved 'drivers/usb/dwc3/gadget.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'arch/arm64/boot/dts/qcom/sc7280.dtsi'. +[master 5b20259d88e7] Merge remote-tracking branch 'usb/usb-next' +$ git diff -M --stat --summary HEAD^.. + Documentation/ABI/testing/configfs-usb-gadget-uac1 | 10 + + Documentation/ABI/testing/configfs-usb-gadget-uac2 | 10 + + .../bindings/phy/qcom,qmp-usb3-dp-phy.yaml | 1 + + .../devicetree/bindings/usb/generic-ehci.yaml | 5 + + .../devicetree/bindings/usb/generic-ohci.yaml | 5 + + .../devicetree/bindings/usb/snps,dwc3.yaml | 15 +- + Documentation/usb/gadget-testing.rst | 38 +- + drivers/of/base.c | 1 + + drivers/usb/dwc2/core.h | 2 + + drivers/usb/dwc2/gadget.c | 28 +- + drivers/usb/dwc3/core.c | 15 + + drivers/usb/dwc3/core.h | 16 + + drivers/usb/dwc3/dwc3-qcom.c | 23 +- + drivers/usb/dwc3/ep0.c | 2 + + drivers/usb/dwc3/gadget.c | 232 +++++++ + drivers/usb/gadget/composite.c | 8 +- + drivers/usb/gadget/configfs.c | 4 + + drivers/usb/gadget/function/f_ncm.c | 50 +- + drivers/usb/gadget/function/f_uac1.c | 674 ++++++++++++++++++++- + drivers/usb/gadget/function/f_uac2.c | 656 ++++++++++++++++++-- + drivers/usb/gadget/function/u_audio.c | 369 ++++++++++- + drivers/usb/gadget/function/u_audio.h | 22 + + drivers/usb/gadget/function/u_ether.c | 5 +- + drivers/usb/gadget/function/u_uac1.h | 20 + + drivers/usb/gadget/function/u_uac2.h | 23 +- + drivers/usb/gadget/udc/core.c | 19 + + drivers/usb/host/ehci-mv.c | 23 +- + drivers/usb/host/fotg210-hcd.c | 48 +- + drivers/usb/host/fotg210.h | 5 - + drivers/usb/host/ohci-spear.c | 2 +- + drivers/usb/host/xhci-pci-renesas.c | 7 +- + drivers/usb/host/xhci-pci.c | 2 - + drivers/usb/host/xhci-pci.h | 3 - + drivers/usb/phy/phy-isp1301-omap.c | 2 +- + include/linux/of.h | 5 + + include/linux/usb/audio-v2.h | 14 + + include/linux/usb/gadget.h | 4 + + 37 files changed, 2171 insertions(+), 197 deletions(-) +Merging usb-gadget/next (e49d033bddf5 Linux 5.12-rc6) +$ git merge usb-gadget/next +Already up to date. +Merging usb-serial/usb-next (2734d6c1b1a0 Linux 5.14-rc2) +$ git merge usb-serial/usb-next +Already up to date. +Merging usb-chipidea-next/for-usb-next (956df1bb0ab8 usb: chipidea: host: fix port index underflow and UBSAN complains) +$ git merge usb-chipidea-next/for-usb-next +Auto-merging drivers/usb/cdns3/cdnsp-gadget.c +Merge made by the 'recursive' strategy. + drivers/usb/chipidea/host.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) +Merging tty/tty-next (e679004dec37 tty: hvc: replace BUG_ON() with negative return value) +$ git merge tty/tty-next +Removing Documentation/devicetree/bindings/serial/cdns,uart.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/serial/cdns,uart.txt | 27 --- + .../devicetree/bindings/serial/cdns,uart.yaml | 66 ++++++ + drivers/tty/amiserial.c | 236 ++++++++------------- + drivers/tty/hvc/hvc_xen.c | 17 +- + drivers/tty/serial/8250/8250_bcm7271.c | 2 +- + drivers/tty/serial/8250/8250_exar.c | 36 ++-- + drivers/tty/serial/8250/8250_omap.c | 2 +- + drivers/tty/serial/8250/8250_pnp.c | 2 + + drivers/tty/serial/8250/Kconfig | 10 +- + drivers/tty/serial/Kconfig | 2 +- + drivers/tty/serial/amba-pl011.c | 197 ++++++++++++++++- + drivers/tty/serial/jsm/jsm_neo.c | 2 + + drivers/tty/serial/jsm/jsm_tty.c | 3 + + drivers/tty/serial/samsung_tty.c | 46 ++-- + drivers/tty/serial/serial_core.c | 8 + + drivers/tty/serial/uartlite.c | 61 +++++- + drivers/tty/vt/vt.c | 21 +- + 17 files changed, 502 insertions(+), 236 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/serial/cdns,uart.txt + create mode 100644 Documentation/devicetree/bindings/serial/cdns,uart.yaml +Merging char-misc/char-misc-next (03b1292d1c0e scripts/spdxcheck-test.sh: Drop python2) +$ git merge char-misc/char-misc-next +Removing Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt +Merge made by the 'recursive' strategy. + Documentation/admin-guide/binderfs.rst | 13 ++ + .../firmware/xilinx/xlnx,zynqmp-firmware.txt | 44 ------- + .../firmware/xilinx/xlnx,zynqmp-firmware.yaml | 89 ++++++++++++++ + .../devicetree/bindings/fpga/xlnx,versal-fpga.yaml | 33 +++++ + drivers/accessibility/speakup/i18n.c | 14 +-- + drivers/accessibility/speakup/speakup_soft.c | 15 ++- + drivers/android/binderfs.c | 39 ++++++ + drivers/bus/fsl-mc/fsl-mc-bus.c | 134 ++++++++++++++++++--- + drivers/dio/dio.c | 2 +- + drivers/firmware/xilinx/zynqmp.c | 17 +++ + drivers/fpga/Kconfig | 9 ++ + drivers/fpga/Makefile | 1 + + drivers/fpga/versal-fpga.c | 96 +++++++++++++++ + drivers/misc/lkdtm/core.c | 1 - + drivers/misc/pci_endpoint_test.c | 1 + + drivers/misc/pvpanic/pvpanic-pci.c | 2 + + drivers/misc/sgi-gru/grumain.c | 6 +- + drivers/misc/sgi-gru/grutables.h | 3 +- + drivers/most/most_cdev.c | 8 +- + include/linux/firmware/xlnx-zynqmp.h | 10 ++ + scripts/spdxcheck-test.sh | 16 ++- + .../selftests/filesystems/binderfs/binderfs_test.c | 17 +++ + 22 files changed, 479 insertions(+), 91 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt + create mode 100644 Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml + create mode 100644 Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml + create mode 100644 drivers/fpga/versal-fpga.c +Merging extcon/extcon-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge extcon/extcon-next +Already up to date. +Merging phy-next/next (c1302e8ce517 phy: tegra: xusb: mark PM functions as __maybe_unused) +$ git merge phy-next/next +Merge made by the 'recursive' strategy. + ...keembay-usb.yaml => intel,keembay-phy-usb.yaml} | 2 +- + .../devicetree/bindings/phy/qcom,qmp-phy.yaml | 2 + + .../devicetree/bindings/phy/samsung,ufs-phy.yaml | 1 + + drivers/phy/qualcomm/phy-qcom-qmp.c | 140 +++++++++++++++++++++ + drivers/phy/samsung/Makefile | 5 +- + .../{phy-exynos7-ufs.h => phy-exynos7-ufs.c} | 8 +- + drivers/phy/samsung/phy-exynosautov9-ufs.c | 67 ++++++++++ + drivers/phy/samsung/phy-samsung-ufs.c | 3 + + drivers/phy/samsung/phy-samsung-ufs.h | 13 +- + drivers/phy/tegra/xusb.c | 4 +- + 10 files changed, 232 insertions(+), 13 deletions(-) + rename Documentation/devicetree/bindings/phy/{intel,phy-keembay-usb.yaml => intel,keembay-phy-usb.yaml} (93%) + rename drivers/phy/samsung/{phy-exynos7-ufs.h => phy-exynos7-ufs.c} (93%) + create mode 100644 drivers/phy/samsung/phy-exynosautov9-ufs.c +Merging soundwire/next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge soundwire/next +Already up to date. +Merging thunderbolt/next (b18f901382fd thunderbolt: Fix DROM handling for USB4 DROM) +$ git merge thunderbolt/next +Already up to date. +Merging vfio/next (6a45ece4c9af vfio/pci: Handle concurrent vma faults) +$ git merge vfio/next +Already up to date. +Merging staging/staging-next (334201d503d5 staging: hisilicon,hi6421-spmi-pmic.yaml: fix patternProperties) +$ git merge staging/staging-next +Merge made by the 'recursive' strategy. + drivers/misc/Kconfig | 10 + + drivers/misc/Makefile | 1 + + drivers/misc/hi6421v600-irq.c | 307 +++++++++++++++++++++ + drivers/staging/board/board.c | 7 +- + drivers/staging/hikey9xx/hi6421-spmi-pmic.c | 257 +---------------- + .../hikey9xx/hisilicon,hi6421-spmi-pmic.yaml | 16 +- + drivers/staging/qlge/qlge_dbg.c | 4 +- + drivers/staging/rtl8188eu/Makefile | 2 +- + drivers/staging/rtl8188eu/{hal => core}/mac_cfg.c | 8 +- + drivers/staging/rtl8188eu/core/rtw_efuse.c | 32 +-- + drivers/staging/rtl8188eu/core/rtw_ieee80211.c | 4 +- + drivers/staging/rtl8188eu/hal/pwrseq.c | 52 ---- + drivers/staging/rtl8188eu/hal/pwrseqcmd.c | 80 +++--- + drivers/staging/rtl8188eu/hal/rtl8188e_hal_init.c | 12 +- + drivers/staging/rtl8188eu/hal/usb_halinit.c | 37 +-- + drivers/staging/rtl8188eu/include/hal_intf.h | 20 -- + drivers/staging/rtl8188eu/include/phy.h | 2 +- + drivers/staging/rtl8188eu/include/pwrseq.h | 168 ++--------- + drivers/staging/rtl8188eu/include/pwrseqcmd.h | 6 +- + drivers/staging/rtl8188eu/include/rtl8188e_hal.h | 9 - + drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 75 ----- + drivers/staging/rtl8188eu/os_dep/usb_intf.c | 8 +- + drivers/staging/rtl8712/os_intfs.c | 2 +- + drivers/staging/rtl8712/osdep_service.h | 2 +- + drivers/staging/rtl8712/wifi.h | 48 ---- + drivers/staging/rtl8723bs/core/rtw_ap.c | 2 +- + drivers/staging/rtl8723bs/core/rtw_io.c | 13 +- + drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 2 +- + drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 2 +- + drivers/staging/rtl8723bs/core/rtw_wlan_util.c | 4 +- + drivers/staging/rtl8723bs/hal/hal_btcoex.c | 2 +- + drivers/staging/rtl8723bs/hal/hal_com.c | 4 +- + drivers/staging/rtl8723bs/include/ieee80211.h | 16 +- + drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 53 +++- + drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 6 +- + .../staging/vc04_services/bcm2835-audio/bcm2835.h | 2 +- + include/linux/mfd/hi6421-spmi-pmic.h | 5 - + 37 files changed, 505 insertions(+), 775 deletions(-) + create mode 100644 drivers/misc/hi6421v600-irq.c + rename drivers/staging/rtl8188eu/{hal => core}/mac_cfg.c (93%) +Merging iio/togreg (73380b9ad247 iio: st-sensors: Remove some unused includes and add some that should be there) +$ git merge iio/togreg +Removing Documentation/devicetree/bindings/iio/accel/bosch,bma180.yaml +Merge made by the 'recursive' strategy. + .../bindings/iio/accel/bosch,bma180.yaml | 61 ----- + .../bindings/iio/accel/bosch,bma255.yaml | 23 +- + .../bindings/iio/adc/rockchip-saradc.yaml | 1 + + .../devicetree/bindings/iio/dac/adi,ad5064.yaml | 268 +++++++++++++++++++++ + .../devicetree/bindings/iio/dac/adi,ad5360.yaml | 79 ++++++ + .../devicetree/bindings/iio/dac/adi,ad5380.yaml | 70 ++++++ + .../devicetree/bindings/iio/dac/adi,ad5421.yaml | 51 ++++ + .../devicetree/bindings/iio/dac/adi,ad5449.yaml | 97 ++++++++ + .../devicetree/bindings/iio/dac/adi,ad5504.yaml | 50 ++++ + .../devicetree/bindings/iio/dac/adi,ad5624r.yaml | 47 ++++ + .../devicetree/bindings/iio/dac/adi,ad5686.yaml | 75 ++++++ + .../devicetree/bindings/iio/dac/adi,ad5761.yaml | 60 +++++ + .../devicetree/bindings/iio/dac/adi,ad5764.yaml | 62 +++++ + .../devicetree/bindings/iio/dac/adi,ad5791.yaml | 52 ++++ + .../devicetree/bindings/iio/dac/adi,ad8801.yaml | 60 +++++ + .../bindings/iio/dac/microchip,mcp4922.yaml | 46 ++++ + drivers/counter/intel-qep.c | 1 - + drivers/iio/accel/adxl345.h | 1 - + drivers/iio/accel/adxl345_core.c | 29 +-- + drivers/iio/accel/adxl345_i2c.c | 6 - + drivers/iio/accel/adxl345_spi.c | 6 - + drivers/iio/accel/bma220_spi.c | 79 +++--- + drivers/iio/accel/da280.c | 26 +- + drivers/iio/accel/da311.c | 26 +- + drivers/iio/accel/dmard10.c | 27 +-- + drivers/iio/accel/hid-sensor-accel-3d.c | 10 +- + drivers/iio/accel/st_accel_buffer.c | 7 +- + drivers/iio/accel/st_accel_core.c | 8 +- + drivers/iio/accel/st_accel_i2c.c | 3 +- + drivers/iio/accel/st_accel_spi.c | 2 +- + drivers/iio/adc/rockchip_saradc.c | 22 +- + drivers/iio/common/st_sensors/st_sensors_buffer.c | 2 - + drivers/iio/common/st_sensors/st_sensors_core.c | 1 + + drivers/iio/common/st_sensors/st_sensors_core.h | 1 + + drivers/iio/common/st_sensors/st_sensors_i2c.c | 3 +- + drivers/iio/common/st_sensors/st_sensors_spi.c | 3 +- + drivers/iio/common/st_sensors/st_sensors_trigger.c | 1 - + drivers/iio/dac/ad5624r_spi.c | 18 +- + drivers/iio/gyro/hid-sensor-gyro-3d.c | 11 +- + drivers/iio/gyro/st_gyro_buffer.c | 7 +- + drivers/iio/gyro/st_gyro_core.c | 9 +- + drivers/iio/gyro/st_gyro_i2c.c | 2 +- + drivers/iio/gyro/st_gyro_spi.c | 2 +- + drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 22 +- + drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h | 18 +- + drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c | 15 +- + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c | 1 + + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c | 3 +- + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c | 3 +- + drivers/iio/industrialio-buffer.c | 9 +- + drivers/iio/industrialio-core.c | 33 ++- + drivers/iio/light/adjd_s311.c | 53 +--- + drivers/iio/light/hid-sensor-als.c | 11 +- + drivers/iio/light/hid-sensor-prox.c | 11 +- + drivers/iio/light/si1145.c | 1 - + drivers/iio/light/tcs3414.c | 48 ++-- + drivers/iio/magnetometer/st_magn_buffer.c | 7 +- + drivers/iio/magnetometer/st_magn_core.c | 11 +- + drivers/iio/magnetometer/st_magn_i2c.c | 2 +- + drivers/iio/magnetometer/st_magn_spi.c | 2 +- + drivers/iio/orientation/hid-sensor-incl-3d.c | 11 +- + drivers/iio/potentiometer/max5481.c | 22 +- + drivers/iio/pressure/hid-sensor-press.c | 11 +- + drivers/iio/pressure/st_pressure_buffer.c | 7 +- + drivers/iio/pressure/st_pressure_core.c | 10 +- + drivers/iio/pressure/st_pressure_i2c.c | 3 +- + drivers/iio/pressure/st_pressure_spi.c | 2 +- + drivers/iio/proximity/rfd77402.c | 83 +++---- + drivers/iio/temperature/tmp006.c | 53 ++-- + 69 files changed, 1372 insertions(+), 495 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/iio/accel/bosch,bma180.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5064.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5360.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5380.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5421.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5449.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5504.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5624r.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5686.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5761.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5764.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad5791.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/adi,ad8801.yaml + create mode 100644 Documentation/devicetree/bindings/iio/dac/microchip,mcp4922.yaml +Merging mux/for-next (3516bd729358 Merge tag 's390-5.11-3' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux) +$ git merge mux/for-next +Already up to date. +Merging icc/icc-next (2092cdb412f0 Merge branch 'icc-fixes' into icc-next) +$ git merge icc/icc-next +Merge made by the 'recursive' strategy. + drivers/interconnect/core.c | 10 ++++ + drivers/interconnect/qcom/icc-rpmh.c | 113 +++++++++++++++++++++++++++++++---- + drivers/interconnect/qcom/icc-rpmh.h | 2 + + drivers/interconnect/qcom/sc7180.c | 96 +---------------------------- + drivers/interconnect/qcom/sc7280.c | 96 +---------------------------- + drivers/interconnect/qcom/sdm845.c | 99 +----------------------------- + drivers/interconnect/qcom/sdx55.c | 96 +---------------------------- + drivers/interconnect/qcom/sm8150.c | 96 +---------------------------- + drivers/interconnect/qcom/sm8250.c | 96 +---------------------------- + drivers/interconnect/qcom/sm8350.c | 97 +----------------------------- + 10 files changed, 129 insertions(+), 672 deletions(-) +Merging dmaengine/next (0e96454ca26c dmaengine: idxd: remove fault processing code) +$ git merge dmaengine/next +Auto-merging drivers/dma/idxd/sysfs.c +CONFLICT (content): Merge conflict in drivers/dma/idxd/sysfs.c +Recorded preimage for 'drivers/dma/idxd/sysfs.c' +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'drivers/dma/idxd/sysfs.c'. +[master 6a5aa976418a] Merge remote-tracking branch 'dmaengine/next' +$ git diff -M --stat --summary HEAD^.. + Documentation/ABI/stable/sysfs-driver-dma-idxd | 7 + + drivers/dma/Kconfig | 23 +- + drivers/dma/Makefile | 2 +- + drivers/dma/dw/idma32.c | 138 +++++- + drivers/dma/dw/internal.h | 16 + + drivers/dma/dw/pci.c | 6 +- + drivers/dma/dw/platform.c | 6 +- + drivers/dma/idxd/Makefile | 8 + + drivers/dma/idxd/bus.c | 92 ++++ + drivers/dma/idxd/cdev.c | 65 ++- + drivers/dma/idxd/compat.c | 114 +++++ + drivers/dma/idxd/device.c | 291 +++++++++++-- + drivers/dma/idxd/dma.c | 82 +++- + drivers/dma/idxd/idxd.h | 144 +++++-- + drivers/dma/idxd/init.c | 135 +++--- + drivers/dma/idxd/irq.c | 101 +---- + drivers/dma/idxd/registers.h | 3 + + drivers/dma/idxd/submit.c | 23 +- + drivers/dma/idxd/sysfs.c | 571 ++++++------------------- + drivers/dma/sprd-dma.c | 1 + + include/linux/platform_data/dma-dw.h | 3 + + 21 files changed, 1144 insertions(+), 687 deletions(-) + create mode 100644 drivers/dma/idxd/bus.c + create mode 100644 drivers/dma/idxd/compat.c +$ git am -3 ../patches/0001-fixup-for-bus-Make-remove-callback-return-void.patch +Applying: fixup for "bus: Make remove callback return void" +$ git reset HEAD^ +Unstaged changes after reset: +M drivers/dma/idxd/bus.c +$ git add -A . +$ git commit -v -a --amend +[master 3691022714de] Merge remote-tracking branch 'dmaengine/next' + Date: Fri Jul 23 15:45:57 2021 +1000 +Merging cgroup/for-next (1e7107c5ef44 cgroup1: fix leaked context root causing sporadic NULL deref in LTP) +$ git merge cgroup/for-next +Merge made by the 'recursive' strategy. + fs/internal.h | 1 - + include/linux/fs_context.h | 1 + + kernel/cgroup/cgroup-v1.c | 4 +--- + 3 files changed, 2 insertions(+), 4 deletions(-) +Merging scsi/for-next (c18a4e657ce6 Merge branch 'fixes' into for-next) +$ git merge scsi/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging scsi-mkp/for-next (e15f669cd996 scsi: libsas: Allow libsas to include SCSI header files directly) +$ git merge scsi-mkp/for-next +Auto-merging drivers/scsi/scsi_debug.c +Merge made by the 'recursive' strategy. + drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- + drivers/s390/scsi/zfcp_fsf.c | 4 +- + drivers/scsi/aacraid/aachba.c | 1 - + drivers/scsi/be2iscsi/be_mgmt.c | 84 ++++++----- + drivers/scsi/libsas/Makefile | 2 +- + drivers/scsi/libsas/sas_ata.c | 4 +- + drivers/scsi/libsas/sas_discover.c | 2 +- + drivers/scsi/libsas/sas_expander.c | 2 +- + drivers/scsi/libsas/sas_host_smp.c | 2 +- + drivers/scsi/libsas/sas_init.c | 2 +- + drivers/scsi/libsas/sas_phy.c | 2 +- + drivers/scsi/libsas/sas_port.c | 2 +- + drivers/scsi/libsas/sas_scsi_host.c | 6 +- + drivers/scsi/lpfc/lpfc.h | 1 - + drivers/scsi/lpfc/lpfc_attr.c | 4 +- + drivers/scsi/lpfc/lpfc_crtn.h | 2 + + drivers/scsi/lpfc/lpfc_ct.c | 5 +- + drivers/scsi/lpfc/lpfc_disc.h | 9 +- + drivers/scsi/lpfc/lpfc_els.c | 120 ++++++++------- + drivers/scsi/lpfc/lpfc_hbadisc.c | 197 +++++++++++++++++++------ + drivers/scsi/lpfc/lpfc_hw4.h | 20 ++- + drivers/scsi/lpfc/lpfc_init.c | 51 +++++-- + drivers/scsi/lpfc/lpfc_nportdisc.c | 43 +++--- + drivers/scsi/lpfc/lpfc_nvme.c | 10 +- + drivers/scsi/lpfc/lpfc_nvme.h | 6 +- + drivers/scsi/lpfc/lpfc_scsi.c | 68 +++++---- + drivers/scsi/lpfc/lpfc_sli.c | 192 ++++++++++++++++++------ + drivers/scsi/lpfc/lpfc_sli4.h | 4 +- + drivers/scsi/lpfc/lpfc_version.h | 2 +- + drivers/scsi/qla2xxx/qla_iocb.c | 77 ++-------- + drivers/scsi/qla2xxx/qla_sup.c | 1 - + drivers/scsi/scsi_debug.c | 112 ++++++++------ + drivers/scsi/smartpqi/Kconfig | 8 +- + drivers/scsi/smartpqi/smartpqi.h | 6 +- + drivers/scsi/smartpqi/smartpqi_init.c | 64 ++++++-- + drivers/scsi/smartpqi/smartpqi_sas_transport.c | 4 +- + drivers/scsi/smartpqi/smartpqi_sis.c | 4 +- + drivers/scsi/smartpqi/smartpqi_sis.h | 4 +- + drivers/scsi/ufs/ufshcd.c | 3 +- + drivers/scsi/ufs/ufshcd.h | 12 +- + include/scsi/scsi_cmnd.h | 18 ++- + 41 files changed, 718 insertions(+), 444 deletions(-) +Merging vhost/linux-next (db7b337709a1 virtio-mem: prioritize unplug from ZONE_MOVABLE in Big Block Mode) +$ git merge vhost/linux-next +Already up to date. +Merging rpmsg/for-next (7486f29e5e60 Merge branches 'hwspinlock-next', 'rpmsg-next' and 'rproc-next' into for-next) +$ git merge rpmsg/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging gpio/for-next (7ac554888233 MAINTAINERS: Remove reference to non-existing file) +$ git merge gpio/for-next +Already up to date. +Merging gpio-brgl/gpio/for-next (f3f1017a98f9 docs: gpio: explain GPIOD_OUT_* values and toggling active low) +$ git merge gpio-brgl/gpio/for-next +Merge made by the 'recursive' strategy. + Documentation/driver-api/gpio/consumer.rst | 10 +++++++++- + Documentation/driver-api/gpio/driver.rst | 11 ++++------- + drivers/gpio/gpio-rcar.c | 4 ++-- + 3 files changed, 15 insertions(+), 10 deletions(-) +Merging gpio-intel/for-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge gpio-intel/for-next +Already up to date. +Merging pinctrl/for-next (bfa50166cd9d pinctrl: ralink: rt305x: add missing include) +$ git merge pinctrl/for-next +Already up to date. +Merging pinctrl-intel/for-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge pinctrl-intel/for-next +Already up to date. +Merging pinctrl-renesas/renesas-pinctrl (e9d66bdbc5ab pinctrl: renesas: r8a77995: Add bias pinconf support) +$ git merge pinctrl-renesas/renesas-pinctrl +Merge made by the 'recursive' strategy. + drivers/pinctrl/renesas/pfc-r8a77995.c | 320 ++++++++++++++++++++++++++++++++- + drivers/pinctrl/renesas/pinctrl.c | 16 +- + drivers/pinctrl/renesas/sh_pfc.h | 3 + + 3 files changed, 323 insertions(+), 16 deletions(-) +Merging pinctrl-samsung/for-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge pinctrl-samsung/for-next +Already up to date. +Merging pwm/for-next (f4a8e31ed84e pwm: ep93xx: Ensure configuring period and duty_cycle isn't wrongly skipped) +$ git merge pwm/for-next +Already up to date. +Merging userns/for-next (5e6b8a50a7ce cred: add missing return error code when set_cred_ucounts() failed) +$ git merge userns/for-next +Already up to date. +Merging ktest/for-next (170f4869e662 ktest.pl: Fix the logic for truncating the size of the log file for email) +$ git merge ktest/for-next +Already up to date. +Merging kselftest/next (2734d6c1b1a0 Linux 5.14-rc2) +$ git merge kselftest/next +Already up to date. +Merging livepatching/for-next (c150bbbb1731 Merge branch 'for-5.13/signal' into for-next) +$ git merge livepatching/for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging coresight/next (1efbcec2ef8c coresight: cti: Reduce scope for the variable “cs_fwnode” in cti_plat_create_connection()) +$ git merge coresight/next +Merge made by the 'recursive' strategy. +Merging rtc/rtc-next (4aa90c036df6 rtc: pcf8523: rename register and bit defines) +$ git merge rtc/rtc-next +Already up to date. +Merging nvdimm/libnvdimm-for-next (30c10d32152d Merge branch 'for-5.12/cxl' into libnvdimm-for-next) +$ git merge nvdimm/libnvdimm-for-next +Already up to date! +Merge made by the 'recursive' strategy. +Merging at24/at24/for-next (6efb943b8616 Linux 5.13-rc1) +$ git merge at24/at24/for-next +Already up to date. +Merging ntb/ntb-next (28293b6c68cd ntb: ntb_pingpong: remove redundant initialization of variables msg_data and spad_data) +$ git merge ntb/ntb-next +Merge made by the 'recursive' strategy. + drivers/ntb/test/ntb_msi_test.c | 4 +++- + drivers/ntb/test/ntb_perf.c | 1 + + drivers/ntb/test/ntb_pingpong.c | 2 +- + 3 files changed, 5 insertions(+), 2 deletions(-) +Merging seccomp/for-next/seccomp (19d67694745c Documentation: seccomp: Fix typo in user notification) +$ git merge seccomp/for-next/seccomp +Merge made by the 'recursive' strategy. + Documentation/userspace-api/seccomp_filter.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging kspp/for-next/kspp (6160d948cc6f hardening: Clarify Kconfig text for auto-var-init) +$ git merge kspp/for-next/kspp +Merge made by the 'recursive' strategy. + Makefile | 5 ++++ + security/Kconfig.hardening | 71 +++++++++++++++++++++++++++++++++------------- + 2 files changed, 56 insertions(+), 20 deletions(-) +Merging cisco/for-next (9e98c678c2d6 Linux 5.1-rc1) +$ git merge cisco/for-next +Already up to date. +Merging gnss/gnss-next (0f79ce970e79 gnss: drop stray semicolons) +$ git merge gnss/gnss-next +Merge made by the 'recursive' strategy. + drivers/gnss/mtk.c | 2 +- + drivers/gnss/serial.c | 2 +- + drivers/gnss/sirf.c | 2 +- + drivers/gnss/ubx.c | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) +Merging fsi/next (9ab1428dfe2c fsi/sbefifo: Fix reset timeout) +$ git merge fsi/next +Already up to date. +Merging slimbus/for-next (e5c578adcdd9 slimbus: ngd: reset dma setup during runtime pm) +$ git merge slimbus/for-next +Merge made by the 'recursive' strategy. + drivers/slimbus/messaging.c | 7 ++++--- + drivers/slimbus/qcom-ngd-ctrl.c | 22 +++++++++++++--------- + 2 files changed, 17 insertions(+), 12 deletions(-) +Merging nvmem/for-next (e73f0f0ee754 Linux 5.14-rc1) +$ git merge nvmem/for-next +Already up to date. +Merging xarray/main (2c7e57a02708 idr test suite: Improve reporting from idr_find_test_1) +$ git merge xarray/main +Already up to date. +Merging hyperv/hyperv-next (63fb60c2fcc9 hv: hyperv.h: Remove unused inline functions) +$ git merge hyperv/hyperv-next +Auto-merging arch/x86/kernel/cpu/mshyperv.c +Merge made by the 'recursive' strategy. + arch/x86/hyperv/hv_init.c | 101 ++--------------- + arch/x86/include/asm/mshyperv.h | 4 - + arch/x86/kernel/cpu/mshyperv.c | 38 +++---- + drivers/clocksource/hyperv_timer.c | 3 - + drivers/hv/hv_common.c | 219 +++++++++++++++++++++++++++++++++++++ + drivers/hv/hv_util.c | 5 - + include/asm-generic/mshyperv.h | 13 +++ + include/clocksource/hyperv_timer.h | 11 +- + include/linux/hyperv.h | 16 --- + 9 files changed, 268 insertions(+), 142 deletions(-) +Merging auxdisplay/auxdisplay (24ebc044c72e auxdisplay: Replace symbolic permissions with octal permissions) +$ git merge auxdisplay/auxdisplay +Merge made by the 'recursive' strategy. + .../bindings/auxdisplay/hit,hd44780.yaml | 31 +++++++++++++++++++++- + drivers/auxdisplay/cfag12864b.c | 2 +- + drivers/auxdisplay/charlcd.c | 4 +-- + drivers/auxdisplay/hd44780.c | 2 +- + drivers/auxdisplay/ks0108.c | 18 +++---------- + 5 files changed, 36 insertions(+), 21 deletions(-) +Merging kgdb/kgdb/for-next (c8daba4640ac kgdb: Fix fall-through warning for Clang) +$ git merge kgdb/kgdb/for-next +Already up to date. +Merging hmm/hmm (fe07bfda2fb9 Linux 5.12-rc1) +$ git merge hmm/hmm +Already up to date. +Merging fpga/for-next (c485d3bf3cc7 fpga: fpga-mgr: wrap the write_sg() op) +$ git merge fpga/for-next +Merge made by the 'recursive' strategy. + Documentation/driver-api/fpga/fpga-bridge.rst | 10 +- + Documentation/driver-api/fpga/fpga-mgr.rst | 12 +-- + Documentation/driver-api/fpga/fpga-programming.rst | 8 +- + Documentation/driver-api/fpga/fpga-region.rst | 20 ++-- + Documentation/fpga/dfl.rst | 4 +- + drivers/fpga/altera-cvp.c | 2 +- + drivers/fpga/altera-freeze-bridge.c | 2 + + drivers/fpga/dfl-fme-mgr.c | 6 -- + drivers/fpga/dfl-fme-pr.c | 2 +- + drivers/fpga/dfl-n3000-nios.c | 2 +- + drivers/fpga/dfl-pci.c | 5 + + drivers/fpga/dfl.h | 2 +- + drivers/fpga/fpga-bridge.c | 8 +- + drivers/fpga/fpga-mgr.c | 111 ++++++++++++++------- + drivers/fpga/stratix10-soc.c | 6 -- + drivers/fpga/ts73xx-fpga.c | 6 -- + drivers/fpga/xilinx-pr-decoupler.c | 2 + + drivers/fpga/xilinx-spi.c | 2 + + drivers/fpga/zynq-fpga.c | 6 +- + drivers/fpga/zynqmp-fpga.c | 10 +- + include/linux/fpga/fpga-mgr.h | 2 +- + 21 files changed, 127 insertions(+), 101 deletions(-) +Merging kunit/test (e73f0f0ee754 Linux 5.14-rc1) +$ git merge kunit/test +Already up to date. +Merging cfi/cfi/next (6efb943b8616 Linux 5.13-rc1) +$ git merge cfi/cfi/next +Already up to date. +Merging kunit-next/kunit (2734d6c1b1a0 Linux 5.14-rc2) +$ git merge kunit-next/kunit +Already up to date. +Merging trivial/for-next (9ff9b0d392ea Merge tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next) +$ git merge trivial/for-next +Already up to date. +Merging mhi/mhi-next (b8c95616d154 bus: mhi: core: Improve debug messages for power up) +$ git merge mhi/mhi-next +Auto-merging include/linux/mhi.h +Auto-merging drivers/net/mhi/net.c +Auto-merging drivers/bus/mhi/pci_generic.c +Auto-merging drivers/bus/mhi/core/main.c +Merge made by the 'recursive' strategy. + drivers/bus/mhi/core/boot.c | 17 ++++--- + drivers/bus/mhi/core/init.c | 93 ++++++++++++++++++++++------------- + drivers/bus/mhi/core/internal.h | 22 +-------- + drivers/bus/mhi/core/main.c | 15 +++--- + drivers/bus/mhi/core/pm.c | 34 ++++--------- + drivers/bus/mhi/pci_generic.c | 38 ++++++++++++++ + drivers/net/mhi/net.c | 2 +- + drivers/net/wireless/ath/ath11k/mhi.c | 1 + + drivers/net/wwan/mhi_wwan_ctrl.c | 2 +- + include/linux/mhi.h | 9 +++- + net/qrtr/mhi.c | 2 +- + 11 files changed, 136 insertions(+), 99 deletions(-) +Merging memblock/for-next (a4d5613c4dc6 arm: extend pfn_valid to take into account freed memory map alignment) +$ git merge memblock/for-next +Already up to date. +Merging init/init-user-pointers (38b082236e77 initramfs: use vfs_utimes in do_copy) +$ git merge init/init-user-pointers +Already up to date. +Merging counters/counters (e71ba9452f0b Linux 5.11-rc2) +$ git merge counters/counters +Already up to date. +Merging rust/rust-next (5d3986cf8ed6 MAINTAINERS: Rust) +$ git merge rust/rust-next +Auto-merging scripts/Makefile.build +Auto-merging samples/Kconfig +Auto-merging lib/vsprintf.c +Auto-merging lib/Kconfig.debug +Auto-merging kernel/printk/printk.c +Auto-merging kernel/kallsyms.c +Auto-merging init/Kconfig +Auto-merging include/linux/kallsyms.h +CONFLICT (content): Merge conflict in include/linux/kallsyms.h +Auto-merging arch/riscv/Makefile +Auto-merging Makefile +CONFLICT (content): Merge conflict in Makefile +Auto-merging MAINTAINERS +Resolved 'Makefile' using previous resolution. +Resolved 'include/linux/kallsyms.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master b94ac51b11b5] Merge remote-tracking branch 'rust/rust-next' +$ git diff -M --stat --summary HEAD^.. + .gitignore | 5 + + .rustfmt.toml | 12 + + Documentation/doc-guide/kernel-doc.rst | 3 + + Documentation/index.rst | 1 + + Documentation/kbuild/kbuild.rst | 4 + + Documentation/process/changes.rst | 13 + + Documentation/rust/arch-support.rst | 35 + + Documentation/rust/assets/favicon-16x16.png | Bin 0 -> 798 bytes + Documentation/rust/assets/favicon-32x32.png | Bin 0 -> 2076 bytes + Documentation/rust/assets/rust-logo.png | Bin 0 -> 53976 bytes + Documentation/rust/coding.rst | 92 + + Documentation/rust/docs.rst | 110 + + Documentation/rust/index.rst | 20 + + Documentation/rust/quick-start.rst | 222 ++ + MAINTAINERS | 14 + + Makefile | 176 +- + arch/arm/rust/target.json | 28 + + arch/arm64/rust/target.json | 35 + + arch/powerpc/rust/target.json | 30 + + arch/riscv/Makefile | 1 + + arch/riscv/rust/rv32ima.json | 37 + + arch/riscv/rust/rv32imac.json | 37 + + arch/riscv/rust/rv64ima.json | 37 + + arch/riscv/rust/rv64imac.json | 37 + + arch/x86/rust/target.json | 37 + + include/linux/kallsyms.h | 2 +- + include/linux/spinlock.h | 17 +- + init/Kconfig | 28 + + kernel/kallsyms.c | 7 + + kernel/livepatch/core.c | 4 +- + kernel/printk/printk.c | 5 +- + lib/Kconfig.debug | 144 ++ + lib/vsprintf.c | 12 + + rust/.gitignore | 6 + + rust/Makefile | 316 +++ + rust/alloc/README.md | 32 + + rust/alloc/alloc.rs | 425 ++++ + rust/alloc/borrow.rs | 493 ++++ + rust/alloc/boxed.rs | 1728 ++++++++++++++ + rust/alloc/collections/mod.rs | 116 + + rust/alloc/fmt.rs | 587 +++++ + rust/alloc/lib.rs | 197 ++ + rust/alloc/macros.rs | 128 ++ + rust/alloc/prelude/mod.rs | 17 + + rust/alloc/prelude/v1.rs | 16 + + rust/alloc/raw_vec.rs | 612 +++++ + rust/alloc/rc.rs | 2539 +++++++++++++++++++++ + rust/alloc/slice.rs | 1271 +++++++++++ + rust/alloc/str.rs | 614 +++++ + rust/alloc/string.rs | 2847 +++++++++++++++++++++++ + rust/alloc/sync.rs | 2631 ++++++++++++++++++++++ + rust/alloc/vec/drain.rs | 157 ++ + rust/alloc/vec/drain_filter.rs | 145 ++ + rust/alloc/vec/into_iter.rs | 296 +++ + rust/alloc/vec/is_zero.rs | 106 + + rust/alloc/vec/mod.rs | 3255 +++++++++++++++++++++++++++ + rust/alloc/vec/partial_eq.rs | 49 + + rust/alloc/vec/set_len_on_drop.rs | 30 + + rust/alloc/vec/spec_extend.rs | 170 ++ + rust/bindgen_parameters | 13 + + rust/build_error.rs | 33 + + rust/compiler_builtins.rs | 146 ++ + rust/exports.c | 16 + + rust/helpers.c | 235 ++ + rust/kernel/allocator.rs | 63 + + rust/kernel/bindings.rs | 28 + + rust/kernel/bindings_helper.h | 24 + + rust/kernel/buffer.rs | 39 + + rust/kernel/build_assert.rs | 80 + + rust/kernel/c_types.rs | 119 + + rust/kernel/chrdev.rs | 212 ++ + rust/kernel/error.rs | 272 +++ + rust/kernel/file.rs | 130 ++ + rust/kernel/file_operations.rs | 698 ++++++ + rust/kernel/io_buffer.rs | 153 ++ + rust/kernel/iov_iter.rs | 95 + + rust/kernel/lib.rs | 220 ++ + rust/kernel/linked_list.rs | 245 ++ + rust/kernel/miscdev.rs | 113 + + rust/kernel/module_param.rs | 497 ++++ + rust/kernel/of.rs | 101 + + rust/kernel/pages.rs | 176 ++ + rust/kernel/platdev.rs | 166 ++ + rust/kernel/prelude.rs | 28 + + rust/kernel/print.rs | 412 ++++ + rust/kernel/random.rs | 50 + + rust/kernel/raw_list.rs | 361 +++ + rust/kernel/rbtree.rs | 570 +++++ + rust/kernel/security.rs | 79 + + rust/kernel/static_assert.rs | 39 + + rust/kernel/str.rs | 259 +++ + rust/kernel/sync/arc.rs | 227 ++ + rust/kernel/sync/condvar.rs | 136 ++ + rust/kernel/sync/guard.rs | 82 + + rust/kernel/sync/locked_by.rs | 112 + + rust/kernel/sync/mod.rs | 84 + + rust/kernel/sync/mutex.rs | 101 + + rust/kernel/sync/spinlock.rs | 109 + + rust/kernel/sysctl.rs | 198 ++ + rust/kernel/task.rs | 193 ++ + rust/kernel/traits.rs | 26 + + rust/kernel/types.rs | 249 ++ + rust/kernel/user_ptr.rs | 191 ++ + rust/macros/lib.rs | 127 ++ + rust/macros/module.rs | 754 +++++++ + samples/Kconfig | 2 + + samples/Makefile | 1 + + samples/rust/Kconfig | 113 + + samples/rust/Makefile | 12 + + samples/rust/rust_chrdev.rs | 51 + + samples/rust/rust_minimal.rs | 38 + + samples/rust/rust_miscdev.rs | 150 ++ + samples/rust/rust_module_parameters.rs | 72 + + samples/rust/rust_print.rs | 57 + + samples/rust/rust_random.rs | 61 + + samples/rust/rust_semaphore.rs | 177 ++ + samples/rust/rust_semaphore_c.c | 212 ++ + samples/rust/rust_stack_probing.rs | 40 + + samples/rust/rust_sync.rs | 81 + + scripts/Makefile.build | 22 + + scripts/Makefile.lib | 12 + + scripts/generate_rust_analyzer.py | 143 ++ + scripts/kallsyms.c | 33 +- + scripts/kconfig/confdata.c | 67 +- + scripts/rust-version.sh | 31 + + tools/include/linux/kallsyms.h | 2 +- + tools/include/linux/lockdep.h | 2 +- + tools/lib/perf/include/perf/event.h | 2 +- + tools/lib/symbol/kallsyms.h | 2 +- + 129 files changed, 29600 insertions(+), 32 deletions(-) + create mode 100644 .rustfmt.toml + create mode 100644 Documentation/rust/arch-support.rst + create mode 100644 Documentation/rust/assets/favicon-16x16.png + create mode 100644 Documentation/rust/assets/favicon-32x32.png + create mode 100644 Documentation/rust/assets/rust-logo.png + create mode 100644 Documentation/rust/coding.rst + create mode 100644 Documentation/rust/docs.rst + create mode 100644 Documentation/rust/index.rst + create mode 100644 Documentation/rust/quick-start.rst + create mode 100644 arch/arm/rust/target.json + create mode 100644 arch/arm64/rust/target.json + create mode 100644 arch/powerpc/rust/target.json + create mode 100644 arch/riscv/rust/rv32ima.json + create mode 100644 arch/riscv/rust/rv32imac.json + create mode 100644 arch/riscv/rust/rv64ima.json + create mode 100644 arch/riscv/rust/rv64imac.json + create mode 100644 arch/x86/rust/target.json + create mode 100644 rust/.gitignore + create mode 100644 rust/Makefile + create mode 100644 rust/alloc/README.md + create mode 100644 rust/alloc/alloc.rs + create mode 100644 rust/alloc/borrow.rs + create mode 100644 rust/alloc/boxed.rs + create mode 100644 rust/alloc/collections/mod.rs + create mode 100644 rust/alloc/fmt.rs + create mode 100644 rust/alloc/lib.rs + create mode 100644 rust/alloc/macros.rs + create mode 100644 rust/alloc/prelude/mod.rs + create mode 100644 rust/alloc/prelude/v1.rs + create mode 100644 rust/alloc/raw_vec.rs + create mode 100644 rust/alloc/rc.rs + create mode 100644 rust/alloc/slice.rs + create mode 100644 rust/alloc/str.rs + create mode 100644 rust/alloc/string.rs + create mode 100644 rust/alloc/sync.rs + create mode 100644 rust/alloc/vec/drain.rs + create mode 100644 rust/alloc/vec/drain_filter.rs + create mode 100644 rust/alloc/vec/into_iter.rs + create mode 100644 rust/alloc/vec/is_zero.rs + create mode 100644 rust/alloc/vec/mod.rs + create mode 100644 rust/alloc/vec/partial_eq.rs + create mode 100644 rust/alloc/vec/set_len_on_drop.rs + create mode 100644 rust/alloc/vec/spec_extend.rs + create mode 100644 rust/bindgen_parameters + create mode 100644 rust/build_error.rs + create mode 100644 rust/compiler_builtins.rs + create mode 100644 rust/exports.c + create mode 100644 rust/helpers.c + create mode 100644 rust/kernel/allocator.rs + create mode 100644 rust/kernel/bindings.rs + create mode 100644 rust/kernel/bindings_helper.h + create mode 100644 rust/kernel/buffer.rs + create mode 100644 rust/kernel/build_assert.rs + create mode 100644 rust/kernel/c_types.rs + create mode 100644 rust/kernel/chrdev.rs + create mode 100644 rust/kernel/error.rs + create mode 100644 rust/kernel/file.rs + create mode 100644 rust/kernel/file_operations.rs + create mode 100644 rust/kernel/io_buffer.rs + create mode 100644 rust/kernel/iov_iter.rs + create mode 100644 rust/kernel/lib.rs + create mode 100644 rust/kernel/linked_list.rs + create mode 100644 rust/kernel/miscdev.rs + create mode 100644 rust/kernel/module_param.rs + create mode 100644 rust/kernel/of.rs + create mode 100644 rust/kernel/pages.rs + create mode 100644 rust/kernel/platdev.rs + create mode 100644 rust/kernel/prelude.rs + create mode 100644 rust/kernel/print.rs + create mode 100644 rust/kernel/random.rs + create mode 100644 rust/kernel/raw_list.rs + create mode 100644 rust/kernel/rbtree.rs + create mode 100644 rust/kernel/security.rs + create mode 100644 rust/kernel/static_assert.rs + create mode 100644 rust/kernel/str.rs + create mode 100644 rust/kernel/sync/arc.rs + create mode 100644 rust/kernel/sync/condvar.rs + create mode 100644 rust/kernel/sync/guard.rs + create mode 100644 rust/kernel/sync/locked_by.rs + create mode 100644 rust/kernel/sync/mod.rs + create mode 100644 rust/kernel/sync/mutex.rs + create mode 100644 rust/kernel/sync/spinlock.rs + create mode 100644 rust/kernel/sysctl.rs + create mode 100644 rust/kernel/task.rs + create mode 100644 rust/kernel/traits.rs + create mode 100644 rust/kernel/types.rs + create mode 100644 rust/kernel/user_ptr.rs + create mode 100644 rust/macros/lib.rs + create mode 100644 rust/macros/module.rs + create mode 100644 samples/rust/Kconfig + create mode 100644 samples/rust/Makefile + create mode 100644 samples/rust/rust_chrdev.rs + create mode 100644 samples/rust/rust_minimal.rs + create mode 100644 samples/rust/rust_miscdev.rs + create mode 100644 samples/rust/rust_module_parameters.rs + create mode 100644 samples/rust/rust_print.rs + create mode 100644 samples/rust/rust_random.rs + create mode 100644 samples/rust/rust_semaphore.rs + create mode 100644 samples/rust/rust_semaphore_c.c + create mode 100644 samples/rust/rust_stack_probing.rs + create mode 100644 samples/rust/rust_sync.rs + create mode 100755 scripts/generate_rust_analyzer.py + create mode 100755 scripts/rust-version.sh +Merging cxl/next (4ad6181e4b21 cxl/pci: Rename CXL REGLOC ID) +$ git merge cxl/next +Already up to date. +Merging folio/for-next (a4757d06df08 mm/filemap: Add FGP_STABLE) +$ git merge folio/for-next +Auto-merging mm/util.c +Auto-merging mm/shmem.c +Auto-merging mm/rmap.c +CONFLICT (content): Merge conflict in mm/rmap.c +Auto-merging mm/memory-failure.c +Auto-merging mm/memcontrol.c +Auto-merging mm/filemap.c +CONFLICT (content): Merge conflict in mm/filemap.c +Auto-merging kernel/bpf/verifier.c +Auto-merging include/linux/netfs.h +Auto-merging include/linux/mm.h +Auto-merging fs/afs/write.c +Resolved 'mm/filemap.c' using previous resolution. +Resolved 'mm/rmap.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 7c9c6a044f35] Merge remote-tracking branch 'folio/for-next' +$ git diff -M --stat --summary HEAD^.. + Documentation/core-api/cachetlb.rst | 6 + + Documentation/core-api/mm-api.rst | 5 + + Documentation/filesystems/netfs_library.rst | 2 + + arch/arm/include/asm/cacheflush.h | 1 + + arch/nds32/include/asm/cacheflush.h | 1 + + fs/afs/write.c | 9 +- + fs/cachefiles/rdwr.c | 16 +- + fs/io_uring.c | 2 +- + fs/jfs/jfs_metapage.c | 1 + + include/asm-generic/cacheflush.h | 6 + + include/linux/backing-dev.h | 6 +- + include/linux/flex_proportions.h | 9 +- + include/linux/gfp.h | 22 +- + include/linux/highmem-internal.h | 11 + + include/linux/highmem.h | 37 ++ + include/linux/huge_mm.h | 15 - + include/linux/ksm.h | 4 +- + include/linux/memcontrol.h | 228 ++++++----- + include/linux/migrate.h | 4 + + include/linux/mm.h | 233 ++++++++--- + include/linux/mm_inline.h | 101 +++-- + include/linux/mm_types.h | 77 ++++ + include/linux/mmdebug.h | 20 + + include/linux/netfs.h | 77 ++-- + include/linux/page-flags.h | 267 +++++++++---- + include/linux/page_idle.h | 99 +++-- + include/linux/page_owner.h | 8 +- + include/linux/page_ref.h | 158 +++++++- + include/linux/pagemap.h | 579 +++++++++++++++++----------- + include/linux/rmap.h | 10 +- + include/linux/swap.h | 17 +- + include/linux/vmstat.h | 107 +++++ + include/linux/writeback.h | 9 +- + include/trace/events/pagemap.h | 46 ++- + include/trace/events/writeback.h | 28 +- + kernel/bpf/verifier.c | 2 +- + kernel/events/uprobes.c | 3 +- + lib/flex_proportions.c | 28 +- + mm/Makefile | 2 +- + mm/compaction.c | 4 +- + mm/filemap.c | 565 +++++++++++++-------------- + mm/folio-compat.c | 142 +++++++ + mm/huge_memory.c | 7 +- + mm/hugetlb.c | 2 +- + mm/internal.h | 36 +- + mm/khugepaged.c | 8 +- + mm/ksm.c | 34 +- + mm/memcontrol.c | 327 ++++++++-------- + mm/memory-failure.c | 2 +- + mm/memory.c | 20 +- + mm/mempolicy.c | 10 + + mm/memremap.c | 2 +- + mm/migrate.c | 189 +++++---- + mm/mlock.c | 3 +- + mm/page-writeback.c | 447 +++++++++++---------- + mm/page_alloc.c | 14 +- + mm/page_io.c | 4 +- + mm/page_owner.c | 10 +- + mm/rmap.c | 14 +- + mm/shmem.c | 7 +- + mm/swap.c | 180 ++++----- + mm/swap_state.c | 2 +- + mm/swapfile.c | 8 +- + mm/userfaultfd.c | 2 +- + mm/util.c | 103 ++--- + mm/vmscan.c | 8 +- + mm/workingset.c | 44 +-- + 67 files changed, 2807 insertions(+), 1643 deletions(-) + create mode 100644 mm/folio-compat.c +Merging akpm-current/current (ace6e27b9019 selftests/memfd: remove unused variable) +$ git merge --no-ff akpm-current/current +Auto-merging mm/workingset.c +Auto-merging mm/vmscan.c +Auto-merging mm/truncate.c +Auto-merging mm/swapfile.c +Auto-merging mm/rmap.c +Auto-merging mm/page_owner.c +Auto-merging mm/page_alloc.c +Auto-merging mm/page-writeback.c +CONFLICT (content): Merge conflict in mm/page-writeback.c +Auto-merging mm/migrate.c +Auto-merging mm/memremap.c +Auto-merging mm/mempolicy.c +Auto-merging mm/memory.c +Auto-merging mm/memory-failure.c +Auto-merging mm/memcontrol.c +CONFLICT (content): Merge conflict in mm/memcontrol.c +Auto-merging mm/internal.h +Auto-merging mm/hugetlb.c +Auto-merging mm/huge_memory.c +Auto-merging mm/filemap.c +Auto-merging mm/compaction.c +Auto-merging lib/Kconfig.debug +Auto-merging kernel/sched/sched.h +Auto-merging kernel/sched/core.c +Auto-merging kernel/fork.c +Auto-merging include/linux/writeback.h +Auto-merging include/linux/swap.h +Auto-merging include/linux/sched.h +Auto-merging include/linux/rmap.h +Auto-merging include/linux/pagemap.h +Auto-merging include/linux/mm_types.h +Auto-merging include/linux/mm.h +Auto-merging include/linux/migrate.h +Auto-merging include/linux/memcontrol.h +CONFLICT (content): Merge conflict in include/linux/memcontrol.h +Auto-merging include/linux/highmem.h +Auto-merging include/linux/fs.h +Auto-merging include/linux/backing-dev.h +Auto-merging fs/internal.h +Auto-merging fs/inode.c +Auto-merging drivers/of/kexec.c +Auto-merging drivers/mmc/host/mmc_spi.c +Auto-merging drivers/base/node.c +Auto-merging arch/nds32/include/asm/cacheflush.h +Auto-merging arch/arm64/mm/mmu.c +Auto-merging arch/arm/mach-rpc/ecard.c +Auto-merging arch/arm/include/asm/cacheflush.h +Auto-merging Documentation/core-api/cachetlb.rst +Resolved 'include/linux/memcontrol.h' using previous resolution. +Resolved 'mm/memcontrol.c' using previous resolution. +Resolved 'mm/page-writeback.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit -v -a +[master 1141df70117a] Merge branch 'akpm-current/current' +$ git diff -M --stat --summary HEAD^.. + Documentation/ABI/testing/sysfs-kernel-mm-numa | 24 + + Documentation/admin-guide/mm/memory-hotplug.rst | 800 ++++++++++--------- + .../admin-guide/mm/numa_memory_policy.rst | 16 +- + Documentation/arm64/tagged-address-abi.rst | 26 +- + Documentation/core-api/cachetlb.rst | 86 +-- + Documentation/dev-tools/kasan.rst | 13 +- + Documentation/dev-tools/kfence.rst | 98 +-- + .../translations/zh_CN/core-api/cachetlb.rst | 9 - + arch/Kconfig | 30 +- + arch/arm/include/asm/cacheflush.h | 4 +- + arch/arm/mach-rpc/ecard.c | 2 +- + arch/arm/mm/flush.c | 33 - + arch/arm/mm/nommu.c | 6 - + arch/arm64/mm/mmu.c | 3 +- + arch/csky/abiv1/cacheflush.c | 11 - + arch/csky/abiv1/inc/abi/cacheflush.h | 4 +- + arch/csky/kernel/probes/kprobes.c | 3 +- + arch/ia64/mm/init.c | 3 +- + arch/microblaze/include/asm/pgtable.h | 2 - + arch/microblaze/mm/init.c | 12 - + arch/microblaze/mm/pgtable.c | 17 +- + arch/mips/include/asm/cacheflush.h | 8 +- + arch/nds32/include/asm/cacheflush.h | 3 +- + arch/nds32/mm/cacheflush.c | 9 - + arch/parisc/include/asm/cacheflush.h | 8 +- + arch/parisc/kernel/cache.c | 3 +- + arch/powerpc/Kconfig | 1 + + arch/powerpc/kernel/smp.c | 2 +- + arch/powerpc/mm/book3s64/radix_tlb.c | 4 +- + arch/powerpc/mm/mem.c | 3 +- + arch/powerpc/platforms/pseries/hotplug-memory.c | 13 +- + arch/s390/mm/init.c | 3 +- + arch/sh/include/asm/cacheflush.h | 8 +- + arch/sh/mm/init.c | 3 +- + arch/x86/mm/init_32.c | 3 +- + arch/x86/mm/init_64.c | 3 +- + block/blk-map.c | 2 +- + drivers/acpi/acpi_memhotplug.c | 11 +- + drivers/base/memory.c | 4 +- + drivers/base/node.c | 2 - + drivers/block/ps3disk.c | 2 +- + drivers/dax/kmem.c | 3 +- + drivers/mmc/host/jz4740_mmc.c | 4 - + drivers/mmc/host/mmc_spi.c | 2 +- + drivers/of/kexec.c | 1 + + drivers/virtio/virtio_mem.c | 4 +- + fs/buffer.c | 56 +- + fs/drop_caches.c | 3 +- + fs/exec.c | 10 +- + fs/fs-writeback.c | 11 +- + fs/fs_context.c | 4 +- + fs/hfsplus/catalog.c | 16 +- + fs/hfsplus/dir.c | 4 +- + fs/hfsplus/hfsplus_raw.h | 12 +- + fs/hfsplus/xattr.c | 18 +- + fs/inode.c | 48 +- + fs/internal.h | 1 - + fs/namei.c | 8 +- + fs/nilfs2/sysfs.c | 26 +- + fs/ocfs2/dlmglue.c | 2 +- + fs/ocfs2/namei.c | 17 +- + fs/proc/base.c | 5 +- + fs/proc/fd.c | 65 +- + fs/proc/kcore.c | 73 ++ + fs/proc/page.c | 40 +- + fs/userfaultfd.c | 26 +- + include/linux/backing-dev-defs.h | 2 + + include/linux/backing-dev.h | 19 + + include/linux/fs.h | 1 + + include/linux/highmem.h | 39 +- + include/linux/memblock.h | 4 +- + include/linux/memcontrol.h | 75 +- + include/linux/memory.h | 2 +- + include/linux/memory_hotplug.h | 17 +- + include/linux/mempolicy.h | 9 +- + include/linux/memremap.h | 6 + + include/linux/migrate.h | 14 +- + include/linux/mm.h | 32 - + include/linux/mm_types.h | 2 + + include/linux/mm_types_task.h | 5 + + include/linux/mmzone.h | 15 +- + include/linux/pagemap.h | 50 ++ + include/linux/rmap.h | 8 +- + include/linux/sched.h | 8 + + include/linux/sched/mm.h | 21 + + include/linux/slab.h | 32 + + include/linux/swap.h | 28 +- + include/linux/threads.h | 2 +- + include/linux/vm_event_item.h | 2 + + include/linux/vmpressure.h | 2 +- + include/linux/writeback.h | 2 +- + include/trace/events/migrate.h | 3 +- + include/uapi/linux/mempolicy.h | 1 + + init/main.c | 25 +- + ipc/shm.c | 10 +- + ipc/util.c | 6 +- + kernel/cpu.c | 2 +- + kernel/exit.c | 2 +- + kernel/fork.c | 51 ++ + kernel/hung_task.c | 44 ++ + kernel/kthread.c | 21 +- + kernel/pid_namespace.c | 3 +- + kernel/sched/core.c | 35 +- + kernel/sched/sched.h | 4 +- + kernel/sys.c | 7 - + kernel/sysctl.c | 8 +- + lib/Kconfig | 3 - + lib/Kconfig.debug | 17 +- + lib/math/Kconfig | 2 +- + lib/math/rational.c | 3 + + lib/scatterlist.c | 5 +- + lib/string.c | 130 +++- + lib/test_sort.c | 40 +- + lib/test_vmalloc.c | 5 +- + mm/Kconfig | 5 +- + mm/backing-dev.c | 13 +- + mm/compaction.c | 22 +- + mm/debug_vm_pgtable.c | 849 ++++++++++++--------- + mm/filemap.c | 23 +- + mm/gup.c | 2 +- + mm/huge_memory.c | 26 - + mm/hugetlb.c | 164 +++- + mm/internal.h | 9 + + mm/kasan/hw_tags.c | 43 -- + mm/kasan/kasan.h | 1 - + mm/kasan/report.c | 29 +- + mm/kfence/core.c | 22 +- + mm/kfence/kfence.h | 2 + + mm/kfence/kfence_test.c | 2 +- + mm/kfence/report.c | 19 +- + mm/memblock.c | 23 +- + mm/memcontrol.c | 186 +++-- + mm/memory-failure.c | 2 +- + mm/memory.c | 2 +- + mm/memory_hotplug.c | 51 +- + mm/mempolicy.c | 152 +++- + mm/memremap.c | 25 +- + mm/migrate.c | 315 +++++++- + mm/mmap_lock.c | 4 +- + mm/mremap.c | 2 +- + mm/oom_kill.c | 7 +- + mm/page-writeback.c | 121 ++- + mm/page_alloc.c | 95 ++- + mm/page_isolation.c | 7 +- + mm/page_owner.c | 14 +- + mm/rmap.c | 14 +- + mm/secretmem.c | 1 + + mm/slub.c | 6 +- + mm/sparse.c | 43 +- + mm/swapfile.c | 8 +- + mm/truncate.c | 47 +- + mm/vmalloc.c | 79 +- + mm/vmpressure.c | 10 +- + mm/vmscan.c | 206 ++++- + mm/vmstat.c | 25 +- + mm/workingset.c | 10 + + mm/zsmalloc.c | 10 +- + tools/testing/scatterlist/linux/mm.h | 1 - + tools/testing/selftests/memfd/memfd_test.c | 2 +- + tools/testing/selftests/vm/.gitignore | 1 + + tools/testing/selftests/vm/Makefile | 3 + + tools/testing/selftests/vm/ksm_tests.c | 516 +++++++++++++ + tools/testing/selftests/vm/run_vmtests.sh | 96 +++ + tools/testing/selftests/vm/userfaultfd.c | 6 +- + 164 files changed, 3993 insertions(+), 1908 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-numa + create mode 100644 tools/testing/selftests/vm/ksm_tests.c +$ git clone -s -l -n -q . ../rebase-tmp +$ cd ../rebase-tmp +$ git checkout -b akpm remotes/origin/akpm/master +Switched to a new branch 'akpm' +$ git rebase --onto master remotes/origin/akpm/master-base + Successfully rebased and updated refs/heads/akpm. +$ cd ../next +$ git fetch -f ../rebase-tmp akpm:akpm/master +From ../rebase-tmp + + d651260ca117...1141df70117a akpm -> akpm/master (forced update) +$ rm -rf ../rebase-tmp +Merging akpm/master (1141df70117a Merge branch 'akpm-current/current') +$ git merge --no-ff akpm/master +Already up to date. diff --git a/localversion-next b/localversion-next new file mode 100644 index 0000000000000..5b98b8b0a279b --- /dev/null +++ b/localversion-next @@ -0,0 +1 @@ +-next-20210723 From fcfd75cabcc146924d550597ddd9b66c21e38cdc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 22 Apr 2020 22:20:42 +0300 Subject: [PATCH 666/851] HACK: ARM: seccomp: Enforce whitelisting of clock_gettime64 After updating to Ubuntu 20.04, I found that all chromium-based software got broken. This happens because Ubuntu 20.04 updated GLIBC to 2.31, which now uses clock_gettime64 by default, but chromium doesn't whitelist this syscall for seccomp. The clock_gettime64 is a VDSO function on ARM, and thus, there is no good reason to fail software that is not ready for a newer GLIBC versions. Falkon and Akregator are now working on Ubuntu 20.04 without a need to disable seccomp by software, which isn't always possible (like in a case of Akregator). Signed-off-by: Dmitry Osipenko --- arch/arm/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2771e682220b4..c5ba584673939 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -894,7 +894,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) /* Do seccomp after ptrace; syscall may have changed. */ #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER - if (secure_computing() == -1) + if (scno != __NR_clock_gettime64 && secure_computing() == -1) return -1; #else /* XXX: remove this once OABI gets fixed */ From 0c817025aeeda23841525a3dd6ff48273dfc30c5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 6 Jun 2020 15:25:55 +0300 Subject: [PATCH 667/851] HACK: ARM: seccomp: Enforce whitelisting of clock_nanosleep_time64 Chromium browser crashes sometime because it doesn't whitelist the clock_nanosleep_time64 syscall. This is a problem crated by GLIBC, it won't be fixed until all userspace software that uses seccomp will update its syscall filtering rules. Signed-off-by: Dmitry Osipenko --- arch/arm/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index c5ba584673939..fe7abb618eaa5 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -894,7 +894,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) /* Do seccomp after ptrace; syscall may have changed. */ #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER - if (scno != __NR_clock_gettime64 && secure_computing() == -1) + if (scno != __NR_clock_gettime64 && + scno != __NR_clock_nanosleep_time64 && + secure_computing() == -1) return -1; #else /* XXX: remove this once OABI gets fixed */ From 88a8ac744cd44de11978561ef33f1b3c13cc8224 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 13 Apr 2021 07:55:03 +0300 Subject: [PATCH 668/851] ARM: tegra: Add SoC thermal sensor to Tegra30 device-trees Add the on-chip SoC thermal sensor to Tegra30 device-trees. Now CPU temperature reporting and thermal throttling is available on all Tegra30 devices universally. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra30.dtsi | 87 ++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index c577c191be4bb..eaf4951d9ff8e 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -5,6 +5,7 @@ #include #include #include +#include #include "tegra30-peripherals-opp.dtsi" @@ -800,6 +801,20 @@ reset-names = "fuse"; }; + tsensor: tsensor@70014000 { + compatible = "nvidia,tegra30-tsensor"; + reg = <0x70014000 0x500>; + interrupts = ; + clocks = <&tegra_car TEGRA30_CLK_TSENSOR>; + resets = <&tegra_car TEGRA30_CLK_TSENSOR>; + + assigned-clocks = <&tegra_car TEGRA30_CLK_TSENSOR>; + assigned-clock-parents = <&tegra_car TEGRA30_CLK_CLK_M>; + assigned-clock-rates = <500000>; + + #thermal-sensor-cells = <1>; + }; + hda@70030000 { compatible = "nvidia,tegra30-hda"; reg = <0x70030000 0x10000>; @@ -1062,32 +1077,36 @@ #address-cells = <1>; #size-cells = <0>; - cpu@0 { + cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0>; clocks = <&tegra_car TEGRA30_CLK_CCLK_G>; + #cooling-cells = <2>; }; - cpu@1 { + cpu1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <1>; clocks = <&tegra_car TEGRA30_CLK_CCLK_G>; + #cooling-cells = <2>; }; - cpu@2 { + cpu2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <2>; clocks = <&tegra_car TEGRA30_CLK_CCLK_G>; + #cooling-cells = <2>; }; - cpu@3 { + cpu3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <3>; clocks = <&tegra_car TEGRA30_CLK_CCLK_G>; + #cooling-cells = <2>; }; }; @@ -1102,4 +1121,64 @@ <&{/cpus/cpu@2}>, <&{/cpus/cpu@3}>; }; + + thermal-zones { + tsensor0-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + thermal-sensors = <&tsensor 0>; + + trips { + level1_trip: dvfs-alert { + /* throttle at 80C until temperature drops to 79.8C */ + temperature = <80000>; + hysteresis = <200>; + type = "passive"; + }; + + level2_trip: cpu-div2-throttle { + /* hardware CPU x2 freq throttle at 85C */ + temperature = <85000>; + hysteresis = <200>; + type = "hot"; + }; + + level3_trip: soc-critical { + /* hardware shut down at 90C */ + temperature = <90000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&level1_trip>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&actmon THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; + }; + }; + + tsensor1-thermal { + status = "disabled"; + + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <0>; /* milliseconds */ + + thermal-sensors = <&tsensor 1>; + + trips { + dvfs-alert { + temperature = <80000>; + hysteresis = <200>; + type = "passive"; + }; + }; + }; + }; }; From 3ebe34aee48471262a6aab6a1a05a06410715514 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 20 Jun 2021 11:29:12 +0300 Subject: [PATCH 669/851] ARM: tegra: ouya: Add interrupt to temperature sensor node The TEMP_ALERT pin of LM90 temperature sensor is connected to Tegra SoC. Add interrupt property to the temperature sensor and enable it in pinmux, for completeness. Tested-by: Matt Merhar Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra30-ouya.dts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/tegra30-ouya.dts b/arch/arm/boot/dts/tegra30-ouya.dts index ab8744f3d72da..90db5ff72537b 100644 --- a/arch/arm/boot/dts/tegra30-ouya.dts +++ b/arch/arm/boot/dts/tegra30-ouya.dts @@ -124,12 +124,11 @@ compatible = "onnn,nct1008"; reg = <0x4c>; vcc-supply = <&sys_3v3_reg>; + + interrupt-parent = <&gpio>; + interrupts = ; + #thermal-sensor-cells = <1>; -/* - * The interrupt is bugged, once triggered it never clears. - * interrupt-parent = <&gpio>; - * interrupts = ; - */ }; pmic: pmic@2d { @@ -4376,8 +4375,8 @@ nvidia,pins = "pcc2"; nvidia,function = "i2s4"; nvidia,pull = ; - nvidia,tristate = ; - nvidia,enable-input = ; + nvidia,tristate = ; + nvidia,enable-input = ; }; sdmmc4_rst_n_pcc3 { nvidia,pins = "sdmmc4_rst_n_pcc3"; From 7328ac16365b2c86343fb10546a4fa080a30b03a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 18 Jun 2021 19:44:08 +0300 Subject: [PATCH 670/851] ARM: tegra: paz00: Add interrupt to temperature sensor node The TEMP_ALERT pin of LM90 temperature sensor is connected to Tegra SoC. Add interrupt property to the temperature sensor for completeness. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20-paz00.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index 3180bff90756f..acc816bfd2333 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -503,6 +503,10 @@ adt7461: temperature-sensor@4c { compatible = "adi,adt7461"; reg = <0x4c>; + + interrupt-parent = <&gpio>; + interrupts = ; + #thermal-sensor-cells = <1>; }; }; From a1a53c257c09615d72e642589b2499b31f06363b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 16 Jun 2021 16:58:11 +0300 Subject: [PATCH 671/851] ARM: tegra: nexus7: Add interrupt to temperature sensor node The TEMP_ALERT pin of LM90 temperature sensor is connected to Tegra SoC. Add interrupt property to the temperature sensor for completeness. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi index ae8300baa2d47..33985fca956fb 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi @@ -920,6 +920,10 @@ compatible = "onnn,nct1008"; reg = <0x4c>; vcc-supply = <&vdd_3v3_sys>; + + interrupt-parent = <&gpio>; + interrupts = ; + #thermal-sensor-cells = <1>; }; From 10540724ff5319f4536f5b13141aeacf477ade82 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 18 Jun 2021 19:39:02 +0300 Subject: [PATCH 672/851] ARM: tegra: acer-a500: Add interrupt to temperature sensor node The TEMP_ALERT pin of LM90 temperature sensor is connected to Tegra SoC. Add interrupt property to the temperature sensor for completeness. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20-acer-a500-picasso.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts index 1976c383912aa..4897079680bd3 100644 --- a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts +++ b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts @@ -693,6 +693,10 @@ compatible = "onnn,nct1008"; reg = <0x4c>; vcc-supply = <&vdd_3v3_sys>; + + interrupt-parent = <&gpio>; + interrupts = ; + #thermal-sensor-cells = <1>; }; }; From d16b2bcb11a7296aef21b16f9e29ea36cf845914 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 19 Jun 2021 15:04:34 +0300 Subject: [PATCH 673/851] ARM: tegra: nyan: Correct interrupt trigger type of temperature sensor The LM90 temperature sensor should use edge-triggered interrupt because LM90 hardware doesn't deassert interrupt line until temperature is back to normal state, which results in interrupt storm. Correct the interrupt trigger type. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra124-nyan.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra124-nyan.dtsi b/arch/arm/boot/dts/tegra124-nyan.dtsi index 5f71add38dfe6..63a81270300a6 100644 --- a/arch/arm/boot/dts/tegra124-nyan.dtsi +++ b/arch/arm/boot/dts/tegra124-nyan.dtsi @@ -91,7 +91,7 @@ compatible = "ti,tmp451"; reg = <0x4c>; interrupt-parent = <&gpio>; - interrupts = ; + interrupts = ; #thermal-sensor-cells = <1>; }; From 86fd43ce6e3536d1ed6b77281118641f26d7efd0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 19 Jun 2021 15:03:12 +0300 Subject: [PATCH 674/851] ARM: tegra: apalis: Correct interrupt trigger type of temperature sensor The LM90 temperature sensor should use edge-triggered interrupt because LM90 hardware doesn't deassert interrupt line until temperature is back to normal state, which results in interrupt storm. Correct the interrupt trigger type. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra124-apalis-v1.2.dtsi | 2 +- arch/arm/boot/dts/tegra124-apalis.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/tegra124-apalis-v1.2.dtsi b/arch/arm/boot/dts/tegra124-apalis-v1.2.dtsi index 1e30fa405fa0e..cde9ae8fa04b7 100644 --- a/arch/arm/boot/dts/tegra124-apalis-v1.2.dtsi +++ b/arch/arm/boot/dts/tegra124-apalis-v1.2.dtsi @@ -1751,7 +1751,7 @@ compatible = "ti,tmp451"; reg = <0x4c>; interrupt-parent = <&gpio>; - interrupts = ; + interrupts = ; #thermal-sensor-cells = <1>; vcc-supply = <®_module_3v3>; }; diff --git a/arch/arm/boot/dts/tegra124-apalis.dtsi b/arch/arm/boot/dts/tegra124-apalis.dtsi index 608896f8dd52f..a46d9ba9bb7a4 100644 --- a/arch/arm/boot/dts/tegra124-apalis.dtsi +++ b/arch/arm/boot/dts/tegra124-apalis.dtsi @@ -1744,7 +1744,7 @@ compatible = "ti,tmp451"; reg = <0x4c>; interrupt-parent = <&gpio>; - interrupts = ; + interrupts = ; #thermal-sensor-cells = <1>; vcc-supply = <®_module_3v3>; }; From ad1b26d642433914beb91a113c0d146066004805 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 19 Jun 2021 14:52:31 +0300 Subject: [PATCH 675/851] ARM: tegra: cardhu: Correct interrupt trigger type of temperature sensor The LM90 temperature sensor should use edge-triggered interrupt because LM90 hardware doesn't deassert interrupt line until temperature is back to normal state, which results in interrupt storm. Correct the interrupt trigger type. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra30-cardhu.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index d9dd11569d4b2..448f1397e64a9 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -348,7 +348,7 @@ reg = <0x4c>; vcc-supply = <&sys_3v3_reg>; interrupt-parent = <&gpio>; - interrupts = ; + interrupts = ; #thermal-sensor-cells = <1>; }; From cb0376cdbca0506bfd165d59efa24b342ee58cff Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 19 Jun 2021 14:55:27 +0300 Subject: [PATCH 676/851] ARM: tegra: dalmore: Correct interrupt trigger type of temperature sensor The LM90 temperature sensor should use edge-triggered interrupt because LM90 hardware doesn't deassert interrupt line until temperature is back to normal state, which results in interrupt storm. Correct the interrupt trigger type. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra114-dalmore.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra114-dalmore.dts b/arch/arm/boot/dts/tegra114-dalmore.dts index c04162ddec3c2..7fd901f8d39ab 100644 --- a/arch/arm/boot/dts/tegra114-dalmore.dts +++ b/arch/arm/boot/dts/tegra114-dalmore.dts @@ -788,7 +788,7 @@ reg = <0x4c>; vcc-supply = <&palmas_ldo6_reg>; interrupt-parent = <&gpio>; - interrupts = ; + interrupts = ; }; }; From 7070284adaa04482cf35d75f22092fde25b125f1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 19 Jun 2021 15:04:20 +0300 Subject: [PATCH 677/851] ARM: tegra: jetson-tk1: Correct interrupt trigger type of temperature sensor The LM90 temperature sensor should use edge-triggered interrupt because LM90 hardware doesn't deassert interrupt line until temperature is back to normal state, which results in interrupt storm. Correct the interrupt trigger type. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra124-jetson-tk1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 414cd1cafa7f1..35ab296408e10 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -1427,7 +1427,7 @@ compatible = "ti,tmp451"; reg = <0x4c>; interrupt-parent = <&gpio>; - interrupts = ; + interrupts = ; }; eeprom@56 { From e0f8efe0620118f837e42e7fba51094cefb45507 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 27 Jun 2021 23:33:13 +0300 Subject: [PATCH 678/851] ARM: tegra: acer-a500: Remove bogus USB VBUS regulators The configuration of USB VBUS regulators was borrowed from downstream kernel, which is incorrect because the corresponding GPIOs are connected to PROX_EN (A501 3G model) and LED_EN pins in accordance to the board schematics. USB works fine with both GPIOs being disabled, so remove the bogus USB VBUS regulators. The USB VBUS of USB3 is supplied from the fixed 5v system regulator and device-mode USB1 doesn't have VBUS switches. Signed-off-by: Dmitry Osipenko --- .../boot/dts/tegra20-acer-a500-picasso.dts | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts index 4897079680bd3..c385b13d4faa2 100644 --- a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts +++ b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts @@ -723,7 +723,6 @@ nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <2>; nvidia,xcvr-lsrslew = <2>; - vbus-supply = <&vdd_vbus1>; }; usb@c5008000 { @@ -735,7 +734,7 @@ nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <2>; nvidia,xcvr-lsrslew = <2>; - vbus-supply = <&vdd_vbus3>; + vbus-supply = <&vdd_5v0_sys>; }; brcm_wifi_pwrseq: wifi-pwrseq { @@ -995,28 +994,6 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_vbus1: regulator@4 { - compatible = "regulator-fixed"; - regulator-name = "vdd_usb1_vbus"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - regulator-always-on; - gpio = <&gpio TEGRA_GPIO(D, 0) GPIO_ACTIVE_HIGH>; - enable-active-high; - vin-supply = <&vdd_5v0_sys>; - }; - - vdd_vbus3: regulator@5 { - compatible = "regulator-fixed"; - regulator-name = "vdd_usb3_vbus"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - regulator-always-on; - gpio = <&gpio TEGRA_GPIO(D, 3) GPIO_ACTIVE_HIGH>; - enable-active-high; - vin-supply = <&vdd_5v0_sys>; - }; - sound { compatible = "nvidia,tegra-audio-wm8903-picasso", "nvidia,tegra-audio-wm8903"; From a93ba125c147226f66694293f875a946be2cc069 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Jul 2021 05:21:22 +0300 Subject: [PATCH 679/851] ARM: tegra: acer-a500: Improve thermal zones Use skin temperature for maintaining temperature that is suitable specifically for A500. Add CPU thermal zone that protects silicon. All these changes don't make a significant difference, but it is a more correct definition of thermal zones. Signed-off-by: Dmitry Osipenko --- .../boot/dts/tegra20-acer-a500-picasso.dts | 55 ++++++++++++++++--- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts index c385b13d4faa2..75ce986df958f 100644 --- a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts +++ b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts @@ -1025,11 +1025,48 @@ }; thermal-zones { + /* + * NCT1008 has two sensors: + * + * 0: internal that monitors ambient/skin temperature + * 1: external that is connected to the CPU's diode + * + * Ideally we should use userspace thermal governor, + * but it's a much more complex solution. The "skin" + * zone is a simpler solution which prevents A500 from + * getting too hot from a user's tactile perspective. + * The CPU zone is intended to protect silicon from damage. + */ + skin-thermal { polling-delay-passive = <1000>; /* milliseconds */ - polling-delay = <0>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ thermal-sensors = <&nct1008 0>; + + trips { + trip0: skin-alert { + /* start throttling at 60C */ + temperature = <60000>; + hysteresis = <200>; + type = "passive"; + }; + + trip1: skin-crit { + /* shut down at 70C */ + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&trip0>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; + }; }; cpu-thermal { @@ -1039,24 +1076,24 @@ thermal-sensors = <&nct1008 1>; trips { - trip0: cpu-alert0 { - /* start throttling at 60C */ - temperature = <60000>; + trip2: cpu-alert { + /* throttle at 85C until temperature drops to 84.8C */ + temperature = <85000>; hysteresis = <200>; type = "passive"; }; - trip1: cpu-crit { - /* shut down at 70C */ - temperature = <70000>; + trip3: cpu-crit { + /* shut down at 90C */ + temperature = <90000>; hysteresis = <2000>; type = "critical"; }; }; cooling-maps { - map0 { - trip = <&trip0>; + map1 { + trip = <&trip2>; cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; From ba66abf087417c97894dba9a69f48d813f098d94 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Jul 2021 05:21:56 +0300 Subject: [PATCH 680/851] ARM: tegra: nexus7: Improve thermal zones Use skin temperature for maintaining temperature that is suitable specifically for Nexus 7. Add CPU thermal zone that protects silicon. All these changes don't make a significant difference, but it is a more correct definition of thermal zones. Signed-off-by: Dmitry Osipenko --- .../tegra30-asus-nexus7-grouper-common.dtsi | 56 +++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi index 33985fca956fb..9732cd6f20b7d 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi @@ -1255,21 +1255,34 @@ }; thermal-zones { - cpu-thermal { + /* + * NCT72 has two sensors: + * + * 0: internal that monitors ambient/skin temperature + * 1: external that is connected to the CPU's diode + * + * Ideally we should use userspace thermal governor, + * but it's a much more complex solution. The "skin" + * zone is a simpler solution which prevents Nexus 7 + * from getting too hot from a user's tactile perspective. + * The CPU zone is intended to protect silicon from damage. + */ + + skin-thermal { polling-delay-passive = <1000>; /* milliseconds */ polling-delay = <5000>; /* milliseconds */ - thermal-sensors = <&nct72 1>; + thermal-sensors = <&nct72 0>; trips { - trip0: cpu-alert0 { + trip0: skin-alert { /* throttle at 57C until temperature drops to 56.8C */ temperature = <57000>; hysteresis = <200>; type = "passive"; }; - trip1: cpu-crit { + trip1: skin-crit { /* shut down at 65C */ temperature = <65000>; hysteresis = <2000>; @@ -1289,5 +1302,40 @@ }; }; }; + + cpu-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + thermal-sensors = <&nct72 1>; + + trips { + trip2: cpu-alert { + /* throttle at 85C until temperature drops to 84.8C */ + temperature = <85000>; + hysteresis = <200>; + type = "passive"; + }; + + trip3: cpu-crit { + /* shut down at 90C */ + temperature = <90000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map1 { + trip = <&trip2>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&actmon THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; }; }; From 8c8d0be7c91ea11182d878f3f11c0c156aec69e7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 19 Jun 2021 15:04:59 +0300 Subject: [PATCH 681/851] arm64: tegra194: p2888: Correct interrupt trigger type of temperature sensor The LM90 temperature sensor should use edge-triggered interrupt because LM90 hardware doesn't deassert interrupt line until temperature is back to normal state, which results in interrupt storm. Correct the interrupt trigger type. Signed-off-by: Dmitry Osipenko --- arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi index 7e7b0eb90c802..c4058ee36fecb 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi @@ -309,7 +309,7 @@ interrupt-parent = <&gpio>; interrupts = ; + IRQ_TYPE_EDGE_FALLING>; vcc-supply = <&vdd_1v8ls>; #thermal-sensor-cells = <1>; From 424f01431087fd71abd835ba9a7d90ffe6b556be Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 14 Apr 2021 03:59:53 +0300 Subject: [PATCH 682/851] ARM: tegra_defconfig: Enable CONFIG_TEGRA30_TSENSOR Enable NVIDIA Tegra30 SoC thermal sensor driver in tegra_defconfig. Signed-off-by: Dmitry Osipenko --- arch/arm/configs/tegra_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 3d8d8af9524de..63e7f3261e6d6 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -169,6 +169,7 @@ CONFIG_THERMAL_STATISTICS=y CONFIG_CPU_THERMAL=y CONFIG_DEVFREQ_THERMAL=y CONFIG_TEGRA_SOCTHERM=m +CONFIG_TEGRA30_TSENSOR=m CONFIG_WATCHDOG=y CONFIG_MAX77620_WATCHDOG=y CONFIG_TEGRA_WATCHDOG=y From 6dbb46f2b3c1db2bf27bb0c50fa73712f87c2a61 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 16:36:15 +0300 Subject: [PATCH 683/851] ARM: tegra_defconfig: Enable CONFIG_FB Previously CONFIG_FB was auto-selected, now it's not. We completely lost framebuffer in tegra_defconfig. Select the CONFIG_FB to fix it. Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Dmitry Osipenko --- arch/arm/configs/tegra_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 63e7f3261e6d6..ced01b114cd30 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -209,6 +209,7 @@ CONFIG_DRM_LVDS_CODEC=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_PWM=y +CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y From ad9f94a1daad4916795b72918d518b67bd44e968 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 16:38:38 +0300 Subject: [PATCH 684/851] ARM: tegra_defconfig: Enable Acer A500 drivers Enable EC controller drivers of the Acer A500 tablet. Signed-off-by: Dmitry Osipenko --- arch/arm/configs/tegra_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index ced01b114cd30..52db1c4f9de44 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -159,6 +159,7 @@ CONFIG_POWER_RESET_AS3722=y CONFIG_POWER_RESET_GPIO=y CONFIG_BATTERY_SBS=y CONFIG_BATTERY_BQ27XXX=y +CONFIG_BATTERY_ACER_A500=y CONFIG_CHARGER_GPIO=y CONFIG_CHARGER_SMB347=y CONFIG_CHARGER_TPS65090=y @@ -182,6 +183,7 @@ CONFIG_MFD_PALMAS=y CONFIG_MFD_TPS65090=y CONFIG_MFD_TPS6586X=y CONFIG_MFD_TPS65910=y +CONFIG_MFD_ACER_A500_EC=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AS3722=y @@ -266,6 +268,7 @@ CONFIG_LEDS_TRIGGER_GPIO=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_LEDS_TRIGGER_TRANSIENT=y CONFIG_LEDS_TRIGGER_CAMERA=y +CONFIG_LEDS_ACER_A500=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_AS3722=y CONFIG_RTC_DRV_DS1307=y From ee55e74ae1e5202824158385df00fa84180c7456 Mon Sep 17 00:00:00 2001 From: Maxim Schwalm Date: Tue, 13 Jul 2021 19:07:39 +0200 Subject: [PATCH 685/851] ARM: tegra_defconfig: Enable CONFIG_CROS_EC Updating the tegra_defconfig was omitted, when the ChromeOS EC config options were renamed by a previous commit. Fix it. Fixes: 67c70aa86f8b ("arm/arm64: defconfig: Update configs to use the new CROS_EC options") Signed-off-by: Maxim Schwalm --- arch/arm/configs/tegra_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 52db1c4f9de44..0bb0846265b5e 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -175,7 +175,6 @@ CONFIG_WATCHDOG=y CONFIG_MAX77620_WATCHDOG=y CONFIG_TEGRA_WATCHDOG=y CONFIG_MFD_AS3722=y -CONFIG_MFD_CROS_EC_DEV=y CONFIG_MFD_MAX77620=y CONFIG_MFD_MAX8907=y CONFIG_MFD_STMPE=y @@ -289,6 +288,8 @@ CONFIG_NVEC_POWER=y CONFIG_NVEC_PAZ00=y CONFIG_STAGING_MEDIA=y CONFIG_TEGRA_VDE=y +CONFIG_CHROME_PLATFORMS=y +CONFIG_CROS_EC=y CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y CONFIG_ARCH_TEGRA_2x_SOC=y From 25071d2faac333d58ab89168d43ac28fbfba333b Mon Sep 17 00:00:00 2001 From: Maxim Schwalm Date: Tue, 13 Jul 2021 19:08:34 +0200 Subject: [PATCH 686/851] ARM: tegra_defconfig: Rebuild defconfig Run "make tegra_defconfig; make savedefconfig" to rebuild tegra_defconfig. Signed-off-by: Maxim Schwalm --- arch/arm/configs/tegra_defconfig | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 0bb0846265b5e..0965ed0501dcc 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -20,9 +20,6 @@ CONFIG_SLAB=y CONFIG_ARCH_TEGRA=y CONFIG_SMP=y CONFIG_HIGHMEM=y -CONFIG_SECCOMP=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_KEXEC=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y @@ -49,8 +46,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_INET_ESP=y -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_OPTIMISTIC_DAD=y @@ -86,7 +81,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEGRA_GMI=y CONFIG_MTD=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_AD525X_DPOT=y @@ -116,8 +110,8 @@ CONFIG_INPUT_JOYDEV=y CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_GPIO=y CONFIG_KEYBOARD_TEGRA=y -CONFIG_KEYBOARD_CAP11XX=y CONFIG_KEYBOARD_CROS_EC=y +CONFIG_KEYBOARD_CAP11XX=y CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ATMEL_MXT=y @@ -159,10 +153,10 @@ CONFIG_POWER_RESET_AS3722=y CONFIG_POWER_RESET_GPIO=y CONFIG_BATTERY_SBS=y CONFIG_BATTERY_BQ27XXX=y -CONFIG_BATTERY_ACER_A500=y CONFIG_CHARGER_GPIO=y CONFIG_CHARGER_SMB347=y CONFIG_CHARGER_TPS65090=y +CONFIG_BATTERY_ACER_A500=y CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM95245=y CONFIG_THERMAL=y @@ -196,7 +190,6 @@ CONFIG_REGULATOR_TPS65090=y CONFIG_REGULATOR_TPS6586X=y CONFIG_REGULATOR_TPS65910=y CONFIG_MEDIA_SUPPORT=y -CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=y CONFIG_USB_GSPCA=y @@ -207,10 +200,9 @@ CONFIG_DRM_TEGRA_STAGING=y CONFIG_DRM_PANEL_LVDS=y CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_LVDS_CODEC=y -# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_PWM=y -CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y @@ -248,18 +240,15 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y -CONFIG_USB_CHIPIDEA_TEGRA=y CONFIG_USB_GADGET=y CONFIG_MMC=y CONFIG_MMC_BLOCK_MINORS=16 CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_PWM=y -CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_ACER_A500=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_ONESHOT=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y @@ -267,7 +256,6 @@ CONFIG_LEDS_TRIGGER_GPIO=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_LEDS_TRIGGER_TRANSIENT=y CONFIG_LEDS_TRIGGER_CAMERA=y -CONFIG_LEDS_ACER_A500=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_AS3722=y CONFIG_RTC_DRV_DS1307=y @@ -296,9 +284,7 @@ CONFIG_ARCH_TEGRA_2x_SOC=y CONFIG_ARCH_TEGRA_3x_SOC=y CONFIG_ARCH_TEGRA_114_SOC=y CONFIG_ARCH_TEGRA_124_SOC=y -CONFIG_PM_DEVFREQ=y CONFIG_ARM_TEGRA_DEVFREQ=y -CONFIG_ARM_TEGRA20_DEVFREQ=y CONFIG_MEMORY=y CONFIG_IIO=y CONFIG_KXCJK1013=y @@ -339,7 +325,6 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO_TWOFISH=y -CONFIG_CRC_CCITT=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_PRINTK_TIME=y From 9dc785ae371db252fdecdd767a83b4871dbf38fa Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 16:39:13 +0300 Subject: [PATCH 687/851] ARM: multi_v7_defconfig: Enable Acer A500 drivers Enable EC controller drivers of the Acer A500 tablet. Signed-off-by: Dmitry Osipenko --- arch/arm/configs/multi_v7_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 3ddb8c5b0ddbc..37dde809181fa 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -494,6 +494,7 @@ CONFIG_BATTERY_ACT8945A=y CONFIG_BATTERY_CPCAP=m CONFIG_BATTERY_SBS=y CONFIG_BATTERY_BQ27XXX=m +CONFIG_BATTERY_ACER_A500=m CONFIG_AXP20X_POWER=m CONFIG_BATTERY_MAX17040=m CONFIG_BATTERY_MAX17042=m @@ -552,6 +553,7 @@ CONFIG_BCM7038_WDT=m CONFIG_BCMA_HOST_SOC=y CONFIG_BCMA_DRIVER_GMAC_CMN=y CONFIG_BCMA_DRIVER_GPIO=y +CONFIG_MFD_ACER_A500_EC=m CONFIG_MFD_ACT8945A=y CONFIG_MFD_AS3711=y CONFIG_MFD_AS3722=y @@ -897,6 +899,7 @@ CONFIG_LEDS_GPIO=y CONFIG_LEDS_PWM=y CONFIG_LEDS_MAX77693=m CONFIG_LEDS_MAX8997=m +CONFIG_LEDS_ACER_A500=m CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_ONESHOT=y From bb57c7206bdd0d1602502aa1f27b74f6aa0a98bc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 14 Apr 2021 04:01:18 +0300 Subject: [PATCH 688/851] ARM: multi_v7_defconfig: Enable CONFIG_TEGRA30_TSENSOR Enable NVIDIA Tegra30 SoC thermal sensor driver in multi_v7_defconfig. Signed-off-by: Dmitry Osipenko --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 37dde809181fa..d4634b7eb7402 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -526,6 +526,7 @@ CONFIG_BRCMSTB_THERMAL=m CONFIG_GENERIC_ADC_THERMAL=m CONFIG_ST_THERMAL_MEMMAP=y CONFIG_TEGRA_SOCTHERM=m +CONFIG_TEGRA30_TSENSOR=m CONFIG_UNIPHIER_THERMAL=y CONFIG_DA9063_WATCHDOG=m CONFIG_XILINX_WATCHDOG=y From 8bb9e3d155aaa861dcb339c3f11f8b32ed82996f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 18 Jul 2021 23:54:00 +0300 Subject: [PATCH 689/851] soc/tegra: pmc: Prevent racing with cpuilde driver Both PMC and cpuidle drivers are probed at the same init level and cpuidle depends on the PMC suspend mode. Add new default suspend mode that indicates whether PMC driver has been probed and reset the mode in a case of deferred probe of the PMC driver. Signed-off-by: Dmitry Osipenko --- arch/arm/mach-tegra/pm.c | 2 +- arch/arm/mach-tegra/pm.h | 6 ------ arch/arm/mach-tegra/tegra.c | 2 -- drivers/soc/tegra/pmc.c | 14 +++++++++++++- include/soc/tegra/pm.h | 6 ++++++ 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c index 6452ebf68d40e..b21f51b8e19e1 100644 --- a/arch/arm/mach-tegra/pm.c +++ b/arch/arm/mach-tegra/pm.c @@ -403,7 +403,7 @@ static const struct platform_suspend_ops tegra_suspend_ops = { .enter = tegra_suspend_enter, }; -void __init tegra_init_suspend(void) +void tegra_pm_init_suspend(void) { enum tegra_suspend_mode mode = tegra_pmc_get_suspend_mode(); diff --git a/arch/arm/mach-tegra/pm.h b/arch/arm/mach-tegra/pm.h index 81525f5f4a441..e63f96de28256 100644 --- a/arch/arm/mach-tegra/pm.h +++ b/arch/arm/mach-tegra/pm.h @@ -25,10 +25,4 @@ void tegra30_sleep_core_init(void); extern void (*tegra_tear_down_cpu)(void); -#ifdef CONFIG_PM_SLEEP -void tegra_init_suspend(void); -#else -static inline void tegra_init_suspend(void) {} -#endif - #endif /* _MACH_TEGRA_PM_H_ */ diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index c011359bcdb44..ab5008f35803c 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -84,8 +84,6 @@ static void __init tegra_dt_init(void) static void __init tegra_dt_init_late(void) { - tegra_init_suspend(); - if (IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC) && of_machine_is_compatible("compal,paz00")) tegra_paz00_wifikill_init(); diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index ea62f84d1c8bd..50091c4ec9481 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -436,7 +436,7 @@ struct tegra_pmc { static struct tegra_pmc *pmc = &(struct tegra_pmc) { .base = NULL, - .suspend_mode = TEGRA_SUSPEND_NONE, + .suspend_mode = TEGRA_SUSPEND_NOT_READY, }; static inline struct tegra_powergate * @@ -1812,6 +1812,7 @@ static int tegra_pmc_parse_dt(struct tegra_pmc *pmc, struct device_node *np) u32 value, values[2]; if (of_property_read_u32(np, "nvidia,suspend-mode", &value)) { + pmc->suspend_mode = TEGRA_SUSPEND_NONE; } else { switch (value) { case 0: @@ -2785,6 +2786,11 @@ static int tegra_pmc_regmap_init(struct tegra_pmc *pmc) return 0; } +static void tegra_pmc_reset_suspend_mode(void *data) +{ + pmc->suspend_mode = TEGRA_SUSPEND_NOT_READY; +} + static int tegra_pmc_probe(struct platform_device *pdev) { void __iomem *base; @@ -2803,6 +2809,11 @@ static int tegra_pmc_probe(struct platform_device *pdev) if (err < 0) return err; + err = devm_add_action_or_reset(&pdev->dev, tegra_pmc_reset_suspend_mode, + NULL); + if (err) + return err; + /* take over the memory region from the early initialization */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); base = devm_ioremap_resource(&pdev->dev, res); @@ -2909,6 +2920,7 @@ static int tegra_pmc_probe(struct platform_device *pdev) tegra_pmc_clock_register(pmc, pdev->dev.of_node); platform_set_drvdata(pdev, pmc); + tegra_pm_init_suspend(); return 0; diff --git a/include/soc/tegra/pm.h b/include/soc/tegra/pm.h index 08477d7bfab93..4338789270263 100644 --- a/include/soc/tegra/pm.h +++ b/include/soc/tegra/pm.h @@ -14,6 +14,7 @@ enum tegra_suspend_mode { TEGRA_SUSPEND_LP1, /* CPU voltage off, DRAM self-refresh */ TEGRA_SUSPEND_LP0, /* CPU + core voltage off, DRAM self-refresh */ TEGRA_MAX_SUSPEND_MODE, + TEGRA_SUSPEND_NOT_READY, }; #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_ARM) @@ -28,6 +29,7 @@ void tegra_pm_clear_cpu_in_lp2(void); void tegra_pm_set_cpu_in_lp2(void); int tegra_pm_enter_lp2(void); int tegra_pm_park_secondary_cpu(unsigned long cpu); +void tegra_pm_init_suspend(void); #else static inline enum tegra_suspend_mode tegra_pm_validate_suspend_mode(enum tegra_suspend_mode mode) @@ -61,6 +63,10 @@ static inline int tegra_pm_park_secondary_cpu(unsigned long cpu) { return -ENOTSUPP; } + +static inline void tegra_pm_init_suspend(void) +{ +} #endif /* CONFIG_PM_SLEEP */ #endif /* __SOC_TEGRA_PM_H__ */ From 08bc2b48d18ae1404ecfd20ac4e84e7b4af98303 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 18 Jul 2021 23:57:30 +0300 Subject: [PATCH 690/851] cpuidle: tegra: Check whether PMC is ready Check whether PMC is ready before proceeding with the cpuidle registration. This fixes racing with the PMC driver probe order, which results in a disabled deepest CC6 idling state if cpuidle driver is probed before the PMC. Signed-off-by: Dmitry Osipenko --- drivers/cpuidle/cpuidle-tegra.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c index 508bd9f237929..9845629aeb6d4 100644 --- a/drivers/cpuidle/cpuidle-tegra.c +++ b/drivers/cpuidle/cpuidle-tegra.c @@ -337,6 +337,9 @@ static void tegra_cpuidle_setup_tegra114_c7_state(void) static int tegra_cpuidle_probe(struct platform_device *pdev) { + if (tegra_pmc_get_suspend_mode() == TEGRA_SUSPEND_NOT_READY) + return -EPROBE_DEFER; + /* LP2 could be disabled in device-tree */ if (tegra_pmc_get_suspend_mode() < TEGRA_SUSPEND_LP2) tegra_cpuidle_disable_state(TEGRA_CC6); From abdea4fbe31af4274e71fe805623166160dc88dd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 22:28:43 +0300 Subject: [PATCH 691/851] soc/tegra: pmc: Temporarily disable PMC state syncing Disable PMC state syncing in order to ensure that we won't break older kernels once device-trees will be updated with the addition of the power domains. Previously this was unnecessary because the plan was to make clk device that will attach to the domain for each clock, but the plan changed and now we're going make a better GENPD implementation that will require to update each device driver with the runtime PM and OPP support before we could safely enable the state syncing. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/pmc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 50091c4ec9481..fb8faf7b226ab 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -360,6 +360,7 @@ struct tegra_pmc_soc { unsigned int num_pmc_clks; bool has_blink_output; bool has_usb_sleepwalk; + bool supports_core_domain; }; /** @@ -3041,6 +3042,7 @@ static void tegra20_pmc_setup_irq_polarity(struct tegra_pmc *pmc, } static const struct tegra_pmc_soc tegra20_pmc_soc = { + .supports_core_domain = false, .num_powergates = ARRAY_SIZE(tegra20_powergates), .powergates = tegra20_powergates, .num_cpu_powergates = 0, @@ -3101,6 +3103,7 @@ static const char * const tegra30_reset_sources[] = { }; static const struct tegra_pmc_soc tegra30_pmc_soc = { + .supports_core_domain = false, .num_powergates = ARRAY_SIZE(tegra30_powergates), .powergates = tegra30_powergates, .num_cpu_powergates = ARRAY_SIZE(tegra30_cpu_powergates), @@ -3157,6 +3160,7 @@ static const u8 tegra114_cpu_powergates[] = { }; static const struct tegra_pmc_soc tegra114_pmc_soc = { + .supports_core_domain = false, .num_powergates = ARRAY_SIZE(tegra114_powergates), .powergates = tegra114_powergates, .num_cpu_powergates = ARRAY_SIZE(tegra114_cpu_powergates), @@ -3273,6 +3277,7 @@ static const struct pinctrl_pin_desc tegra124_pin_descs[] = { }; static const struct tegra_pmc_soc tegra124_pmc_soc = { + .supports_core_domain = false, .num_powergates = ARRAY_SIZE(tegra124_powergates), .powergates = tegra124_powergates, .num_cpu_powergates = ARRAY_SIZE(tegra124_cpu_powergates), @@ -3398,6 +3403,7 @@ static const struct tegra_wake_event tegra210_wake_events[] = { }; static const struct tegra_pmc_soc tegra210_pmc_soc = { + .supports_core_domain = false, .num_powergates = ARRAY_SIZE(tegra210_powergates), .powergates = tegra210_powergates, .num_cpu_powergates = ARRAY_SIZE(tegra210_cpu_powergates), @@ -3555,6 +3561,7 @@ static const struct tegra_wake_event tegra186_wake_events[] = { }; static const struct tegra_pmc_soc tegra186_pmc_soc = { + .supports_core_domain = false, .num_powergates = 0, .powergates = NULL, .num_cpu_powergates = 0, @@ -3689,6 +3696,7 @@ static const struct tegra_wake_event tegra194_wake_events[] = { }; static const struct tegra_pmc_soc tegra194_pmc_soc = { + .supports_core_domain = false, .num_powergates = 0, .powergates = NULL, .num_cpu_powergates = 0, @@ -3757,6 +3765,7 @@ static const char * const tegra234_reset_sources[] = { }; static const struct tegra_pmc_soc tegra234_pmc_soc = { + .supports_core_domain = false, .num_powergates = 0, .powergates = NULL, .num_cpu_powergates = 0, @@ -3803,6 +3812,14 @@ static void tegra_pmc_sync_state(struct device *dev) { int err; + /* + * Newer device-trees have power domains, but we need to prepare all + * device drivers with runtime PM and OPP support first, otherwise + * state syncing is unsafe. + */ + if (!pmc->soc->supports_core_domain) + return; + /* * Older device-trees don't have core PD, and thus, there are * no dependencies that will block the state syncing. We shouldn't From 95e687bfeb538f6918f0139c00997106cb9abae9 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 22:50:44 +0300 Subject: [PATCH 692/851] soc/tegra: pmc: Implement attach_dev() of power domain drivers Implement attach_dev() callback of power domain drivers that initializes the domain's performance state. GENPD core will apply the performance state on the first runtime PM resume of the attached device. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/pmc.c | 147 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index fb8faf7b226ab..c3032d0d44a4d 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -506,6 +506,151 @@ static void tegra_pmc_scratch_writel(struct tegra_pmc *pmc, u32 value, writel(value, pmc->scratch + offset); } +static const char * const tegra_emc_compats[] = { + "nvidia,tegra20-emc", + "nvidia,tegra30-emc", + NULL, +}; + +/* + * This GENPD callback is used by both powergate and core domains. + * + * We retrieve clock rate of the attached device and initialize domain's + * performance state in accordance to the clock rate. + */ +static int tegra_pmc_pd_attach_dev(struct generic_pm_domain *genpd, + struct device *dev) +{ + struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev); + struct opp_table *opp_table, *pd_opp_table; + struct generic_pm_domain *core_genpd; + struct dev_pm_opp *opp, *pd_opp; + unsigned long rate, state; + struct gpd_link *link; + struct clk *clk; + u32 hw_version; + int ret; + + /* + * Tegra114+ SocS don't support OPP yet. But if they will get OPP + * support, then we want to skip OPP for older kernels to preserve + * compatibility of newer DTBs with older kernels. + */ + if (!pmc->soc->supports_core_domain) + return 0; + + /* + * The EMC devices are a special case because we have a protection + * from non-EMC drivers getting clock handle before EMC driver is + * fully initialized. The goal of the protection is to prevent + * devfreq driver from getting failures if it will try to change + * EMC clock rate until clock is fully initialized. The EMC drivers + * will initialize the performance state by themselves. + */ + if (of_device_compatible_match(dev->of_node, tegra_emc_compats)) + return 0; + + clk = clk_get(dev, NULL); + if (IS_ERR(clk)) { + dev_err(&genpd->dev, "failed to get clk of %s: %pe\n", + dev_name(dev), clk); + return PTR_ERR(clk); + } + + rate = clk_get_rate(clk); + if (!rate) { + dev_err(&genpd->dev, "failed to get clk rate of %s\n", + dev_name(dev)); + ret = -EINVAL; + goto put_clk; + } + + if (of_machine_is_compatible("nvidia,tegra20")) + hw_version = BIT(tegra_sku_info.soc_process_id); + else + hw_version = BIT(tegra_sku_info.soc_speedo_id); + + opp_table = dev_pm_opp_set_supported_hw(dev, &hw_version, 1); + if (IS_ERR(opp_table)) { + dev_err(&genpd->dev, "failed to set OPP supported HW for %s: %pe\n", + dev_name(dev), opp_table); + ret = PTR_ERR(opp_table); + goto put_clk; + } + + ret = dev_pm_opp_of_add_table(dev); + if (ret) { + /* older DTBs that don't have OPPs will get -ENODEV here */ + if (ret != -ENODEV) + dev_err(&genpd->dev, "failed to get OPP table of %s: %d\n", + dev_name(dev), ret); + else + ret = 0; + + goto put_supported_hw; + } + + /* find suitable OPP for the rate */ + opp = dev_pm_opp_find_freq_ceil(dev, &rate); + + if (opp == ERR_PTR(-ERANGE)) + opp = dev_pm_opp_find_freq_floor(dev, &rate); + + if (IS_ERR(opp)) { + dev_err(&genpd->dev, "failed to find OPP for %luHz of %s: %pe\n", + rate, dev_name(dev), opp); + ret = PTR_ERR(opp); + goto remove_dev_table; + } + + if (!list_empty(&genpd->child_links)) { + link = list_first_entry(&genpd->child_links, struct gpd_link, + child_node); + core_genpd = link->parent; + } else { + core_genpd = genpd; + } + + pd_opp_table = dev_pm_opp_get_opp_table(&core_genpd->dev); + if (IS_ERR(pd_opp_table)) { + dev_err(&genpd->dev, "failed to get OPP table of %s: %pe\n", + dev_name(&core_genpd->dev), pd_opp_table); + ret = PTR_ERR(pd_opp_table); + goto put_dev_opp; + } + + pd_opp = dev_pm_opp_xlate_required_opp(opp_table, pd_opp_table, opp); + if (IS_ERR(pd_opp)) { + dev_err(&genpd->dev, + "failed to xlate required OPP for %luHz of %s: %pe\n", + rate, dev_name(dev), pd_opp); + ret = PTR_ERR(pd_opp); + goto put_pd_opp_table; + } + + /* + * The initialized state will be applied by GENPD core on the first + * RPM-resume of the device. This means that drivers don't need to + * explicitly initialize performance state. + */ + state = pm_genpd_opp_to_performance_state(&core_genpd->dev, pd_opp); + gpd_data->rpm_pstate = state; + dev_pm_opp_put(pd_opp); + +put_pd_opp_table: + dev_pm_opp_put_opp_table(pd_opp_table); +put_dev_opp: + dev_pm_opp_put(opp); +remove_dev_table: + dev_pm_opp_of_remove_table(dev); +put_supported_hw: + dev_pm_opp_put_supported_hw(opp_table); +put_clk: + clk_put(clk); + + return ret; +} + /* * TODO Figure out a way to call this with the struct tegra_pmc * passed in. * This currently doesn't work because readx_poll_timeout() can only operate @@ -1238,6 +1383,7 @@ static int tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np) pg->id = id; pg->genpd.name = np->name; + pg->genpd.attach_dev = tegra_pmc_pd_attach_dev; pg->genpd.power_off = tegra_genpd_power_off; pg->genpd.power_on = tegra_genpd_power_on; pg->pmc = pmc; @@ -1354,6 +1500,7 @@ static int tegra_pmc_core_pd_add(struct tegra_pmc *pmc, struct device_node *np) return -ENOMEM; genpd->name = np->name; + genpd->attach_dev = tegra_pmc_pd_attach_dev; genpd->set_performance_state = tegra_pmc_core_pd_set_performance_state; genpd->opp_to_performance_state = tegra_pmc_core_pd_opp_to_performance_state; From 02774e20db5e4c6f129b760c7814a7ee10ec8009 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 00:45:14 +0300 Subject: [PATCH 693/851] soc/tegra: Don't print error message when OPPs not available Previously we assumed that devm_tegra_core_dev_init_opp_table() will be used only by drivers that will always have device with OPP table, but this is not true anymore. For example now Tegra30 will have OPP table for PWM, but Tegra20 not and both use the same driver. Hence let's not print the error message about missing OPP table in the common helper, we can print it somewhere else. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c index cd33e99249c3a..a42d4f98c0783 100644 --- a/drivers/soc/tegra/common.c +++ b/drivers/soc/tegra/common.c @@ -111,9 +111,7 @@ int devm_tegra_core_dev_init_opp_table(struct device *dev, */ err = devm_pm_opp_of_add_table(dev); if (err) { - if (err == -ENODEV) - dev_err_once(dev, "OPP table not found, please update device-tree\n"); - else + if (err != -ENODEV) dev_err(dev, "failed to add OPP table: %d\n", err); return err; From 284a9139d694ad3515e443a2306d2dce8852d029 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 00:56:24 +0300 Subject: [PATCH 694/851] soc/tegra: Add devm_tegra_core_dev_init_opp_table_simple() Only couple drivers need to get the -ENODEV error code and explicitly initialize the performance state. Add new helper that allows to avoid the extra boilerplate code in majority of drivers. Signed-off-by: Dmitry Osipenko --- include/soc/tegra/common.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/soc/tegra/common.h b/include/soc/tegra/common.h index af41ad80ec216..265ad90e45a27 100644 --- a/include/soc/tegra/common.h +++ b/include/soc/tegra/common.h @@ -39,4 +39,17 @@ devm_tegra_core_dev_init_opp_table(struct device *dev, } #endif +static inline int +devm_tegra_core_dev_init_opp_table_simple(struct device *dev) +{ + struct tegra_core_opp_params params = {}; + int err; + + err = devm_tegra_core_dev_init_opp_table(dev, ¶ms); + if (err != -ENODEV) + return err; + + return 0; +} + #endif /* __SOC_TEGRA_COMMON_H__ */ From ba66db6cf88a6e75413838fd2f17e742bd914685 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 25 Dec 2020 16:34:51 +0300 Subject: [PATCH 695/851] dt-bindings: clock: tegra-car: Document new tegra-clocks sub-node Document tegra-clocks sub-node which describes Tegra SoC clocks that require a higher voltage of the core power domain in order to operate properly on a higher clock rates. Each node contains a phandle to OPP table and power domain. The root PLLs and system clocks don't have any specific device dedicated to them, clock controller is in charge of managing power for them. Signed-off-by: Dmitry Osipenko --- .../bindings/clock/nvidia,tegra20-car.yaml | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml index 459d2a5253935..7f5cd27e4ce02 100644 --- a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml +++ b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml @@ -42,6 +42,48 @@ properties: "#reset-cells": const: 1 + tegra-clocks: + description: child nodes are the output clocks from the CAR + type: object + + patternProperties: + "^[a-z]+[0-9]+$": + type: object + properties: + compatible: + allOf: + - items: + - enum: + - nvidia,tegra20-sclk + - nvidia,tegra30-sclk + - nvidia,tegra30-pllc + - nvidia,tegra30-plle + - nvidia,tegra30-pllm + - const: nvidia,tegra-clock + + operating-points-v2: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to OPP table that contains frequencies, voltages and + opp-supported-hw property, which is a bitfield indicating + SoC process or speedo ID mask. + + clocks: + items: + - description: node's clock + + power-domains: + maxItems: 1 + description: phandle to the core SoC power domain + + required: + - compatible + - operating-points-v2 + - clocks + - power-domains + + additionalProperties: false + required: - compatible - reg @@ -59,6 +101,15 @@ examples: reg = <0x60006000 0x1000>; #clock-cells = <1>; #reset-cells = <1>; + + tegra-clocks { + sclk { + compatible = "nvidia,tegra20-sclk", "nvidia,tegra-clock"; + operating-points-v2 = <&opp_table>; + clocks = <&tegra_car TEGRA20_CLK_SCLK>; + power-domains = <&domain>; + }; + }; }; usb-controller@c5004000 { From 3dcf73ca5e4023e614564c5ab6eb730484a2641c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 22 Nov 2020 22:36:01 +0300 Subject: [PATCH 696/851] clk: tegra: Support runtime PM and power domain The Clock-and-Reset controller resides in a core power domain on NVIDIA Tegra SoCs. In order to support voltage scaling of the core power domain, we hook up DVFS-capable clocks to the core GENPD for managing of the GENPD's performance state based on the clock changes. Some clocks don't have any specific physical hardware unit that backs them, like root PLLs and system clock and they have theirs own voltage requirements. This patch adds new clk-device driver that backs the clocks and provides runtime PM functionality for them. A virtual clk-device is created for each such DVFS-capable clock at the clock's registration time by the new tegra_clk_register() helper. Driver changes clock's device GENPD performance state based on clk-rate notifications. In result we have this sequence of events: 1. Clock driver creates virtual device for selective clocks, enables runtime PM for the created device and registers the clock. 2. Clk-device driver starts to listen to clock rate changes. 3. Something changes clk rate or enables/disables clk. 4. CCF core propagates the change through the clk tree. 5. Clk-device driver gets clock rate-change notification or GENPD core handles prepare/unprepare of the clock. 6. Clk-device driver changes GENPD performance state on clock rate change. 7. GENPD driver changes voltage regulator state change. 8. The regulator state is committed to hardware via I2C. We rely on fact that DVFS is not needed for Tegra I2C. Hence I2C subsystem stays independent from the clk power management and there are no deadlock spots in the sequence. Currently all clocks are registered very early during kernel boot when the device driver core isn't available yet. The clk-device can't be created at that time. This patch splits the registration of the clocks in two phases: 1. Register all essential clocks which don't use RPM and are needed during early boot. 2. Register at a later boot time the rest of clocks. This patch adds power management support for Tegra20 and Tegra30 clocks. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/clk/tegra/Makefile | 1 + drivers/clk/tegra/clk-device.c | 222 ++++++++++++++++++++++++++++++++ drivers/clk/tegra/clk-pll.c | 2 +- drivers/clk/tegra/clk-super.c | 2 +- drivers/clk/tegra/clk-tegra20.c | 39 ++++-- drivers/clk/tegra/clk-tegra30.c | 70 ++++++---- drivers/clk/tegra/clk.c | 66 ++++++++++ drivers/clk/tegra/clk.h | 2 + 8 files changed, 368 insertions(+), 36 deletions(-) create mode 100644 drivers/clk/tegra/clk-device.c diff --git a/drivers/clk/tegra/Makefile b/drivers/clk/tegra/Makefile index 7b1816856eb59..a0715cdfc1a4b 100644 --- a/drivers/clk/tegra/Makefile +++ b/drivers/clk/tegra/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += clk.o obj-y += clk-audio-sync.o +obj-y += clk-device.o obj-y += clk-dfll.o obj-y += clk-divider.o obj-y += clk-periph.o diff --git a/drivers/clk/tegra/clk-device.c b/drivers/clk/tegra/clk-device.c new file mode 100644 index 0000000000000..1399eaba1c915 --- /dev/null +++ b/drivers/clk/tegra/clk-device.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "clk.h" + +/* + * This driver manages performance state of the core power domain for the + * independent PLLs and system clocks. We created a virtual clock device + * for such clocks, see tegra_clk_register(). + */ + +struct tegra_clk_device { + struct notifier_block clk_nb; + struct device *dev; + struct clk_hw *hw; + struct mutex lock; +}; + +static int tegra_clock_set_pd_state(struct tegra_clk_device *clk_dev, + unsigned long rate) +{ + struct device *dev = clk_dev->dev; + struct dev_pm_opp *opp; + unsigned int pstate; + + opp = dev_pm_opp_find_freq_ceil(dev, &rate); + if (opp == ERR_PTR(-ERANGE)) { + dev_dbg(dev, "failed to find ceil OPP for %luHz\n", rate); + opp = dev_pm_opp_find_freq_floor(dev, &rate); + } + + if (IS_ERR(opp)) { + dev_err(dev, "failed to find OPP for %luHz: %pe\n", rate, opp); + return PTR_ERR(opp); + } + + pstate = dev_pm_opp_get_required_pstate(opp, 0); + dev_pm_opp_put(opp); + + return dev_pm_genpd_set_performance_state(dev, pstate); +} + +static int tegra_clock_change_notify(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct clk_notifier_data *cnd = data; + struct tegra_clk_device *clk_dev; + int err = 0; + + clk_dev = container_of(nb, struct tegra_clk_device, clk_nb); + + mutex_lock(&clk_dev->lock); + switch (msg) { + case PRE_RATE_CHANGE: + if (cnd->new_rate > cnd->old_rate) + err = tegra_clock_set_pd_state(clk_dev, cnd->new_rate); + break; + + case ABORT_RATE_CHANGE: + err = tegra_clock_set_pd_state(clk_dev, cnd->old_rate); + break; + + case POST_RATE_CHANGE: + if (cnd->new_rate < cnd->old_rate) + err = tegra_clock_set_pd_state(clk_dev, cnd->new_rate); + break; + + default: + break; + } + mutex_unlock(&clk_dev->lock); + + return notifier_from_errno(err); +} + +static int tegra_clock_sync_pd_state(struct tegra_clk_device *clk_dev) +{ + unsigned long rate; + int ret = 0; + + mutex_lock(&clk_dev->lock); + + if (!pm_runtime_status_suspended(clk_dev->dev)) { + rate = clk_hw_get_rate(clk_dev->hw); + ret = tegra_clock_set_pd_state(clk_dev, rate); + } + + mutex_unlock(&clk_dev->lock); + + return ret; +} + +static int tegra_clock_probe(struct platform_device *pdev) +{ + struct tegra_clk_device *clk_dev; + struct device *dev = &pdev->dev; + struct clk *clk; + int err; + + if (!dev->pm_domain) + return -EINVAL; + + clk_dev = devm_kzalloc(dev, sizeof(*clk_dev), GFP_KERNEL); + if (!clk_dev) + return -ENOMEM; + + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + clk_dev->dev = dev; + clk_dev->hw = __clk_get_hw(clk); + clk_dev->clk_nb.notifier_call = tegra_clock_change_notify; + mutex_init(&clk_dev->lock); + + platform_set_drvdata(pdev, clk_dev); + + err = devm_tegra_core_dev_init_opp_table_simple(dev); + if (err) + return err; + + err = clk_notifier_register(clk, &clk_dev->clk_nb); + if (err) { + dev_err(dev, "failed to register clk notifier: %d\n", err); + return err; + } + + /* + * The driver is attaching to a potentially active/resumed clock, hence + * we need to sync the power domain performance state in a accordance to + * the clock rate if clock is resumed. + */ + err = tegra_clock_sync_pd_state(clk_dev); + if (err) + goto unreg_clk; + + return 0; + +unreg_clk: + clk_notifier_unregister(clk, &clk_dev->clk_nb); + + return err; +} + +static __maybe_unused int tegra_clock_pm_suspend(struct device *dev) +{ + struct tegra_clk_device *clk_dev = dev_get_drvdata(dev); + + /* + * Power management of the clock is entangled with the Tegra PMC + * GENPD because PMC driver enables/disables clocks for toggling + * of the PD's on/off state. + * + * The PMC GENPD is resumed in NOIRQ phase, before RPM of the clocks + * becomes available, hence PMC can't use clocks at the early resume + * phase if RPM is involved. For example when 3d clock is enabled, + * it may enable the parent PLL clock that needs to be RPM-resumed. + * + * Secondly, the PLL clocks may be enabled by the low level suspend + * code, so we need to assume that PLL is in enabled state during + * suspend. + * + * We will keep PLLs and system clock resumed during suspend time. + * All PLLs on all SoCs are low power and system clock is always-on, + * so practically not much is changed here. + */ + + return clk_prepare(clk_dev->hw->clk); +} + +static __maybe_unused int tegra_clock_pm_resume(struct device *dev) +{ + struct tegra_clk_device *clk_dev = dev_get_drvdata(dev); + + clk_unprepare(clk_dev->hw->clk); + + return 0; +} + +static void tegra_clock_shutdown(struct platform_device *pdev) +{ + struct tegra_clk_device *clk_dev = platform_get_drvdata(pdev); + + clk_prepare(clk_dev->hw->clk); +} + +static const struct dev_pm_ops tegra_clock_pm = { + SET_SYSTEM_SLEEP_PM_OPS(tegra_clock_pm_suspend, + tegra_clock_pm_resume) +}; + +static const struct of_device_id tegra_clock_match[] = { + { .compatible = "nvidia,tegra20-sclk" }, + { .compatible = "nvidia,tegra30-sclk" }, + { .compatible = "nvidia,tegra30-pllc" }, + { .compatible = "nvidia,tegra30-plle" }, + { .compatible = "nvidia,tegra30-pllm" }, + { } +}; + +static struct platform_driver tegra_clock_driver = { + .driver = { + .name = "tegra-clock", + .of_match_table = tegra_clock_match, + .pm = &tegra_clock_pm, + .suppress_bind_attrs = true, + }, + .probe = tegra_clock_probe, + .shutdown = tegra_clock_shutdown, +}; +builtin_platform_driver(tegra_clock_driver); diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index eaa079c177c33..131efc53659d8 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -1914,7 +1914,7 @@ static struct clk *_tegra_clk_register_pll(struct tegra_clk_pll *pll, /* Data in .init is copied by clk_register(), so stack variable OK */ pll->hw.init = &init; - return clk_register(NULL, &pll->hw); + return tegra_clk_register(&pll->hw); } struct clk *tegra_clk_register_pll(const char *name, const char *parent_name, diff --git a/drivers/clk/tegra/clk-super.c b/drivers/clk/tegra/clk-super.c index 6099c6e9acd45..d1af4817051eb 100644 --- a/drivers/clk/tegra/clk-super.c +++ b/drivers/clk/tegra/clk-super.c @@ -226,7 +226,7 @@ struct clk *tegra_clk_register_super_mux(const char *name, /* Data in .init is copied by clk_register(), so stack variable OK */ super->hw.init = &init; - clk = clk_register(NULL, &super->hw); + clk = tegra_clk_register(&super->hw); if (IS_ERR(clk)) kfree(super); diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 3664593a5ba4e..cf92c0f4db61b 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -710,13 +710,6 @@ static void tegra20_super_clk_init(void) NULL); clks[TEGRA20_CLK_CCLK] = clk; - /* SCLK */ - clk = tegra_clk_register_super_mux("sclk", sclk_parents, - ARRAY_SIZE(sclk_parents), - CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, - clk_base + SCLK_BURST_POLICY, 0, 4, 0, 0, NULL); - clks[TEGRA20_CLK_SCLK] = clk; - /* twd */ clk = clk_register_fixed_factor(NULL, "twd", "cclk", 0, 1, 4); clks[TEGRA20_CLK_TWD] = clk; @@ -1146,13 +1139,39 @@ static void __init tegra20_clock_init(struct device_node *np) tegra20_periph_clk_init(); tegra20_audio_clk_init(); - tegra_init_dup_clks(tegra_clk_duplicates, clks, TEGRA20_CLK_CLK_MAX); - tegra_add_of_provider(np, tegra20_clk_src_onecell_get); - tegra_register_devclks(devclks, ARRAY_SIZE(devclks)); tegra_clk_apply_init_table = tegra20_clock_apply_init_table; tegra_cpu_car_ops = &tegra20_cpu_car_ops; } CLK_OF_DECLARE(tegra20, "nvidia,tegra20-car", tegra20_clock_init); + +/* + * Clocks that use runtime PM can't be created at the CLK_OF_DECLARE + * stage because drivers base isn't initialized yet, and thus platform + * devices can't be created for the clocks. Hence we need to split the + * registration of the clocks into two phases. The first phase registers + * essential clocks which don't require RPM and are actually used during + * early boot. The second phase registers clocks which use RPM and this + * is done when device drivers core API is ready. + */ +static int __init tegra20_init_runtime_pm_clocks(void) +{ + struct clk *clk; + + if (!of_machine_is_compatible("nvidia,tegra20")) + return 0; + + clk = tegra_clk_register_super_mux("sclk", sclk_parents, + ARRAY_SIZE(sclk_parents), + CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, + clk_base + SCLK_BURST_POLICY, 0, 4, 0, 0, NULL); + clks[TEGRA20_CLK_SCLK] = clk; + + tegra_init_dup_clks(tegra_clk_duplicates, clks, TEGRA20_CLK_CLK_MAX); + tegra_register_devclks(devclks, ARRAY_SIZE(devclks)); + + return 0; +} +postcore_initcall_sync(tegra20_init_runtime_pm_clocks); diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 64121bc66d85a..a8ca313f3c6fa 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -812,11 +812,6 @@ static void __init tegra30_pll_init(void) { struct clk *clk; - /* PLLC */ - clk = tegra_clk_register_pll("pll_c", "pll_ref", clk_base, pmc_base, 0, - &pll_c_params, NULL); - clks[TEGRA30_CLK_PLL_C] = clk; - /* PLLC_OUT1 */ clk = tegra_clk_register_divider("pll_c_out1_div", "pll_c", clk_base + PLLC_OUT, 0, TEGRA_DIVIDER_ROUND_UP, @@ -826,11 +821,6 @@ static void __init tegra30_pll_init(void) 0, NULL); clks[TEGRA30_CLK_PLL_C_OUT1] = clk; - /* PLLM */ - clk = tegra_clk_register_pll("pll_m", "pll_ref", clk_base, pmc_base, - CLK_SET_RATE_GATE, &pll_m_params, NULL); - clks[TEGRA30_CLK_PLL_M] = clk; - /* PLLM_OUT1 */ clk = tegra_clk_register_divider("pll_m_out1_div", "pll_m", clk_base + PLLM_OUT, 0, TEGRA_DIVIDER_ROUND_UP, @@ -880,9 +870,6 @@ static void __init tegra30_pll_init(void) ARRAY_SIZE(pll_e_parents), CLK_SET_RATE_NO_REPARENT, clk_base + PLLE_AUX, 2, 1, 0, NULL); - clk = tegra_clk_register_plle("pll_e", "pll_e_mux", clk_base, pmc_base, - CLK_GET_RATE_NOCACHE, &pll_e_params, NULL); - clks[TEGRA30_CLK_PLL_E] = clk; } static const char *cclk_g_parents[] = { "clk_m", "pll_c", "clk_32k", "pll_m", @@ -971,14 +958,6 @@ static void __init tegra30_super_clk_init(void) NULL); clks[TEGRA30_CLK_CCLK_LP] = clk; - /* SCLK */ - clk = tegra_clk_register_super_mux("sclk", sclk_parents, - ARRAY_SIZE(sclk_parents), - CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, - clk_base + SCLK_BURST_POLICY, - 0, 4, 0, 0, NULL); - clks[TEGRA30_CLK_SCLK] = clk; - /* twd */ clk = clk_register_fixed_factor(NULL, "twd", "cclk_g", CLK_SET_RATE_PARENT, 1, 2); @@ -1354,13 +1333,56 @@ static void __init tegra30_clock_init(struct device_node *np) tegra30_audio_plls, ARRAY_SIZE(tegra30_audio_plls), 24000000); - tegra_init_dup_clks(tegra_clk_duplicates, clks, TEGRA30_CLK_CLK_MAX); - tegra_add_of_provider(np, tegra30_clk_src_onecell_get); - tegra_register_devclks(devclks, ARRAY_SIZE(devclks)); tegra_clk_apply_init_table = tegra30_clock_apply_init_table; tegra_cpu_car_ops = &tegra30_cpu_car_ops; } CLK_OF_DECLARE(tegra30, "nvidia,tegra30-car", tegra30_clock_init); + +/* + * Clocks that use runtime PM can't be created at the CLK_OF_DECLARE + * stage because drivers base isn't initialized yet, and thus platform + * devices can't be created for the clocks. Hence we need to split the + * registration of the clocks into two phases. The first phase registers + * essential clocks which don't require RPM and are actually used during + * early boot. The second phase registers clocks which use RPM and this + * is done when device drivers core API is ready. + */ +static int __init tegra30_init_runtime_pm_clocks(void) +{ + struct clk *clk; + + if (!of_machine_is_compatible("nvidia,tegra30")) + return 0; + + /* PLLC */ + clk = tegra_clk_register_pll("pll_c", "pll_ref", clk_base, pmc_base, 0, + &pll_c_params, NULL); + clks[TEGRA30_CLK_PLL_C] = clk; + + /* PLLE */ + clk = tegra_clk_register_plle("pll_e", "pll_e_mux", clk_base, pmc_base, + CLK_GET_RATE_NOCACHE, &pll_e_params, NULL); + clks[TEGRA30_CLK_PLL_E] = clk; + + /* PLLM */ + clk = tegra_clk_register_pll("pll_m", "pll_ref", clk_base, pmc_base, + CLK_SET_RATE_GATE, &pll_m_params, NULL); + clks[TEGRA30_CLK_PLL_M] = clk; + + /* SCLK */ + clk = tegra_clk_register_super_mux("sclk", sclk_parents, + ARRAY_SIZE(sclk_parents), + CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, + clk_base + SCLK_BURST_POLICY, + 0, 4, 0, 0, NULL); + clks[TEGRA30_CLK_SCLK] = clk; + + tegra_init_dup_clks(tegra_clk_duplicates, clks, TEGRA30_CLK_CLK_MAX); + tegra_register_devclks(devclks, ARRAY_SIZE(devclks)); + + return 0; +} +postcore_initcall_sync(tegra30_init_runtime_pm_clocks); diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c index f6cdce441cf7a..dda27dc149933 100644 --- a/drivers/clk/tegra/clk.c +++ b/drivers/clk/tegra/clk.c @@ -9,14 +9,19 @@ #include #include #include +#include #include +#include +#include #include +#include #include #include "clk.h" /* Global data of Tegra CPU CAR ops */ +static struct device_node *tegra_car_np; static struct tegra_cpu_car_ops dummy_car_ops; struct tegra_cpu_car_ops *tegra_cpu_car_ops = &dummy_car_ops; @@ -320,6 +325,8 @@ void __init tegra_add_of_provider(struct device_node *np, { int i; + tegra_car_np = np; + for (i = 0; i < clk_num; i++) { if (IS_ERR(clks[i])) { pr_err @@ -372,6 +379,65 @@ struct clk ** __init tegra_lookup_dt_id(int clk_id, return NULL; } +static struct device_node *tegra_clk_get_of_node(struct clk_hw *hw) +{ + struct device_node *np, *root; + + if (!tegra_car_np) + return NULL; + + root = of_get_child_by_name(tegra_car_np, "tegra-clocks"); + if (!root) + return NULL; + + for_each_child_of_node(root, np) { + if (strcmp(np->name, hw->init->name)) + continue; + + if (!of_device_is_compatible(np, "nvidia,tegra-clock")) + continue; + + return np; + } + + of_node_put(root); + + return NULL; +} + +struct clk *tegra_clk_register(struct clk_hw *hw) +{ + struct platform_device *pdev; + struct device *dev = NULL; + struct device_node *np; + const char *dev_name; + + np = tegra_clk_get_of_node(hw); + + if (!of_device_is_available(np)) + goto reg_clk; + + dev_name = kasprintf(GFP_KERNEL, "tegra_clk_%s", hw->init->name); + if (!dev_name) { + of_node_put(np); + goto reg_clk; + } + + pdev = of_platform_device_create(np, dev_name, NULL); + if (!pdev) { + pr_err("%s: failed to create device for %pOF\n", __func__, np); + kfree(dev_name); + of_node_put(np); + goto reg_clk; + } + + dev = &pdev->dev; + pm_runtime_enable(dev); + +reg_clk: + return clk_register(dev, hw); +} + tegra_clk_apply_init_table_func tegra_clk_apply_init_table; static int __init tegra_clocks_apply_init_table(void) diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h index 0c3ba0ccce1ac..4fab4aa9fa314 100644 --- a/drivers/clk/tegra/clk.h +++ b/drivers/clk/tegra/clk.h @@ -927,4 +927,6 @@ struct clk *tegra20_clk_register_emc(void __iomem *ioaddr, bool low_jitter); struct clk *tegra210_clk_register_emc(struct device_node *np, void __iomem *regs); +struct clk *tegra_clk_register(struct clk_hw *hw); + #endif /* TEGRA_CLK_H */ From f178767625b56d5822074ed7e19517b754d03801 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 04:52:14 +0300 Subject: [PATCH 697/851] dt-bindings: host1x: Document OPP and power domain properties Document new DVFS OPP table and power domain properties of the Host1x bus and devices sitting on the bus. Reviewed-by: Rob Herring Signed-off-by: Dmitry Osipenko --- .../display/tegra/nvidia,tegra20-host1x.txt | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt index 8a6d3e1ee306a..62861a8fb5c68 100644 --- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt @@ -20,6 +20,18 @@ Required properties: - reset-names: Must include the following entries: - host1x +Optional properties: +- operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to HEG or core power domain. + +For each opp entry in 'operating-points-v2' table of host1x and its modules: +- opp-supported-hw: One bitfield indicating: + On Tegra20: SoC process ID mask + On Tegra30+: SoC speedo ID mask + + A bitwise AND is performed against the value and if any bit + matches, the OPP gets enabled. + Each host1x client module having to perform DMA through the Memory Controller should have the interconnect endpoints set to the Memory Client and External Memory respectively. @@ -45,6 +57,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to MPE power domain. - vi: video input @@ -128,6 +142,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to VENC power domain. - epp: encoder pre-processor @@ -147,6 +163,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to HEG or core power domain. - isp: image signal processor @@ -166,6 +184,7 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - power-domains: Phandle to VENC or core power domain. - gr2d: 2D graphics engine @@ -185,6 +204,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to HEG or core power domain. - gr3d: 3D graphics engine @@ -209,6 +230,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandles to 3D or core power domain. - dc: display controller @@ -241,6 +264,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to core power domain. - hdmi: High Definition Multimedia Interface @@ -267,6 +292,7 @@ of the following host1x client modules: - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection - nvidia,edid: supplies a binary EDID blob - nvidia,panel: phandle of a display panel + - operating-points-v2: See ../bindings/opp/opp.txt for details. - tvo: TV encoder output @@ -277,6 +303,10 @@ of the following host1x client modules: - clocks: Must contain one entry, for the module clock. See ../clocks/clock-bindings.txt for details. + Optional properties: + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to core power domain. + - dsi: display serial interface Required properties: @@ -305,6 +335,7 @@ of the following host1x client modules: - nvidia,panel: phandle of a display panel - nvidia,ganged-mode: contains a phandle to a second DSI controller to gang up with in order to support up to 8 data lanes + - operating-points-v2: See ../bindings/opp/opp.txt for details. - sor: serial output resource @@ -408,6 +439,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_HOST1X>; resets = <&tegra_car 28>; reset-names = "host1x"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; #address-cells = <1>; #size-cells = <1>; @@ -421,6 +454,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_MPE>; resets = <&tegra_car 60>; reset-names = "mpe"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; vi@54080000 { @@ -429,6 +464,7 @@ Example: interrupts = ; assigned-clocks = <&tegra_car TEGRA210_CLK_VI>; assigned-clock-parents = <&tegra_car TEGRA210_CLK_PLL_C4_OUT0>; + operating-points-v2 = <&dvfs_opp_table>; clocks = <&tegra_car TEGRA210_CLK_VI>; power-domains = <&pd_venc>; @@ -510,6 +546,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_EPP>; resets = <&tegra_car 19>; reset-names = "epp"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; isp { @@ -528,6 +566,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_GR2D>; resets = <&tegra_car 21>; reset-names = "2d"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; gr3d { @@ -536,6 +576,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_GR3D>; resets = <&tegra_car 24>; reset-names = "3d"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; dc@54200000 { @@ -547,6 +589,8 @@ Example: clock-names = "dc", "parent"; resets = <&tegra_car 27>; reset-names = "dc"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; interconnects = <&mc TEGRA20_MC_DISPLAY0A &emc>, <&mc TEGRA20_MC_DISPLAY0B &emc>, @@ -571,6 +615,8 @@ Example: clock-names = "dc", "parent"; resets = <&tegra_car 26>; reset-names = "dc"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; interconnects = <&mc TEGRA20_MC_DISPLAY0AB &emc>, <&mc TEGRA20_MC_DISPLAY0BB &emc>, @@ -596,6 +642,7 @@ Example: resets = <&tegra_car 51>; reset-names = "hdmi"; status = "disabled"; + operating-points-v2 = <&dvfs_opp_table>; }; tvo { @@ -604,6 +651,7 @@ Example: interrupts = <0 76 0x04>; clocks = <&tegra_car TEGRA20_CLK_TVO>; status = "disabled"; + operating-points-v2 = <&dvfs_opp_table>; }; dsi { @@ -615,6 +663,7 @@ Example: resets = <&tegra_car 48>; reset-names = "dsi"; status = "disabled"; + operating-points-v2 = <&dvfs_opp_table>; }; }; From 8d61ed4f224a8ecdadd0b5da2de67da69eccb2a0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 21 Dec 2020 02:07:47 +0300 Subject: [PATCH 698/851] dt-bindings: host1x: Document Memory Client resets of Host1x, GR2D and GR3D Memory Client should be blocked before hardware reset is asserted in order to prevent memory corruption and hanging of memory controller. Document Memory Client resets of Host1x, GR2D and GR3D hardware units. Signed-off-by: Dmitry Osipenko --- .../bindings/display/tegra/nvidia,tegra20-host1x.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt index 62861a8fb5c68..07a08653798b2 100644 --- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt @@ -18,6 +18,7 @@ Required properties: - resets: Must contain an entry for each entry in reset-names. See ../reset/reset.txt for details. - reset-names: Must include the following entries: + - mc - host1x Optional properties: @@ -197,6 +198,7 @@ of the following host1x client modules: - resets: Must contain an entry for each entry in reset-names. See ../reset/reset.txt for details. - reset-names: Must include the following entries: + - mc - 2d Optional properties: @@ -222,6 +224,8 @@ of the following host1x client modules: - resets: Must contain an entry for each entry in reset-names. See ../reset/reset.txt for details. - reset-names: Must include the following entries: + - mc + - mc2 (Only required on SoCs with two 3D clocks) - 3d - 3d2 (Only required on SoCs with two 3D clocks) From 4838606ec72d3e320260e5d1a3de42df065e5305 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 3 Dec 2020 00:24:27 +0300 Subject: [PATCH 699/851] gpu: host1x: Add host1x_channel_stop() Add host1x_channel_stop() which waits till channel becomes idle and then stops the channel hardware. This is needed for supporting suspend/resume by host1x drivers since the hardware state is lost after power-gating, thus the channel needs to be stopped before client enters into suspend. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/host1x/channel.c | 8 ++++++++ include/linux/host1x.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index 4cd212bb570d5..2a9a3a8d5931a 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -75,6 +75,14 @@ struct host1x_channel *host1x_channel_get_index(struct host1x *host, return ch; } +void host1x_channel_stop(struct host1x_channel *channel) +{ + struct host1x *host = dev_get_drvdata(channel->dev->parent); + + host1x_hw_cdma_stop(host, &channel->cdma); +} +EXPORT_SYMBOL(host1x_channel_stop); + static void release_channel(struct kref *kref) { struct host1x_channel *channel = diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9b6784708f2e6..fd5fe637edbb4 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -182,6 +182,7 @@ struct host1x_job; struct host1x_channel *host1x_channel_request(struct host1x_client *client); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); +void host1x_channel_stop(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); From 75f312f09b48d3b066fb62324260ee4a76058f19 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 06:10:40 +0300 Subject: [PATCH 700/851] gpu: host1x: Add runtime PM support Add runtime PM support to the Host1x driver. This is required for enabling system-wide DVFS and supporting dynamic power management using a generic power domain. For the starter we will keep host1x always-on because dynamic power management require a major refactoring of the driver code since lot's of code paths will need the RPM handling and we're going to remove some of these paths in the future. Host1x doesn't consume much power so it is good enough, we at least need to resume Host1x in order to initialize the power state. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/host1x/debug.c | 15 +++ drivers/gpu/host1x/dev.c | 145 +++++++++++++++++++++++------ drivers/gpu/host1x/dev.h | 3 +- drivers/gpu/host1x/hw/channel_hw.c | 44 ++++----- drivers/gpu/host1x/intr.c | 3 - drivers/gpu/host1x/syncpt.c | 5 +- 6 files changed, 159 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index 8a14880c61bbd..18d9c8d206e3f 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -52,6 +53,11 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) { struct host1x *m = dev_get_drvdata(ch->dev->parent); struct output *o = data; + int err; + + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return err; mutex_lock(&ch->cdma.lock); mutex_lock(&debug_lock); @@ -64,6 +70,8 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) mutex_unlock(&debug_lock); mutex_unlock(&ch->cdma.lock); + pm_runtime_put(m->dev); + return 0; } @@ -71,9 +79,14 @@ static void show_syncpts(struct host1x *m, struct output *o) { struct list_head *pos; unsigned int i; + int err; host1x_debug_output(o, "---- syncpts ----\n"); + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return; + for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { u32 max = host1x_syncpt_read_max(m->syncpt + i); u32 min = host1x_syncpt_load(m->syncpt + i); @@ -101,6 +114,8 @@ static void show_syncpts(struct host1x *m, struct output *o) base_val); } + pm_runtime_put(m->dev); + host1x_debug_output(o, "\n"); } diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index fbb6447b8659e..0945df6039af3 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -6,12 +6,15 @@ */ #include +#include #include #include #include #include #include #include +#include +#include #include #define CREATE_TRACE_POINTS @@ -190,6 +193,9 @@ static void host1x_setup_sid_table(struct host1x *host) const struct host1x_info *info = host->info; unsigned int i; + if (!info->has_hypervisor) + return; + for (i = 0; i < info->num_sid_entries; i++) { const struct host1x_sid_entry *entry = &info->sid_table[i]; @@ -347,6 +353,27 @@ static void host1x_iommu_exit(struct host1x *host) } } +static int host1x_get_resets(struct host1x *host) +{ + int err; + + host->resets[0].id = "mc"; + host->resets[1].id = "host1x"; + host->nresets = ARRAY_SIZE(host->resets); + + err = devm_reset_control_bulk_get_optional_exclusive_released( + host->dev, host->nresets, host->resets); + if (err) { + dev_err(host->dev, "failed to get reset: %d\n", err); + return err; + } + + if (WARN_ON(!host->resets[1].rstc)) + return -EINVAL; + + return 0; +} + static int host1x_probe(struct platform_device *pdev) { struct host1x *host; @@ -423,12 +450,9 @@ static int host1x_probe(struct platform_device *pdev) return err; } - host->rst = devm_reset_control_get(&pdev->dev, "host1x"); - if (IS_ERR(host->rst)) { - err = PTR_ERR(host->rst); - dev_err(&pdev->dev, "failed to get reset: %d\n", err); + err = host1x_get_resets(host); + if (err) return err; - } err = host1x_iommu_init(host); if (err < 0) { @@ -443,22 +467,10 @@ static int host1x_probe(struct platform_device *pdev) goto iommu_exit; } - err = clk_prepare_enable(host->clk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable clock\n"); - goto free_channels; - } - - err = reset_control_deassert(host->rst); - if (err < 0) { - dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); - goto unprepare_disable; - } - err = host1x_syncpt_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize syncpts\n"); - goto reset_assert; + goto free_channels; } err = host1x_intr_init(host, syncpt_irq); @@ -467,10 +479,14 @@ static int host1x_probe(struct platform_device *pdev) goto deinit_syncpt; } - host1x_debug_init(host); + pm_runtime_enable(&pdev->dev); + + /* the driver's code isn't ready yet for the dynamic RPM */ + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + goto deinit_intr; - if (host->info->has_hypervisor) - host1x_setup_sid_table(host); + host1x_debug_init(host); err = host1x_register(host); if (err < 0) @@ -486,13 +502,13 @@ static int host1x_probe(struct platform_device *pdev) host1x_unregister(host); deinit_debugfs: host1x_debug_deinit(host); + + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); +deinit_intr: host1x_intr_deinit(host); deinit_syncpt: host1x_syncpt_deinit(host); -reset_assert: - reset_control_assert(host->rst); -unprepare_disable: - clk_disable_unprepare(host->clk); free_channels: host1x_channel_list_free(&host->channel_list); iommu_exit: @@ -507,19 +523,94 @@ static int host1x_remove(struct platform_device *pdev) host1x_unregister(host); host1x_debug_deinit(host); + + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + host1x_intr_deinit(host); host1x_syncpt_deinit(host); - reset_control_assert(host->rst); - clk_disable_unprepare(host->clk); host1x_iommu_exit(host); return 0; } +static int __maybe_unused host1x_runtime_suspend(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + host1x_intr_stop(host); + host1x_syncpt_save(host); + + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(host->clk); + reset_control_bulk_release(host->nresets, host->resets); + + return 0; + +resume_host1x: + host1x_setup_sid_table(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return err; +} + +static int __maybe_unused host1x_runtime_resume(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(host->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + err = reset_control_bulk_deassert(host->nresets, host->resets); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + host1x_setup_sid_table(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return 0; + +disable_clk: + clk_disable_unprepare(host->clk); +release_reset: + reset_control_bulk_release(host->nresets, host->resets); + + return err; +} + +static const struct dev_pm_ops host1x_pm = { + SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, + NULL) + /* TODO: add system suspend-resume once driver will be ready for that */ +}; + static struct platform_driver tegra_host1x_driver = { .driver = { .name = "tegra-host1x", .of_match_table = host1x_of_match, + .pm = &host1x_pm, }, .probe = host1x_probe, .remove = host1x_remove, diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index fa6d4bc46e981..41a7a63514c40 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -118,7 +118,8 @@ struct host1x { struct host1x_syncpt_base *bases; struct device *dev; struct clk *clk; - struct reset_control *rst; + struct reset_control_bulk_data resets[2]; + unsigned int nresets; struct iommu_group *group; struct iommu_domain *domain; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 1999780a7203a..6b40e9af1e886 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -159,6 +159,27 @@ static void host1x_channel_set_streamid(struct host1x_channel *channel) #endif } +static void host1x_enable_gather_filter(struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 + struct host1x *host = dev_get_drvdata(ch->dev->parent); + u32 val; + + if (!host->hv_regs) + return; + + val = host1x_hypervisor_readl( + host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); + val |= BIT(ch->id % 32); + host1x_hypervisor_writel( + host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); +#elif HOST1X_HW >= 4 + host1x_ch_writel(ch, + HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), + HOST1X_CHANNEL_CHANNELCTRL); +#endif +} + static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; @@ -190,6 +211,7 @@ static int channel_submit(struct host1x_job *job) } host1x_channel_set_streamid(ch); + host1x_enable_gather_filter(ch); /* begin a CDMA submit */ err = host1x_cdma_begin(&ch->cdma, job); @@ -249,27 +271,6 @@ static int channel_submit(struct host1x_job *job) return err; } -static void enable_gather_filter(struct host1x *host, - struct host1x_channel *ch) -{ -#if HOST1X_HW >= 6 - u32 val; - - if (!host->hv_regs) - return; - - val = host1x_hypervisor_readl( - host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); - val |= BIT(ch->id % 32); - host1x_hypervisor_writel( - host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); -#elif HOST1X_HW >= 4 - host1x_ch_writel(ch, - HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), - HOST1X_CHANNEL_CHANNELCTRL); -#endif -} - static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, unsigned int index) { @@ -278,7 +279,6 @@ static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, #else ch->regs = dev->regs + index * 0x100; #endif - enable_gather_filter(dev, ch); return 0; } diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 45b6be927ec4d..965ba21818b15 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -297,14 +297,11 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync) "host1x_sp_%02u", id); } - host1x_intr_start(host); - return 0; } void host1x_intr_deinit(struct host1x *host) { - host1x_intr_stop(host); } void host1x_intr_start(struct host1x *host) diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index d198a10848c6b..e08e331e46aea 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -143,6 +143,8 @@ void host1x_syncpt_restore(struct host1x *host) for (i = 0; i < host1x_syncpt_nb_bases(host); i++) host1x_hw_syncpt_restore_wait_base(host, sp_base + i); + host1x_hw_syncpt_enable_protection(host); + wmb(); } @@ -366,9 +368,6 @@ int host1x_syncpt_init(struct host1x *host) host->syncpt = syncpt; host->bases = bases; - host1x_syncpt_restore(host); - host1x_hw_syncpt_enable_protection(host); - /* Allocate sync point to use for clearing waits for expired fences */ host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop"); if (!host->nop_sp) From 0cd2eae494ccb00cf54609a760d4183969837d5e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 23 Apr 2021 01:45:08 +0300 Subject: [PATCH 701/851] gpu: host1x: Add stub driver for MPE, VI, EPP and ISP We never had drivers for MPE, VI, EPP and ISP hardware units on Tegra20 and Tegra30 and they are specified in the device-trees. Thus, a device is getting created for them by host1x bus, but driver is never getting bound. After adding support for generic power domains, we now have a situation where the state of PMC driver is never synced because consumer device never becomes ready due to the missing drivers and it needs to be synced in order to allow scaling of SoC core voltage. Add a stub driver in order to resolve the problem. Signed-off-by: Dmitry Osipenko --- drivers/gpu/host1x/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 0945df6039af3..400a3b9d88575 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -616,7 +616,46 @@ static struct platform_driver tegra_host1x_driver = { .remove = host1x_remove, }; +/* + * We never had drivers for MPE, VI, EPP and ISP hardware units on Tegra20 + * and Tegra30, but they are specified in the device-trees, and thus, + * device entity is getting created for them by host1x bus a, but driver + * is never getting bound. After adding support for generic power domains + * on Tegra20/30, we now have a situation where the state of PMC driver is + * never synced because consumer device never becomes ready due to the + * missing drivers. The PMC state needs to be synced in order to allow + * scaling of the SoC core voltage. In order to solve this problem, + * we will create and bind a dummy driver to the offending devices until + * we will have a real driver for them. + */ +static const struct of_device_id host1x_stub_of_matches[] = { + { .compatible = "nvidia,tegra20-mpe", }, + { .compatible = "nvidia,tegra30-mpe", }, + { .compatible = "nvidia,tegra20-epp", }, + { .compatible = "nvidia,tegra30-epp", }, + { .compatible = "nvidia,tegra20-vi", }, + { .compatible = "nvidia,tegra30-vi", }, + { .compatible = "nvidia,tegra20-isp", }, + { .compatible = "nvidia,tegra30-isp", }, + { /* sentinel */ } +}; + +static int host1x_stub_probe(struct platform_device *pdev) +{ + pm_runtime_enable(&pdev->dev); + return 0; +} + +static struct platform_driver tegra_host1x_stub_driver = { + .driver = { + .name = "tegra-host1x-stub", + .of_match_table = host1x_stub_of_matches, + }, + .probe = host1x_stub_probe, +}; + static struct platform_driver * const drivers[] = { + &tegra_host1x_stub_driver, &tegra_host1x_driver, &tegra_mipi_driver, }; From abe121f32d3e0550be09164caf6c045a519fe1d0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 02:48:03 +0300 Subject: [PATCH 702/851] drm/tegra: dc: Support OPP and SoC core voltage scaling Add OPP and SoC core voltage scaling support to the display controller driver. This is required for enabling system-wide DVFS on pre-Tegra186 SoCs. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/dc.c | 69 ++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/tegra/dc.h | 2 ++ 2 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 51bbbc42a144f..fa9cbeb7a9585 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -10,9 +10,12 @@ #include #include #include +#include +#include #include #include +#include #include #include @@ -1727,6 +1730,47 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc, return 0; } +static void tegra_dc_update_voltage_state(struct tegra_dc *dc, + struct tegra_dc_state *state) +{ + unsigned long rate, pstate; + struct dev_pm_opp *opp; + int err; + + if (!dc->has_opp_table) + return; + + /* calculate actual pixel clock rate which depends on internal divider */ + rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2); + + /* find suitable OPP for the rate */ + opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate); + + if (opp == ERR_PTR(-ERANGE)) + opp = dev_pm_opp_find_freq_floor(dc->dev, &rate); + + if (IS_ERR(opp)) { + dev_err(dc->dev, "failed to find OPP for %luHz: %pe\n", + rate, opp); + return; + } + + pstate = dev_pm_opp_get_required_pstate(opp, 0); + dev_pm_opp_put(opp); + + /* + * The minimum core voltage depends on the pixel clock rate (which + * depends on internal clock divider of the CRTC) and not on the + * rate of the display controller clock. This is why we're not using + * dev_pm_opp_set_rate() API and instead controlling the power domain + * directly. + */ + err = dev_pm_genpd_set_performance_state(dc->dev, pstate); + if (err) + dev_err(dc->dev, "failed to set power domain state to %lu: %d\n", + pstate, err); +} + static void tegra_dc_commit_state(struct tegra_dc *dc, struct tegra_dc_state *state) { @@ -1766,6 +1810,8 @@ static void tegra_dc_commit_state(struct tegra_dc *dc, value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1; tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); } + + tegra_dc_update_voltage_state(dc, state); } static void tegra_dc_stop(struct tegra_dc *dc) @@ -1859,6 +1905,8 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, err = host1x_client_suspend(&dc->client); if (err < 0) dev_err(dc->dev, "failed to suspend: %d\n", err); + + dev_pm_genpd_set_performance_state(dc->dev, 0); } static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, @@ -2622,6 +2670,23 @@ static int tegra_dc_couple(struct tegra_dc *dc) return 0; } +static int tegra_dc_init_opp_table(struct tegra_dc *dc) +{ + struct tegra_core_opp_params opp_params = {}; + int err; + + err = devm_tegra_core_dev_init_opp_table(dc->dev, &opp_params); + if (err && err != -ENODEV) + return err; + + if (err) + dc->has_opp_table = false; + else + dc->has_opp_table = true; + + return 0; +} + static int tegra_dc_probe(struct platform_device *pdev) { u64 dma_mask = dma_get_mask(pdev->dev.parent); @@ -2687,6 +2752,10 @@ static int tegra_dc_probe(struct platform_device *pdev) tegra_powergate_power_off(dc->powergate); } + err = tegra_dc_init_opp_table(dc); + if (err < 0) + return err; + dc->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(dc->regs)) return PTR_ERR(dc->regs); diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 5e13f1cfd749a..88fc9c634c1d5 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -91,6 +91,8 @@ struct tegra_dc { struct drm_info_list *debugfs_files; const struct tegra_dc_soc_info *soc; + + bool has_opp_table; }; static inline struct tegra_dc * From d6a7307f60ab5ab4186f16df9d3d14f47e76a89f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 22:14:02 +0300 Subject: [PATCH 703/851] drm/tegra: hdmi: Add OPP support The HDMI on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now HDMI driver must use OPP API for driving the controller's clock rate because OPP API takes care of reconfiguring the domain's performance state in accordance to the clock rate. Add OPP support to the HDMI driver. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/hdmi.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index e5d2a40260288..5b5d174c5a2b6 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -11,10 +11,13 @@ #include #include #include +#include #include #include #include +#include + #include #include #include @@ -1195,7 +1198,7 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) h_back_porch = mode->htotal - mode->hsync_end; h_front_porch = mode->hsync_start - mode->hdisplay; - err = clk_set_rate(hdmi->clk, hdmi->pixel_clock); + err = dev_pm_opp_set_rate(hdmi->dev, hdmi->pixel_clock); if (err < 0) { dev_err(hdmi->dev, "failed to set HDMI clock frequency: %d\n", err); @@ -1708,6 +1711,10 @@ static int tegra_hdmi_probe(struct platform_device *pdev) hdmi->output.dev = &pdev->dev; + err = devm_tegra_core_dev_init_opp_table_simple(&pdev->dev); + if (err) + return err; + err = tegra_output_probe(&hdmi->output); if (err < 0) return err; From 0168356ead8bb50869217cc9103571a762b15973 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 02:54:13 +0300 Subject: [PATCH 704/851] drm/tegra: gr2d: Support OPP and power management Add OPP and PM support to the GR2D driver. This is required for enabling system-wide DVFS and supporting dynamic power management using a generic power domain. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/gr2d.c | 156 +++++++++++++++++++++++++++++++++-- 1 file changed, 148 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index de288cba39055..099fd7e2a462a 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -7,11 +7,22 @@ #include #include #include +#include +#include +#include + +#include #include "drm.h" #include "gem.h" #include "gr2d.h" +enum { + RST_GR2D_MC, + RST_GR2D, + RST_GR2D_MAX, +}; + struct gr2d_soc { unsigned int version; }; @@ -21,6 +32,9 @@ struct gr2d { struct host1x_channel *channel; struct clk *clk; + struct reset_control_bulk_data resets[RST_GR2D_MAX]; + unsigned int nresets; + const struct gr2d_soc *soc; DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS); @@ -101,16 +115,24 @@ static int gr2d_open_channel(struct tegra_drm_client *client, struct tegra_drm_context *context) { struct gr2d *gr2d = to_gr2d(client); + int err; context->channel = host1x_channel_get(gr2d->channel); if (!context->channel) return -ENOMEM; + err = pm_runtime_resume_and_get(client->base.dev); + if (err) { + host1x_channel_put(context->channel); + return err; + } + return 0; } static void gr2d_close_channel(struct tegra_drm_context *context) { + pm_runtime_put_sync(context->client->base.dev); host1x_channel_put(context->channel); } @@ -190,6 +212,27 @@ static const u32 gr2d_addr_regs[] = { GR2D_VA_BASE_ADDR_SB, }; +static int gr2d_get_resets(struct device *dev, struct gr2d *gr2d) +{ + int err; + + gr2d->resets[RST_GR2D_MC].id = "mc"; + gr2d->resets[RST_GR2D].id = "2d"; + gr2d->nresets = RST_GR2D_MAX; + + err = devm_reset_control_bulk_get_optional_exclusive_released( + dev, gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to get reset: %d\n", err); + return err; + } + + if (WARN_ON(!gr2d->resets[RST_GR2D].rstc)) + return -EINVAL; + + return 0; +} + static int gr2d_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -202,6 +245,8 @@ static int gr2d_probe(struct platform_device *pdev) if (!gr2d) return -ENOMEM; + platform_set_drvdata(pdev, gr2d); + gr2d->soc = of_device_get_match_data(dev); syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL); @@ -214,11 +259,13 @@ static int gr2d_probe(struct platform_device *pdev) return PTR_ERR(gr2d->clk); } - err = clk_prepare_enable(gr2d->clk); - if (err) { - dev_err(dev, "cannot turn on clock\n"); + err = devm_tegra_core_dev_init_opp_table_simple(dev); + if (err) + return err; + + err = gr2d_get_resets(dev, gr2d); + if (err) return err; - } INIT_LIST_HEAD(&gr2d->client.base.list); gr2d->client.base.ops = &gr2d_client_ops; @@ -231,20 +278,27 @@ static int gr2d_probe(struct platform_device *pdev) gr2d->client.version = gr2d->soc->version; gr2d->client.ops = &gr2d_ops; + pm_runtime_enable(dev); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 200); + err = host1x_client_register(&gr2d->client.base); if (err < 0) { dev_err(dev, "failed to register host1x client: %d\n", err); - clk_disable_unprepare(gr2d->clk); - return err; + goto disable_rpm; } /* initialize address register map */ for (i = 0; i < ARRAY_SIZE(gr2d_addr_regs); i++) set_bit(gr2d_addr_regs[i], gr2d->addr_regs); - platform_set_drvdata(pdev, gr2d); - return 0; + +disable_rpm: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + + return err; } static int gr2d_remove(struct platform_device *pdev) @@ -259,15 +313,101 @@ static int gr2d_remove(struct platform_device *pdev) return err; } + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static int __maybe_unused gr2d_runtime_suspend(struct device *dev) +{ + struct gr2d *gr2d = dev_get_drvdata(dev); + int err; + + host1x_channel_stop(gr2d->channel); + reset_control_bulk_release(gr2d->nresets, gr2d->resets); + + /* + * GR2D module shouldn't be reset while hardware is idling, otherwise + * host1x's cmdproc will stuck on trying to access any G2 register + * after reset. GR2D module could be either hot-reset or reset after + * power-gating of the HEG partition. Hence we will put in reset only + * the memory client part of the module, the HEG GENPD will take care + * of resetting GR2D module across power-gating. + * + * On Tegra20 there is no HEG partition, but it's okay to have + * undetermined h/w state since userspace is expected to reprogram + * the state on each job submission anyways. + */ + err = reset_control_acquire(gr2d->resets[RST_GR2D_MC].rstc); + if (err) { + dev_err(dev, "failed to acquire MC reset: %d\n", err); + goto acquire_reset; + } + + err = reset_control_assert(gr2d->resets[RST_GR2D_MC].rstc); + reset_control_release(gr2d->resets[RST_GR2D_MC].rstc); + if (err) { + dev_err(dev, "failed to assert MC reset: %d\n", err); + goto acquire_reset; + } + clk_disable_unprepare(gr2d->clk); return 0; + +acquire_reset: + reset_control_bulk_acquire(gr2d->nresets, gr2d->resets); + reset_control_bulk_deassert(gr2d->nresets, gr2d->resets); + + return err; +} + +static int __maybe_unused gr2d_runtime_resume(struct device *dev) +{ + struct gr2d *gr2d = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(gr2d->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + /* this is a reset array which deasserts both 2D MC and 2D itself */ + err = reset_control_bulk_deassert(gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + return 0; + +disable_clk: + clk_disable_unprepare(gr2d->clk); +release_reset: + reset_control_bulk_release(gr2d->nresets, gr2d->resets); + + return err; } +static const struct dev_pm_ops tegra_gr2d_pm = { + SET_RUNTIME_PM_OPS(gr2d_runtime_suspend, gr2d_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + struct platform_driver tegra_gr2d_driver = { .driver = { .name = "tegra-gr2d", .of_match_table = gr2d_match, + .pm = &tegra_gr2d_pm, }, .probe = gr2d_probe, .remove = gr2d_remove, From 4c2be35e8fe6fc02e7abf237774b489c2bf3ce72 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 02:54:31 +0300 Subject: [PATCH 705/851] drm/tegra: gr3d: Support OPP and power management Add OPP and add PM support to the GR3D driver. This is required for enabling system-wide DVFS and supporting dynamic power management using a generic power domain. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/gr3d.c | 401 ++++++++++++++++++++++++++++++----- 1 file changed, 347 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 24442ade0da35..b450f1299c3a9 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -5,32 +5,48 @@ */ #include +#include #include #include #include #include #include +#include +#include +#include #include +#include #include #include "drm.h" #include "gem.h" #include "gr3d.h" +enum { + RST_GR3D_MC, + RST_GR3D, + RST_GR3D2_MC, + RST_GR3D2, + RST_GR3D_MAX, +}; + struct gr3d_soc { unsigned int version; + unsigned int num_clocks; + unsigned int num_resets; }; struct gr3d { struct tegra_drm_client client; struct host1x_channel *channel; - struct clk *clk_secondary; - struct clk *clk; - struct reset_control *rst_secondary; - struct reset_control *rst; const struct gr3d_soc *soc; + struct clk_bulk_data *clocks; + unsigned int nclocks; + struct reset_control_bulk_data resets[RST_GR3D_MAX]; + unsigned int nresets; + bool legacy_pd; DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS); }; @@ -109,16 +125,24 @@ static int gr3d_open_channel(struct tegra_drm_client *client, struct tegra_drm_context *context) { struct gr3d *gr3d = to_gr3d(client); + int err; context->channel = host1x_channel_get(gr3d->channel); if (!context->channel) return -ENOMEM; + err = pm_runtime_resume_and_get(client->base.dev); + if (err) { + host1x_channel_put(context->channel); + return err; + } + return 0; } static void gr3d_close_channel(struct tegra_drm_context *context) { + pm_runtime_put_sync(context->client->base.dev); host1x_channel_put(context->channel); } @@ -155,14 +179,20 @@ static const struct tegra_drm_client_ops gr3d_ops = { static const struct gr3d_soc tegra20_gr3d_soc = { .version = 0x20, + .num_clocks = 1, + .num_resets = 2, }; static const struct gr3d_soc tegra30_gr3d_soc = { .version = 0x30, + .num_clocks = 2, + .num_resets = 4, }; static const struct gr3d_soc tegra114_gr3d_soc = { .version = 0x35, + .num_clocks = 1, + .num_resets = 2, }; static const struct of_device_id tegra_gr3d_match[] = { @@ -278,9 +308,138 @@ static const u32 gr3d_addr_regs[] = { GR3D_GLOBAL_SAMP23SURFADDR(15), }; +static int gr3d_link_power_domain(struct device *dev, struct device *pd_dev) +{ + const u32 link_flags = DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME; + struct device_link *link; + int err; + + link = device_link_add(dev, pd_dev, link_flags); + if (!link) { + dev_err(dev, "failed to link to %s\n", dev_name(pd_dev)); + return -EINVAL; + } + + err = devm_add_action_or_reset(dev, (void *)device_link_del, link); + if (err) + return err; + + return 0; +} + +static int gr3d_init_power(struct device *dev, struct gr3d *gr3d) +{ + const char *opp_genpd_names[] = { "3d0", "3d1", NULL }; + struct device **opp_virt_dev; + struct device *pd_dev; + unsigned int i; + int err; + + err = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (err < 0) { + if (err != -ENOENT) + return err; + + /* + * Older device-trees don't use GENPD. In this case we should + * toggle power domain manually. + */ + gr3d->legacy_pd = true; + return 0; + } + + /* + * The PM domain core automatically attaches a single power domain, + * otherwise it skips attaching completely. We have a single domain + * on Tegra20 and two domains on Tegra30+. + */ + if (dev->pm_domain) + return 0; + + err = devm_pm_opp_attach_genpd(dev, opp_genpd_names, &opp_virt_dev); + if (err) + return err; + + for (i = 0; opp_genpd_names[i]; i++) { + pd_dev = opp_virt_dev[i]; + if (!pd_dev) { + dev_err(dev, "failed to get %s power domain\n", + opp_genpd_names[i]); + return -EINVAL; + } + + err = gr3d_link_power_domain(dev, pd_dev); + if (err) + return err; + } + + return 0; +} + +static int gr3d_set_opp(struct dev_pm_set_opp_data *data) +{ + struct gr3d *gr3d = dev_get_drvdata(data->dev); + unsigned int i; + int err; + + for (i = 0; i < gr3d->nclocks; i++) { + err = clk_set_rate(gr3d->clocks[i].clk, data->new_opp.rate); + if (err) { + dev_err(data->dev, "failed to set %s rate to %lu: %d\n", + gr3d->clocks[i].id, data->new_opp.rate, err); + return err; + } + } + + return 0; +} + +static int gr3d_get_clocks(struct device *dev, struct gr3d *gr3d) +{ + int err; + + err = devm_clk_bulk_get_all(dev, &gr3d->clocks); + if (err < 0) { + dev_err(dev, "failed to get clock: %d\n", err); + return err; + } + gr3d->nclocks = err; + + if (gr3d->nclocks != gr3d->soc->num_clocks) { + dev_err(dev, "invalid number of clocks: %u\n", gr3d->nclocks); + return -EINVAL; + } + + return 0; +} + +static int gr3d_get_resets(struct device *dev, struct gr3d *gr3d) +{ + int err; + + gr3d->nresets = gr3d->soc->num_resets; + gr3d->resets[RST_GR3D2_MC].id = "mc2"; + gr3d->resets[RST_GR3D_MC].id = "mc"; + gr3d->resets[RST_GR3D2].id = "3d2"; + gr3d->resets[RST_GR3D].id = "3d"; + + err = devm_reset_control_bulk_get_optional_exclusive_released( + dev, gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to get reset: %d\n", err); + return err; + } + + if (WARN_ON(!gr3d->resets[RST_GR3D].rstc) || + WARN_ON(!gr3d->resets[RST_GR3D2].rstc && gr3d->nresets == 4)) + return -EINVAL; + + return 0; +} + static int gr3d_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; struct host1x_syncpt **syncpts; struct gr3d *gr3d; unsigned int i; @@ -290,56 +449,33 @@ static int gr3d_probe(struct platform_device *pdev) if (!gr3d) return -ENOMEM; + platform_set_drvdata(pdev, gr3d); + gr3d->soc = of_device_get_match_data(&pdev->dev); syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL); if (!syncpts) return -ENOMEM; - gr3d->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(gr3d->clk)) { - dev_err(&pdev->dev, "cannot get clock\n"); - return PTR_ERR(gr3d->clk); - } - - gr3d->rst = devm_reset_control_get(&pdev->dev, "3d"); - if (IS_ERR(gr3d->rst)) { - dev_err(&pdev->dev, "cannot get reset\n"); - return PTR_ERR(gr3d->rst); - } + err = gr3d_get_clocks(&pdev->dev, gr3d); + if (err) + return err; - if (of_device_is_compatible(np, "nvidia,tegra30-gr3d")) { - gr3d->clk_secondary = devm_clk_get(&pdev->dev, "3d2"); - if (IS_ERR(gr3d->clk_secondary)) { - dev_err(&pdev->dev, "cannot get secondary clock\n"); - return PTR_ERR(gr3d->clk_secondary); - } + err = gr3d_get_resets(&pdev->dev, gr3d); + if (err) + return err; - gr3d->rst_secondary = devm_reset_control_get(&pdev->dev, - "3d2"); - if (IS_ERR(gr3d->rst_secondary)) { - dev_err(&pdev->dev, "cannot get secondary reset\n"); - return PTR_ERR(gr3d->rst_secondary); - } - } + err = gr3d_init_power(&pdev->dev, gr3d); + if (err) + return err; - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D, gr3d->clk, - gr3d->rst); - if (err < 0) { - dev_err(&pdev->dev, "failed to power up 3D unit\n"); + err = devm_pm_opp_register_set_opp_helper(&pdev->dev, gr3d_set_opp); + if (err) return err; - } - if (gr3d->clk_secondary) { - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D1, - gr3d->clk_secondary, - gr3d->rst_secondary); - if (err < 0) { - dev_err(&pdev->dev, - "failed to power up secondary 3D unit\n"); - return err; - } - } + err = devm_tegra_core_dev_init_opp_table_simple(&pdev->dev); + if (err) + return err; INIT_LIST_HEAD(&gr3d->client.base.list); gr3d->client.base.ops = &gr3d_client_ops; @@ -352,20 +488,28 @@ static int gr3d_probe(struct platform_device *pdev) gr3d->client.version = gr3d->soc->version; gr3d->client.ops = &gr3d_ops; + pm_runtime_enable(&pdev->dev); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, 200); + err = host1x_client_register(&gr3d->client.base); if (err < 0) { dev_err(&pdev->dev, "failed to register host1x client: %d\n", err); - return err; + goto disable_rpm; } /* initialize address register map */ for (i = 0; i < ARRAY_SIZE(gr3d_addr_regs); i++) set_bit(gr3d_addr_regs[i], gr3d->addr_regs); - platform_set_drvdata(pdev, gr3d); - return 0; + +disable_rpm: + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return err; } static int gr3d_remove(struct platform_device *pdev) @@ -380,23 +524,172 @@ static int gr3d_remove(struct platform_device *pdev) return err; } - if (gr3d->clk_secondary) { - reset_control_assert(gr3d->rst_secondary); - tegra_powergate_power_off(TEGRA_POWERGATE_3D1); - clk_disable_unprepare(gr3d->clk_secondary); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static int gr3d_legacy_domain_power_up(struct device *dev, const char *name, + unsigned int id) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + struct reset_control *reset; + struct clk *clk; + unsigned int i; + int err; + + /* + * Tegra20 device-tree doesn't specify 3d clock name and there is only + * one clock for Tegra20. Tegra30+ device-trees always specified names + * for the clocks. + */ + if (gr3d->nclocks == 1) { + if (id == TEGRA_POWERGATE_3D1) + return 0; + + clk = gr3d->clocks[0].clk; + } else { + for (i = 0; i < gr3d->nclocks; i++) { + if (WARN_ON_ONCE(!gr3d->clocks[i].id)) + continue; + + if (!strcmp(gr3d->clocks[i].id, name)) { + clk = gr3d->clocks[i].clk; + break; + } + } + + if (WARN_ON_ONCE(i == gr3d->nclocks)) + return -EINVAL; + } + + /* + * We use array of resets, which includes MC resets, and MC + * reset shouldn't be asserted while hardware is gated because + * MC flushing will fail for gated hardware. Hence for legacy + * PD we request the individual reset separately. + */ + reset = reset_control_get_exclusive_released(dev, name); + if (IS_ERR(reset)) + return PTR_ERR(reset); + + err = reset_control_acquire(reset); + if (err) { + dev_err(dev, "failed to acquire %s reset: %d\n", name, err); + } else { + err = tegra_powergate_sequence_power_up(id, clk, reset); + reset_control_release(reset); } - reset_control_assert(gr3d->rst); - tegra_powergate_power_off(TEGRA_POWERGATE_3D); - clk_disable_unprepare(gr3d->clk); + reset_control_put(reset); + if (err) + return err; + + /* + * tegra_powergate_sequence_power_up() leaves clocks enabled + * while GENPD not, hence keep clock-enable balanced. + */ + clk_disable_unprepare(clk); return 0; } +static int gr3d_legacy_power_up(struct device *dev) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + int err; + + if (gr3d->legacy_pd) { + err = gr3d_legacy_domain_power_up(dev, "3d", + TEGRA_POWERGATE_3D); + if (err) + return err; + + err = gr3d_legacy_domain_power_up(dev, "3d2", + TEGRA_POWERGATE_3D1); + if (err) + return err; + } + + return 0; +} + +static int __maybe_unused gr3d_runtime_suspend(struct device *dev) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + int err; + + host1x_channel_stop(gr3d->channel); + + err = reset_control_bulk_assert(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + + usleep_range(10, 20); + + /* + * Older device-trees don't specify MC resets and power-gating can't + * be done safely in that case. Hence we will keep the power ungated + * for older DTBs. For newer DTBs, GENPD will perform the power-gating. + */ + + clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks); + reset_control_bulk_release(gr3d->nresets, gr3d->resets); + + return 0; +} + +static int __maybe_unused gr3d_runtime_resume(struct device *dev) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + int err; + + err = gr3d_legacy_power_up(dev); + if (err) + return err; + + err = reset_control_bulk_acquire(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_bulk_prepare_enable(gr3d->nclocks, gr3d->clocks); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + err = reset_control_bulk_deassert(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + return 0; + +disable_clk: + clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks); +release_reset: + reset_control_bulk_release(gr3d->nresets, gr3d->resets); + + return err; +} + +static const struct dev_pm_ops tegra_gr3d_pm = { + SET_RUNTIME_PM_OPS(gr3d_runtime_suspend, gr3d_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + struct platform_driver tegra_gr3d_driver = { .driver = { .name = "tegra-gr3d", .of_match_table = tegra_gr3d_match, + .pm = &tegra_gr3d_pm, }, .probe = gr3d_probe, .remove = gr3d_remove, From f210a67fd15da90bd5e7ed2aafa96038fe37cfa0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 3 Dec 2020 02:51:31 +0300 Subject: [PATCH 706/851] drm/tegra: vic: Stop channel before suspending Host1x channel should be idling before hardware is turned off, hence stop the channel in the suspend callback. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/vic.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index c02010ff2b7f2..9e4c014819bf7 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -515,8 +515,23 @@ static int vic_remove(struct platform_device *pdev) return 0; } +static __maybe_unused int vic_suspend(struct device *dev) +{ + struct vic *vic = dev_get_drvdata(dev); + int err; + + host1x_channel_stop(vic->channel); + + err = pm_runtime_force_suspend(dev); + if (err < 0) + return err; + + return 0; +} + static const struct dev_pm_ops vic_pm_ops = { SET_RUNTIME_PM_OPS(vic_runtime_suspend, vic_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(vic_suspend, pm_runtime_force_resume) }; struct platform_driver tegra_vic_driver = { From 3f0c1671143d4c9570c24728b0004f3a383fc98b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 20:56:50 +0300 Subject: [PATCH 707/851] usb: chipidea: tegra: Add runtime PM support The Tegra USB controller belongs to the core power domain and we're going to enable GENPD support for the core domain. Now USB controller must be resumed using runtime PM API in order to initialize the USB power state. We already support runtime PM for the CI device, but CI's PM is separated from the RPM managed by tegra-usb driver. Add runtime PM support to the main USB driver. Signed-off-by: Dmitry Osipenko --- drivers/usb/chipidea/ci_hdrc_tegra.c | 50 +++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_tegra.c b/drivers/usb/chipidea/ci_hdrc_tegra.c index 60361141ac04f..456628d72706a 100644 --- a/drivers/usb/chipidea/ci_hdrc_tegra.c +++ b/drivers/usb/chipidea/ci_hdrc_tegra.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -278,6 +279,8 @@ static int tegra_usb_probe(struct platform_device *pdev) if (!usb) return -ENOMEM; + platform_set_drvdata(pdev, usb); + soc = of_device_get_match_data(&pdev->dev); if (!soc) { dev_err(&pdev->dev, "failed to match OF data\n"); @@ -296,11 +299,10 @@ static int tegra_usb_probe(struct platform_device *pdev) return err; } - err = clk_prepare_enable(usb->clk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable clock: %d\n", err); - return err; - } + pm_runtime_enable(&pdev->dev); + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + goto disable_pm; if (device_property_present(&pdev->dev, "nvidia,needs-double-reset")) usb->needs_double_reset = true; @@ -320,8 +322,6 @@ static int tegra_usb_probe(struct platform_device *pdev) if (err) goto fail_power_off; - platform_set_drvdata(pdev, usb); - /* setup and register ChipIdea HDRC device */ usb->soc = soc; usb->data.name = "tegra-usb"; @@ -350,7 +350,10 @@ static int tegra_usb_probe(struct platform_device *pdev) phy_shutdown: usb_phy_shutdown(usb->phy); fail_power_off: - clk_disable_unprepare(usb->clk); + pm_runtime_put(&pdev->dev); +disable_pm: + pm_runtime_disable(&pdev->dev); + return err; } @@ -360,15 +363,46 @@ static int tegra_usb_remove(struct platform_device *pdev) ci_hdrc_remove_device(usb->dev); usb_phy_shutdown(usb->phy); + + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static int __maybe_unused tegra_usb_runtime_resume(struct device *dev) +{ + struct tegra_usb *usb = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(usb->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + return err; + } + + return 0; +} + +static int __maybe_unused tegra_usb_runtime_suspend(struct device *dev) +{ + struct tegra_usb *usb = dev_get_drvdata(dev); + clk_disable_unprepare(usb->clk); return 0; } +static const struct dev_pm_ops tegra_usb_pm = { + SET_RUNTIME_PM_OPS(tegra_usb_runtime_suspend, tegra_usb_runtime_resume, + NULL) +}; + static struct platform_driver tegra_usb_driver = { .driver = { .name = "tegra-usb", .of_match_table = tegra_usb_of_match, + .pm = &tegra_usb_pm, }, .probe = tegra_usb_probe, .remove = tegra_usb_remove, From 604c4922127edb5efbe551ee098d967ed17663ba Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:00:06 +0300 Subject: [PATCH 708/851] bus: tegra-gmi: Add runtime PM support The GMI bus on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now GMI must be resumed using runtime PM API in order to initialize the GMI power state. Add runtime PM support to the GMI driver. Signed-off-by: Dmitry Osipenko --- drivers/bus/tegra-gmi.c | 44 +++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/bus/tegra-gmi.c b/drivers/bus/tegra-gmi.c index a6570789f7afe..c3ff84e05de9f 100644 --- a/drivers/bus/tegra-gmi.c +++ b/drivers/bus/tegra-gmi.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #define TEGRA_GMI_CONFIG 0x00 @@ -54,9 +55,10 @@ static int tegra_gmi_enable(struct tegra_gmi *gmi) { int err; - err = clk_prepare_enable(gmi->clk); - if (err < 0) { - dev_err(gmi->dev, "failed to enable clock: %d\n", err); + pm_runtime_enable(gmi->dev); + err = pm_runtime_resume_and_get(gmi->dev); + if (err) { + pm_runtime_disable(gmi->dev); return err; } @@ -83,7 +85,9 @@ static void tegra_gmi_disable(struct tegra_gmi *gmi) writel(config, gmi->base + TEGRA_GMI_CONFIG); reset_control_assert(gmi->rst); - clk_disable_unprepare(gmi->clk); + + pm_runtime_put(gmi->dev); + pm_runtime_disable(gmi->dev); } static int tegra_gmi_parse_dt(struct tegra_gmi *gmi) @@ -213,6 +217,7 @@ static int tegra_gmi_probe(struct platform_device *pdev) if (!gmi) return -ENOMEM; + platform_set_drvdata(pdev, gmi); gmi->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -247,8 +252,6 @@ static int tegra_gmi_probe(struct platform_device *pdev) return err; } - platform_set_drvdata(pdev, gmi); - return 0; } @@ -262,6 +265,34 @@ static int tegra_gmi_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused tegra_gmi_runtime_resume(struct device *dev) +{ + struct tegra_gmi *gmi = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(gmi->clk); + if (err < 0) { + dev_err(gmi->dev, "failed to enable clock: %d\n", err); + return err; + } + + return 0; +} + +static int __maybe_unused tegra_gmi_runtime_suspend(struct device *dev) +{ + struct tegra_gmi *gmi = dev_get_drvdata(dev); + + clk_disable_unprepare(gmi->clk); + + return 0; +} + +static const struct dev_pm_ops tegra_gmi_pm = { + SET_RUNTIME_PM_OPS(tegra_gmi_runtime_suspend, tegra_gmi_runtime_resume, + NULL) +}; + static const struct of_device_id tegra_gmi_id_table[] = { { .compatible = "nvidia,tegra20-gmi", }, { .compatible = "nvidia,tegra30-gmi", }, @@ -275,6 +306,7 @@ static struct platform_driver tegra_gmi_driver = { .driver = { .name = "tegra-gmi", .of_match_table = tegra_gmi_id_table, + .pm = &tegra_gmi_pm, }, }; module_platform_driver(tegra_gmi_driver); From 7373c4b45f0fa905b360d7a3d3b3baf1d3768684 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:08:35 +0300 Subject: [PATCH 709/851] pwm: tegra: Add runtime PM and OPP support The PWM on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now PWM must be resumed using runtime PM API in order to initialize the PWM power state. The PWM clock rate must be changed using OPP API that will reconfigure the power domain performance state in accordance to the rate. Add runtime PM and OPP support to the PWM driver. Signed-off-by: Dmitry Osipenko --- drivers/pwm/pwm-tegra.c | 104 ++++++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 19 deletions(-) diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index 11a10b575ace9..ea124d40f2729 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -42,12 +42,16 @@ #include #include #include +#include #include #include #include +#include #include #include +#include + #define PWM_ENABLE (1 << 31) #define PWM_DUTY_WIDTH 8 #define PWM_DUTY_SHIFT 16 @@ -145,12 +149,25 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, required_clk_rate = (NSEC_PER_SEC / period_ns) << PWM_DUTY_WIDTH; - err = clk_set_rate(pc->clk, required_clk_rate); - if (err < 0) + /* + * GENPD performance state should be changed only for + * the resumed device because GENPD core tracks performance + * state and drops/restores the state on RPM suspend/resume. + */ + err = pm_runtime_resume_and_get(pc->dev); + if (err) + return err; + + err = dev_pm_opp_set_rate(pc->dev, required_clk_rate); + if (err < 0) { + pm_runtime_put(pc->dev); return -EINVAL; + } /* Store the new rate for further references */ pc->clk_rate = clk_get_rate(pc->clk); + + pm_runtime_put(pc->dev); } rate = pc->clk_rate >> PWM_DUTY_WIDTH; @@ -181,8 +198,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * before writing the register. Otherwise, keep it enabled. */ if (!pwm_is_enabled(pwm)) { - err = clk_prepare_enable(pc->clk); - if (err < 0) + err = pm_runtime_resume_and_get(pc->dev); + if (err) return err; } else val |= PWM_ENABLE; @@ -193,7 +210,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * If the PWM is not enabled, turn the clock off again to save power. */ if (!pwm_is_enabled(pwm)) - clk_disable_unprepare(pc->clk); + pm_runtime_put(pc->dev); return 0; } @@ -204,8 +221,8 @@ static int tegra_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) int rc = 0; u32 val; - rc = clk_prepare_enable(pc->clk); - if (rc < 0) + rc = pm_runtime_resume_and_get(pc->dev); + if (rc) return rc; val = pwm_readl(pc, pwm->hwpwm); @@ -224,7 +241,7 @@ static void tegra_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) val &= ~PWM_ENABLE; pwm_writel(pc, pwm->hwpwm, val); - clk_disable_unprepare(pc->clk); + pm_runtime_put(pc->dev); } static const struct pwm_ops tegra_pwm_ops = { @@ -256,11 +273,21 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->clk)) return PTR_ERR(pwm->clk); + ret = devm_tegra_core_dev_init_opp_table_simple(&pdev->dev); + if (ret) + return ret; + + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret) + goto disable_pm; + /* Set maximum frequency of the IP */ - ret = clk_set_rate(pwm->clk, pwm->soc->max_frequency); + ret = dev_pm_opp_set_rate(pwm->dev, pwm->soc->max_frequency); if (ret < 0) { dev_err(&pdev->dev, "Failed to set max frequency: %d\n", ret); - return ret; + goto put_pm; } /* @@ -278,7 +305,7 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->rst)) { ret = PTR_ERR(pwm->rst); dev_err(&pdev->dev, "Reset control is not found: %d\n", ret); - return ret; + goto put_pm; } reset_control_deassert(pwm->rst); @@ -291,34 +318,70 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); reset_control_assert(pwm->rst); - return ret; + goto put_pm; } + pm_runtime_put(&pdev->dev); + return 0; +put_pm: + pm_runtime_put(&pdev->dev); +disable_pm: + pm_runtime_disable(&pdev->dev); + return ret; } static int tegra_pwm_remove(struct platform_device *pdev) { struct tegra_pwm_chip *pc = platform_get_drvdata(pdev); + int err; pwmchip_remove(&pc->chip); + err = pm_runtime_resume_and_get(pc->dev); + if (err) + return err; + reset_control_assert(pc->rst); + pm_runtime_put(pc->dev); + pm_runtime_disable(pc->dev); return 0; } -#ifdef CONFIG_PM_SLEEP -static int tegra_pwm_suspend(struct device *dev) +static int __maybe_unused tegra_pwm_runtime_suspend(struct device *dev) { - return pinctrl_pm_select_sleep_state(dev); + struct tegra_pwm_chip *pc = dev_get_drvdata(dev); + int err; + + clk_disable_unprepare(pc->clk); + + err = pinctrl_pm_select_sleep_state(dev); + if (err) { + clk_prepare_enable(pc->clk); + return err; + } + + return 0; } -static int tegra_pwm_resume(struct device *dev) +static int __maybe_unused tegra_pwm_runtime_resume(struct device *dev) { - return pinctrl_pm_select_default_state(dev); + struct tegra_pwm_chip *pc = dev_get_drvdata(dev); + int err; + + err = pinctrl_pm_select_default_state(dev); + if (err) + return err; + + err = clk_prepare_enable(pc->clk); + if (err) { + pinctrl_pm_select_sleep_state(dev); + return err; + } + + return 0; } -#endif static const struct tegra_pwm_soc tegra20_pwm_soc = { .num_channels = 4, @@ -344,7 +407,10 @@ static const struct of_device_id tegra_pwm_of_match[] = { MODULE_DEVICE_TABLE(of, tegra_pwm_of_match); static const struct dev_pm_ops tegra_pwm_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(tegra_pwm_suspend, tegra_pwm_resume) + SET_RUNTIME_PM_OPS(tegra_pwm_runtime_suspend, tegra_pwm_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver tegra_pwm_driver = { From 46009be437b820c9eb73919c684c4eeff427b584 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:18:09 +0300 Subject: [PATCH 710/851] mmc: sdhci-tegra: Add runtime PM and OPP support The SDHCI on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now SDHCI must be resumed using runtime PM API in order to initialize the SDHCI power state. The SDHCI clock rate must be changed using OPP API that will reconfigure the power domain performance state in accordance to the rate. Add runtime PM and OPP support to the SDHCI driver. Signed-off-by: Dmitry Osipenko --- drivers/mmc/host/sdhci-tegra.c | 136 +++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 41 deletions(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 387ce9cdbd7cc..e0747bebfcde4 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -24,6 +26,8 @@ #include #include +#include + #include "sdhci-pltfm.h" #include "cqhci.h" @@ -123,6 +127,12 @@ SDHCI_TRNS_BLK_CNT_EN | \ SDHCI_TRNS_DMA) +enum { + TEGRA_CLK_BULK_SDHCI, + TEGRA_CLK_BULK_TMCLK, + TEGRA_CLK_BULK_NUM, +}; + struct sdhci_tegra_soc_data { const struct sdhci_pltfm_data *pdata; u64 dma_mask; @@ -171,6 +181,8 @@ struct sdhci_tegra { bool enable_hwcq; unsigned long curr_clk_rate; u8 tuned_tap_delay; + + struct clk_bulk_data clocks[TEGRA_CLK_BULK_NUM]; }; static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg) @@ -758,10 +770,15 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host); + struct device *dev = mmc_dev(host->mmc); unsigned long host_clk; + int err; - if (!clock) - return sdhci_set_clock(host, clock); + if (!clock) { + sdhci_set_clock(host, clock); + dev_pm_opp_set_rate(dev, clock); + return; + } /* * In DDR50/52 modes the Tegra SDHCI controllers require the SDHCI @@ -776,7 +793,12 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) * from clk_get_rate() is used. */ host_clk = tegra_host->ddr_signaling ? clock * 2 : clock; - clk_set_rate(pltfm_host->clk, host_clk); + + err = dev_pm_opp_set_rate(dev, host_clk); + if (err) + dev_err(dev, "failed to set clk rate to %luHz: %d\n", + host_clk, err); + tegra_host->curr_clk_rate = host_clk; if (tegra_host->ddr_signaling) host->max_clk = host_clk; @@ -1653,6 +1675,16 @@ static int sdhci_tegra_probe(struct platform_device *pdev) goto err_power_req; } + tegra_host->clocks[TEGRA_CLK_BULK_SDHCI].id = NULL; + tegra_host->clocks[TEGRA_CLK_BULK_TMCLK].id = "tmclk"; + + rc = devm_clk_bulk_get_optional(&pdev->dev, TEGRA_CLK_BULK_NUM, + tegra_host->clocks); + if (rc) { + dev_err_probe(&pdev->dev, rc, "failed to get clock\n"); + goto err_power_req; + } + /* * Tegra210 has a separate SDMMC_LEGACY_TM clock used for host * timeout clock and SW can choose TMCLK or SDCLK for hardware @@ -1669,34 +1701,19 @@ static int sdhci_tegra_probe(struct platform_device *pdev) */ if (soc_data->nvquirks & NVQUIRK_HAS_TMCLK) { - clk = devm_clk_get(&pdev->dev, "tmclk"); - if (IS_ERR(clk)) { - rc = PTR_ERR(clk); - if (rc == -EPROBE_DEFER) - goto err_power_req; - - dev_warn(&pdev->dev, "failed to get tmclk: %d\n", rc); - clk = NULL; - } + clk = tegra_host->clocks[TEGRA_CLK_BULK_TMCLK].clk; + if (!clk) + dev_warn(&pdev->dev, "failed to get tmclk"); clk_set_rate(clk, 12000000); - rc = clk_prepare_enable(clk); - if (rc) { - dev_err(&pdev->dev, - "failed to enable tmclk: %d\n", rc); - goto err_power_req; - } - - tegra_host->tmclk = clk; } - clk = devm_clk_get(mmc_dev(host->mmc), NULL); - if (IS_ERR(clk)) { - rc = dev_err_probe(&pdev->dev, PTR_ERR(clk), - "failed to get clock\n"); - goto err_clk_get; + clk = tegra_host->clocks[TEGRA_CLK_BULK_SDHCI].clk; + if (!clk) { + dev_err(&pdev->dev, "failed to get sdhci clock\n"); + rc = -ENOENT; + goto err_power_req; } - clk_prepare_enable(clk); pltfm_host->clk = clk; tegra_host->rst = devm_reset_control_get_exclusive(&pdev->dev, @@ -1704,9 +1721,18 @@ static int sdhci_tegra_probe(struct platform_device *pdev) if (IS_ERR(tegra_host->rst)) { rc = PTR_ERR(tegra_host->rst); dev_err(&pdev->dev, "failed to get reset control: %d\n", rc); - goto err_rst_get; + goto err_power_req; } + rc = devm_tegra_core_dev_init_opp_table_simple(&pdev->dev); + if (rc) + goto err_power_req; + + pm_runtime_enable(&pdev->dev); + rc = pm_runtime_resume_and_get(&pdev->dev); + if (rc) + goto err_pm_get; + rc = reset_control_assert(tegra_host->rst); if (rc) goto err_rst_get; @@ -1728,9 +1754,9 @@ static int sdhci_tegra_probe(struct platform_device *pdev) err_add_host: reset_control_assert(tegra_host->rst); err_rst_get: - clk_disable_unprepare(pltfm_host->clk); -err_clk_get: - clk_disable_unprepare(tegra_host->tmclk); + pm_runtime_put(&pdev->dev); +err_pm_get: + pm_runtime_disable(&pdev->dev); err_power_req: err_parse_dt: sdhci_pltfm_free(pdev); @@ -1747,19 +1773,39 @@ static int sdhci_tegra_remove(struct platform_device *pdev) reset_control_assert(tegra_host->rst); usleep_range(2000, 4000); - clk_disable_unprepare(pltfm_host->clk); - clk_disable_unprepare(tegra_host->tmclk); + + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); sdhci_pltfm_free(pdev); return 0; } -#ifdef CONFIG_PM_SLEEP -static int __maybe_unused sdhci_tegra_suspend(struct device *dev) +static int __maybe_unused sdhci_tegra_runtime_suspend(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host); + + clk_bulk_disable_unprepare(TEGRA_CLK_BULK_NUM, tegra_host->clocks); + + return 0; +} + +static int __maybe_unused sdhci_tegra_runtime_resume(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host); + + return clk_bulk_prepare_enable(TEGRA_CLK_BULK_NUM, tegra_host->clocks); +} + +#ifdef CONFIG_PM_SLEEP +static int sdhci_tegra_suspend(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); int ret; if (host->mmc->caps2 & MMC_CAP2_CQE) { @@ -1774,17 +1820,22 @@ static int __maybe_unused sdhci_tegra_suspend(struct device *dev) return ret; } - clk_disable_unprepare(pltfm_host->clk); + ret = pm_runtime_force_suspend(dev); + if (ret) { + sdhci_resume_host(host); + cqhci_resume(host->mmc); + return ret; + } + return 0; } -static int __maybe_unused sdhci_tegra_resume(struct device *dev) +static int sdhci_tegra_resume(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); int ret; - ret = clk_prepare_enable(pltfm_host->clk); + ret = pm_runtime_force_resume(dev); if (ret) return ret; @@ -1803,13 +1854,16 @@ static int __maybe_unused sdhci_tegra_resume(struct device *dev) suspend_host: sdhci_suspend_host(host); disable_clk: - clk_disable_unprepare(pltfm_host->clk); + pm_runtime_force_suspend(dev); return ret; } #endif -static SIMPLE_DEV_PM_OPS(sdhci_tegra_dev_pm_ops, sdhci_tegra_suspend, - sdhci_tegra_resume); +static const struct dev_pm_ops sdhci_tegra_dev_pm_ops = { + SET_RUNTIME_PM_OPS(sdhci_tegra_runtime_suspend, sdhci_tegra_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(sdhci_tegra_suspend, sdhci_tegra_resume) +}; static struct platform_driver sdhci_tegra_driver = { .driver = { From 313409e45158995482b7e929725d05f8ed492595 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:35:11 +0300 Subject: [PATCH 711/851] mtd: rawnand: tegra: Add runtime PM support The NAND on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now NAND must be resumed using runtime PM API in order to initialize the NAND power state. Add runtime PM support to the NAND driver. Signed-off-by: Dmitry Osipenko --- drivers/mtd/nand/raw/tegra_nand.c | 44 ++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index fbf67722a049c..044c7143af7af 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #define COMMAND 0x00 @@ -1152,6 +1153,7 @@ static int tegra_nand_probe(struct platform_device *pdev) return -ENOMEM; ctrl->dev = &pdev->dev; + platform_set_drvdata(pdev, ctrl); nand_controller_init(&ctrl->controller); ctrl->controller.ops = &tegra_nand_controller_ops; @@ -1168,14 +1170,15 @@ static int tegra_nand_probe(struct platform_device *pdev) if (IS_ERR(ctrl->clk)) return PTR_ERR(ctrl->clk); - err = clk_prepare_enable(ctrl->clk); + pm_runtime_enable(ctrl->dev); + err = pm_runtime_resume_and_get(ctrl->dev); if (err) - return err; + goto err_disable_pm; err = reset_control_reset(rst); if (err) { dev_err(ctrl->dev, "Failed to reset HW: %d\n", err); - goto err_disable_clk; + goto err_put_pm; } writel_relaxed(HWSTATUS_CMD_DEFAULT, ctrl->regs + HWSTATUS_CMD); @@ -1190,21 +1193,21 @@ static int tegra_nand_probe(struct platform_device *pdev) dev_name(&pdev->dev), ctrl); if (err) { dev_err(ctrl->dev, "Failed to get IRQ: %d\n", err); - goto err_disable_clk; + goto err_put_pm; } writel_relaxed(DMA_MST_CTRL_IS_DONE, ctrl->regs + DMA_MST_CTRL); err = tegra_nand_chips_init(ctrl->dev, ctrl); if (err) - goto err_disable_clk; - - platform_set_drvdata(pdev, ctrl); + goto err_put_pm; return 0; -err_disable_clk: - clk_disable_unprepare(ctrl->clk); +err_put_pm: + pm_runtime_put(ctrl->dev); +err_disable_pm: + pm_runtime_disable(ctrl->dev); return err; } @@ -1221,11 +1224,33 @@ static int tegra_nand_remove(struct platform_device *pdev) nand_cleanup(chip); + pm_runtime_put(ctrl->dev); + pm_runtime_disable(ctrl->dev); + + return 0; +} + +static int __maybe_unused tegra_nand_runtime_resume(struct device *dev) +{ + struct tegra_nand_controller *ctrl = dev_get_drvdata(dev); + + return clk_prepare_enable(ctrl->clk); +} + +static int __maybe_unused tegra_nand_runtime_suspend(struct device *dev) +{ + struct tegra_nand_controller *ctrl = dev_get_drvdata(dev); + clk_disable_unprepare(ctrl->clk); return 0; } +static const struct dev_pm_ops tegra_nand_pm = { + SET_RUNTIME_PM_OPS(tegra_nand_runtime_suspend, tegra_nand_runtime_resume, + NULL) +}; + static const struct of_device_id tegra_nand_of_match[] = { { .compatible = "nvidia,tegra20-nand" }, { /* sentinel */ } @@ -1236,6 +1261,7 @@ static struct platform_driver tegra_nand_driver = { .driver = { .name = "tegra-nand", .of_match_table = tegra_nand_of_match, + .pm = &tegra_nand_pm, }, .probe = tegra_nand_probe, .remove = tegra_nand_remove, From 03acba1e1ed0da2a259aef0ec13d2102ca24aec5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:50:43 +0300 Subject: [PATCH 712/851] soc/tegra: fuse: Clear fuse->clk on driver probe failure The fuse->clk must be cleared if FUSE driver fails to probe, otherwise tegra_fuse_readl() will crash. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/fuse/fuse-tegra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index 3d9da3d359dae..2434c570b53c9 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -246,6 +246,7 @@ static int tegra_fuse_probe(struct platform_device *pdev) return 0; restore: + fuse->clk = NULL; fuse->base = base; return err; } From 4d4753fc2c88952fab53e7611cce5df68e8c0678 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:53:03 +0300 Subject: [PATCH 713/851] soc/tegra: fuse: Add runtime PM support The Tegra FUSE belongs to the core power domain and we're going to enable GENPD support for the core domain. Now FUSE device must be resumed using runtime PM API in order to initialize the FUSE power state. Add runtime PM support to the FUSE driver. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/fuse/fuse-tegra.c | 30 +++++++++++++++++++++++++++ drivers/soc/tegra/fuse/fuse-tegra20.c | 10 +++++---- drivers/soc/tegra/fuse/fuse-tegra30.c | 9 ++++---- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index 2434c570b53c9..747237865aff6 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -210,6 +211,8 @@ static int tegra_fuse_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fuse); fuse->dev = &pdev->dev; + pm_runtime_enable(&pdev->dev); + if (fuse->soc->probe) { err = fuse->soc->probe(fuse); if (err < 0) @@ -248,13 +251,40 @@ static int tegra_fuse_probe(struct platform_device *pdev) restore: fuse->clk = NULL; fuse->base = base; + pm_runtime_disable(&pdev->dev); return err; } +static int __maybe_unused tegra_fuse_runtime_resume(struct device *dev) +{ + int err; + + err = clk_prepare_enable(fuse->clk); + if (err < 0) { + dev_err(dev, "failed to enable FUSE clock: %d\n", err); + return err; + } + + return 0; +} + +static int __maybe_unused tegra_fuse_runtime_suspend(struct device *dev) +{ + clk_disable_unprepare(fuse->clk); + + return 0; +} + +static const struct dev_pm_ops tegra_fuse_pm = { + SET_RUNTIME_PM_OPS(tegra_fuse_runtime_suspend, tegra_fuse_runtime_resume, + NULL) +}; + static struct platform_driver tegra_fuse_driver = { .driver = { .name = "tegra-fuse", .of_match_table = tegra_fuse_match, + .pm = &tegra_fuse_pm, .suppress_bind_attrs = true, }, .probe = tegra_fuse_probe, diff --git a/drivers/soc/tegra/fuse/fuse-tegra20.c b/drivers/soc/tegra/fuse/fuse-tegra20.c index 16aaa28573ac0..cd6a273707fe0 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra20.c +++ b/drivers/soc/tegra/fuse/fuse-tegra20.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,10 @@ static u32 tegra20_fuse_read(struct tegra_fuse *fuse, unsigned int offset) u32 value = 0; int err; + err = pm_runtime_resume_and_get(fuse->dev); + if (err) + return err; + mutex_lock(&fuse->apbdma.lock); fuse->apbdma.config.src_addr = fuse->phys + FUSE_BEGIN + offset; @@ -66,8 +71,6 @@ static u32 tegra20_fuse_read(struct tegra_fuse *fuse, unsigned int offset) reinit_completion(&fuse->apbdma.wait); - clk_prepare_enable(fuse->clk); - dmaengine_submit(dma_desc); dma_async_issue_pending(fuse->apbdma.chan); time_left = wait_for_completion_timeout(&fuse->apbdma.wait, @@ -78,10 +81,9 @@ static u32 tegra20_fuse_read(struct tegra_fuse *fuse, unsigned int offset) else value = *fuse->apbdma.virt; - clk_disable_unprepare(fuse->clk); - out: mutex_unlock(&fuse->apbdma.lock); + pm_runtime_put(fuse->dev); return value; } diff --git a/drivers/soc/tegra/fuse/fuse-tegra30.c b/drivers/soc/tegra/fuse/fuse-tegra30.c index c1aa7815bd6ec..dd03565a39a4c 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra30.c +++ b/drivers/soc/tegra/fuse/fuse-tegra30.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -52,15 +53,13 @@ static u32 tegra30_fuse_read(struct tegra_fuse *fuse, unsigned int offset) u32 value; int err; - err = clk_prepare_enable(fuse->clk); - if (err < 0) { - dev_err(fuse->dev, "failed to enable FUSE clock: %d\n", err); + err = pm_runtime_resume_and_get(fuse->dev); + if (err) return 0; - } value = readl_relaxed(fuse->base + FUSE_BEGIN + offset); - clk_disable_unprepare(fuse->clk); + pm_runtime_put(fuse->dev); return value; } From 1c245ddaa7a0b7406adf48b236fd890434351362 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 02:46:58 +0300 Subject: [PATCH 714/851] soc/tegra: fuse: Enable fuse clock on suspend The FUSE clock should be enabled during suspend on Tegra124. Currently clk driver enables it on all SoCs, but FUSE may require a higher core voltage on Tegra30 while enabled. Move the quirk into the FUSE driver and make it specific to Tegra124. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/fuse/fuse-tegra.c | 29 +++++++++++++++++++++++++++ drivers/soc/tegra/fuse/fuse-tegra30.c | 1 + drivers/soc/tegra/fuse/fuse.h | 2 ++ 3 files changed, 32 insertions(+) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index 747237865aff6..f2151815db585 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -275,9 +275,38 @@ static int __maybe_unused tegra_fuse_runtime_suspend(struct device *dev) return 0; } +static int __maybe_unused tegra_fuse_suspend(struct device *dev) +{ + int ret; + + /* + * Critical for RAM re-repair operation, which must occur on resume + * from LP1 system suspend and as part of CCPLEX cluster switching. + */ + if (fuse->soc->clk_suspend_on) + ret = pm_runtime_resume_and_get(dev); + else + ret = pm_runtime_force_suspend(dev); + + return ret; +} + +static int __maybe_unused tegra_fuse_resume(struct device *dev) +{ + int ret = 0; + + if (fuse->soc->clk_suspend_on) + pm_runtime_put(dev); + else + ret = pm_runtime_force_resume(dev); + + return ret; +} + static const struct dev_pm_ops tegra_fuse_pm = { SET_RUNTIME_PM_OPS(tegra_fuse_runtime_suspend, tegra_fuse_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(tegra_fuse_suspend, tegra_fuse_resume) }; static struct platform_driver tegra_fuse_driver = { diff --git a/drivers/soc/tegra/fuse/fuse-tegra30.c b/drivers/soc/tegra/fuse/fuse-tegra30.c index dd03565a39a4c..e1f1db3b0526d 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra30.c +++ b/drivers/soc/tegra/fuse/fuse-tegra30.c @@ -208,6 +208,7 @@ const struct tegra_fuse_soc tegra124_fuse_soc = { .lookups = tegra124_fuse_lookups, .num_lookups = ARRAY_SIZE(tegra124_fuse_lookups), .soc_attr_group = &tegra_soc_attr_group, + .clk_suspend_on = true, }; #endif diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h index e057a58e20603..de58feba04350 100644 --- a/drivers/soc/tegra/fuse/fuse.h +++ b/drivers/soc/tegra/fuse/fuse.h @@ -34,6 +34,8 @@ struct tegra_fuse_soc { unsigned int num_lookups; const struct attribute_group *soc_attr_group; + + bool clk_suspend_on; }; struct tegra_fuse { From 4775761559d1c62421038a4f5022f8c966a4a247 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 02:51:15 +0300 Subject: [PATCH 715/851] clk: tegra: Remove CLK_IS_CRITICAL flag from fuse clock FUSE driver now takes care of keeping the clock enabled when necessary. Remove the CLK_IS_CRITICAL flag from the clock. Signed-off-by: Dmitry Osipenko --- drivers/clk/tegra/clk-tegra-periph.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 292d6269daf15..4dcf7f7cb8a09 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -777,11 +777,7 @@ static struct tegra_periph_init_data gate_clks[] = { GATE("ahbdma", "hclk", 33, 0, tegra_clk_ahbdma, 0), GATE("apbdma", "pclk", 34, 0, tegra_clk_apbdma, 0), GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0), - /* - * Critical for RAM re-repair operation, which must occur on resume - * from LP1 system suspend and as part of CCPLEX cluster switching. - */ - GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL), + GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0), GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0), GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0), GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0), From d8556720202ef2e46e1a02352221c46fe5118d41 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 22:03:44 +0300 Subject: [PATCH 716/851] spi: tegra20-slink: Improve runtime PM usage The Tegra SPI driver supports runtime PM, which controls the clock enable state, but the clk is also enabled separately from the RPM at the driver probe time, and thus, stays always on. Fix it. Runtime PM now is always available on Tegra, hence there is no need to check the RPM presence in the driver anymore. Remove these checks. Signed-off-by: Dmitry Osipenko --- drivers/spi/spi-tegra20-slink.c | 71 +++++++++++---------------------- 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 6a726c95ac7a8..5fd6680732b15 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1061,33 +1061,12 @@ static int tegra_slink_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Can not get clock %d\n", ret); goto exit_free_master; } - ret = clk_prepare(tspi->clk); - if (ret < 0) { - dev_err(&pdev->dev, "Clock prepare failed %d\n", ret); - goto exit_free_master; - } - ret = clk_enable(tspi->clk); - if (ret < 0) { - dev_err(&pdev->dev, "Clock enable failed %d\n", ret); - goto exit_clk_unprepare; - } - - spi_irq = platform_get_irq(pdev, 0); - tspi->irq = spi_irq; - ret = request_threaded_irq(tspi->irq, tegra_slink_isr, - tegra_slink_isr_thread, IRQF_ONESHOT, - dev_name(&pdev->dev), tspi); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n", - tspi->irq); - goto exit_clk_disable; - } tspi->rst = devm_reset_control_get_exclusive(&pdev->dev, "spi"); if (IS_ERR(tspi->rst)) { dev_err(&pdev->dev, "can not get reset\n"); ret = PTR_ERR(tspi->rst); - goto exit_free_irq; + goto exit_free_master; } tspi->max_buf_size = SLINK_FIFO_DEPTH << 2; @@ -1095,7 +1074,7 @@ static int tegra_slink_probe(struct platform_device *pdev) ret = tegra_slink_init_dma_param(tspi, true); if (ret < 0) - goto exit_free_irq; + goto exit_free_master; ret = tegra_slink_init_dma_param(tspi, false); if (ret < 0) goto exit_rx_dma_free; @@ -1106,16 +1085,9 @@ static int tegra_slink_probe(struct platform_device *pdev) init_completion(&tspi->xfer_completion); pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) { - ret = tegra_slink_runtime_resume(&pdev->dev); - if (ret) - goto exit_pm_disable; - } - - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret) { dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret); - pm_runtime_put_noidle(&pdev->dev); goto exit_pm_disable; } @@ -1123,33 +1095,43 @@ static int tegra_slink_probe(struct platform_device *pdev) udelay(2); reset_control_deassert(tspi->rst); + spi_irq = platform_get_irq(pdev, 0); + tspi->irq = spi_irq; + ret = request_threaded_irq(tspi->irq, tegra_slink_isr, + tegra_slink_isr_thread, IRQF_ONESHOT, + dev_name(&pdev->dev), tspi); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n", + tspi->irq); + goto exit_pm_put; + } + tspi->def_command_reg = SLINK_M_S; tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN; tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND); tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2); - pm_runtime_put(&pdev->dev); master->dev.of_node = pdev->dev.of_node; ret = devm_spi_register_master(&pdev->dev, master); if (ret < 0) { dev_err(&pdev->dev, "can not register to master err %d\n", ret); - goto exit_pm_disable; + goto exit_free_irq; } + + pm_runtime_put(&pdev->dev); + return ret; +exit_free_irq: + free_irq(spi_irq, tspi); +exit_pm_put: + pm_runtime_put(&pdev->dev); exit_pm_disable: pm_runtime_disable(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - tegra_slink_runtime_suspend(&pdev->dev); + tegra_slink_deinit_dma_param(tspi, false); exit_rx_dma_free: tegra_slink_deinit_dma_param(tspi, true); -exit_free_irq: - free_irq(spi_irq, tspi); -exit_clk_disable: - clk_disable(tspi->clk); -exit_clk_unprepare: - clk_unprepare(tspi->clk); exit_free_master: spi_master_put(master); return ret; @@ -1162,9 +1144,6 @@ static int tegra_slink_remove(struct platform_device *pdev) free_irq(tspi->irq, tspi); - clk_disable(tspi->clk); - clk_unprepare(tspi->clk); - if (tspi->tx_dma_chan) tegra_slink_deinit_dma_param(tspi, false); @@ -1172,8 +1151,6 @@ static int tegra_slink_remove(struct platform_device *pdev) tegra_slink_deinit_dma_param(tspi, true); pm_runtime_disable(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - tegra_slink_runtime_suspend(&pdev->dev); return 0; } From de64d9def94dc4cc1a2572768459f1c71e2823a0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 22:09:56 +0300 Subject: [PATCH 717/851] spi: tegra20-slink: Add OPP support The SPI on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now SPI driver must use OPP API for driving the controller's clock rate because OPP API takes care of reconfiguring the domain's performance state in accordance to the rate. Add OPP support to the driver. Signed-off-by: Dmitry Osipenko --- drivers/spi/spi-tegra20-slink.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 5fd6680732b15..383d332ac0261 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -18,12 +18,15 @@ #include #include #include +#include #include #include #include #include #include +#include + #define SLINK_COMMAND 0x000 #define SLINK_BIT_LENGTH(x) (((x) & 0x1f) << 0) #define SLINK_WORD_SIZE(x) (((x) & 0x1f) << 5) @@ -683,7 +686,7 @@ static int tegra_slink_start_transfer_one(struct spi_device *spi, bits_per_word = t->bits_per_word; speed = t->speed_hz; if (speed != tspi->cur_speed) { - clk_set_rate(tspi->clk, speed * 4); + dev_pm_opp_set_rate(tspi->dev, speed * 4); tspi->cur_speed = speed; } @@ -1054,6 +1057,10 @@ static int tegra_slink_probe(struct platform_device *pdev) goto exit_free_master; } + ret = devm_tegra_core_dev_init_opp_table_simple(&pdev->dev); + if (ret) + return ret; + /* disabled clock may cause interrupt storm upon request */ tspi->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(tspi->clk)) { From 29f95c63cebcc2e9748bbe81e13ffc760104860f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 00:34:22 +0300 Subject: [PATCH 718/851] memory: tegra20-emc: Add minimal runtime PM support EMC is always enabled, mark it as enabled. This makes the OPP and GENPD API usage more consistent since otherwise we're changing performance state of a power domain for the device that doesn't support power management. Signed-off-by: Dmitry Osipenko --- drivers/memory/tegra/tegra20-emc.c | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c index c3462dbc8c22b..22087009fa0c8 100644 --- a/drivers/memory/tegra/tegra20-emc.c +++ b/drivers/memory/tegra/tegra20-emc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -1028,6 +1029,32 @@ static int tegra_emc_devfreq_init(struct tegra_emc *emc) return 0; } +static void devm_tegra_emc_disable_runtime_pm(void *dev) +{ + pm_runtime_put(dev); + pm_runtime_disable(dev); +} + +static int tegra_emc_init_pm(struct tegra_emc *emc) +{ + int err; + + pm_runtime_enable(emc->dev); + err = pm_runtime_resume_and_get(emc->dev); + if (err) { + pm_runtime_disable(emc->dev); + return err; + } + + err = devm_add_action_or_reset(emc->dev, + devm_tegra_emc_disable_runtime_pm, + emc->dev); + if (err) + return err; + + return 0; +} + static int tegra_emc_probe(struct platform_device *pdev) { struct tegra_core_opp_params opp_params = {}; @@ -1076,6 +1103,10 @@ static int tegra_emc_probe(struct platform_device *pdev) if (err) return err; + err = tegra_emc_init_pm(emc); + if (err) + return err; + opp_params.init_state = true; err = devm_tegra_core_dev_init_opp_table(&pdev->dev, &opp_params); From 32faeca946806e9a1176fd720f3e69e5cf658994 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 00:34:33 +0300 Subject: [PATCH 719/851] memory: tegra30-emc: Add minimal runtime PM support EMC is always enabled, mark it as enabled. This makes the OPP and GENPD API usage more consistent since otherwise we're changing performance state of a power domain for the device that doesn't support power management. Signed-off-by: Dmitry Osipenko --- drivers/memory/tegra/tegra30-emc.c | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/memory/tegra/tegra30-emc.c b/drivers/memory/tegra/tegra30-emc.c index 7e21a852f2e1f..5a311feb5c20a 100644 --- a/drivers/memory/tegra/tegra30-emc.c +++ b/drivers/memory/tegra/tegra30-emc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1519,6 +1520,32 @@ static int tegra_emc_init_clk(struct tegra_emc *emc) return 0; } +static void devm_tegra_emc_disable_runtime_pm(void *dev) +{ + pm_runtime_put(dev); + pm_runtime_disable(dev); +} + +static int tegra_emc_init_pm(struct tegra_emc *emc) +{ + int err; + + pm_runtime_enable(emc->dev); + err = pm_runtime_resume_and_get(emc->dev); + if (err) { + pm_runtime_disable(emc->dev); + return err; + } + + err = devm_add_action_or_reset(emc->dev, + devm_tegra_emc_disable_runtime_pm, + emc->dev); + if (err) + return err; + + return 0; +} + static int tegra_emc_probe(struct platform_device *pdev) { struct tegra_core_opp_params opp_params = {}; @@ -1571,6 +1598,10 @@ static int tegra_emc_probe(struct platform_device *pdev) if (err) return err; + err = tegra_emc_init_pm(emc); + if (err) + return err; + opp_params.init_state = true; err = devm_tegra_core_dev_init_opp_table(&pdev->dev, &opp_params); From f640c89995e66662de356ae6be0a04a14ccfafcb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 05:10:49 +0300 Subject: [PATCH 720/851] media: dt: bindings: tegra-vde: Convert to schema Convert NVIDIA Tegra video decoder binding to schema. Signed-off-by: Dmitry Osipenko --- .../bindings/media/nvidia,tegra-vde.txt | 64 ----------- .../bindings/media/nvidia,tegra-vde.yaml | 107 ++++++++++++++++++ 2 files changed, 107 insertions(+), 64 deletions(-) delete mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt create mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt deleted file mode 100644 index 602169b8aa198..0000000000000 --- a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt +++ /dev/null @@ -1,64 +0,0 @@ -NVIDIA Tegra Video Decoder Engine - -Required properties: -- compatible : Must contain one of the following values: - - "nvidia,tegra20-vde" - - "nvidia,tegra30-vde" - - "nvidia,tegra114-vde" - - "nvidia,tegra124-vde" - - "nvidia,tegra132-vde" -- reg : Must contain an entry for each entry in reg-names. -- reg-names : Must include the following entries: - - sxe - - bsev - - mbe - - ppe - - mce - - tfe - - ppb - - vdma - - frameid -- iram : Must contain phandle to the mmio-sram device node that represents - IRAM region used by VDE. -- interrupts : Must contain an entry for each entry in interrupt-names. -- interrupt-names : Must include the following entries: - - sync-token - - bsev - - sxe -- clocks : Must include the following entries: - - vde -- resets : Must contain an entry for each entry in reset-names. -- reset-names : Should include the following entries: - - vde - -Optional properties: -- resets : Must contain an entry for each entry in reset-names. -- reset-names : Must include the following entries: - - mc -- iommus: Must contain phandle to the IOMMU device node. - -Example: - -video-codec@6001a000 { - compatible = "nvidia,tegra20-vde"; - reg = <0x6001a000 0x1000 /* Syntax Engine */ - 0x6001b000 0x1000 /* Video Bitstream Engine */ - 0x6001c000 0x100 /* Macroblock Engine */ - 0x6001c200 0x100 /* Post-processing Engine */ - 0x6001c400 0x100 /* Motion Compensation Engine */ - 0x6001c600 0x100 /* Transform Engine */ - 0x6001c800 0x100 /* Pixel prediction block */ - 0x6001ca00 0x100 /* Video DMA */ - 0x6001d800 0x300 /* Video frame controls */>; - reg-names = "sxe", "bsev", "mbe", "ppe", "mce", - "tfe", "ppb", "vdma", "frameid"; - iram = <&vde_pool>; /* IRAM region */ - interrupts = , /* Sync token interrupt */ - , /* BSE-V interrupt */ - ; /* SXE interrupt */ - interrupt-names = "sync-token", "bsev", "sxe"; - clocks = <&tegra_car TEGRA20_CLK_VDE>; - reset-names = "vde", "mc"; - resets = <&tegra_car 61>, <&mc TEGRA20_MC_RESET_VDE>; - iommus = <&mc TEGRA_SWGROUP_VDE>; -}; diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml new file mode 100644 index 0000000000000..3b6c1f031e049 --- /dev/null +++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/nvidia,tegra-vde.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVIDIA Tegra Video Decoder Engine + +maintainers: + - Dmitry Osipenko + - Jon Hunter + - Thierry Reding + +properties: + compatible: + oneOf: + - items: + - enum: + - nvidia,tegra132-vde + - nvidia,tegra124-vde + - nvidia,tegra114-vde + - nvidia,tegra30-vde + - enum: + - nvidia,tegra20-vde + - items: + - const: nvidia,tegra20-vde + + reg: + maxItems: 9 + + reg-names: + items: + - const: sxe + - const: bsev + - const: mbe + - const: ppe + - const: mce + - const: tfe + - const: ppb + - const: vdma + - const: frameid + + clocks: + maxItems: 1 + + resets: + maxItems: 2 + + reset-names: + items: + - const: vde + - const: mc + + interrupts: + maxItems: 3 + + interrupt-names: + items: + - const: sync-token + - const: bsev + - const: sxe + + iommus: + maxItems: 1 + + iram: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle of the SRAM MMIO node. + +required: + - compatible + - reg + - reg-names + - clocks + - resets + - reset-names + - interrupts + - interrupt-names + +additionalProperties: false + +examples: + - | + video-codec@6001a000 { + compatible = "nvidia,tegra20-vde"; + reg = <0x6001a000 0x1000>, /* Syntax Engine */ + <0x6001b000 0x1000>, /* Video Bitstream Engine */ + <0x6001c000 0x100>, /* Macroblock Engine */ + <0x6001c200 0x100>, /* Post-processing Engine */ + <0x6001c400 0x100>, /* Motion Compensation Engine */ + <0x6001c600 0x100>, /* Transform Engine */ + <0x6001c800 0x100>, /* Pixel prediction block */ + <0x6001ca00 0x100>, /* Video DMA */ + <0x6001d800 0x300>; /* Video frame controls */ + reg-names = "sxe", "bsev", "mbe", "ppe", "mce", + "tfe", "ppb", "vdma", "frameid"; + iram = <&iram>; /* IRAM MMIO region */ + interrupts = <0 9 4>, /* Sync token */ + <0 10 4>, /* BSE-V */ + <0 12 4>; /* SXE */ + interrupt-names = "sync-token", "bsev", "sxe"; + clocks = <&clk 61>; + reset-names = "vde", "mc"; + resets = <&rst 61>, <&mem 13>; + iommus = <&mem 15>; + }; From 01660bb75fdbe2112807b58ce0cb549368dd67a4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Apr 2021 00:06:25 +0300 Subject: [PATCH 721/851] media: dt: bindings: tegra-vde: Document OPP and power domain Document new OPP table and power domain properties of the video decoder hardware. Signed-off-by: Dmitry Osipenko --- .../devicetree/bindings/media/nvidia,tegra-vde.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml index 3b6c1f031e049..0b7d4d815707b 100644 --- a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml +++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml @@ -68,6 +68,16 @@ properties: description: Phandle of the SRAM MMIO node. + operating-points-v2: + description: + Should contain freqs and voltages and opp-supported-hw property, + which is a bitfield indicating SoC speedo or process ID mask. + + power-domains: + maxItems: 1 + description: + Phandle to the SoC core power domain. + required: - compatible - reg @@ -104,4 +114,6 @@ examples: reset-names = "vde", "mc"; resets = <&rst 61>, <&mem 13>; iommus = <&mem 15>; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; From b1907d574e98f766c0fbda5a8e9bf7f7b5f9fd1e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 03:03:57 +0300 Subject: [PATCH 722/851] media: staging: tegra-vde: Support generic power domain Currently driver supports legacy power domain API, this patch adds generic power domain support. This allows us to utilize a modern GENPD API for newer device-trees and support DVFS of the video decoder hardware. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/staging/media/tegra-vde/vde.c | 59 ++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/drivers/staging/media/tegra-vde/vde.c b/drivers/staging/media/tegra-vde/vde.c index ed4c1250b3038..fc7a32ed1821b 100644 --- a/drivers/staging/media/tegra-vde/vde.c +++ b/drivers/staging/media/tegra-vde/vde.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -920,13 +922,17 @@ static __maybe_unused int tegra_vde_runtime_suspend(struct device *dev) struct tegra_vde *vde = dev_get_drvdata(dev); int err; - err = tegra_powergate_power_off(TEGRA_POWERGATE_VDEC); - if (err) { - dev_err(dev, "Failed to power down HW: %d\n", err); - return err; + if (!dev->pm_domain) { + err = tegra_powergate_power_off(TEGRA_POWERGATE_VDEC); + if (err) { + dev_err(dev, "Failed to power down HW: %d\n", err); + return err; + } } clk_disable_unprepare(vde->clk); + reset_control_release(vde->rst); + reset_control_release(vde->rst_mc); return 0; } @@ -936,14 +942,41 @@ static __maybe_unused int tegra_vde_runtime_resume(struct device *dev) struct tegra_vde *vde = dev_get_drvdata(dev); int err; - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_VDEC, - vde->clk, vde->rst); + err = reset_control_acquire(vde->rst_mc); if (err) { - dev_err(dev, "Failed to power up HW : %d\n", err); + dev_err(dev, "Failed to acquire mc reset: %d\n", err); return err; } + err = reset_control_acquire(vde->rst); + if (err) { + dev_err(dev, "Failed to acquire reset: %d\n", err); + goto release_mc_reset; + } + + if (!dev->pm_domain) { + err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_VDEC, + vde->clk, vde->rst); + if (err) { + dev_err(dev, "Failed to power up HW : %d\n", err); + goto release_reset; + } + } + + err = clk_prepare_enable(vde->clk); + if (err) { + dev_err(dev, "Failed to enable clock: %d\n", err); + goto release_reset; + } + return 0; + +release_reset: + reset_control_release(vde->rst); +release_mc_reset: + reset_control_release(vde->rst_mc); + + return err; } static int tegra_vde_probe(struct platform_device *pdev) @@ -1001,14 +1034,14 @@ static int tegra_vde_probe(struct platform_device *pdev) return err; } - vde->rst = devm_reset_control_get(dev, NULL); + vde->rst = devm_reset_control_get_exclusive_released(dev, NULL); if (IS_ERR(vde->rst)) { err = PTR_ERR(vde->rst); dev_err(dev, "Could not get VDE reset %d\n", err); return err; } - vde->rst_mc = devm_reset_control_get_optional(dev, "mc"); + vde->rst_mc = devm_reset_control_get_optional_exclusive_released(dev, "mc"); if (IS_ERR(vde->rst_mc)) { err = PTR_ERR(vde->rst_mc); dev_err(dev, "Could not get MC reset %d\n", err); @@ -1133,8 +1166,12 @@ static void tegra_vde_shutdown(struct platform_device *pdev) * On some devices bootloader isn't ready to a power-gated VDE on * a warm-reboot, machine will hang in that case. */ - if (pm_runtime_status_suspended(&pdev->dev)) - tegra_vde_runtime_resume(&pdev->dev); + if (pm_runtime_status_suspended(&pdev->dev)) { + if (pdev->dev.pm_domain) + dev_pm_genpd_resume(&pdev->dev); + else + tegra_vde_runtime_resume(&pdev->dev); + } } static __maybe_unused int tegra_vde_pm_suspend(struct device *dev) From 0a6bbcdf88246945914b9ec18bc485ec67d11866 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 24 Nov 2020 00:34:42 +0300 Subject: [PATCH 723/851] ARM: tegra: Add OPP tables and power domains to Tegra20 device-trees Add OPP tables and power domains to all peripheral devices which support power management on Tegra20 SoC. Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 Signed-off-by: Dmitry Osipenko --- .../boot/dts/tegra20-acer-a500-picasso.dts | 1 + arch/arm/boot/dts/tegra20-colibri.dtsi | 3 +- arch/arm/boot/dts/tegra20-harmony.dts | 3 +- arch/arm/boot/dts/tegra20-paz00.dts | 1 + .../arm/boot/dts/tegra20-peripherals-opp.dtsi | 941 ++++++++++++++++++ arch/arm/boot/dts/tegra20-seaboard.dts | 3 +- arch/arm/boot/dts/tegra20-tamonten.dtsi | 3 +- arch/arm/boot/dts/tegra20-trimslice.dts | 9 + arch/arm/boot/dts/tegra20-ventana.dts | 1 + arch/arm/boot/dts/tegra20.dtsi | 103 ++ 10 files changed, 1064 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts index 75ce986df958f..d97b69b38f0c3 100644 --- a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts +++ b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts @@ -709,6 +709,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <458>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; usb@c5000000 { diff --git a/arch/arm/boot/dts/tegra20-colibri.dtsi b/arch/arm/boot/dts/tegra20-colibri.dtsi index 585a5b441cf64..4ec403aa5f2e1 100644 --- a/arch/arm/boot/dts/tegra20-colibri.dtsi +++ b/arch/arm/boot/dts/tegra20-colibri.dtsi @@ -495,7 +495,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "VDD_CORE_1.2V"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -601,6 +601,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; /* Set SLEEP MODE bit in SUPPLYENE register of TPS658643 PMIC */ i2c-thermtrip { diff --git a/arch/arm/boot/dts/tegra20-harmony.dts b/arch/arm/boot/dts/tegra20-harmony.dts index ae4312eedcbd5..b21bab437ebd9 100644 --- a/arch/arm/boot/dts/tegra20-harmony.dts +++ b/arch/arm/boot/dts/tegra20-harmony.dts @@ -339,7 +339,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "vdd_sm0,vdd_core"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -565,6 +565,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; pcie@80003000 { diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index acc816bfd2333..c8e4439d3dfb1 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -519,6 +519,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <0>; nvidia,sys-clock-req-active-high; + core-supply = <&core_vdd_reg>; }; usb@c5000000 { diff --git a/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi index ef3ad2e5f2701..ff8c3dcba8e96 100644 --- a/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi @@ -1,6 +1,46 @@ // SPDX-License-Identifier: GPL-2.0 / { + core_opp_table: core-power-domain-opp-table { + compatible = "operating-points-v2"; + opp-shared; + + core_opp_950: opp@950000 { + opp-microvolt = <950000 950000 1300000>; + opp-level = <950000>; + }; + + core_opp_1000: opp@1000000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-level = <1000000>; + }; + + core_opp_1100: opp@1100000 { + opp-microvolt = <1100000 1100000 1300000>; + opp-level = <1100000>; + }; + + core_opp_1200: opp@1200000 { + opp-microvolt = <1200000 1200000 1300000>; + opp-level = <1200000>; + }; + + core_opp_1225: opp@1225000 { + opp-microvolt = <1225000 1225000 1300000>; + opp-level = <1225000>; + }; + + core_opp_1275: opp@1275000 { + opp-microvolt = <1275000 1275000 1300000>; + opp-level = <1275000>; + }; + + core_opp_1300: opp@1300000 { + opp-microvolt = <1300000 1300000 1300000>; + opp-level = <1300000>; + }; + }; + emc_icc_dvfs_opp_table: emc-dvfs-opp-table { compatible = "operating-points-v2"; @@ -8,66 +48,77 @@ opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <36000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp@47500000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <47500000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp@50000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <50000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp@54000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp@57000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <57000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp@100000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <100000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp@108000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp@126666000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <126666000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp@150000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <150000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp@190000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <190000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp@216000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <216000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; opp-suspend; }; @@ -75,36 +126,926 @@ opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp@333000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <333000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp@380000000 { opp-microvolt = <1100000 1100000 1300000>; opp-hz = /bits/ 64 <380000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; }; opp@600000000 { opp-microvolt = <1200000 1200000 1300000>; opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; }; opp@666000000 { opp-microvolt = <1200000 1200000 1300000>; opp-hz = /bits/ 64 <666000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; }; opp@760000000 { opp-microvolt = <1300000 1300000 1300000>; opp-hz = /bits/ 64 <760000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1300>; + }; + }; + + gr2d_dvfs_opp_table: gr2d-opp-table { + compatible = "operating-points-v2"; + + opp@133000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@171000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@247000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp@300000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + gr3d_dvfs_opp_table: gr3d-opp-table { + compatible = "operating-points-v2"; + + opp@114000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <114000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp@161500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <161500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp@161500000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <161500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp@209000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <209000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp@218500000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <218500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp@247000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp@247000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp@256500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <256500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp@285000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp@285000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp@304000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; + }; + + opp@323000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <323000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@333500000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <333500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1275>; + }; + + opp@333500000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <333500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1200>; + }; + + opp@351500000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <351500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1100>; + }; + + opp@361000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1275>; + }; + + opp@380000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp@400000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1275>; + }; + + opp@400000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1200>; + }; + }; + + disp1_dvfs_opp_table: disp1-opp-table { + compatible = "operating-points-v2"; + + opp@158000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <158000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@190000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + disp2_dvfs_opp_table: disp2-opp-table { + compatible = "operating-points-v2"; + + opp@158000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <158000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@190000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + dsi_dvfs_opp_table: dsi-opp-table { + compatible = "operating-points-v2"; + + opp@100000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@500000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <500000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + epp_dvfs_opp_table: epp-opp-table { + compatible = "operating-points-v2"; + + opp@133000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@171000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@247000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp@300000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + hdmi_dvfs_opp_table: hdmi-opp-table { + compatible = "operating-points-v2"; + + opp@148500000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <148500000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + host1x_dvfs_opp_table: host1x-opp-table { + compatible = "operating-points-v2"; + + opp@104500000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <104500000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@133000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@166000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <166000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + ide_dvfs_opp_table: ide-opp-table { + compatible = "operating-points-v2"; + + opp@100000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + mipi_dvfs_opp_table: mipi-opp-table { + compatible = "operating-points-v2"; + + opp@40000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <40000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@60000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1275>; + }; + }; + + mpe_dvfs_opp_table: mpe-opp-table { + compatible = "operating-points-v2"; + + opp@104500000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <104500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp@142500000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <142500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp@152000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp@190000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp@190000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp@228000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <228000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp@228000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <228000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp@237500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <237500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp@266000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <266000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp@275500000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <275500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp@300000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp@300000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1100>; + }; + }; + + ndflash_dvfs_opp_table: ndflash-opp-table { + compatible = "operating-points-v2"; + + opp@130000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <130000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@150000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <150000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@158000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <158000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp@164000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <164000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + nor_dvfs_opp_table: nor-opp-table { + compatible = "operating-points-v2"; + + opp@92000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <92000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + pcie_dvfs_opp_table: pcie-opp-table { + compatible = "operating-points-v2"; + + opp@250000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <250000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sdmmc1_dvfs_opp_table: sdmmc1-opp-table { + compatible = "operating-points-v2"; + + opp@44000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sdmmc2_dvfs_opp_table: sdmmc2-opp-table { + compatible = "operating-points-v2"; + + opp@44000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sdmmc3_dvfs_opp_table: sdmmc3-opp-table { + compatible = "operating-points-v2"; + + opp@44000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sdmmc4_dvfs_opp_table: sdmmc4-opp-table { + compatible = "operating-points-v2"; + + opp@44000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sclk_dvfs_opp_table: sclk-opp-table { + compatible = "operating-points-v2"; + + opp@95000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <95000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp@123500000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <123500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp@133000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp@152000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp@159500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <159500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp@171000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp@180500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <180500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp@190000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp@207000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <207000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp@218500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <218500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp@222500000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <222500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; + }; + + opp@229500000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <229500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@240000000,1225 { + opp-microvolt = <1225000 1225000 1300000>; + opp-hz = /bits/ 64 <240000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1225>; + }; + + opp@240000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <240000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1200>; + }; + + opp@247000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1275>; + }; + + opp@256500000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <256500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1100>; + }; + + opp@260000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <260000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp@262000000,1300 { + opp-microvolt = <1300000 1300000 1300000>; + opp-hz = /bits/ 64 <262000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1300>; + }; + + opp@264000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <264000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1275>; + }; + + opp@277500000,1300 { + opp-microvolt = <1300000 1300000 1300000>; + opp-hz = /bits/ 64 <277500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1300>; + }; + + opp@285000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1275>; + }; + + opp@292500000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <292500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1200>; + }; + + opp@300000000,1300 { + opp-microvolt = <1300000 1300000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp@300000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1275>; + }; + }; + + tvo_dvfs_opp_table: tvo-opp-table { + compatible = "operating-points-v2"; + + opp@250000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <250000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + usbd_dvfs_opp_table: usbd-opp-table { + compatible = "operating-points-v2"; + + opp@480000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + usb2_dvfs_opp_table: usb2-opp-table { + compatible = "operating-points-v2"; + + opp@480000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + usb3_dvfs_opp_table: usb3-opp-table { + compatible = "operating-points-v2"; + + opp@480000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + vde_dvfs_opp_table: vde-opp-table { + compatible = "operating-points-v2"; + + opp@95000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <95000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp@123500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <123500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp@123500000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <123500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp@152000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp@152000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp@171000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp@209000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <209000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp@209000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <209000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp@218500000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <218500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp@237500000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <237500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp@275500000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <275500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; + }; + + opp@285000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@300000000,1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1275>; + }; + + opp@300000000,1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; + }; + + opp@300000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1100>; + }; + }; + + vi_dvfs_opp_table: vi-opp-table { + compatible = "operating-points-v2"; + + opp@85000000,950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <85000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@100000000,1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@150000000,1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <150000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; }; }; }; diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts index 92d494b8c3d25..5aeb7bb6c4151 100644 --- a/arch/arm/boot/dts/tegra20-seaboard.dts +++ b/arch/arm/boot/dts/tegra20-seaboard.dts @@ -444,7 +444,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "vdd_sm0,vdd_core"; regulator-min-microvolt = <1300000>; regulator-max-microvolt = <1300000>; @@ -689,6 +689,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; memory-controller@7000f400 { diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index 95e6bccdb4f6e..ff2fb335ad10a 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -357,7 +357,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "vdd_sys_sm0,vdd_core"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -477,6 +477,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; pcie@80003000 { diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts index 4bc87bc0c2a45..582dc7910ff8c 100644 --- a/arch/arm/boot/dts/tegra20-trimslice.dts +++ b/arch/arm/boot/dts/tegra20-trimslice.dts @@ -321,6 +321,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; pcie@80003000 { @@ -444,6 +445,14 @@ regulator-always-on; }; + vdd_core: regulator@5 { + compatible = "regulator-fixed"; + regulator-name = "vdd_core"; + regulator-min-microvolt = <1300000>; + regulator-max-microvolt = <1300000>; + regulator-always-on; + }; + sound { compatible = "nvidia,tegra-audio-trimslice"; nvidia,i2s-controller = <&tegra_i2s1>; diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts index 5a2578b3707f4..e41ce1b1ec3ff 100644 --- a/arch/arm/boot/dts/tegra20-ventana.dts +++ b/arch/arm/boot/dts/tegra20-ventana.dts @@ -544,6 +544,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <458>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; usb@c5000000 { diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 6ce4981781054..5c74cc76b5e3d 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -42,6 +42,8 @@ clock-names = "host1x"; resets = <&tegra_car 28>; reset-names = "host1x"; + operating-points-v2 = <&host1x_dvfs_opp_table>; + power-domains = <&pd_core>; #address-cells = <1>; #size-cells = <1>; @@ -55,6 +57,8 @@ clocks = <&tegra_car TEGRA20_CLK_MPE>; resets = <&tegra_car 60>; reset-names = "mpe"; + operating-points-v2 = <&mpe_dvfs_opp_table>; + power-domains = <&pd_mpe>; }; vi@54080000 { @@ -64,6 +68,8 @@ clocks = <&tegra_car TEGRA20_CLK_VI>; resets = <&tegra_car 20>; reset-names = "vi"; + operating-points-v2 = <&vi_dvfs_opp_table>; + power-domains = <&pd_venc>; }; epp@540c0000 { @@ -73,6 +79,8 @@ clocks = <&tegra_car TEGRA20_CLK_EPP>; resets = <&tegra_car 19>; reset-names = "epp"; + operating-points-v2 = <&epp_dvfs_opp_table>; + power-domains = <&pd_core>; }; isp@54100000 { @@ -82,6 +90,7 @@ clocks = <&tegra_car TEGRA20_CLK_ISP>; resets = <&tegra_car 23>; reset-names = "isp"; + power-domains = <&pd_venc>; }; gr2d@54140000 { @@ -91,6 +100,8 @@ clocks = <&tegra_car TEGRA20_CLK_GR2D>; resets = <&tegra_car 21>; reset-names = "2d"; + operating-points-v2 = <&gr2d_dvfs_opp_table>; + power-domains = <&pd_core>; }; gr3d@54180000 { @@ -99,6 +110,8 @@ clocks = <&tegra_car TEGRA20_CLK_GR3D>; resets = <&tegra_car 24>; reset-names = "3d"; + operating-points-v2 = <&gr3d_dvfs_opp_table>; + power-domains = <&pd_3d>; }; dc@54200000 { @@ -110,6 +123,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 27>; reset-names = "dc"; + operating-points-v2 = <&disp1_dvfs_opp_table>; + power-domains = <&pd_core>; nvidia,head = <0>; @@ -138,6 +153,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 26>; reset-names = "dc"; + operating-points-v2 = <&disp2_dvfs_opp_table>; + power-domains = <&pd_core>; nvidia,head = <1>; @@ -166,6 +183,8 @@ clock-names = "hdmi", "parent"; resets = <&tegra_car 51>; reset-names = "hdmi"; + operating-points-v2 = <&hdmi_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -174,6 +193,8 @@ reg = <0x542c0000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA20_CLK_TVO>; + operating-points-v2 = <&tvo_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -185,6 +206,8 @@ clock-names = "dsi", "parent"; resets = <&tegra_car 48>; reset-names = "dsi"; + operating-points-v2 = <&dsi_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; }; @@ -242,6 +265,16 @@ reg = <0x60006000 0x1000>; #clock-cells = <1>; #reset-cells = <1>; + + tegra-clocks { + sclk { + compatible = "nvidia,tegra20-sclk", + "nvidia,tegra-clock"; + operating-points-v2 = <&sclk_dvfs_opp_table>; + clocks = <&tegra_car TEGRA20_CLK_SCLK>; + power-domains = <&pd_core>; + }; + }; }; flow-controller@60007000 { @@ -319,6 +352,8 @@ clocks = <&tegra_car TEGRA20_CLK_VDE>; reset-names = "vde", "mc"; resets = <&tegra_car 61>, <&mc TEGRA20_MC_RESET_VDE>; + operating-points-v2 = <&vde_dvfs_opp_table>; + power-domains = <&pd_vde>; }; apbmisc@70000800 { @@ -460,6 +495,8 @@ reset-names = "nand"; assigned-clocks = <&tegra_car TEGRA20_CLK_NDFLASH>; assigned-clock-rates = <150000000>; + operating-points-v2 = <&ndflash_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -473,6 +510,8 @@ clock-names = "gmi"; resets = <&tegra_car 42>; reset-names = "gmi"; + operating-points-v2 = <&nor_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -643,6 +682,52 @@ clocks = <&tegra_car TEGRA20_CLK_PCLK>, <&clk32k_in>; clock-names = "pclk", "clk32k_in"; #clock-cells = <1>; + + pd_core: core-domain { + operating-points-v2 = <&core_opp_table>; + #power-domain-cells = <0>; + }; + + powergates { + pd_3d: 3d { + clocks = <&tegra_car TEGRA20_CLK_GR3D>; + resets = <&mc TEGRA20_MC_RESET_3D>, + <&tegra_car TEGRA20_CLK_GR3D>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_venc: venc { + clocks = <&tegra_car TEGRA20_CLK_ISP>, + <&tegra_car TEGRA20_CLK_VI>, + <&tegra_car TEGRA20_CLK_CSI>; + resets = <&mc TEGRA20_MC_RESET_ISP>, + <&mc TEGRA20_MC_RESET_VI>, + <&tegra_car TEGRA20_CLK_ISP>, + <&tegra_car 20 /* VI */>, + <&tegra_car TEGRA20_CLK_CSI>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_vde: vdec { + clocks = <&tegra_car TEGRA20_CLK_VDE>; + resets = <&mc TEGRA20_MC_RESET_VDE>, + <&tegra_car TEGRA20_CLK_VDE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_mpe: mpe { + clocks = <&tegra_car TEGRA20_CLK_MPE>; + resets = <&mc TEGRA20_MC_RESET_MPEA>, + <&mc TEGRA20_MC_RESET_MPEB>, + <&mc TEGRA20_MC_RESET_MPEC>, + <&tegra_car TEGRA20_CLK_MPE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + }; }; mc: memory-controller@7000f000 { @@ -662,6 +747,7 @@ reg = <0x7000f400 0x400>; interrupts = ; clocks = <&tegra_car TEGRA20_CLK_EMC>; + power-domains = <&pd_core>; #address-cells = <1>; #size-cells = <0>; #interconnect-cells = <0>; @@ -712,6 +798,9 @@ <&tegra_car 72>, <&tegra_car 74>; reset-names = "pex", "afi", "pcie_x"; + operating-points-v2 = <&pcie_dvfs_opp_table>; + power-domains = <&pd_core>; + status = "disabled"; pci@1,0 { @@ -754,6 +843,8 @@ reset-names = "usb"; nvidia,needs-double-reset; nvidia,phy = <&phy1>; + operating-points-v2 = <&usbd_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -791,6 +882,8 @@ resets = <&tegra_car 58>; reset-names = "usb"; nvidia,phy = <&phy2>; + operating-points-v2 = <&usb2_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -817,6 +910,8 @@ resets = <&tegra_car 59>; reset-names = "usb"; nvidia,phy = <&phy3>; + operating-points-v2 = <&usb3_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -851,6 +946,8 @@ clock-names = "sdhci"; resets = <&tegra_car 14>; reset-names = "sdhci"; + operating-points-v2 = <&sdmmc1_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -862,6 +959,8 @@ clock-names = "sdhci"; resets = <&tegra_car 9>; reset-names = "sdhci"; + operating-points-v2 = <&sdmmc2_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -873,6 +972,8 @@ clock-names = "sdhci"; resets = <&tegra_car 69>; reset-names = "sdhci"; + operating-points-v2 = <&sdmmc3_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -884,6 +985,8 @@ clock-names = "sdhci"; resets = <&tegra_car 15>; reset-names = "sdhci"; + operating-points-v2 = <&sdmmc4_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; From cea7c1c08c8c5d97c3e66cc9bac3a77f9f1744f5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 24 Nov 2020 00:52:00 +0300 Subject: [PATCH 724/851] ARM: tegra: Add OPP tables and power domains to Tegra30 device-trees Add OPP tables and power domains to all peripheral devices which support power management on Tegra30 SoC. Tested-by: Peter Geis # Ouya T30 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- .../tegra30-asus-nexus7-grouper-common.dtsi | 1 + arch/arm/boot/dts/tegra30-beaver.dts | 1 + arch/arm/boot/dts/tegra30-cardhu.dtsi | 1 + arch/arm/boot/dts/tegra30-colibri.dtsi | 17 +- arch/arm/boot/dts/tegra30-ouya.dts | 1 + .../arm/boot/dts/tegra30-peripherals-opp.dtsi | 1412 +++++++++++++++++ arch/arm/boot/dts/tegra30.dtsi | 156 ++ 7 files changed, 1586 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi index 9732cd6f20b7d..4f116c26f6ce2 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi @@ -957,6 +957,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; ahub@70080000 { diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts index e159feeedef72..b54cbb24c4d33 100644 --- a/arch/arm/boot/dts/tegra30-beaver.dts +++ b/arch/arm/boot/dts/tegra30-beaver.dts @@ -1915,6 +1915,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&core_vdd_reg>; }; ahub@70080000 { diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index 448f1397e64a9..b2bba923eb93b 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -391,6 +391,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; ahub@70080000 { diff --git a/arch/arm/boot/dts/tegra30-colibri.dtsi b/arch/arm/boot/dts/tegra30-colibri.dtsi index 413e35215804b..0627b64f044d1 100644 --- a/arch/arm/boot/dts/tegra30-colibri.dtsi +++ b/arch/arm/boot/dts/tegra30-colibri.dtsi @@ -765,9 +765,14 @@ vddctrl_reg: vddctrl { regulator-name = "+V1.0_VDD_CPU"; - regulator-min-microvolt = <1150000>; - regulator-max-microvolt = <1150000>; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1250000>; + regulator-coupled-with = <&vdd_core>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; regulator-always-on; + + nvidia,tegra-cpu-regulator; }; reg_1v8_vio: vio { @@ -890,18 +895,23 @@ }; /* SW: +V1.2_VDD_CORE */ - regulator@60 { + vdd_core: regulator@60 { compatible = "ti,tps62362"; reg = <0x60>; regulator-name = "tps62362-vout"; regulator-min-microvolt = <900000>; regulator-max-microvolt = <1400000>; + regulator-coupled-with = <&vddctrl_reg>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; regulator-boot-on; regulator-always-on; ti,vsel0-state-low; /* VSEL1: EN_CORE_DVFS_N low for DVFS */ ti,vsel1-state-low; + + nvidia,tegra-core-regulator; }; }; @@ -914,6 +924,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; /* Set DEV_OFF bit in DCDC control register of TPS65911 PMIC */ i2c-thermtrip { diff --git a/arch/arm/boot/dts/tegra30-ouya.dts b/arch/arm/boot/dts/tegra30-ouya.dts index 90db5ff72537b..2a79cd4662b1b 100644 --- a/arch/arm/boot/dts/tegra30-ouya.dts +++ b/arch/arm/boot/dts/tegra30-ouya.dts @@ -274,6 +274,7 @@ nvidia,core-pwr-off-time = <458>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; mc_timings: memory-controller@7000f000 { diff --git a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi index 2c97803197257..788dcac2079ee 100644 --- a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi @@ -1,6 +1,56 @@ // SPDX-License-Identifier: GPL-2.0 / { + core_opp_table: core-power-domain-opp-table { + compatible = "operating-points-v2"; + opp-shared; + + core_opp_950: opp@950000 { + opp-microvolt = <950000 950000 1350000>; + opp-level = <950000>; + }; + + core_opp_1000: opp@1000000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-level = <1000000>; + }; + + core_opp_1050: opp@1050000 { + opp-microvolt = <1050000 1050000 1350000>; + opp-level = <1050000>; + }; + + core_opp_1100: opp@1100000 { + opp-microvolt = <1100000 1100000 1350000>; + opp-level = <1100000>; + }; + + core_opp_1150: opp@1150000 { + opp-microvolt = <1150000 1150000 1350000>; + opp-level = <1150000>; + }; + + core_opp_1200: opp@1200000 { + opp-microvolt = <1200000 1200000 1350000>; + opp-level = <1200000>; + }; + + core_opp_1250: opp@1250000 { + opp-microvolt = <1250000 1250000 1350000>; + opp-level = <1250000>; + }; + + core_opp_1300: opp@1300000 { + opp-microvolt = <1300000 1300000 1350000>; + opp-level = <1300000>; + }; + + core_opp_1350: opp@1350000 { + opp-microvolt = <1350000 1350000 1350000>; + opp-level = <1350000>; + }; + }; + emc_icc_dvfs_opp_table: emc-dvfs-opp-table { compatible = "operating-points-v2"; @@ -8,126 +58,147 @@ opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp@12750000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp@12750000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@25500000,950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp@25500000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp@25500000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@27000000,950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp@27000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp@27000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@51000000,950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp@51000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp@51000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@54000000,950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp@54000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp@54000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@102000000,950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp@102000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp@102000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@108000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; }; opp@108000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@204000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; opp-suspend; }; @@ -135,6 +206,7 @@ opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; opp-suspend; }; @@ -142,126 +214,147 @@ opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; }; opp@333500000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; }; opp@333500000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@375000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; }; opp@375000000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; }; opp@375000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@400000000,1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; }; opp@400000000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; }; opp@400000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@416000000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <416000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; }; opp@416000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <416000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@450000000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <450000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; }; opp@450000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <450000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@533000000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; }; opp@533000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@625000000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <625000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; }; opp@625000000,1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <625000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp@667000000,1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <667000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; }; opp@750000000,1300 { opp-microvolt = <1300000 1300000 1350000>; opp-hz = /bits/ 64 <750000000>; opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; }; opp@800000000,1300 { opp-microvolt = <1300000 1300000 1350000>; opp-hz = /bits/ 64 <800000000>; opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; }; opp@900000000,1350 { opp-microvolt = <1350000 1350000 1350000>; opp-hz = /bits/ 64 <900000000>; opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; }; }; @@ -383,4 +476,1323 @@ opp-peak-kBps = <7200000>; }; }; + + gr2d_dvfs_opp_table: gr2d-opp-table { + compatible = "operating-points-v2"; + + opp@267000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp@285000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp@304000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp@332000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp@361000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@380000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp@408000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp@416000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp@446000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp@484000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp@520000000,1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp@600000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + gr3d_dvfs_opp_table: gr3d-opp-table { + compatible = "operating-points-v2"; + + opp@234000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <234000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>, <&core_opp_1000>; + }; + + opp@247000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>, <&core_opp_1000>; + }; + + opp@285000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>, <&core_opp_1050>; + }; + + opp@304000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>, <&core_opp_1050>; + }; + + opp@332000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>, <&core_opp_1100>; + }; + + opp@361000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>, <&core_opp_1100>; + }; + + opp@380000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>, <&core_opp_1150>; + }; + + opp@408000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>, <&core_opp_1150>; + }; + + opp@416000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>, <&core_opp_1200>; + }; + + opp@446000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>, <&core_opp_1200>; + }; + + opp@484000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>, <&core_opp_1250>; + }; + + opp@520000000,1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>, <&core_opp_1300>; + }; + + opp@600000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>, <&core_opp_1350>; + }; + }; + + afi_dvfs_opp_table: afi-opp-table { + compatible = "operating-points-v2"; + + opp@250000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <250000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + cve_dvfs_opp_table: cve-opp-table { + compatible = "operating-points-v2"; + + opp@297000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <297000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + }; + + disp1_dvfs_opp_table: disp1-opp-table { + compatible = "operating-points-v2"; + + opp@120000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <120000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1000>; + }; + + opp@155000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <155000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; + }; + + opp@190000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1200>; + }; + + opp@268000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <268000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1050>; + }; + }; + + disp2_dvfs_opp_table: disp2-opp-table { + compatible = "operating-points-v2"; + + opp@120000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <120000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1000>; + }; + + opp@155000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <155000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; + }; + + opp@190000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1200>; + }; + + opp@268000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <268000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1050>; + }; + }; + + dsia_dvfs_opp_table: dsia-opp-table { + compatible = "operating-points-v2"; + + opp@275000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <275000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + dsib_dvfs_opp_table: dsib-opp-table { + compatible = "operating-points-v2"; + + opp@275000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <275000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + epp_dvfs_opp_table: epp-opp-table { + compatible = "operating-points-v2"; + + opp@267000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp@285000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp@304000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp@332000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp@361000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@380000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp@408000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp@416000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp@446000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp@484000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp@520000000,1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp@600000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + fuse_burn_dvfs_opp_table: fuse_burn-opp-table { + compatible = "operating-points-v2"; + + opp@26000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <26000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1150>; + }; + }; + + hdmi_dvfs_opp_table: hdmi-opp-table { + compatible = "operating-points-v2"; + + opp@148500000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <148500000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + host1x_dvfs_opp_table: host1x-opp-table { + compatible = "operating-points-v2"; + + opp@152000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp@188000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <188000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1050>; + }; + + opp@222000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <222000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1100>; + }; + + opp@242000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <242000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + + opp@254000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <254000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1150>; + }; + + opp@267000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; + }; + + opp@300000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + mipi_dvfs_opp_table: mipi-opp-table { + compatible = "operating-points-v2"; + + opp@60000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; + }; + }; + + mpe_dvfs_opp_table: mpe-opp-table { + compatible = "operating-points-v2"; + + opp@234000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <234000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>; + }; + + opp@247000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp@285000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp@304000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp@332000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp@361000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@380000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp@408000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp@416000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp@446000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp@484000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp@520000000,1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp@600000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + sclk_dvfs_opp_table: sclk-opp-table { + compatible = "operating-points-v2"; + + opp@51000000,950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <51000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; + }; + + opp@136000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <136000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp@164000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <164000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1050>; + }; + + opp@191000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <191000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp@205000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <205000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; + }; + + opp@216000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <216000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1150>; + }; + + opp@227000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <227000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1100>; + }; + + opp@267000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; + }; + + opp@334000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <334000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1250>; + }; + + opp@378000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <378000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + }; + + se_dvfs_opp_table: se-opp-table { + compatible = "operating-points-v2"; + + opp@267000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp@285000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp@304000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp@332000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp@361000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@380000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp@408000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp@416000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp@446000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp@484000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1250>; + }; + + opp@520000000,1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp@600000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + + opp@625000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <625000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + }; + + ndflash_dvfs_opp_table: ndflash-opp-table { + compatible = "operating-points-v2"; + + opp@120000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <120000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@200000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <200000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1150>; + }; + }; + + nor_dvfs_opp_table: nor-opp-table { + compatible = "operating-points-v2"; + + opp@108000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <108000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + + opp@115000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <115000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp@130000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <130000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1050>; + }; + + opp@133000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1150>; + }; + }; + + pcie_dvfs_opp_table: pcie-opp-table { + compatible = "operating-points-v2"; + + opp@250000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <250000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + pll_c_dvfs_opp_table: pll_c-opp-table { + compatible = "operating-points-v2"; + + opp@533000000,950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <533000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@667000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <667000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@800000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <800000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp@1066000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <1066000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + + opp@1200000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <1200000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1350>; + }; + }; + + pll_e_dvfs_opp_table: pll_e-opp-table { + compatible = "operating-points-v2"; + + opp@100000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + pll_m_dvfs_opp_table: pll_m-opp-table { + compatible = "operating-points-v2"; + + opp@533000000,950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <533000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@667000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <667000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@800000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <800000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp@1066000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <1066000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + pwm_dvfs_opp_table: pwm-opp-table { + compatible = "operating-points-v2"; + + opp@408000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sata_dvfs_opp_table: sata-opp-table { + compatible = "operating-points-v2"; + + opp@216000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <216000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sata_oob_dvfs_opp_table: sata_oob-opp-table { + compatible = "operating-points-v2"; + + opp@216000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <216000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sbc1_dvfs_opp_table: sbc1-opp-table { + compatible = "operating-points-v2"; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@60000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp@100000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc2_dvfs_opp_table: sbc2-opp-table { + compatible = "operating-points-v2"; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@60000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp@100000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc3_dvfs_opp_table: sbc3-opp-table { + compatible = "operating-points-v2"; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@60000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp@100000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc4_dvfs_opp_table: sbc4-opp-table { + compatible = "operating-points-v2"; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@60000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp@100000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc5_dvfs_opp_table: sbc5-opp-table { + compatible = "operating-points-v2"; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@60000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp@100000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc6_dvfs_opp_table: sbc6-opp-table { + compatible = "operating-points-v2"; + + opp@52000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp@60000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp@100000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sdmmc1_dvfs_opp_table: sdmmc1-opp-table { + compatible = "operating-points-v2"; + + opp@104000000,950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <104000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@208000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <208000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sdmmc3_dvfs_opp_table: sdmmc3-opp-table { + compatible = "operating-points-v2"; + + opp@104000000,950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <104000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp@208000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <208000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + spdif_out_dvfs_opp_table: spdif-out-opp-table { + compatible = "operating-points-v2"; + + opp@26000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <26000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + tvdac_dvfs_opp_table: tvdac-opp-table { + compatible = "operating-points-v2"; + + opp@220000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <220000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + tvo_dvfs_opp_table: tvo-opp-table { + compatible = "operating-points-v2"; + + opp@297000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <297000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + }; + + usbd_dvfs_opp_table: usbd-opp-table { + compatible = "operating-points-v2"; + + opp@480000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + usb2_dvfs_opp_table: usb2-opp-table { + compatible = "operating-points-v2"; + + opp@480000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + usb3_dvfs_opp_table: usb3-opp-table { + compatible = "operating-points-v2"; + + opp@480000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + vde_dvfs_opp_table: vde-opp-table { + compatible = "operating-points-v2"; + + opp@228000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <228000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>; + }; + + opp@247000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp@275000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <275000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp@304000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp@332000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp@352000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <352000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp@380000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp@400000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp@416000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp@437000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <437000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp@484000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp@520000000,1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp@600000000,1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + vi_dvfs_opp_table: vi-opp-table { + compatible = "operating-points-v2"; + + opp@216000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <216000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>; + }; + + opp@219000000,1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <219000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp@267000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1050>; + }; + + opp@285000000,1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1050>; + }; + + opp@300000000,1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1100>; + }; + + opp@371000000,1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <371000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1150>; + }; + + opp@409000000,1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <409000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; + }; + + opp@425000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <425000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1250>; + }; + + opp@470000000,1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <470000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + }; }; diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index eaf4951d9ff8e..81bce6ce6729e 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -55,6 +55,8 @@ <&tegra_car 72>, <&tegra_car 74>; reset-names = "pex", "afi", "pcie_x"; + operating-points-v2 = <&pcie_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; pci@1,0 { @@ -124,6 +126,8 @@ resets = <&tegra_car 28>; reset-names = "host1x"; iommus = <&mc TEGRA_SWGROUP_HC>; + operating-points-v2 = <&host1x_dvfs_opp_table>; + power-domains = <&pd_heg>; #address-cells = <1>; #size-cells = <1>; @@ -137,6 +141,7 @@ clocks = <&tegra_car TEGRA30_CLK_MPE>; resets = <&tegra_car 60>; reset-names = "mpe"; + power-domains = <&pd_mpe>; iommus = <&mc TEGRA_SWGROUP_MPE>; }; @@ -148,6 +153,7 @@ clocks = <&tegra_car TEGRA30_CLK_VI>; resets = <&tegra_car 20>; reset-names = "vi"; + power-domains = <&pd_venc>; iommus = <&mc TEGRA_SWGROUP_VI>; }; @@ -159,6 +165,7 @@ clocks = <&tegra_car TEGRA30_CLK_EPP>; resets = <&tegra_car 19>; reset-names = "epp"; + power-domains = <&pd_heg>; iommus = <&mc TEGRA_SWGROUP_EPP>; }; @@ -170,6 +177,7 @@ clocks = <&tegra_car TEGRA30_CLK_ISP>; resets = <&tegra_car 23>; reset-names = "isp"; + power-domains = <&pd_venc>; iommus = <&mc TEGRA_SWGROUP_ISP>; }; @@ -181,6 +189,8 @@ clocks = <&tegra_car TEGRA30_CLK_GR2D>; resets = <&tegra_car 21>; reset-names = "2d"; + operating-points-v2 = <&gr2d_dvfs_opp_table>; + power-domains = <&pd_heg>; iommus = <&mc TEGRA_SWGROUP_G2>; }; @@ -194,6 +204,9 @@ resets = <&tegra_car 24>, <&tegra_car 98>; reset-names = "3d", "3d2"; + operating-points-v2 = <&gr3d_dvfs_opp_table>; + power-domains = <&pd_3d0>, <&pd_3d1>; + power-domain-names = "3d0", "3d1"; iommus = <&mc TEGRA_SWGROUP_NV>, <&mc TEGRA_SWGROUP_NV2>; @@ -208,6 +221,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 27>; reset-names = "dc"; + operating-points-v2 = <&disp1_dvfs_opp_table>; + power-domains = <&pd_core>; iommus = <&mc TEGRA_SWGROUP_DC>; @@ -238,6 +253,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 26>; reset-names = "dc"; + operating-points-v2 = <&disp2_dvfs_opp_table>; + power-domains = <&pd_core>; iommus = <&mc TEGRA_SWGROUP_DCB>; @@ -268,6 +285,8 @@ clock-names = "hdmi", "parent"; resets = <&tegra_car 51>; reset-names = "hdmi"; + operating-points-v2 = <&hdmi_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -276,6 +295,8 @@ reg = <0x542c0000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA30_CLK_TVO>; + operating-points-v2 = <&tvo_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -287,6 +308,8 @@ clock-names = "dsi", "parent"; resets = <&tegra_car 48>; reset-names = "dsi"; + operating-points-v2 = <&dsia_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -298,6 +321,8 @@ clock-names = "dsi", "parent"; resets = <&tegra_car 84>; reset-names = "dsi"; + operating-points-v2 = <&dsib_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; }; @@ -358,6 +383,40 @@ reg = <0x60006000 0x1000>; #clock-cells = <1>; #reset-cells = <1>; + + tegra-clocks { + sclk { + compatible = "nvidia,tegra30-sclk", + "nvidia,tegra-clock"; + operating-points-v2 = <&sclk_dvfs_opp_table>; + clocks = <&tegra_car TEGRA30_CLK_SCLK>; + power-domains = <&pd_core>; + }; + + pll_c { + compatible = "nvidia,tegra30-pllc", + "nvidia,tegra-clock"; + operating-points-v2 = <&pll_c_dvfs_opp_table>; + clocks = <&tegra_car TEGRA30_CLK_PLL_C>; + power-domains = <&pd_core>; + }; + + pll_e { + compatible = "nvidia,tegra30-plle", + "nvidia,tegra-clock"; + operating-points-v2 = <&pll_e_dvfs_opp_table>; + clocks = <&tegra_car TEGRA30_CLK_PLL_E>; + power-domains = <&pd_core>; + }; + + pll_m { + compatible = "nvidia,tegra30-pllm", + "nvidia,tegra-clock"; + operating-points-v2 = <&pll_m_dvfs_opp_table>; + clocks = <&tegra_car TEGRA30_CLK_PLL_M>; + power-domains = <&pd_core>; + }; + }; }; flow-controller@60007000 { @@ -468,6 +527,8 @@ reset-names = "vde", "mc"; resets = <&tegra_car 61>, <&mc TEGRA30_MC_RESET_VDE>; iommus = <&mc TEGRA_SWGROUP_VDE>; + operating-points-v2 = <&vde_dvfs_opp_table>; + power-domains = <&pd_vde>; }; apbmisc@70000800 { @@ -565,6 +626,8 @@ clock-names = "gmi"; resets = <&tegra_car 42>; reset-names = "gmi"; + operating-points-v2 = <&nor_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -575,6 +638,8 @@ clocks = <&tegra_car TEGRA30_CLK_PWM>; resets = <&tegra_car 17>; reset-names = "pwm"; + operating-points-v2 = <&pwm_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -676,6 +741,8 @@ reset-names = "spi"; dmas = <&apbdma 15>, <&apbdma 15>; dma-names = "rx", "tx"; + operating-points-v2 = <&sbc1_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -690,6 +757,8 @@ reset-names = "spi"; dmas = <&apbdma 16>, <&apbdma 16>; dma-names = "rx", "tx"; + operating-points-v2 = <&sbc2_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -704,6 +773,8 @@ reset-names = "spi"; dmas = <&apbdma 17>, <&apbdma 17>; dma-names = "rx", "tx"; + operating-points-v2 = <&sbc3_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -718,6 +789,8 @@ reset-names = "spi"; dmas = <&apbdma 18>, <&apbdma 18>; dma-names = "rx", "tx"; + operating-points-v2 = <&sbc4_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -732,6 +805,8 @@ reset-names = "spi"; dmas = <&apbdma 27>, <&apbdma 27>; dma-names = "rx", "tx"; + operating-points-v2 = <&sbc5_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -746,6 +821,8 @@ reset-names = "spi"; dmas = <&apbdma 28>, <&apbdma 28>; dma-names = "rx", "tx"; + operating-points-v2 = <&sbc6_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -765,6 +842,72 @@ clocks = <&tegra_car TEGRA30_CLK_PCLK>, <&clk32k_in>; clock-names = "pclk", "clk32k_in"; #clock-cells = <1>; + + pd_core: core-domain { + operating-points-v2 = <&core_opp_table>; + #power-domain-cells = <0>; + }; + + powergates { + pd_3d0: 3d0 { + clocks = <&tegra_car TEGRA30_CLK_GR3D>; + resets = <&mc TEGRA30_MC_RESET_3D>, + <&tegra_car TEGRA30_CLK_GR3D>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_3d1: 3d1 { + clocks = <&tegra_car TEGRA30_CLK_GR3D2>; + resets = <&mc TEGRA30_MC_RESET_3D2>, + <&tegra_car TEGRA30_CLK_GR3D2>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_venc: venc { + clocks = <&tegra_car TEGRA30_CLK_ISP>, + <&tegra_car TEGRA30_CLK_VI>, + <&tegra_car TEGRA30_CLK_CSI>; + resets = <&mc TEGRA30_MC_RESET_ISP>, + <&mc TEGRA30_MC_RESET_VI>, + <&tegra_car TEGRA30_CLK_ISP>, + <&tegra_car 20 /* VI */>, + <&tegra_car TEGRA30_CLK_CSI>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_vde: vdec { + clocks = <&tegra_car TEGRA30_CLK_VDE>; + resets = <&mc TEGRA30_MC_RESET_VDE>, + <&tegra_car TEGRA30_CLK_VDE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_mpe: mpe { + clocks = <&tegra_car TEGRA30_CLK_MPE>; + resets = <&mc TEGRA30_MC_RESET_MPE>, + <&tegra_car TEGRA30_CLK_MPE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_heg: heg { + clocks = <&tegra_car TEGRA30_CLK_GR2D>, + <&tegra_car TEGRA30_CLK_EPP>, + <&tegra_car TEGRA30_CLK_HOST1X>; + resets = <&mc TEGRA30_MC_RESET_2D>, + <&mc TEGRA30_MC_RESET_EPP>, + <&mc TEGRA30_MC_RESET_HC>, + <&tegra_car TEGRA30_CLK_GR2D>, + <&tegra_car TEGRA30_CLK_EPP>, + <&tegra_car TEGRA30_CLK_HOST1X>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + }; }; mc: memory-controller@7000f000 { @@ -785,6 +928,7 @@ reg = <0x7000f400 0x400>; interrupts = ; clocks = <&tegra_car TEGRA30_CLK_EMC>; + power-domains = <&pd_core>; nvidia,memory-controller = <&mc>; operating-points-v2 = <&emc_icc_dvfs_opp_table>; @@ -799,6 +943,8 @@ clock-names = "fuse"; resets = <&tegra_car 39>; reset-names = "fuse"; + operating-points-v2 = <&fuse_burn_dvfs_opp_table>; + power-domains = <&pd_core>; }; tsensor: tsensor@70014000 { @@ -921,6 +1067,8 @@ clock-names = "sdhci"; resets = <&tegra_car 14>; reset-names = "sdhci"; + operating-points-v2 = <&sdmmc1_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -943,6 +1091,8 @@ clock-names = "sdhci"; resets = <&tegra_car 69>; reset-names = "sdhci"; + operating-points-v2 = <&sdmmc3_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -967,6 +1117,8 @@ reset-names = "usb"; nvidia,needs-double-reset; nvidia,phy = <&phy1>; + operating-points-v2 = <&usbd_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -1006,6 +1158,8 @@ resets = <&tegra_car 58>; reset-names = "usb"; nvidia,phy = <&phy2>; + operating-points-v2 = <&usb2_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; @@ -1044,6 +1198,8 @@ resets = <&tegra_car 59>; reset-names = "usb"; nvidia,phy = <&phy3>; + operating-points-v2 = <&usb3_dvfs_opp_table>; + power-domains = <&pd_core>; status = "disabled"; }; From 6801ed71b703fb90ff62c976f816faa6997c4751 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 20 Feb 2018 17:07:24 +0300 Subject: [PATCH 725/851] ARM: tegra: Add Memory Client resets to Tegra20 GR2D, GR3D and Host1x Memory access must be blocked before hardware reset is asserted and before power is gated, otherwise a serious hardware fault is inevitable. Add reset for memory clients to the GR2D, GR3D and Host1x nodes. Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 5c74cc76b5e3d..2cb31bdd9eeab 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -40,8 +40,8 @@ interrupt-names = "syncpt", "host1x"; clocks = <&tegra_car TEGRA20_CLK_HOST1X>; clock-names = "host1x"; - resets = <&tegra_car 28>; - reset-names = "host1x"; + resets = <&tegra_car 28>, <&mc TEGRA20_MC_RESET_HC>; + reset-names = "host1x", "mc"; operating-points-v2 = <&host1x_dvfs_opp_table>; power-domains = <&pd_core>; @@ -98,8 +98,8 @@ reg = <0x54140000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA20_CLK_GR2D>; - resets = <&tegra_car 21>; - reset-names = "2d"; + resets = <&tegra_car 21>, <&mc TEGRA20_MC_RESET_2D>; + reset-names = "2d", "mc"; operating-points-v2 = <&gr2d_dvfs_opp_table>; power-domains = <&pd_core>; }; @@ -108,8 +108,8 @@ compatible = "nvidia,tegra20-gr3d"; reg = <0x54180000 0x00040000>; clocks = <&tegra_car TEGRA20_CLK_GR3D>; - resets = <&tegra_car 24>; - reset-names = "3d"; + resets = <&tegra_car 24>, <&mc TEGRA20_MC_RESET_3D>; + reset-names = "3d", "mc"; operating-points-v2 = <&gr3d_dvfs_opp_table>; power-domains = <&pd_3d>; }; From 39e6993df521148c77107acdad169646f8b1f981 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 20 Feb 2018 17:10:52 +0300 Subject: [PATCH 726/851] ARM: tegra: Add Memory Client resets to Tegra30 GR2D, GR3D and Host1x Memory access must be blocked before hardware reset is asserted and before power is gated, otherwise a serious hardware fault is inevitable. Add reset for memory clients to the GR2D, GR3D and Host1x nodes. Tested-by: Peter Geis # Ouya T30 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra30.dtsi | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index 81bce6ce6729e..35ed517efdcf3 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -123,8 +123,8 @@ interrupt-names = "syncpt", "host1x"; clocks = <&tegra_car TEGRA30_CLK_HOST1X>; clock-names = "host1x"; - resets = <&tegra_car 28>; - reset-names = "host1x"; + resets = <&tegra_car 28>, <&mc TEGRA30_MC_RESET_HC>; + reset-names = "host1x", "mc"; iommus = <&mc TEGRA_SWGROUP_HC>; operating-points-v2 = <&host1x_dvfs_opp_table>; power-domains = <&pd_heg>; @@ -187,8 +187,8 @@ reg = <0x54140000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA30_CLK_GR2D>; - resets = <&tegra_car 21>; - reset-names = "2d"; + resets = <&tegra_car 21>, <&mc TEGRA30_MC_RESET_2D>; + reset-names = "2d", "mc"; operating-points-v2 = <&gr2d_dvfs_opp_table>; power-domains = <&pd_heg>; @@ -202,8 +202,10 @@ <&tegra_car TEGRA30_CLK_GR3D2>; clock-names = "3d", "3d2"; resets = <&tegra_car 24>, - <&tegra_car 98>; - reset-names = "3d", "3d2"; + <&tegra_car 98>, + <&mc TEGRA30_MC_RESET_3D>, + <&mc TEGRA30_MC_RESET_3D2>; + reset-names = "3d", "3d2", "mc", "mc2"; operating-points-v2 = <&gr3d_dvfs_opp_table>; power-domains = <&pd_3d0>, <&pd_3d1>; power-domain-names = "3d0", "3d1"; From 88f82f01faa07c7ec3b57c669d620e42ae46efaa Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 23:46:59 +0300 Subject: [PATCH 727/851] soc/tegra: pmc: Enable core domain support on Tegra20 and Tegra30 All device drivers got runtime PM and OPP support. Flip the core domain support status for Tegra20 and Tegra30 SoCs. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index c3032d0d44a4d..59ae7277fddaa 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -3189,7 +3189,7 @@ static void tegra20_pmc_setup_irq_polarity(struct tegra_pmc *pmc, } static const struct tegra_pmc_soc tegra20_pmc_soc = { - .supports_core_domain = false, + .supports_core_domain = true, .num_powergates = ARRAY_SIZE(tegra20_powergates), .powergates = tegra20_powergates, .num_cpu_powergates = 0, @@ -3250,7 +3250,7 @@ static const char * const tegra30_reset_sources[] = { }; static const struct tegra_pmc_soc tegra30_pmc_soc = { - .supports_core_domain = false, + .supports_core_domain = true, .num_powergates = ARRAY_SIZE(tegra30_powergates), .powergates = tegra30_powergates, .num_cpu_powergates = ARRAY_SIZE(tegra30_cpu_powergates), From db8ec5b6049530da2039d1923d814c295599c6a1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Apr 2021 23:02:50 +0300 Subject: [PATCH 728/851] iommu/tegra-smmu: Change debugfs directory name Change debugfs directory name to "smmu", which is a much more obvious name than the generic name of the memory controller device-tree node. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 0a281833f6117..093c270b9245c 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -1141,7 +1141,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, tegra_smmu_ahb_enable(); - err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, dev_name(dev)); + err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu"); if (err) return ERR_PTR(err); From 67f2f8c623eb0cfffd2c096ba511f05698b96bcc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 26 Mar 2021 21:04:35 +0300 Subject: [PATCH 729/851] iommu/tegra-smmu: Defer attachment of display clients All consumer-grade Android and Chromebook devices show a splash screen on boot and then display is left enabled when kernel is booted. This behaviour is unacceptable in a case of implicit IOMMU domains to which devices are attached during kernel boot since devices, like display controller, may perform DMA at that time. We can work around this problem by deferring the enable of SMMU translation for a specific devices, like a display controller, until the first IOMMU mapping is created, which works good enough in practice because by that time h/w is already stopped. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-smmu.c | 71 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 093c270b9245c..e5f2f836effa3 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -60,6 +60,8 @@ struct tegra_smmu_as { dma_addr_t pd_dma; unsigned id; u32 attr; + bool display_attached[2]; + bool attached_devices_need_sync; }; static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom) @@ -78,6 +80,10 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) return readl(smmu->regs + offset); } +/* all Tegra SoCs use the same group IDs for displays */ +#define SMMU_SWGROUP_DC 1 +#define SMMU_SWGROUP_DCB 2 + #define SMMU_CONFIG 0x010 #define SMMU_CONFIG_ENABLE (1 << 0) @@ -253,6 +259,20 @@ static inline void smmu_flush(struct tegra_smmu *smmu) smmu_readl(smmu, SMMU_PTB_ASID); } +static int smmu_swgroup_to_display_id(unsigned int swgroup) +{ + switch (swgroup) { + case SMMU_SWGROUP_DC: + return 0; + + case SMMU_SWGROUP_DCB: + return 1; + + default: + return -1; + } +} + static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, unsigned int *idp) { unsigned long id; @@ -318,6 +338,9 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) as->domain.geometry.aperture_end = 0xffffffff; as->domain.geometry.force_aperture = true; + /* work around implicit attachment of devices with active DMA */ + as->attached_devices_need_sync = true; + return &as->domain; } @@ -410,6 +433,31 @@ static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup, } } +static void tegra_smmu_attach_deferred_devices(struct iommu_domain *domain) +{ + struct tegra_smmu_as *as = to_smmu_as(domain); + + if (!as->attached_devices_need_sync) + return; + + if (as->display_attached[0] || as->display_attached[1]) { + struct tegra_smmu *smmu = as->smmu; + unsigned int i; + + for (i = 0; i < smmu->soc->num_clients; i++) { + const struct tegra_mc_client *client = &smmu->soc->clients[i]; + const int disp_id = smmu_swgroup_to_display_id(client->swgroup); + + if (disp_id < 0 || !as->display_attached[disp_id]) + continue; + + tegra_smmu_enable(smmu, client->swgroup, as->id); + } + } + + as->attached_devices_need_sync = false; +} + static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, struct tegra_smmu_as *as) { @@ -495,10 +543,26 @@ static int tegra_smmu_attach_dev(struct iommu_domain *domain, return -ENOENT; for (index = 0; index < fwspec->num_ids; index++) { + const unsigned int swgroup = fwspec->ids[index]; + const int disp_id = smmu_swgroup_to_display_id(swgroup); + err = tegra_smmu_as_prepare(smmu, as); if (err) goto disable; + if (disp_id >= 0) { + as->display_attached[disp_id] = true; + + /* + * In most cases display is performing DMA before + * driver is initialized by showing a splash screen + * and in this case we should defer the h/w attachment + * until the first mapping is created by display driver. + */ + if (as->attached_devices_need_sync) + continue; + } + tegra_smmu_enable(smmu, fwspec->ids[index], as->id); } @@ -527,6 +591,12 @@ static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *de return; for (index = 0; index < fwspec->num_ids; index++) { + const unsigned int swgroup = fwspec->ids[index]; + const int disp_id = smmu_swgroup_to_display_id(swgroup); + + if (disp_id >= 0) + as->display_attached[disp_id] = false; + tegra_smmu_disable(smmu, fwspec->ids[index], as->id); tegra_smmu_as_unprepare(smmu, as); } @@ -762,6 +832,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, int ret; spin_lock_irqsave(&as->lock, flags); + tegra_smmu_attach_deferred_devices(domain); ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags); spin_unlock_irqrestore(&as->lock, flags); From 5eef675dc644f82610a236dfdc3a8cd24f71fa54 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 26 Mar 2021 21:19:00 +0300 Subject: [PATCH 730/851] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook The previous commit fixes problem where display client was attaching too early to IOMMU during kernel boot in a multi-platform kernel configuration which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore, revert it. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-smmu.c | 71 +------------------------------------- 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e5f2f836effa3..9fd9180f31bd1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -869,69 +869,10 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova); } -static struct tegra_smmu *tegra_smmu_find(struct device_node *np) -{ - struct platform_device *pdev; - struct tegra_mc *mc; - - pdev = of_find_device_by_node(np); - if (!pdev) - return NULL; - - mc = platform_get_drvdata(pdev); - if (!mc) - return NULL; - - return mc->smmu; -} - -static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, - struct of_phandle_args *args) -{ - const struct iommu_ops *ops = smmu->iommu.ops; - int err; - - err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); - if (err < 0) { - dev_err(dev, "failed to initialize fwspec: %d\n", err); - return err; - } - - err = ops->of_xlate(dev, args); - if (err < 0) { - dev_err(dev, "failed to parse SW group ID: %d\n", err); - iommu_fwspec_free(dev); - return err; - } - - return 0; -} - static struct iommu_device *tegra_smmu_probe_device(struct device *dev) { - struct device_node *np = dev->of_node; - struct tegra_smmu *smmu = NULL; - struct of_phandle_args args; - unsigned int index = 0; - int err; - - while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, - &args) == 0) { - smmu = tegra_smmu_find(args.np); - if (smmu) { - err = tegra_smmu_configure(smmu, dev, &args); - - if (err < 0) { - of_node_put(args.np); - return ERR_PTR(err); - } - } - - of_node_put(args.np); - index++; - } + struct tegra_smmu *smmu = dev_iommu_priv_get(dev); - smmu = dev_iommu_priv_get(dev); if (!smmu) return ERR_PTR(-ENODEV); @@ -1158,16 +1099,6 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, if (!smmu) return ERR_PTR(-ENOMEM); - /* - * This is a bit of a hack. Ideally we'd want to simply return this - * value. However the IOMMU registration process will attempt to add - * all devices to the IOMMU when bus_set_iommu() is called. In order - * not to rely on global variables to track the IOMMU instance, we - * set it here so that it can be looked up from the .probe_device() - * callback via the IOMMU device's .drvdata field. - */ - mc->smmu = smmu; - size = BITS_TO_LONGS(soc->num_asids) * sizeof(long); smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL); From 621d65c22987059db6ca0aec1d508b25a40686b3 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 16 Jun 2021 22:04:14 +0300 Subject: [PATCH 731/851] thermal/drivers/tegra: Add driver for Tegra30 thermal sensor All NVIDIA Tegra30 SoCs have a two-channel on-chip sensor unit which monitors temperature and voltage of the SoC. Sensors control CPU frequency throttling, which is activated by hardware once preprogrammed temperature level is breached, they also send signal to Power Management controller to perform emergency shutdown on a critical overheat of the SoC die. Add driver for the Tegra30 TSENSOR module, exposing it as a thermal sensor. Tested-by: Andreas Westman Dorcsak # Asus TF700T Tested-by: Maxim Schwalm # Asus TF700T Tested-by: Svyatoslav Ryhel # Asus TF201T Tested-by: Ihor Didenko # Asus TF300T Tested-by: Ion Agorria # Asus TF201T Tested-by: Matt Merhar # Ouya Tested-by: Peter Geis # Ouya Acked-by: Thierry Reding Signed-off-by: Dmitry Osipenko Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20210616190417.32214-4-digetx@gmail.com --- drivers/thermal/tegra/Kconfig | 7 + drivers/thermal/tegra/Makefile | 1 + drivers/thermal/tegra/tegra30-tsensor.c | 673 ++++++++++++++++++++++++ 3 files changed, 681 insertions(+) create mode 100644 drivers/thermal/tegra/tegra30-tsensor.c diff --git a/drivers/thermal/tegra/Kconfig b/drivers/thermal/tegra/Kconfig index 46c2215867cd6..019e3a2eb69e4 100644 --- a/drivers/thermal/tegra/Kconfig +++ b/drivers/thermal/tegra/Kconfig @@ -18,4 +18,11 @@ config TEGRA_BPMP_THERMAL Enable this option for support for sensing system temperature of NVIDIA Tegra systems-on-chip with the BPMP coprocessor (Tegra186). +config TEGRA30_TSENSOR + tristate "Tegra30 Thermal Sensor" + depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST + help + Enable this option to support thermal management of NVIDIA Tegra30 + system-on-chip. + endmenu diff --git a/drivers/thermal/tegra/Makefile b/drivers/thermal/tegra/Makefile index 0f2b66edf0d27..eb27d194c5835 100644 --- a/drivers/thermal/tegra/Makefile +++ b/drivers/thermal/tegra/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_TEGRA_SOCTHERM) += tegra-soctherm.o obj-$(CONFIG_TEGRA_BPMP_THERMAL) += tegra-bpmp-thermal.o +obj-$(CONFIG_TEGRA30_TSENSOR) += tegra30-tsensor.o tegra-soctherm-y := soctherm.o soctherm-fuse.o tegra-soctherm-$(CONFIG_ARCH_TEGRA_124_SOC) += tegra124-soctherm.o diff --git a/drivers/thermal/tegra/tegra30-tsensor.c b/drivers/thermal/tegra/tegra30-tsensor.c new file mode 100644 index 0000000000000..9b6b693cbcf85 --- /dev/null +++ b/drivers/thermal/tegra/tegra30-tsensor.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tegra30 SoC Thermal Sensor driver + * + * Based on downstream HWMON driver from NVIDIA. + * Copyright (C) 2011 NVIDIA Corporation + * + * Author: Dmitry Osipenko + * Copyright (C) 2021 GRATE-DRIVER project + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../thermal_core.h" +#include "../thermal_hwmon.h" + +#define TSENSOR_SENSOR0_CONFIG0 0x0 +#define TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP BIT(0) +#define TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN BIT(1) +#define TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN BIT(2) +#define TSENSOR_SENSOR0_CONFIG0_DVFS_EN BIT(3) +#define TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN BIT(4) +#define TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN BIT(5) +#define TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN BIT(6) +#define TSENSOR_SENSOR0_CONFIG0_M GENMASK(23, 8) +#define TSENSOR_SENSOR0_CONFIG0_N GENMASK(31, 24) + +#define TSENSOR_SENSOR0_CONFIG1 0x8 +#define TSENSOR_SENSOR0_CONFIG1_TH1 GENMASK(15, 0) +#define TSENSOR_SENSOR0_CONFIG1_TH2 GENMASK(31, 16) + +#define TSENSOR_SENSOR0_CONFIG2 0xc +#define TSENSOR_SENSOR0_CONFIG2_TH3 GENMASK(15, 0) + +#define TSENSOR_SENSOR0_STATUS0 0x18 +#define TSENSOR_SENSOR0_STATUS0_STATE GENMASK(2, 0) +#define TSENSOR_SENSOR0_STATUS0_INTR BIT(8) +#define TSENSOR_SENSOR0_STATUS0_CURRENT_VALID BIT(9) + +#define TSENSOR_SENSOR0_TS_STATUS1 0x1c +#define TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT GENMASK(31, 16) + +#define TEGRA30_FUSE_TEST_PROG_VER 0x28 + +#define TEGRA30_FUSE_TSENSOR_CALIB 0x98 +#define TEGRA30_FUSE_TSENSOR_CALIB_LOW GENMASK(15, 0) +#define TEGRA30_FUSE_TSENSOR_CALIB_HIGH GENMASK(31, 16) + +#define TEGRA30_FUSE_SPARE_BIT 0x144 + +struct tegra_tsensor; + +struct tegra_tsensor_calibration_data { + int a, b, m, n, p, r; +}; + +struct tegra_tsensor_channel { + void __iomem *regs; + unsigned int id; + struct tegra_tsensor *ts; + struct thermal_zone_device *tzd; +}; + +struct tegra_tsensor { + void __iomem *regs; + bool swap_channels; + struct clk *clk; + struct device *dev; + struct reset_control *rst; + struct tegra_tsensor_channel ch[2]; + struct tegra_tsensor_calibration_data calib; +}; + +static int tegra_tsensor_hw_enable(const struct tegra_tsensor *ts) +{ + u32 val; + int err; + + err = reset_control_assert(ts->rst); + if (err) { + dev_err(ts->dev, "failed to assert hardware reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(ts->clk); + if (err) { + dev_err(ts->dev, "failed to enable clock: %d\n", err); + return err; + } + + fsleep(1000); + + err = reset_control_deassert(ts->rst); + if (err) { + dev_err(ts->dev, "failed to deassert hardware reset: %d\n", err); + goto disable_clk; + } + + /* + * Sensors are enabled after reset by default, but not gauging + * until clock counter is programmed. + * + * M: number of reference clock pulses after which every + * temperature / voltage measurement is made + * + * N: number of reference clock counts for which the counter runs + */ + val = FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_M, 12500); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_N, 255); + + /* apply the same configuration to both channels */ + writel_relaxed(val, ts->regs + 0x40 + TSENSOR_SENSOR0_CONFIG0); + writel_relaxed(val, ts->regs + 0x80 + TSENSOR_SENSOR0_CONFIG0); + + return 0; + +disable_clk: + clk_disable_unprepare(ts->clk); + + return err; +} + +static int tegra_tsensor_hw_disable(const struct tegra_tsensor *ts) +{ + int err; + + err = reset_control_assert(ts->rst); + if (err) { + dev_err(ts->dev, "failed to assert hardware reset: %d\n", err); + return err; + } + + clk_disable_unprepare(ts->clk); + + return 0; +} + +static void devm_tegra_tsensor_hw_disable(void *data) +{ + const struct tegra_tsensor *ts = data; + + tegra_tsensor_hw_disable(ts); +} + +static int tegra_tsensor_get_temp(void *data, int *temp) +{ + const struct tegra_tsensor_channel *tsc = data; + const struct tegra_tsensor *ts = tsc->ts; + int err, c1, c2, c3, c4, counter; + u32 val; + + /* + * Counter will be invalid if hardware is misprogrammed or not enough + * time passed since the time when sensor was enabled. + */ + err = readl_relaxed_poll_timeout(tsc->regs + TSENSOR_SENSOR0_STATUS0, val, + val & TSENSOR_SENSOR0_STATUS0_CURRENT_VALID, + 21 * USEC_PER_MSEC, + 21 * USEC_PER_MSEC * 50); + if (err) { + dev_err_once(ts->dev, "ch%u: counter invalid\n", tsc->id); + return err; + } + + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_TS_STATUS1); + counter = FIELD_GET(TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT, val); + + /* + * This shouldn't happen with a valid counter status, nevertheless + * lets verify the value since it's in a separate (from status) + * register. + */ + if (counter == 0xffff) { + dev_err_once(ts->dev, "ch%u: counter overflow\n", tsc->id); + return -EINVAL; + } + + /* + * temperature = a * counter + b + * temperature = m * (temperature ^ 2) + n * temperature + p + */ + c1 = DIV_ROUND_CLOSEST(ts->calib.a * counter + ts->calib.b, 1000000); + c1 = c1 ?: 1; + c2 = DIV_ROUND_CLOSEST(ts->calib.p, c1); + c3 = c1 * ts->calib.m; + c4 = ts->calib.n; + + *temp = DIV_ROUND_CLOSEST(c1 * (c2 + c3 + c4), 1000); + + return 0; +} + +static int tegra_tsensor_temp_to_counter(const struct tegra_tsensor *ts, int temp) +{ + int c1, c2; + + c1 = DIV_ROUND_CLOSEST(ts->calib.p - temp * 1000, ts->calib.m); + c2 = -ts->calib.r - int_sqrt(ts->calib.r * ts->calib.r - c1); + + return DIV_ROUND_CLOSEST(c2 * 1000000 - ts->calib.b, ts->calib.a); +} + +static int tegra_tsensor_set_trips(void *data, int low, int high) +{ + const struct tegra_tsensor_channel *tsc = data; + const struct tegra_tsensor *ts = tsc->ts; + u32 val; + + /* + * TSENSOR doesn't trigger interrupt on the "low" temperature breach, + * hence bail out if high temperature is unspecified. + */ + if (high == INT_MAX) + return 0; + + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1); + val &= ~TSENSOR_SENSOR0_CONFIG1_TH1; + + high = tegra_tsensor_temp_to_counter(ts, high); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH1, high); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1); + + return 0; +} + +static const struct thermal_zone_of_device_ops ops = { + .get_temp = tegra_tsensor_get_temp, + .set_trips = tegra_tsensor_set_trips, +}; + +static bool +tegra_tsensor_handle_channel_interrupt(const struct tegra_tsensor *ts, + unsigned int id) +{ + const struct tegra_tsensor_channel *tsc = &ts->ch[id]; + u32 val; + + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_STATUS0); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_STATUS0); + + if (FIELD_GET(TSENSOR_SENSOR0_STATUS0_STATE, val) == 5) + dev_err_ratelimited(ts->dev, "ch%u: counter overflowed\n", id); + + if (!FIELD_GET(TSENSOR_SENSOR0_STATUS0_INTR, val)) + return false; + + thermal_zone_device_update(tsc->tzd, THERMAL_EVENT_UNSPECIFIED); + + return true; +} + +static irqreturn_t tegra_tsensor_isr(int irq, void *data) +{ + const struct tegra_tsensor *ts = data; + bool handled = false; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) + handled |= tegra_tsensor_handle_channel_interrupt(ts, i); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +static int tegra_tsensor_disable_hw_channel(const struct tegra_tsensor *ts, + unsigned int id) +{ + const struct tegra_tsensor_channel *tsc = &ts->ch[id]; + struct thermal_zone_device *tzd = tsc->tzd; + u32 val; + int err; + + if (!tzd) + goto stop_channel; + + err = thermal_zone_device_disable(tzd); + if (err) { + dev_err(ts->dev, "ch%u: failed to disable zone: %d\n", id, err); + return err; + } + +stop_channel: + /* stop channel gracefully */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP, 1); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0); + + return 0; +} + +static void tegra_tsensor_get_hw_channel_trips(struct thermal_zone_device *tzd, + int *hot_trip, int *crit_trip) +{ + unsigned int i; + + /* + * 90C is the maximal critical temperature of all Tegra30 SoC variants, + * use it for the default trip if unspecified in a device-tree. + */ + *hot_trip = 85000; + *crit_trip = 90000; + + for (i = 0; i < tzd->trips; i++) { + enum thermal_trip_type type; + int trip_temp; + + tzd->ops->get_trip_temp(tzd, i, &trip_temp); + tzd->ops->get_trip_type(tzd, i, &type); + + if (type == THERMAL_TRIP_HOT) + *hot_trip = trip_temp; + + if (type == THERMAL_TRIP_CRITICAL) + *crit_trip = trip_temp; + } + + /* clamp hardware trips to the calibration limits */ + *hot_trip = clamp(*hot_trip, 25000, 90000); + + /* + * Kernel will perform a normal system shut down if it will + * see that critical temperature is breached, hence set the + * hardware limit by 5C higher in order to allow system to + * shut down gracefully before sending signal to the Power + * Management controller. + */ + *crit_trip = clamp(*crit_trip + 5000, 25000, 90000); +} + +static int tegra_tsensor_enable_hw_channel(const struct tegra_tsensor *ts, + unsigned int id) +{ + const struct tegra_tsensor_channel *tsc = &ts->ch[id]; + struct thermal_zone_device *tzd = tsc->tzd; + int err, hot_trip = 0, crit_trip = 0; + u32 val; + + if (!tzd) { + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0); + val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP; + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0); + + return 0; + } + + tegra_tsensor_get_hw_channel_trips(tzd, &hot_trip, &crit_trip); + + /* prevent potential racing with tegra_tsensor_set_trips() */ + mutex_lock(&tzd->lock); + + dev_info_once(ts->dev, "ch%u: PMC emergency shutdown trip set to %dC\n", + id, DIV_ROUND_CLOSEST(crit_trip, 1000)); + + hot_trip = tegra_tsensor_temp_to_counter(ts, hot_trip); + crit_trip = tegra_tsensor_temp_to_counter(ts, crit_trip); + + /* program LEVEL2 counter threshold */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1); + val &= ~TSENSOR_SENSOR0_CONFIG1_TH2; + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH2, hot_trip); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1); + + /* program LEVEL3 counter threshold */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG2); + val &= ~TSENSOR_SENSOR0_CONFIG2_TH3; + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG2_TH3, crit_trip); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG2); + + /* + * Enable sensor, emergency shutdown, interrupts for level 1/2/3 + * breaches and counter overflow condition. + * + * Disable DIV2 throttle for now since we need to figure out how + * to integrate it properly with the thermal framework. + * + * Thermal levels supported by hardware: + * + * Level 0 = cold + * Level 1 = passive cooling (cpufreq DVFS) + * Level 2 = passive cooling assisted by hardware (DIV2) + * Level 3 = emergency shutdown assisted by hardware (PMC) + */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0); + val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP; + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_DVFS_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN, 0); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN, 1); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0); + + mutex_unlock(&tzd->lock); + + err = thermal_zone_device_enable(tzd); + if (err) { + dev_err(ts->dev, "ch%u: failed to enable zone: %d\n", id, err); + return err; + } + + return 0; +} + +static bool tegra_tsensor_fuse_read_spare(unsigned int spare) +{ + u32 val = 0; + + tegra_fuse_readl(TEGRA30_FUSE_SPARE_BIT + spare * 4, &val); + + return !!val; +} + +static int tegra_tsensor_nvmem_setup(struct tegra_tsensor *ts) +{ + u32 i, ate_ver = 0, cal = 0, t1_25C = 0, t2_90C = 0; + int err, c1_25C, c2_90C; + + err = tegra_fuse_readl(TEGRA30_FUSE_TEST_PROG_VER, &ate_ver); + if (err) { + dev_err_probe(ts->dev, err, "failed to get ATE version\n"); + return err; + } + + if (ate_ver < 8) { + dev_info(ts->dev, "unsupported ATE version: %u\n", ate_ver); + return -ENODEV; + } + + /* + * We have two TSENSOR channels in a two different spots on SoC. + * Second channel provides more accurate data on older SoC versions, + * use it as a primary channel. + */ + if (ate_ver <= 21) { + dev_info_once(ts->dev, + "older ATE version detected, channels remapped\n"); + ts->swap_channels = true; + } + + err = tegra_fuse_readl(TEGRA30_FUSE_TSENSOR_CALIB, &cal); + if (err) { + dev_err(ts->dev, "failed to get calibration data: %d\n", err); + return err; + } + + /* get calibrated counter values for 25C/90C thresholds */ + c1_25C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_LOW, cal); + c2_90C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_HIGH, cal); + + /* and calibrated temperatures corresponding to the counter values */ + for (i = 0; i < 7; i++) { + t1_25C |= tegra_tsensor_fuse_read_spare(14 + i) << i; + t1_25C |= tegra_tsensor_fuse_read_spare(21 + i) << i; + + t2_90C |= tegra_tsensor_fuse_read_spare(0 + i) << i; + t2_90C |= tegra_tsensor_fuse_read_spare(7 + i) << i; + } + + if (c2_90C - c1_25C <= t2_90C - t1_25C) { + dev_err(ts->dev, "invalid calibration data: %d %d %u %u\n", + c2_90C, c1_25C, t2_90C, t1_25C); + return -EINVAL; + } + + /* all calibration coefficients are premultiplied by 1000000 */ + + ts->calib.a = DIV_ROUND_CLOSEST((t2_90C - t1_25C) * 1000000, + (c2_90C - c1_25C)); + + ts->calib.b = t1_25C * 1000000 - ts->calib.a * c1_25C; + + if (tegra_sku_info.revision == TEGRA_REVISION_A01) { + ts->calib.m = -2775; + ts->calib.n = 1338811; + ts->calib.p = -7300000; + } else { + ts->calib.m = -3512; + ts->calib.n = 1528943; + ts->calib.p = -11100000; + } + + /* except the coefficient of a reduced quadratic equation */ + ts->calib.r = DIV_ROUND_CLOSEST(ts->calib.n, ts->calib.m * 2); + + dev_info_once(ts->dev, + "calibration: %d %d %u %u ATE ver: %u SoC rev: %u\n", + c2_90C, c1_25C, t2_90C, t1_25C, ate_ver, + tegra_sku_info.revision); + + return 0; +} + +static int tegra_tsensor_register_channel(struct tegra_tsensor *ts, + unsigned int id) +{ + struct tegra_tsensor_channel *tsc = &ts->ch[id]; + unsigned int hw_id = ts->swap_channels ? !id : id; + + tsc->ts = ts; + tsc->id = id; + tsc->regs = ts->regs + 0x40 * (hw_id + 1); + + tsc->tzd = devm_thermal_zone_of_sensor_register(ts->dev, id, tsc, &ops); + if (IS_ERR(tsc->tzd)) { + if (PTR_ERR(tsc->tzd) != -ENODEV) + return dev_err_probe(ts->dev, PTR_ERR(tsc->tzd), + "failed to register thermal zone\n"); + + /* + * It's okay if sensor isn't assigned to any thermal zone + * in a device-tree. + */ + tsc->tzd = NULL; + return 0; + } + + if (devm_thermal_add_hwmon_sysfs(tsc->tzd)) + dev_warn(ts->dev, "failed to add hwmon sysfs attributes\n"); + + return 0; +} + +static int tegra_tsensor_probe(struct platform_device *pdev) +{ + struct tegra_tsensor *ts; + unsigned int i; + int err, irq; + + ts = devm_kzalloc(&pdev->dev, sizeof(*ts), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ts->dev = &pdev->dev; + platform_set_drvdata(pdev, ts); + + ts->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ts->regs)) + return PTR_ERR(ts->regs); + + ts->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ts->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(ts->clk), + "failed to get clock\n"); + + ts->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL); + if (IS_ERR(ts->rst)) + return dev_err_probe(&pdev->dev, PTR_ERR(ts->rst), + "failed to get reset control\n"); + + err = tegra_tsensor_nvmem_setup(ts); + if (err) + return err; + + err = tegra_tsensor_hw_enable(ts); + if (err) + return err; + + err = devm_add_action_or_reset(&pdev->dev, + devm_tegra_tsensor_hw_disable, + ts); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_register_channel(ts, i); + if (err) + return err; + } + + err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + tegra_tsensor_isr, IRQF_ONESHOT, + "tegra_tsensor", ts); + if (err) + return dev_err_probe(&pdev->dev, err, + "failed to request interrupt\n"); + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_enable_hw_channel(ts, i); + if (err) + return err; + } + + return 0; +} + +static int __maybe_unused tegra_tsensor_suspend(struct device *dev) +{ + struct tegra_tsensor *ts = dev_get_drvdata(dev); + unsigned int i; + int err; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_disable_hw_channel(ts, i); + if (err) + goto enable_channel; + } + + err = tegra_tsensor_hw_disable(ts); + if (err) + goto enable_channel; + + return 0; + +enable_channel: + while (i--) + tegra_tsensor_enable_hw_channel(ts, i); + + return err; +} + +static int __maybe_unused tegra_tsensor_resume(struct device *dev) +{ + struct tegra_tsensor *ts = dev_get_drvdata(dev); + unsigned int i; + int err; + + err = tegra_tsensor_hw_enable(ts); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_enable_hw_channel(ts, i); + if (err) + return err; + } + + return 0; +} + +static const struct dev_pm_ops tegra_tsensor_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_tsensor_suspend, + tegra_tsensor_resume) +}; + +static const struct of_device_id tegra_tsensor_of_match[] = { + { .compatible = "nvidia,tegra30-tsensor", }, + {}, +}; +MODULE_DEVICE_TABLE(of, tegra_tsensor_of_match); + +static struct platform_driver tegra_tsensor_driver = { + .probe = tegra_tsensor_probe, + .driver = { + .name = "tegra30-tsensor", + .of_match_table = tegra_tsensor_of_match, + .pm = &tegra_tsensor_pm_ops, + }, +}; +module_platform_driver(tegra_tsensor_driver); + +MODULE_DESCRIPTION("NVIDIA Tegra30 Thermal Sensor driver"); +MODULE_AUTHOR("Dmitry Osipenko "); +MODULE_LICENSE("GPL"); From eebb4b44e102abe59d9265eb0432031635bc1216 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 28 Mar 2021 18:51:17 +0300 Subject: [PATCH 732/851] gpu: host1x: Add back arm_iommu_detach_device() The case of CONFIG_ARM_DMA_USE_IOMMU=y was found to be broken for host1x driver. Add back the workaround using arm_iommu_detach_device() as a temporary solution. Cc: stable@vger.kernel.org Fixes: af1cbfb9bf0f ("gpu: host1x: Support DMA mapping of buffers" Signed-off-by: Dmitry Osipenko --- drivers/gpu/host1x/dev.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 400a3b9d88575..4a5088b53df61 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -21,6 +21,10 @@ #include #undef CREATE_TRACE_POINTS +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + #include "bus.h" #include "channel.h" #include "debug.h" @@ -244,6 +248,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); int err; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (host->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(host->dev); + arm_iommu_detach_device(host->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(host->dev); + } +#endif + /* * We may not always want to enable IOMMU support (for example if the * host1x firewall is already enabled and we don't support addressing From 18c6cc79bd9040bf1de21f21d3b382da16df4dd7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 28 Mar 2021 19:00:47 +0300 Subject: [PATCH 733/851] drm/tegra: Add back arm_iommu_detach_device() The case of CONFIG_ARM_DMA_USE_IOMMU=y was found to be broken for DRM driver. Add back the workaround using arm_iommu_detach_device() as a temporary solution. Cc: stable@vger.kernel.org Fixes: fa6661b7aa0b ("drm/tegra: Optionally attach clients to the IOMMU") Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/drm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index a1aa9518a61c2..203c55811c814 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -21,6 +21,10 @@ #include #include +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + #include "uapi.h" #include "drm.h" #include "gem.h" @@ -922,6 +926,17 @@ int host1x_client_iommu_attach(struct host1x_client *client) struct iommu_group *group = NULL; int err; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (client->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(client->dev); + arm_iommu_detach_device(client->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(client->dev); + } +#endif + /* * If the host1x client is already attached to an IOMMU domain that is * not the shared IOMMU domain, don't try to attach it to a different From 22fa6f53f14a19590a0d9bab8125e1f55dd14cc1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 2 May 2019 00:37:11 +0300 Subject: [PATCH 734/851] drm/tegra: dc: Support memory bandwidth management Display controller (DC) performs isochronous memory transfers, and thus, has a requirement for a minimum memory bandwidth that shall be fulfilled, otherwise framebuffer data can't be fetched fast enough and this results in a DC's data-FIFO underflow that follows by a visual corruption. The Memory Controller drivers provide facility for memory bandwidth management via interconnect API. Let's wire up the interconnect API support to the DC driver in order to fix the distorted display output on T30 Ouya, T124 TK1 and other Tegra devices. Tested-by: Peter Geis # Ouya T30 Tested-by: Matt Merhar # Ouya T30 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/Kconfig | 1 + drivers/gpu/drm/tegra/dc.c | 348 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/tegra/dc.h | 12 ++ drivers/gpu/drm/tegra/drm.c | 16 +- drivers/gpu/drm/tegra/plane.c | 117 ++++++++++++ drivers/gpu/drm/tegra/plane.h | 16 ++ 6 files changed, 507 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 5043dcaf1cf95..1650a448eabd6 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -9,6 +9,7 @@ config DRM_TEGRA select DRM_MIPI_DSI select DRM_PANEL select TEGRA_HOST1X + select INTERCONNECT select IOMMU_IOVA select CEC_CORE if CEC_NOTIFIER help diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index fa9cbeb7a9585..8e3ac02d27daa 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -621,9 +622,14 @@ static int tegra_plane_atomic_check(struct drm_plane *plane, struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc); int err; + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + /* no need for further checks if the plane is being disabled */ - if (!new_plane_state->crtc) + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; return 0; + } err = tegra_plane_format(new_plane_state->fb->format->format, &plane_state->format, @@ -811,6 +817,12 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm, formats = dc->soc->primary_formats; modifiers = dc->soc->modifiers; + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, &tegra_plane_funcs, formats, num_formats, modifiers, type, NULL); @@ -848,12 +860,18 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state); struct tegra_plane *tegra = to_tegra_plane(plane); int err; + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + /* no need for further checks if the plane is being disabled */ - if (!new_plane_state->crtc) + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; return 0; + } /* scaling not supported for cursor */ if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) || @@ -1038,6 +1056,12 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, formats = tegra_cursor_plane_formats; } + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, &tegra_plane_funcs, formats, num_formats, linear_modifiers, @@ -1152,6 +1176,12 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, num_formats = dc->soc->num_overlay_formats; formats = dc->soc->overlay_formats; + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + if (!cursor) type = DRM_PLANE_TYPE_OVERLAY; else @@ -1850,6 +1880,106 @@ static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout) return -ETIMEDOUT; } +static void +tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state, + bool prepare_bandwidth_transition) +{ + const struct tegra_plane_state *old_tegra_state, *new_tegra_state; + const struct tegra_dc_state *old_dc_state, *new_dc_state; + u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw; + const struct drm_plane_state *old_plane_state; + const struct drm_crtc_state *old_crtc_state; + struct tegra_dc_window window, old_window; + struct tegra_dc *dc = to_tegra_dc(crtc); + struct tegra_plane *tegra; + struct drm_plane *plane; + + if (dc->soc->has_nvdisplay) + return; + + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + old_dc_state = to_const_dc_state(old_crtc_state); + new_dc_state = to_const_dc_state(crtc->state); + + if (!crtc->state->active) { + if (!old_crtc_state->active) + return; + + /* + * When CRTC is disabled on DPMS, the state of attached planes + * is kept unchanged. Hence we need to enforce removal of the + * bandwidths from the ICC paths. + */ + drm_atomic_crtc_for_each_plane(plane, crtc) { + tegra = to_tegra_plane(plane); + + icc_set_bw(tegra->icc_mem, 0, 0); + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } + + return; + } + + for_each_old_plane_in_state(old_crtc_state->state, plane, + old_plane_state, i) { + old_tegra_state = to_const_tegra_plane_state(old_plane_state); + new_tegra_state = to_const_tegra_plane_state(plane->state); + tegra = to_tegra_plane(plane); + + /* + * We're iterating over the global atomic state and it contains + * planes from another CRTC, hence we need to filter out the + * planes unrelated to this CRTC. + */ + if (tegra->dc != dc) + continue; + + new_avg_bw = new_tegra_state->avg_memory_bandwidth; + old_avg_bw = old_tegra_state->avg_memory_bandwidth; + + new_peak_bw = new_tegra_state->total_peak_memory_bandwidth; + old_peak_bw = old_tegra_state->total_peak_memory_bandwidth; + + /* + * See the comment related to !crtc->state->active above, + * which explains why bandwidths need to be updated when + * CRTC is turning ON. + */ + if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw && + old_crtc_state->active) + continue; + + window.src.h = drm_rect_height(&plane->state->src) >> 16; + window.dst.h = drm_rect_height(&plane->state->dst); + + old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16; + old_window.dst.h = drm_rect_height(&old_plane_state->dst); + + /* + * During the preparation phase (atomic_begin), the memory + * freq should go high before the DC changes are committed + * if bandwidth requirement goes up, otherwise memory freq + * should to stay high if BW requirement goes down. The + * opposite applies to the completion phase (post_commit). + */ + if (prepare_bandwidth_transition) { + new_avg_bw = max(old_avg_bw, new_avg_bw); + new_peak_bw = max(old_peak_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &old_window)) + window = old_window; + } + + icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &window)) + icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw); + else + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } +} + static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -2033,6 +2163,8 @@ static void tegra_crtc_atomic_begin(struct drm_crtc *crtc, { unsigned long flags; + tegra_crtc_update_memory_bandwidth(crtc, state, true); + if (crtc->state->event) { spin_lock_irqsave(&crtc->dev->event_lock, flags); @@ -2065,7 +2197,207 @@ static void tegra_crtc_atomic_flush(struct drm_crtc *crtc, value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); } +static bool tegra_plane_is_cursor(const struct drm_plane_state *state) +{ + const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc; + const struct drm_format_info *fmt = state->fb->format; + unsigned int src_w = drm_rect_width(&state->src) >> 16; + unsigned int dst_w = drm_rect_width(&state->dst); + + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) + return false; + + if (soc->supports_cursor) + return true; + + if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256) + return false; + + return true; +} + +static unsigned long +tegra_plane_overlap_mask(struct drm_crtc_state *state, + const struct drm_plane_state *plane_state) +{ + const struct drm_plane_state *other_state; + const struct tegra_plane *tegra; + unsigned long overlap_mask = 0; + struct drm_plane *plane; + struct drm_rect rect; + + if (!plane_state->visible || !plane_state->fb) + return 0; + + /* + * Data-prefetch FIFO will easily help to overcome temporal memory + * pressure if other plane overlaps with the cursor plane. + */ + if (tegra_plane_is_cursor(plane_state)) + return 0; + + drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) { + rect = plane_state->dst; + + tegra = to_tegra_plane(other_state->plane); + + if (!other_state->visible || !other_state->fb) + continue; + + /* + * Ignore cursor plane overlaps because it's not practical to + * assume that it contributes to the bandwidth in overlapping + * area if window width is small. + */ + if (tegra_plane_is_cursor(other_state)) + continue; + + if (drm_rect_intersect(&rect, &other_state->dst)) + overlap_mask |= BIT(tegra->index); + } + + return overlap_mask; +} + +static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask; + u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {}; + bool all_planes_overlap_simultaneously = true; + const struct tegra_plane_state *tegra_state; + const struct drm_plane_state *plane_state; + struct tegra_dc *dc = to_tegra_dc(crtc); + const struct drm_crtc_state *old_state; + struct drm_crtc_state *new_state; + struct tegra_plane *tegra; + struct drm_plane *plane; + + /* + * The nv-display uses shared planes. The algorithm below assumes + * maximum 3 planes per-CRTC, this assumption isn't applicable to + * the nv-display. Note that T124 support has additional windows, + * but currently they aren't supported by the driver. + */ + if (dc->soc->has_nvdisplay) + return 0; + + new_state = drm_atomic_get_new_crtc_state(state, crtc); + old_state = drm_atomic_get_old_crtc_state(state, crtc); + + /* + * For overlapping planes pixel's data is fetched for each plane at + * the same time, hence bandwidths are accumulated in this case. + * This needs to be taken into account for calculating total bandwidth + * consumed by all planes. + * + * Here we get the overlapping state of each plane, which is a + * bitmask of plane indices telling with what planes there is an + * overlap. Note that bitmask[plane] includes BIT(plane) in order + * to make further code nicer and simpler. + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM)) + return -EINVAL; + + plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth; + mask = tegra_plane_overlap_mask(new_state, plane_state); + overlap_mask[tegra->index] = mask; + + if (hweight_long(mask) != 3) + all_planes_overlap_simultaneously = false; + } + + /* + * Then we calculate maximum bandwidth of each plane state. + * The bandwidth includes the plane BW + BW of the "simultaneously" + * overlapping planes, where "simultaneously" means areas where DC + * fetches from the planes simultaneously during of scan-out process. + * + * For example, if plane A overlaps with planes B and C, but B and C + * don't overlap, then the peak bandwidth will be either in area where + * A-and-B or A-and-C planes overlap. + * + * The plane_peak_bw[] contains peak memory bandwidth values of + * each plane, this information is needed by interconnect provider + * in order to set up latency allowance based on the peak BW, see + * tegra_crtc_update_memory_bandwidth(). + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0; + + /* + * Note that plane's atomic check doesn't touch the + * total_peak_memory_bandwidth of enabled plane, hence the + * current state contains the old bandwidth state from the + * previous CRTC commit. + */ + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + for_each_set_bit(i, &overlap_mask[tegra->index], 3) { + if (i == tegra->index) + continue; + + if (all_planes_overlap_simultaneously) + overlap_bw += plane_peak_bw[i]; + else + overlap_bw = max(overlap_bw, plane_peak_bw[i]); + } + + new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw; + old_peak_bw = tegra_state->total_peak_memory_bandwidth; + + /* + * If plane's peak bandwidth changed (for example plane isn't + * overlapped anymore) and plane isn't in the atomic state, + * then add plane to the state in order to have the bandwidth + * updated. + */ + if (old_peak_bw != new_peak_bw) { + struct tegra_plane_state *new_tegra_state; + struct drm_plane_state *new_plane_state; + + new_plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(new_plane_state)) + return PTR_ERR(new_plane_state); + + new_tegra_state = to_tegra_plane_state(new_plane_state); + new_tegra_state->total_peak_memory_bandwidth = new_peak_bw; + } + } + + return 0; +} + +static int tegra_crtc_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + int err; + + err = tegra_crtc_calculate_memory_bandwidth(crtc, state); + if (err) + return err; + + return 0; +} + +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + /* + * Display bandwidth is allowed to go down only once hardware state + * is known to be armed, i.e. state was committed and VBLANK event + * received. + */ + tegra_crtc_update_memory_bandwidth(crtc, state, false); +} + static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = { + .atomic_check = tegra_crtc_atomic_check, .atomic_begin = tegra_crtc_atomic_begin, .atomic_flush = tegra_crtc_atomic_flush, .atomic_enable = tegra_crtc_atomic_enable, @@ -2391,7 +2723,9 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .overlay_formats = tegra20_overlay_formats, .modifiers = tegra20_modifiers, .has_win_a_without_filters = true, + .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = true, + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_dc_soc_info tegra30_dc_soc_info = { @@ -2411,7 +2745,9 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .overlay_formats = tegra20_overlay_formats, .modifiers = tegra20_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, }; static const struct tegra_dc_soc_info tegra114_dc_soc_info = { @@ -2431,7 +2767,9 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .overlay_formats = tegra114_overlay_formats, .modifiers = tegra20_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, }; static const struct tegra_dc_soc_info tegra124_dc_soc_info = { @@ -2451,7 +2789,9 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .overlay_formats = tegra124_overlay_formats, .modifiers = tegra124_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_dc_soc_info tegra210_dc_soc_info = { @@ -2471,7 +2811,9 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .overlay_formats = tegra114_overlay_formats, .modifiers = tegra124_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { @@ -2521,6 +2863,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = { .has_nvdisplay = true, .wgrps = tegra186_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = { @@ -2570,6 +2913,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = { .has_nvdisplay = true, .wgrps = tegra194_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct of_device_id tegra_dc_of_match[] = { diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 88fc9c634c1d5..81e2cef0f8ace 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -15,6 +15,8 @@ struct tegra_output; +#define TEGRA_DC_LEGACY_PLANES_NUM 7 + struct tegra_dc_state { struct drm_crtc_state base; @@ -33,6 +35,12 @@ static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state) return NULL; } +static inline const struct tegra_dc_state * +to_const_dc_state(const struct drm_crtc_state *state) +{ + return to_dc_state((struct drm_crtc_state *)state); +} + struct tegra_dc_stats { unsigned long frames; unsigned long vblank; @@ -66,7 +74,9 @@ struct tegra_dc_soc_info { unsigned int num_overlay_formats; const u64 *modifiers; bool has_win_a_without_filters; + bool has_win_b_vfilter_mem_client; bool has_win_c_without_vert_filter; + bool plane_tiled_memory_bandwidth_x2; }; struct tegra_dc { @@ -154,6 +164,8 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc, struct drm_crtc_state *crtc_state, struct clk *clk, unsigned long pclk, unsigned int div); +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state); /* from rgb.c */ int tegra_dc_rgb_probe(struct tegra_dc *dc); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 203c55811c814..01745436b1752 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -25,9 +25,10 @@ #include #endif -#include "uapi.h" +#include "dc.h" #include "drm.h" #include "gem.h" +#include "uapi.h" #define DRIVER_NAME "tegra" #define DRIVER_DESC "NVIDIA Tegra graphics" @@ -60,6 +61,17 @@ static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = { .atomic_commit = drm_atomic_helper_commit, }; +static void tegra_atomic_post_commit(struct drm_device *drm, + struct drm_atomic_state *old_state) +{ + struct drm_crtc_state *old_crtc_state __maybe_unused; + struct drm_crtc *crtc; + unsigned int i; + + for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i) + tegra_crtc_atomic_post_commit(crtc, old_state); +} + static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state) { struct drm_device *drm = old_state->dev; @@ -79,6 +91,8 @@ static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state) } else { drm_atomic_helper_commit_tail_rpm(old_state); } + + tegra_atomic_post_commit(drm, old_state); } static const struct drm_mode_config_helper_funcs diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 2e65b4075ce6c..e00ec3f40ec84 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -4,6 +4,7 @@ */ #include +#include #include #include @@ -64,6 +65,9 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane) copy->reflect_x = state->reflect_x; copy->reflect_y = state->reflect_y; copy->opaque = state->opaque; + copy->total_peak_memory_bandwidth = state->total_peak_memory_bandwidth; + copy->peak_memory_bandwidth = state->peak_memory_bandwidth; + copy->avg_memory_bandwidth = state->avg_memory_bandwidth; for (i = 0; i < 2; i++) copy->blending[i] = state->blending[i]; @@ -244,6 +248,78 @@ void tegra_plane_cleanup_fb(struct drm_plane *plane, tegra_dc_unpin(dc, to_tegra_plane_state(state)); } +static int tegra_plane_calculate_memory_bandwidth(struct drm_plane_state *state) +{ + struct tegra_plane_state *tegra_state = to_tegra_plane_state(state); + unsigned int i, bpp, dst_w, dst_h, src_w, src_h, mul; + const struct tegra_dc_soc_info *soc; + const struct drm_format_info *fmt; + struct drm_crtc_state *crtc_state; + u64 avg_bandwidth, peak_bandwidth; + + if (!state->visible) + return 0; + + crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); + if (!crtc_state) + return -EINVAL; + + src_w = drm_rect_width(&state->src) >> 16; + src_h = drm_rect_height(&state->src) >> 16; + dst_w = drm_rect_width(&state->dst); + dst_h = drm_rect_height(&state->dst); + + fmt = state->fb->format; + soc = to_tegra_dc(state->crtc)->soc; + + /* + * Note that real memory bandwidth vary depending on format and + * memory layout, we are not taking that into account because small + * estimation error isn't important since bandwidth is rounded up + * anyway. + */ + for (i = 0, bpp = 0; i < fmt->num_planes; i++) { + unsigned int bpp_plane = fmt->cpp[i] * 8; + + /* + * Sub-sampling is relevant for chroma planes only and vertical + * readouts are not cached, hence only horizontal sub-sampling + * matters. + */ + if (i > 0) + bpp_plane /= fmt->hsub; + + bpp += bpp_plane; + } + + /* average bandwidth in kbytes/sec */ + avg_bandwidth = min(src_w, dst_w) * min(src_h, dst_h); + avg_bandwidth *= drm_mode_vrefresh(&crtc_state->adjusted_mode); + avg_bandwidth = DIV_ROUND_UP(avg_bandwidth * bpp, 8) + 999; + do_div(avg_bandwidth, 1000); + + /* mode.clock in kHz, peak bandwidth in kbytes/sec */ + peak_bandwidth = DIV_ROUND_UP(crtc_state->adjusted_mode.clock * bpp, 8); + + /* + * Tegra30/114 Memory Controller can't interleave DC memory requests + * for the tiled windows because DC uses 16-bytes atom, while DDR3 + * uses 32-bytes atom. Hence there is x2 memory overfetch for tiled + * framebuffer and DDR3 on these SoCs. + */ + if (soc->plane_tiled_memory_bandwidth_x2 && + tegra_state->tiling.mode == TEGRA_BO_TILING_MODE_TILED) + mul = 2; + else + mul = 1; + + /* ICC bandwidth in kbytes/sec */ + tegra_state->peak_memory_bandwidth = kBps_to_icc(peak_bandwidth) * mul; + tegra_state->avg_memory_bandwidth = kBps_to_icc(avg_bandwidth) * mul; + + return 0; +} + int tegra_plane_state_add(struct tegra_plane *plane, struct drm_plane_state *state) { @@ -262,6 +338,10 @@ int tegra_plane_state_add(struct tegra_plane *plane, if (err < 0) return err; + err = tegra_plane_calculate_memory_bandwidth(state); + if (err < 0) + return err; + tegra = to_dc_state(crtc_state); tegra->planes |= WIN_A_ACT_REQ << plane->index; @@ -646,3 +726,40 @@ int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, return 0; } + +static const char * const tegra_plane_icc_names[TEGRA_DC_LEGACY_PLANES_NUM] = { + "wina", "winb", "winc", NULL, NULL, NULL, "cursor", +}; + +int tegra_plane_interconnect_init(struct tegra_plane *plane) +{ + const char *icc_name = tegra_plane_icc_names[plane->index]; + struct device *dev = plane->dc->dev; + struct tegra_dc *dc = plane->dc; + int err; + + if (WARN_ON(plane->index >= TEGRA_DC_LEGACY_PLANES_NUM) || + WARN_ON(!tegra_plane_icc_names[plane->index])) + return -EINVAL; + + plane->icc_mem = devm_of_icc_get(dev, icc_name); + err = PTR_ERR_OR_ZERO(plane->icc_mem); + if (err) { + dev_err_probe(dev, err, "failed to get %s interconnect\n", + icc_name); + return err; + } + + /* plane B on T20/30 has a dedicated memory client for a 6-tap vertical filter */ + if (plane->index == 1 && dc->soc->has_win_b_vfilter_mem_client) { + plane->icc_mem_vfilter = devm_of_icc_get(dev, "winb-vfilter"); + err = PTR_ERR_OR_ZERO(plane->icc_mem_vfilter); + if (err) { + dev_err_probe(dev, err, "failed to get %s interconnect\n", + "winb-vfilter"); + return err; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index 1785c1559c0ce..d9470780c8039 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -8,6 +8,7 @@ #include +struct icc_path; struct tegra_bo; struct tegra_dc; @@ -16,6 +17,9 @@ struct tegra_plane { struct tegra_dc *dc; unsigned int offset; unsigned int index; + + struct icc_path *icc_mem; + struct icc_path *icc_mem_vfilter; }; struct tegra_cursor { @@ -52,6 +56,11 @@ struct tegra_plane_state { /* used for legacy blending support only */ struct tegra_plane_legacy_blending_state blending[2]; bool opaque; + + /* bandwidths are in ICC units, i.e. kbytes/sec */ + u32 total_peak_memory_bandwidth; + u32 peak_memory_bandwidth; + u32 avg_memory_bandwidth; }; static inline struct tegra_plane_state * @@ -63,6 +72,12 @@ to_tegra_plane_state(struct drm_plane_state *state) return NULL; } +static inline const struct tegra_plane_state * +to_const_tegra_plane_state(const struct drm_plane_state *state) +{ + return to_tegra_plane_state((struct drm_plane_state *)state); +} + extern const struct drm_plane_funcs tegra_plane_funcs; int tegra_plane_prepare_fb(struct drm_plane *plane, @@ -78,5 +93,6 @@ bool tegra_plane_format_is_indexed(unsigned int format); bool tegra_plane_format_is_yuv(unsigned int format, bool *planar, unsigned int *bpc); int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, struct tegra_plane_state *state); +int tegra_plane_interconnect_init(struct tegra_plane *plane); #endif /* TEGRA_PLANE_H */ From 67988e920e986daa5b37303e63626e9fd0967c90 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 5 May 2019 16:28:21 +0300 Subject: [PATCH 735/851] drm/tegra: dc: Extend debug stats with total number of events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's useful to know the total number of underflow events and currently the debug stats are getting reset each time CRTC is being disabled. Let's account the overall number of events that doesn't get a reset. Reviewed-by: Michał Mirosław Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/dc.c | 10 ++++++++++ drivers/gpu/drm/tegra/dc.h | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 8e3ac02d27daa..150c7b9c74097 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1605,6 +1605,11 @@ static int tegra_dc_show_stats(struct seq_file *s, void *data) seq_printf(s, "underflow: %lu\n", dc->stats.underflow); seq_printf(s, "overflow: %lu\n", dc->stats.overflow); + seq_printf(s, "frames total: %lu\n", dc->stats.frames_total); + seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total); + seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total); + seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total); + return 0; } @@ -2416,6 +2421,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): frame end\n", __func__); */ + dc->stats.frames_total++; dc->stats.frames++; } @@ -2424,6 +2430,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) dev_dbg(dc->dev, "%s(): vertical blank\n", __func__); */ drm_crtc_handle_vblank(&dc->base); + dc->stats.vblank_total++; dc->stats.vblank++; } @@ -2431,6 +2438,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): underflow\n", __func__); */ + dc->stats.underflow_total++; dc->stats.underflow++; } @@ -2438,11 +2446,13 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): overflow\n", __func__); */ + dc->stats.overflow_total++; dc->stats.overflow++; } if (status & HEAD_UF_INT) { dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__); + dc->stats.underflow_total++; dc->stats.underflow++; } diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 81e2cef0f8ace..26ad1e448c443 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -46,6 +46,11 @@ struct tegra_dc_stats { unsigned long vblank; unsigned long underflow; unsigned long overflow; + + unsigned long frames_total; + unsigned long vblank_total; + unsigned long underflow_total; + unsigned long overflow_total; }; struct tegra_windowgroup_soc { From c3de14e7a72ef2b96a6f5d0d07e323a273f2301e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 7 Jun 2021 01:07:27 +0300 Subject: [PATCH 736/851] soc/tegra: fuse: Add stubs needed for compile testing Add stubs needed for compile-testing of tegra-cpuidle driver. Signed-off-by: Dmitry Osipenko --- include/soc/tegra/fuse.h | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h index 990701f788bce..67d2bc856fbc0 100644 --- a/include/soc/tegra/fuse.h +++ b/include/soc/tegra/fuse.h @@ -6,6 +6,8 @@ #ifndef __SOC_TEGRA_FUSE_H__ #define __SOC_TEGRA_FUSE_H__ +#include + #define TEGRA20 0x20 #define TEGRA30 0x30 #define TEGRA114 0x35 @@ -22,11 +24,6 @@ #ifndef __ASSEMBLY__ -u32 tegra_read_chipid(void); -u8 tegra_get_chip_id(void); -u8 tegra_get_platform(void); -bool tegra_is_silicon(void); - enum tegra_revision { TEGRA_REVISION_UNKNOWN = 0, TEGRA_REVISION_A01, @@ -57,6 +54,10 @@ extern struct tegra_sku_info tegra_sku_info; u32 tegra_read_straps(void); u32 tegra_read_ram_code(void); int tegra_fuse_readl(unsigned long offset, u32 *value); +u32 tegra_read_chipid(void); +u8 tegra_get_chip_id(void); +u8 tegra_get_platform(void); +bool tegra_is_silicon(void); #else static struct tegra_sku_info tegra_sku_info __maybe_unused; @@ -74,6 +75,26 @@ static inline int tegra_fuse_readl(unsigned long offset, u32 *value) { return -ENODEV; } + +static inline u32 tegra_read_chipid(void) +{ + return 0; +} + +static inline u8 tegra_get_chip_id(void) +{ + return 0; +} + +static inline u8 tegra_get_platform(void) +{ + return 0; +} + +static inline bool tegra_is_silicon(void) +{ + return false; +} #endif struct device *tegra_soc_device_register(void); From 29a3d41a569c21e607660f0f3b129a849838ebdd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 7 Jun 2021 01:08:55 +0300 Subject: [PATCH 737/851] soc/tegra: irq: Add stubs needed for compile testing Add stubs needed for compile-testing of tegra-cpuidle driver. Signed-off-by: Dmitry Osipenko --- include/soc/tegra/irq.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/soc/tegra/irq.h b/include/soc/tegra/irq.h index 8eb11a7109e40..94539551c8c1f 100644 --- a/include/soc/tegra/irq.h +++ b/include/soc/tegra/irq.h @@ -6,8 +6,15 @@ #ifndef __SOC_TEGRA_IRQ_H #define __SOC_TEGRA_IRQ_H -#if defined(CONFIG_ARM) +#include + +#if defined(CONFIG_ARM) && defined(CONFIG_ARCH_TEGRA) bool tegra_pending_sgi(void); +#else +static inline bool tegra_pending_sgi(void) +{ + return false; +} #endif #endif /* __SOC_TEGRA_IRQ_H */ From e933a89369fabd5990df9328fb720b87d5cd24ea Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 7 Jun 2021 00:38:53 +0300 Subject: [PATCH 738/851] soc/tegra: pm: Make stubs usable for compile testing The PM stubs need to depend on ARCH_TEGRA in order to be usable for compile-testing of tegra-cpuidle driver. Add the dependency. Signed-off-by: Dmitry Osipenko --- include/soc/tegra/pm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/soc/tegra/pm.h b/include/soc/tegra/pm.h index 4338789270263..ce4d0b1bd0d63 100644 --- a/include/soc/tegra/pm.h +++ b/include/soc/tegra/pm.h @@ -17,7 +17,7 @@ enum tegra_suspend_mode { TEGRA_SUSPEND_NOT_READY, }; -#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_ARM) +#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_ARM) && defined(CONFIG_ARCH_TEGRA) enum tegra_suspend_mode tegra_pm_validate_suspend_mode(enum tegra_suspend_mode mode); From c3972c657539f46b4d99fd86db5516f8af886dc0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 7 Jun 2021 01:09:12 +0300 Subject: [PATCH 739/851] clk: tegra: Add stubs needed for compile testing Add stubs needed for compile-testing of tegra-cpuidle driver. Signed-off-by: Dmitry Osipenko --- include/linux/clk/tegra.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index d128ad1570aa3..9bd06d8a54361 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -42,7 +42,11 @@ struct tegra_cpu_car_ops { #endif }; +#ifdef CONFIG_ARCH_TEGRA extern struct tegra_cpu_car_ops *tegra_cpu_car_ops; +#else +static struct tegra_cpu_car_ops *tegra_cpu_car_ops __maybe_unused; +#endif static inline void tegra_wait_cpu_in_reset(u32 cpu) { From 5e8c7feb29a3c9dcea97345dab0395058e9b8bd4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 7 Jun 2021 00:42:17 +0300 Subject: [PATCH 740/851] cpuidle: tegra: Enable compile testing Enable compile testing of tegra-cpuidle driver. Signed-off-by: Dmitry Osipenko --- drivers/cpuidle/Kconfig.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index 334f83e56120c..599286fc0b08c 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -99,7 +99,7 @@ config ARM_MVEBU_V7_CPUIDLE config ARM_TEGRA_CPUIDLE bool "CPU Idle Driver for NVIDIA Tegra SoCs" - depends on ARCH_TEGRA && !ARM64 + depends on (ARCH_TEGRA || COMPILE_TEST) && !ARM64 && MMU select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP select ARM_CPU_SUSPEND help From 8f5503585ca75747f224be97f514209cf0dc9ec3 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 18 Aug 2018 16:55:12 +0300 Subject: [PATCH 741/851] drm/tegra: plane: Accept all format-modifiers Tiling modifier can't be applied to YV12 video overlay by userspace because all tiling modifiers are filtered out for multi-plane formats. AFAIK, all modifiers should work with all of formats, hence the checking is incorrect. Fixes: e90124cb46bdb ("drm/tegra: plane: Support format modifiers") Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index e00ec3f40ec84..b235fe21560c8 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -123,7 +123,7 @@ static bool tegra_plane_format_mod_supported(struct drm_plane *plane, if (info->num_planes == 1) return true; - return false; + return true; } const struct drm_plane_funcs tegra_plane_funcs = { From 618e799d1892688f077c3ff2af8c99180ed6cac1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 27 Jun 2019 01:54:33 +0300 Subject: [PATCH 742/851] PM / devfreq: tegra30: Use tracepoints for debugging Debug messages create too much CPU and memory activity by themselves, so it's difficult to debug lower rates and catch unwanted interrupts that happen rarely. Tracepoints are ideal in that regards because they do not contribute to the sampled date at all. This allowed me to catch few problems which are fixed by the followup patches, without tracepoints it would be much harder to do. Signed-off-by: Dmitry Osipenko --- drivers/devfreq/tegra30-devfreq.c | 17 ++++ include/trace/events/tegra30_devfreq.h | 105 +++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 include/trace/events/tegra30_devfreq.h diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 10661eb2aed87..9f0a54626af29 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -72,6 +72,9 @@ /* Assume that the bus is saturated if the utilization is 25% */ #define BUS_SATURATION_RATIO 25 +#define CREATE_TRACE_POINTS +#include + /** * struct tegra_devfreq_device_config - configuration specific to an ACTMON * device @@ -273,6 +276,10 @@ static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra, device_writel(dev, do_percent(val, dev->config->boost_down_threshold), ACTMON_DEV_LOWER_WMARK); + + trace_device_lower_upper(dev->config->offset, tegra->cur_freq, + do_percent(val, dev->config->boost_down_threshold), + do_percent(val, dev->config->boost_up_threshold)); } static void actmon_isr_device(struct tegra_devfreq *tegra, @@ -280,6 +287,9 @@ static void actmon_isr_device(struct tegra_devfreq *tegra, { u32 intr_status, dev_ctrl; + trace_device_isr_enter(tegra->regs, dev->config->offset, + dev->boost_freq, cpufreq_quick_get(0)); + dev->avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT); tegra_devfreq_update_avg_wmark(tegra, dev); @@ -319,6 +329,9 @@ static void actmon_isr_device(struct tegra_devfreq *tegra, device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL); device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS); + + trace_device_isr_exit(tegra->regs, dev->config->offset, + dev->boost_freq, cpufreq_quick_get(0)); } static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra, @@ -715,6 +728,10 @@ static int tegra_governor_get_target(struct devfreq *devfreq, actmon_update_target(tegra, dev); target_freq = max(target_freq, dev->target_freq); + + trace_device_target_freq(dev->config->offset, dev->target_freq); + trace_device_target_update(tegra->regs, dev->config->offset, + dev->boost_freq, cpufreq_quick_get(0)); } /* diff --git a/include/trace/events/tegra30_devfreq.h b/include/trace/events/tegra30_devfreq.h new file mode 100644 index 0000000000000..8f264a489daf8 --- /dev/null +++ b/include/trace/events/tegra30_devfreq.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tegra30_devfreq + +#if !defined(_TRACE_TEGRA30_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TEGRA30_DEVFREQ_H + +#include +#include +#include + +DECLARE_EVENT_CLASS(device_state, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq), + TP_STRUCT__entry( + __field(u32, offset) + __field(u32, intr_status) + __field(u32, ctrl) + __field(u32, avg_count) + __field(u32, avg_lower) + __field(u32, avg_upper) + __field(u32, count) + __field(u32, lower) + __field(u32, upper) + __field(u32, boost_freq) + __field(u32, cpu_freq) + ), + TP_fast_assign( + __entry->offset = offset; + __entry->intr_status = readl_relaxed(base + offset + 0x24); + __entry->ctrl = readl_relaxed(base + offset + 0x0); + __entry->avg_count = readl_relaxed(base + offset + 0x20); + __entry->avg_lower = readl_relaxed(base + offset + 0x14); + __entry->avg_upper = readl_relaxed(base + offset + 0x10); + __entry->count = readl_relaxed(base + offset + 0x1c); + __entry->lower = readl_relaxed(base + offset + 0x8); + __entry->upper = readl_relaxed(base + offset + 0x4); + __entry->boost_freq = boost; + __entry->cpu_freq = cpufreq; + ), + TP_printk("%03x: intr 0x%08x ctrl 0x%08x avg %010u %010u %010u cnt %010u %010u %010u boost %010u cpu %u", + __entry->offset, + __entry->intr_status, + __entry->ctrl, + __entry->avg_count, + __entry->avg_lower, + __entry->avg_upper, + __entry->count, + __entry->lower, + __entry->upper, + __entry->boost_freq, + __entry->cpu_freq) +); + +DEFINE_EVENT(device_state, device_isr_enter, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq)); + +DEFINE_EVENT(device_state, device_isr_exit, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq)); + +DEFINE_EVENT(device_state, device_target_update, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq)); + +TRACE_EVENT(device_lower_upper, + TP_PROTO(u32 offset, u32 target, u32 lower, u32 upper), + TP_ARGS(offset, target, lower, upper), + TP_STRUCT__entry( + __field(u32, offset) + __field(u32, target) + __field(u32, lower) + __field(u32, upper) + ), + TP_fast_assign( + __entry->offset = offset; + __entry->target = target; + __entry->lower = lower; + __entry->upper = upper; + ), + TP_printk("%03x: freq %010u lower freq %010u upper freq %010u", + __entry->offset, + __entry->target, + __entry->lower, + __entry->upper) +); + +TRACE_EVENT(device_target_freq, + TP_PROTO(u32 offset, u32 target), + TP_ARGS(offset, target), + TP_STRUCT__entry( + __field(u32, offset) + __field(u32, target) + ), + TP_fast_assign( + __entry->offset = offset; + __entry->target = target; + ), + TP_printk("%03x: freq %010u", __entry->offset, __entry->target) +); +#endif /* _TRACE_TEGRA30_DEVFREQ_H */ + +/* This part must be outside protection */ +#include From c877ee6c05556b6d08583c268ad2a05ff739f87b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 8 May 2018 16:28:36 +0300 Subject: [PATCH 743/851] memory: tegra: Block DMA for clients HW on a faulty memory access Currently Memory Controller informs about erroneous memory accesses done by memory clients and that's it. Let's make it to block whole HW unit that corresponds to the misbehaving memory client in order to try to avoid memory corruptions and to stop deliberate attempts of manipulation by a misbehaving client. Signed-off-by: Dmitry Osipenko --- drivers/memory/tegra/mc.c | 50 ++++++++++++++++++++++ drivers/memory/tegra/tegra114.c | 64 +++++++++++++++++++++++++++++ drivers/memory/tegra/tegra124.c | 66 +++++++++++++++++++++++++++++ drivers/memory/tegra/tegra20.c | 65 +++++++++++++++++++++++++++++ drivers/memory/tegra/tegra210.c | 73 +++++++++++++++++++++++++++++++++ drivers/memory/tegra/tegra30.c | 66 +++++++++++++++++++++++++++++ include/soc/tegra/mc.h | 6 +++ 7 files changed, 390 insertions(+) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 3c5aae7abf358..73eb83ea82c52 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -351,6 +351,45 @@ unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc) } EXPORT_SYMBOL_GPL(tegra_mc_get_emem_device_count); +int tegra_mc_error_block_client_dma(struct tegra_mc *mc, + unsigned int client_idx) +{ + const struct tegra_mc_reset_ops *rst_ops; + const struct tegra_mc_reset *rst; + const char *client; + unsigned int id; + int err; + + id = mc->soc->clients[client_idx].reset_id; + if (id == TEGRA_MC_CLIENT_NO_RESET) + return 0; + + client = mc->soc->clients[client_idx].name; + + rst_ops = mc->soc->reset_ops; + if (!rst_ops) + return -ENODEV; + + if (!rst_ops->block_dma) + return 0; + + rst = tegra_mc_reset_find(mc, id); + if (!rst) + return -ENODEV; + + err = rst_ops->block_dma(mc, rst); + if (err) { + dev_err_ratelimited(mc->dev, "%s: failed to block DMA: %d\n", + client, err); + return err; + } + + dev_warn_ratelimited(mc->dev, "%s: DMA blocked\n", client); + + return 0; +} +EXPORT_SYMBOL_GPL(tegra_mc_error_block_client_dma); + #if defined(CONFIG_ARCH_TEGRA_3x_SOC) || \ defined(CONFIG_ARCH_TEGRA_114_SOC) || \ defined(CONFIG_ARCH_TEGRA_124_SOC) || \ @@ -524,6 +563,7 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) const char *direction, *secure; phys_addr_t addr = 0; unsigned int i; + bool block_dma; char perm[7]; u8 id, type; u32 value; @@ -537,6 +577,10 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) addr <<= 32; } #endif + if (value & MC_ERR_STATUS_RW) + block_dma = true; + else + block_dma = false; if (value & MC_ERR_STATUS_RW) direction = "write"; @@ -593,6 +637,12 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) value = mc_readl(mc, MC_ERR_ADR); addr |= value; + /* Read errors are quite common, hence lets skip them since + * not all drivers support recovering from a blocked DMA. + */ + if (block_dma) + tegra_mc_error_block_client_dma(mc, i); + dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s (%s%s)\n", client, secure, direction, &addr, error, desc, perm); diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index 41350570c815c..d03a5d162dbdc 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -23,6 +23,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0a", @@ -39,6 +40,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0ab", @@ -55,6 +57,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0b", @@ -71,6 +74,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0bb", @@ -87,6 +91,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0c", @@ -103,6 +108,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display0cb", @@ -119,6 +125,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x09, .name = "eppup", @@ -135,6 +142,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x0a, .name = "g2pr", @@ -151,6 +159,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x0b, .name = "g2sr", @@ -167,6 +176,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x0f, .name = "avpcarm7r", @@ -183,6 +193,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "displayhc", @@ -199,6 +210,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x68, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x11, .name = "displayhcb", @@ -215,6 +227,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x68, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x12, .name = "fdcdrd", @@ -231,6 +244,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA114_MC_RESET_3D, }, { .id = 0x13, .name = "fdcdrd2", @@ -247,6 +261,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA114_MC_RESET_3D2, }, { .id = 0x14, .name = "g2dr", @@ -263,6 +278,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x15, .name = "hdar", @@ -279,6 +295,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "host1xdmar", @@ -295,6 +312,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA114_MC_RESET_HC, }, { .id = 0x17, .name = "host1xr", @@ -311,6 +329,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA114_MC_RESET_HC, }, { .id = 0x18, .name = "idxsrd", @@ -327,6 +346,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0b, }, }, + .reset_id = TEGRA114_MC_RESET_3D, }, { .id = 0x1c, .name = "msencsrd", @@ -359,6 +379,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1e, .name = "ppcsahbslvr", @@ -375,6 +396,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xe8, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x20, .name = "texl2srd", @@ -391,6 +413,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA114_MC_RESET_3D2, }, { .id = 0x22, .name = "vdebsevr", @@ -407,6 +430,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x23, .name = "vdember", @@ -423,6 +447,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x24, .name = "vdemcer", @@ -439,6 +464,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xb8, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x25, .name = "vdetper", @@ -455,6 +481,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xee, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x26, .name = "mpcorelpr", @@ -467,6 +494,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x27, .name = "mpcorer", @@ -479,6 +507,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x28, .name = "eppu", @@ -495,6 +524,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x29, .name = "eppv", @@ -511,6 +541,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x2a, .name = "eppy", @@ -527,6 +558,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x2b, .name = "msencswr", @@ -543,6 +575,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA114_MC_RESET_MPE, }, { .id = 0x2c, .name = "viwsb", @@ -559,6 +592,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x47, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x2d, .name = "viwu", @@ -575,6 +609,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x2e, .name = "viwv", @@ -591,6 +626,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x2f, .name = "viwy", @@ -607,6 +643,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x47, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x30, .name = "g2dw", @@ -623,6 +660,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x9, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x32, .name = "avpcarm7w", @@ -639,6 +677,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x33, .name = "fdcdwr", @@ -655,6 +694,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA114_MC_RESET_3D, }, { .id = 0x34, .name = "fdcdwr2", @@ -671,6 +711,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA114_MC_RESET_3D2, }, { .id = 0x35, .name = "hdaw", @@ -687,6 +728,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x36, .name = "host1xw", @@ -703,6 +745,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x25, }, }, + .reset_id = TEGRA114_MC_RESET_HC, }, { .id = 0x37, .name = "ispw", @@ -719,6 +762,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_ISP, }, { .id = 0x38, .name = "mpcorelpw", @@ -731,6 +775,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x39, .name = "mpcorew", @@ -743,6 +788,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3b, .name = "ppcsahbdmaw", @@ -759,6 +805,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3c, .name = "ppcsahbslvw", @@ -775,6 +822,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xe8, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3e, .name = "vdebsevw", @@ -791,6 +839,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x3f, .name = "vdedbgw", @@ -807,6 +856,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x40, .name = "vdembew", @@ -823,6 +873,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x89, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x41, .name = "vdetpmw", @@ -839,6 +890,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x59, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x4a, .name = "xusb_hostr", @@ -855,6 +907,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4b, .name = "xusb_hostw", @@ -871,6 +924,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4c, .name = "xusb_devr", @@ -887,6 +941,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4d, .name = "xusb_devw", @@ -903,6 +958,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4e, .name = "fdcdwr3", @@ -919,6 +975,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4f, .name = "fdcdrd3", @@ -935,6 +992,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x50, .name = "fdcwr4", @@ -951,6 +1009,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x51, .name = "fdcrd4", @@ -967,6 +1026,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x52, .name = "emucifr", @@ -979,6 +1039,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x53, .name = "emucifw", @@ -991,6 +1052,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x54, .name = "tsecsrd", @@ -1007,6 +1069,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x55, .name = "tsecswr", @@ -1023,6 +1086,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, }; diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index d780a84241fe0..b759d53d78c79 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -24,6 +24,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x0, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0a", @@ -40,6 +41,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0xc2, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0ab", @@ -56,6 +58,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0xc6, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0b", @@ -72,6 +75,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0bb", @@ -88,6 +92,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0c", @@ -104,6 +109,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display0cb", @@ -120,6 +126,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0e, .name = "afir", @@ -136,6 +143,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA124_MC_RESET_AFI, }, { .id = 0x0f, .name = "avpcarm7r", @@ -152,6 +160,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "displayhc", @@ -168,6 +177,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x11, .name = "displayhcb", @@ -184,6 +194,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x15, .name = "hdar", @@ -200,6 +211,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x24, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "host1xdmar", @@ -216,6 +228,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA124_MC_RESET_HC, }, { .id = 0x17, .name = "host1xr", @@ -232,6 +245,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA124_MC_RESET_HC, }, { .id = 0x1c, .name = "msencsrd", @@ -248,6 +262,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA124_MC_RESET_MSENC, }, { .id = 0x1d, .name = "ppcsahbdmar", @@ -264,6 +279,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1e, .name = "ppcsahbslvr", @@ -280,6 +296,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1f, .name = "satar", @@ -296,6 +313,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x65, }, }, + .reset_id = TEGRA124_MC_RESET_SATA, }, { .id = 0x22, .name = "vdebsevr", @@ -312,6 +330,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x4f, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x23, .name = "vdember", @@ -328,6 +347,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x3d, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x24, .name = "vdemcer", @@ -344,6 +364,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x66, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x25, .name = "vdetper", @@ -360,6 +381,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x26, .name = "mpcorelpr", @@ -372,6 +394,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x27, .name = "mpcorer", @@ -384,6 +407,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2b, .name = "msencswr", @@ -400,6 +424,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_MSENC, }, { .id = 0x31, .name = "afiw", @@ -416,6 +441,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_AFI, }, { .id = 0x32, .name = "avpcarm7w", @@ -432,6 +458,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x35, .name = "hdaw", @@ -448,6 +475,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x36, .name = "host1xw", @@ -476,6 +504,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x39, .name = "mpcorew", @@ -488,6 +517,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3b, .name = "ppcsahbdmaw", @@ -504,6 +534,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3c, .name = "ppcsahbslvw", @@ -520,6 +551,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3d, .name = "sataw", @@ -536,6 +568,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x65, }, }, + .reset_id = TEGRA124_MC_RESET_SATA, }, { .id = 0x3e, .name = "vdebsevw", @@ -552,6 +585,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x3f, .name = "vdedbgw", @@ -568,6 +602,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x40, .name = "vdembew", @@ -584,6 +619,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x41, .name = "vdetpmw", @@ -600,6 +636,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x44, .name = "ispra", @@ -616,6 +653,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2, }, { .id = 0x46, .name = "ispwa", @@ -632,6 +670,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2, }, { .id = 0x47, .name = "ispwb", @@ -648,6 +687,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2, }, { .id = 0x4a, .name = "xusb_hostr", @@ -664,6 +704,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x39, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_HOST, }, { .id = 0x4b, .name = "xusb_hostw", @@ -680,6 +721,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_HOST, }, { .id = 0x4c, .name = "xusb_devr", @@ -696,6 +738,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x39, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_DEV, }, { .id = 0x4d, .name = "xusb_devw", @@ -712,6 +755,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_DEV, }, { .id = 0x4e, .name = "isprab", @@ -728,6 +772,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2B, }, { .id = 0x50, .name = "ispwab", @@ -744,6 +789,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2B, }, { .id = 0x51, .name = "ispwbb", @@ -760,6 +806,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2B, }, { .id = 0x54, .name = "tsecsrd", @@ -776,6 +823,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x9b, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x55, .name = "tsecswr", @@ -792,6 +840,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x56, .name = "a9avpscr", @@ -808,6 +857,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x57, .name = "a9avpscw", @@ -824,6 +874,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x58, .name = "gpusrd", @@ -841,6 +892,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x59, .name = "gpuswr", @@ -858,6 +910,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x5a, .name = "displayt", @@ -874,6 +927,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x60, .name = "sdmmcra", @@ -890,6 +944,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC1, }, { .id = 0x61, .name = "sdmmcraa", @@ -906,6 +961,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC2, }, { .id = 0x62, .name = "sdmmcr", @@ -922,6 +978,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC3, }, { .id = 0x63, .swgroup = TEGRA_SWGROUP_SDMMC4A, @@ -938,6 +995,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC4, }, { .id = 0x64, .name = "sdmmcwa", @@ -954,6 +1012,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC1, }, { .id = 0x65, .name = "sdmmcwaa", @@ -970,6 +1029,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC2, }, { .id = 0x66, .name = "sdmmcw", @@ -986,6 +1046,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC3, }, { .id = 0x67, .name = "sdmmcwab", @@ -1002,6 +1063,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC4, }, { .id = 0x6c, .name = "vicsrd", @@ -1018,6 +1080,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA124_MC_RESET_VIC, }, { .id = 0x6d, .name = "vicswr", @@ -1034,6 +1097,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VIC, }, { .id = 0x72, .name = "viw", @@ -1050,6 +1114,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VI, }, { .id = 0x73, .name = "displayd", @@ -1066,6 +1131,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, }; diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c index fcd7738fcb536..dc378356f4bad 100644 --- a/drivers/memory/tegra/tegra20.c +++ b/drivers/memory/tegra/tegra20.c @@ -91,159 +91,211 @@ static const struct tegra_mc_client tegra20_mc_clients[] = { { .id = 0x00, .name = "display0a", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0ab", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0b", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0bb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0c", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0cb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display1b", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x07, .name = "display1bb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x08, .name = "eppup", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x09, .name = "g2pr", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x0a, .name = "g2sr", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x0b, .name = "mpeunifbr", + .reset_id = TEGRA20_MC_RESET_MPEB, }, { .id = 0x0c, .name = "viruv", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x0d, .name = "avpcarm7r", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0e, .name = "displayhc", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0f, .name = "displayhcb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "fdcdrd", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x11, .name = "g2dr", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x12, .name = "host1xdmar", + .reset_id = TEGRA20_MC_RESET_HC, }, { .id = 0x13, .name = "host1xr", + .reset_id = TEGRA20_MC_RESET_HC, }, { .id = 0x14, .name = "idxsrd", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x15, .name = "mpcorer", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "mpe_ipred", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x17, .name = "mpeamemrd", + .reset_id = TEGRA20_MC_RESET_MPEA, }, { .id = 0x18, .name = "mpecsrd", + .reset_id = TEGRA20_MC_RESET_MPEC, }, { .id = 0x19, .name = "ppcsahbdmar", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1a, .name = "ppcsahbslvr", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1b, .name = "texsrd", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x1c, .name = "vdebsevr", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x1d, .name = "vdember", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x1e, .name = "vdemcer", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x1f, .name = "vdetper", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x20, .name = "eppu", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x21, .name = "eppv", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x22, .name = "eppy", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x23, .name = "mpeunifbw", + .reset_id = TEGRA20_MC_RESET_MPEB, }, { .id = 0x24, .name = "viwsb", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x25, .name = "viwu", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x26, .name = "viwv", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x27, .name = "viwy", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x28, .name = "g2dw", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x29, .name = "avpcarm7w", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2a, .name = "fdcdwr", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x2b, .name = "host1xw", + .reset_id = TEGRA20_MC_RESET_HC, }, { .id = 0x2c, .name = "ispw", + .reset_id = TEGRA20_MC_RESET_ISP, }, { .id = 0x2d, .name = "mpcorew", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2e, .name = "mpecswr", + .reset_id = TEGRA20_MC_RESET_MPEC, }, { .id = 0x2f, .name = "ppcsahbdmaw", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x30, .name = "ppcsahbslvw", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x31, .name = "vdebsevw", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x32, .name = "vdembew", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x33, .name = "vdetpmw", + .reset_id = TEGRA20_MC_RESET_VDE, }, }; @@ -728,6 +780,7 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) const char *error = tegra_mc_status_names[bit]; const char *direction = "read", *secure = ""; const char *client, *desc; + bool block_dma = true; phys_addr_t addr; u32 value, reg; u8 id, type; @@ -742,6 +795,8 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) if (value & BIT(31)) direction = "write"; + else + block_dma = false; break; case MC_INT_INVALID_GART_PAGE: @@ -753,6 +808,8 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) if (value & BIT(0)) direction = "write"; + else + block_dma = false; break; case MC_INT_SECURITY_VIOLATION: @@ -766,12 +823,20 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) if (value & BIT(31)) direction = "write"; + else + block_dma = false; break; default: continue; } + /* Read errors are quite common, hence lets skip them since + * not all drivers support recovering from a blocked DMA. + */ + if (block_dma) + tegra_mc_error_block_client_dma(mc, id); + client = mc->soc->clients[id].name; addr = mc_readl(mc, reg + sizeof(u32)); diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c index 8ab6498dbe7d2..18debe6ed7098 100644 --- a/drivers/memory/tegra/tegra210.c +++ b/drivers/memory/tegra/tegra210.c @@ -12,6 +12,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .id = 0x00, .name = "ptcr", .swgroup = TEGRA_SWGROUP_PTC, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0a", @@ -28,6 +29,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0ab", @@ -44,6 +46,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0b", @@ -60,6 +63,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0bb", @@ -76,6 +80,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0c", @@ -92,6 +97,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display0cb", @@ -108,6 +114,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0e, .name = "afir", @@ -124,6 +131,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x2e, }, }, + .reset_id = TEGRA210_MC_RESET_AFI, }, { .id = 0x0f, .name = "avpcarm7r", @@ -140,6 +148,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "displayhc", @@ -156,6 +165,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x11, .name = "displayhcb", @@ -172,6 +182,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x15, .name = "hdar", @@ -188,6 +199,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x24, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "host1xdmar", @@ -204,6 +216,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA210_MC_RESET_HC, }, { .id = 0x17, .name = "host1xr", @@ -220,6 +233,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA210_MC_RESET_HC, }, { .id = 0x1c, .name = "nvencsrd", @@ -236,6 +250,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA210_MC_RESET_NVENC, }, { .id = 0x1d, .name = "ppcsahbdmar", @@ -252,6 +267,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1e, .name = "ppcsahbslvr", @@ -268,6 +284,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1f, .name = "satar", @@ -284,6 +301,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x65, }, }, + .reset_id = TEGRA210_MC_RESET_SATA, }, { .id = 0x27, .name = "mpcorer", @@ -296,6 +314,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2b, .name = "nvencswr", @@ -312,6 +331,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_NVENC, }, { .id = 0x31, .name = "afiw", @@ -328,6 +348,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_AFI, }, { .id = 0x32, .name = "avpcarm7w", @@ -344,6 +365,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x35, .name = "hdaw", @@ -360,6 +382,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x36, .name = "host1xw", @@ -376,6 +399,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_HC, }, { .id = 0x39, .name = "mpcorew", @@ -388,6 +412,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3b, .name = "ppcsahbdmaw", @@ -404,6 +429,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3c, .name = "ppcsahbslvw", @@ -420,6 +446,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3d, .name = "sataw", @@ -436,6 +463,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SATA, }, { .id = 0x44, .name = "ispra", @@ -452,6 +480,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2, }, { .id = 0x46, .name = "ispwa", @@ -468,6 +497,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2, }, { .id = 0x47, .name = "ispwb", @@ -484,6 +514,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2, }, { .id = 0x4a, .name = "xusb_hostr", @@ -500,6 +531,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x7a, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_HOST, }, { .id = 0x4b, .name = "xusb_hostw", @@ -516,6 +548,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_HOST, }, { .id = 0x4c, .name = "xusb_devr", @@ -532,6 +565,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x39, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_DEV, }, { .id = 0x4d, .name = "xusb_devw", @@ -548,6 +582,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_DEV, }, { .id = 0x4e, .name = "isprab", @@ -564,6 +599,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2B, }, { .id = 0x50, .name = "ispwab", @@ -580,6 +616,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2B, }, { .id = 0x51, .name = "ispwbb", @@ -596,6 +633,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2B, }, { .id = 0x54, .name = "tsecsrd", @@ -612,6 +650,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x9b, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x55, .name = "tsecswr", @@ -628,6 +667,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x56, .name = "a9avpscr", @@ -644,6 +684,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x57, .name = "a9avpscw", @@ -660,6 +701,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x58, .name = "gpusrd", @@ -677,6 +719,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x59, .name = "gpuswr", @@ -694,6 +737,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x5a, .name = "displayt", @@ -710,6 +754,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x60, .name = "sdmmcra", @@ -726,6 +771,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC1, }, { .id = 0x61, .name = "sdmmcraa", @@ -742,6 +788,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x5a, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC2, }, { .id = 0x62, .name = "sdmmcr", @@ -758,6 +805,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC3, }, { .id = 0x63, .swgroup = TEGRA_SWGROUP_SDMMC4A, @@ -774,6 +822,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x5a, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC4, }, { .id = 0x64, .name = "sdmmcwa", @@ -790,6 +839,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC1, }, { .id = 0x65, .name = "sdmmcwaa", @@ -806,6 +856,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC2, }, { .id = 0x66, .name = "sdmmcw", @@ -822,6 +873,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC3, }, { .id = 0x67, .name = "sdmmcwab", @@ -838,6 +890,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC4, }, { .id = 0x6c, .name = "vicsrd", @@ -854,6 +907,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA210_MC_RESET_VIC, }, { .id = 0x6d, .name = "vicswr", @@ -870,6 +924,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_VIC, }, { .id = 0x72, .name = "viw", @@ -886,6 +941,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_VI, }, { .id = 0x73, .name = "displayd", @@ -902,6 +958,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA210_MC_RESET_DC, }, { .id = 0x78, .name = "nvdecsrd", @@ -918,6 +975,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA210_MC_RESET_NVDEC, }, { .id = 0x79, .name = "nvdecswr", @@ -934,6 +992,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_NVDEC, }, { .id = 0x7a, .name = "aper", @@ -950,6 +1009,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA210_MC_RESET_APE, }, { .id = 0x7b, .name = "apew", @@ -966,6 +1026,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_APE, }, { .id = 0x7e, .name = "nvjpgsrd", @@ -982,6 +1043,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA210_MC_RESET_NVJPG, }, { .id = 0x7f, .name = "nvjpgswr", @@ -998,6 +1060,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_NVJPG, }, { .id = 0x80, .name = "sesrd", @@ -1014,6 +1077,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x2e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x81, .name = "seswr", @@ -1030,6 +1094,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x82, .name = "axiapr", @@ -1046,6 +1111,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x83, .name = "axiapw", @@ -1062,6 +1128,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x84, .name = "etrr", @@ -1078,6 +1145,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x85, .name = "etrw", @@ -1094,6 +1162,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x86, .name = "tsecsrdb", @@ -1110,6 +1179,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x9b, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x87, .name = "tsecswrb", @@ -1126,6 +1196,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x88, .name = "gpusrd2", @@ -1143,6 +1214,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x89, .name = "gpuswr2", @@ -1160,6 +1232,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, }; diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index 84316357513db..781760b2c26e7 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -45,6 +45,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x01, @@ -62,6 +63,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x02, @@ -79,6 +81,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x03, @@ -96,6 +99,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x04, @@ -113,6 +117,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x05, @@ -130,6 +135,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x06, @@ -147,6 +153,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x07, @@ -164,6 +171,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x08, @@ -181,6 +189,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x09, @@ -198,6 +207,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x17, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 8, }, { .id = 0x0a, @@ -215,6 +225,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 64, }, { .id = 0x0b, @@ -232,6 +243,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 64, }, { .id = 0x0c, @@ -249,6 +261,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x0d, @@ -266,6 +279,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x2c, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 8, }, { .id = 0x0e, @@ -283,6 +297,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA30_MC_RESET_AFI, .fifo_size = 16 * 32, }, { .id = 0x0f, @@ -300,6 +315,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x10, @@ -317,6 +333,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x11, @@ -334,6 +351,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x12, @@ -351,6 +369,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 48, }, { .id = 0x13, @@ -368,6 +387,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 48, }, { .id = 0x14, @@ -385,6 +405,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 48, }, { .id = 0x15, @@ -402,6 +423,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 16, }, { .id = 0x16, @@ -419,6 +441,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x05, }, }, + .reset_id = TEGRA30_MC_RESET_HC, .fifo_size = 16 * 16, }, { .id = 0x17, @@ -436,6 +459,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA30_MC_RESET_HC, .fifo_size = 16 * 8, }, { .id = 0x18, @@ -453,6 +477,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 64, }, { .id = 0x19, @@ -470,6 +495,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 64, }, { .id = 0x1a, @@ -487,6 +513,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 2, }, { .id = 0x1b, @@ -504,6 +531,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x42, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 64, }, { .id = 0x1c, @@ -521,6 +549,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x1d, @@ -538,6 +567,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x1e, @@ -555,6 +585,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x12, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 8, }, { .id = 0x1f, @@ -572,6 +603,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA30_MC_RESET_SATA, .fifo_size = 16 * 32, }, { .id = 0x20, @@ -589,6 +621,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 64, }, { .id = 0x21, @@ -606,6 +639,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 64, }, { .id = 0x22, @@ -623,6 +657,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 8, }, { .id = 0x23, @@ -640,6 +675,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xd0, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 4, }, { .id = 0x24, @@ -657,6 +693,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x2a, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, { .id = 0x25, @@ -674,6 +711,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x74, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, { .id = 0x26, @@ -687,6 +725,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 14, }, { .id = 0x27, @@ -700,6 +739,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 14, }, { .id = 0x28, @@ -717,6 +757,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 64, }, { .id = 0x29, @@ -734,6 +775,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 64, }, { .id = 0x2a, @@ -751,6 +793,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 64, }, { .id = 0x2b, @@ -768,6 +811,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x2c, @@ -785,6 +829,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x12, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x2d, @@ -802,6 +847,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xb2, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x2e, @@ -819,6 +865,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xb2, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x2f, @@ -836,6 +883,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x12, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x30, @@ -853,6 +901,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x9, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 128, }, { .id = 0x31, @@ -870,6 +919,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA30_MC_RESET_AFI, .fifo_size = 16 * 32, }, { .id = 0x32, @@ -887,6 +937,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x33, @@ -904,6 +955,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 48, }, { .id = 0x34, @@ -921,6 +973,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 48, }, { .id = 0x35, @@ -938,6 +991,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 16, }, { .id = 0x36, @@ -955,6 +1009,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA30_MC_RESET_HC, .fifo_size = 16 * 32, }, { .id = 0x37, @@ -972,6 +1027,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_ISP, .fifo_size = 16 * 64, }, { .id = 0x38, @@ -985,6 +1041,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 24, }, { .id = 0x39, @@ -998,6 +1055,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 24, }, { .id = 0x3a, @@ -1015,6 +1073,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x3b, @@ -1032,6 +1091,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x3c, @@ -1049,6 +1109,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x06, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 4, }, { .id = 0x3d, @@ -1066,6 +1127,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA30_MC_RESET_SATA, .fifo_size = 16 * 32, }, { .id = 0x3e, @@ -1083,6 +1145,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 4, }, { .id = 0x3f, @@ -1100,6 +1163,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, { .id = 0x40, @@ -1117,6 +1181,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x42, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 2, }, { .id = 0x41, @@ -1134,6 +1199,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x2a, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, }; diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 1066b1194a5a9..80881099e4a71 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -24,9 +24,12 @@ struct tegra_mc_timing { u32 *emem_data; }; +#define TEGRA_MC_CLIENT_NO_RESET UINT_MAX + struct tegra_mc_client { unsigned int id; const char *name; + unsigned int reset_id; /* * For Tegra210 and earlier, this is the SWGROUP ID used for IOVA translations in the * Tegra SMMU, whereas on Tegra186 and later this is the ID used to override the ARM SMMU @@ -235,6 +238,9 @@ struct tegra_mc { int tegra_mc_write_emem_configuration(struct tegra_mc *mc, unsigned long rate); unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc); +int tegra_mc_error_block_client_dma(struct tegra_mc *mc, + unsigned int client_idx); + #ifdef CONFIG_TEGRA_MC struct tegra_mc *devm_tegra_memory_controller_get(struct device *dev); int tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev); From 23a7d9ebf4f5b8fe604c955f71f294da39704efc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 6 May 2018 23:25:55 +0300 Subject: [PATCH 744/851] ARM: dts: tegra20: Add IOMMU nodes to Host1x clients Enable IOMMU support for Host1x clients. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 2cb31bdd9eeab..83d933a977807 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -102,6 +102,8 @@ reset-names = "2d", "mc"; operating-points-v2 = <&gr2d_dvfs_opp_table>; power-domains = <&pd_core>; + + iommus = <&mc>; }; gr3d@54180000 { @@ -112,6 +114,8 @@ reset-names = "3d", "mc"; operating-points-v2 = <&gr3d_dvfs_opp_table>; power-domains = <&pd_3d>; + + iommus = <&mc>; }; dc@54200000 { @@ -139,6 +143,8 @@ "winc", "cursor"; + iommus = <&mc>; + rgb { status = "disabled"; }; @@ -169,6 +175,8 @@ "winc", "cursor"; + iommus = <&mc>; + rgb { status = "disabled"; }; From ba1c37df6112e1d750f9cdb52ba192005d929328 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Oct 2015 19:27:48 +0300 Subject: [PATCH 745/851] xxx: mmc: core: Add quirk for NVIDIA Tegra20 EMMC This quirk is required in order to detect EMMC partitions on some Tegra20 devices. Signed-off-by: Dmitry Osipenko --- drivers/mmc/core/mmc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 838726b68ff3e..4eb6ea47bdfd6 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -405,8 +405,17 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) ext_csd[EXT_CSD_SEC_CNT + 3] << 24; /* Cards with density > 2GiB are sector addressed */ - if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) + if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) { + if (card->host->caps & MMC_CAP_NONREMOVABLE) { + /* + * Size is in 256K chunks, i.e. 512 sectors each. + * This algorithm is defined and used by NVIDIA, + * according to eMMC 4.41, size is in 128K chunks. + */ + card->ext_csd.sectors -= ext_csd[EXT_CSD_BOOT_MULT] * 512; + } mmc_card_set_blockaddr(card); + } } card->ext_csd.strobe_support = ext_csd[EXT_CSD_STROBE_SUPPORT]; From 3a87f6d89452fe6adbb2f8f8be1fc32cc74bf445 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 25 Nov 2019 00:03:21 +0300 Subject: [PATCH 746/851] Revert "xxx: mmc: core: Add quirk for NVIDIA Tegra20 EMMC" This reverts commit 828663c674099f5b2f831e1281913c762a6f61e8. It makes some T20 devices to see eMMC partitions, but breaks the others. --- drivers/mmc/core/mmc.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 4eb6ea47bdfd6..838726b68ff3e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -405,17 +405,8 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) ext_csd[EXT_CSD_SEC_CNT + 3] << 24; /* Cards with density > 2GiB are sector addressed */ - if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) { - if (card->host->caps & MMC_CAP_NONREMOVABLE) { - /* - * Size is in 256K chunks, i.e. 512 sectors each. - * This algorithm is defined and used by NVIDIA, - * according to eMMC 4.41, size is in 128K chunks. - */ - card->ext_csd.sectors -= ext_csd[EXT_CSD_BOOT_MULT] * 512; - } + if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) mmc_card_set_blockaddr(card); - } } card->ext_csd.strobe_support = ext_csd[EXT_CSD_STROBE_SUPPORT]; From bae147b76d80109e3ca8e7d940b781086b080ca0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 14 Dec 2019 23:20:27 +0300 Subject: [PATCH 747/851] xxx: iommu: tegra-gart: Expose as system-wide IOMMU This enables IOMMU for Terga20 GPU. The patch will be removed after updating DRM WIP patch to support IOMMU in a different way. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-gart.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 6a358f92c7e5d..5e4eea31ac5e5 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -364,8 +364,14 @@ struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) goto unregister_iommu; } + err = bus_set_iommu(&platform_bus_type, &gart_iommu_ops); + if (err) + goto free_savedata; + return gart; +free_savedata: + vfree(gart->savedata); unregister_iommu: iommu_device_unregister(&gart->iommu); remove_sysfs: From 8bec22cb15cb6502c49176b9bd859caceb5f147f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 4 Oct 2018 16:46:38 +0300 Subject: [PATCH 748/851] xxx: staging: android: Add legacy ram-console Device-tree should contain the ram-console node, example: /memreserve/ 0xbeb00000 0x100000; ram-console { compatible = "android,ram-console"; reg = <0xbeb00000 0x100000>; }; --- drivers/staging/android/Kconfig | 2 + drivers/staging/android/Makefile | 2 + drivers/staging/android/ramconsole/Kconfig | 63 +++ drivers/staging/android/ramconsole/Makefile | 1 + .../staging/android/ramconsole/ram_console.c | 487 ++++++++++++++++++ .../staging/android/ramconsole/ram_console.h | 22 + 6 files changed, 577 insertions(+) create mode 100644 drivers/staging/android/ramconsole/Kconfig create mode 100644 drivers/staging/android/ramconsole/Makefile create mode 100644 drivers/staging/android/ramconsole/ram_console.c create mode 100644 drivers/staging/android/ramconsole/ram_console.h diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 70498adb15759..2517fbe3b8c7d 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -17,3 +17,5 @@ config ASHMEM endif # if ANDROID endmenu + +source "drivers/staging/android/ramconsole/Kconfig" diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index e9a55a5e65291..d780e07419d69 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -I$(src) # needed for trace events +obj-y += ramconsole/ + obj-$(CONFIG_ASHMEM) += ashmem.o diff --git a/drivers/staging/android/ramconsole/Kconfig b/drivers/staging/android/ramconsole/Kconfig new file mode 100644 index 0000000000000..db03dca04572d --- /dev/null +++ b/drivers/staging/android/ramconsole/Kconfig @@ -0,0 +1,63 @@ +config ANDROID_RAM_CONSOLE + bool "Android RAM buffer console" + default n + +config ANDROID_RAM_CONSOLE_ENABLE_VERBOSE + bool "Enable verbose console messages on Android RAM console" + default y + depends on ANDROID_RAM_CONSOLE + +menuconfig ANDROID_RAM_CONSOLE_ERROR_CORRECTION + bool "Android RAM Console Enable error correction" + default n + depends on ANDROID_RAM_CONSOLE + depends on !ANDROID_RAM_CONSOLE_EARLY_INIT + select REED_SOLOMON + select REED_SOLOMON_ENC8 + select REED_SOLOMON_DEC8 + +if ANDROID_RAM_CONSOLE_ERROR_CORRECTION + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE + int "Android RAM Console Data data size" + default 128 + help + Must be a power of 2. + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE + int "Android RAM Console ECC size" + default 16 + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE + int "Android RAM Console Symbol size" + default 8 + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL + hex "Android RAM Console Polynomial" + default 0x19 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 4) + default 0x29 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 5) + default 0x61 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 6) + default 0x89 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 7) + default 0x11d if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 8) + +endif # ANDROID_RAM_CONSOLE_ERROR_CORRECTION + +config ANDROID_RAM_CONSOLE_EARLY_INIT + bool "Start Android RAM console early" + default n + depends on ANDROID_RAM_CONSOLE + +config ANDROID_RAM_CONSOLE_EARLY_ADDR + hex "Android RAM console virtual address" + default 0 + depends on ANDROID_RAM_CONSOLE_EARLY_INIT + +config ANDROID_RAM_CONSOLE_EARLY_SIZE + hex "Android RAM console buffer size" + default 0 + depends on ANDROID_RAM_CONSOLE_EARLY_INIT + +config ANDROID_RAM_CONSOLE_DEBUG_CONSOLE_SUSPENDED + bool "Resume console in case of kernel panic" + default n + depends on ANDROID_RAM_CONSOLE diff --git a/drivers/staging/android/ramconsole/Makefile b/drivers/staging/android/ramconsole/Makefile new file mode 100644 index 0000000000000..1125854a955ba --- /dev/null +++ b/drivers/staging/android/ramconsole/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ANDROID_RAM_CONSOLE) += ram_console.o diff --git a/drivers/staging/android/ramconsole/ram_console.c b/drivers/staging/android/ramconsole/ram_console.c new file mode 100644 index 0000000000000..198e71633319b --- /dev/null +++ b/drivers/staging/android/ramconsole/ram_console.c @@ -0,0 +1,487 @@ +/* drivers/android/ram_console.c + * + * Copyright (C) 2007-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ram_console.h" + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION +#include +#endif + +struct ram_console_buffer { + __le32 sig; + __le32 start; + __le32 size; + uint8_t data[0]; +}; + +#define RAM_CONSOLE_SIG (0x43474244) /* DBGC */ + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT +static char __initdata + ram_console_old_log_init_buffer[CONFIG_ANDROID_RAM_CONSOLE_EARLY_SIZE]; +#endif +static char *ram_console_old_log; +static size_t ram_console_old_log_size; + +static struct ram_console_buffer *ram_console_buffer; +static size_t ram_console_buffer_size; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION +static char *ram_console_par_buffer; +static struct rs_control *ram_console_rs_decoder; +static int ram_console_corrected_bytes; +static int ram_console_bad_blocks; +#define ECC_BLOCK_SIZE CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE +#define ECC_SIZE CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE +#define ECC_SYMSIZE CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE +#define ECC_POLY CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL +#endif + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION +static void ram_console_encode_rs8(uint8_t *data, size_t len, uint8_t *ecc) +{ + int i; + uint16_t par[ECC_SIZE]; + /* Initialize the parity buffer */ + memset(par, 0, sizeof(par)); + encode_rs8(ram_console_rs_decoder, data, len, par, 0); + for (i = 0; i < ECC_SIZE; i++) + ecc[i] = par[i]; +} + +static int ram_console_decode_rs8(void *data, size_t len, uint8_t *ecc) +{ + int i; + uint16_t par[ECC_SIZE]; + for (i = 0; i < ECC_SIZE; i++) + par[i] = ecc[i]; + return decode_rs8(ram_console_rs_decoder, data, par, len, + NULL, 0, NULL, 0, NULL); +} +#endif + +static void ram_console_update(const char *s, unsigned int count) +{ + struct ram_console_buffer *buffer = ram_console_buffer; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + uint8_t *buffer_end = buffer->data + ram_console_buffer_size; + uint8_t *block; + uint8_t *par; + int size = ECC_BLOCK_SIZE; +#endif + memcpy(buffer->data + le32_to_cpu(buffer->start), s, count); +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + block = buffer->data + (le32_to_cpu(buffer->start) & ~(ECC_BLOCK_SIZE - 1)); + par = ram_console_par_buffer + + (le32_to_cpu(buffer->start) / ECC_BLOCK_SIZE) * ECC_SIZE; + do { + if (block + ECC_BLOCK_SIZE > buffer_end) + size = buffer_end - block; + ram_console_encode_rs8(block, size, par); + block += ECC_BLOCK_SIZE; + par += ECC_SIZE; + } while (block < buffer->data + le32_to_cpu(buffer->start) + count); +#endif +} + +static void ram_console_update_header(void) +{ +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + struct ram_console_buffer *buffer = ram_console_buffer; + uint8_t *par; + par = ram_console_par_buffer + + DIV_ROUND_UP(ram_console_buffer_size, ECC_BLOCK_SIZE) * ECC_SIZE; + ram_console_encode_rs8((uint8_t *)buffer, sizeof(*buffer), par); +#endif +} + +static void +ram_console_write(struct console *console, const char *s, unsigned int count) +{ + int rem; + struct ram_console_buffer *buffer = ram_console_buffer; + + if (count > ram_console_buffer_size) { + s += count - ram_console_buffer_size; + count = ram_console_buffer_size; + } + rem = ram_console_buffer_size - le32_to_cpu(buffer->start); + if (rem < count) { + ram_console_update(s, rem); + s += rem; + count -= rem; + buffer->start = 0; + buffer->size = cpu_to_le32(ram_console_buffer_size); + } + ram_console_update(s, count); + + buffer->start = cpu_to_le32(le32_to_cpu(buffer->start) + count); + if (le32_to_cpu(buffer->size) < ram_console_buffer_size) + buffer->size = cpu_to_le32(le32_to_cpu(buffer->size) + count); + ram_console_update_header(); +} + +static struct console ram_console = { + .name = "ram", + .write = ram_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME, + .index = -1, +}; + +void ram_console_enable_console(int enabled) +{ + if (enabled) + ram_console.flags |= CON_ENABLED; + else + ram_console.flags &= ~CON_ENABLED; +} + +static void __init +ram_console_save_old(struct ram_console_buffer *buffer, const char *bootinfo, + char *dest) +{ + size_t old_log_size = buffer->size; + size_t bootinfo_size = 0; + size_t total_size = old_log_size; + char *ptr; + const char *bootinfo_label = "Boot info:\n"; + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + uint8_t *block; + uint8_t *par; + char strbuf[80]; + int strbuf_len = 0; + + block = buffer->data; + par = ram_console_par_buffer; + while (block < buffer->data + buffer->size) { + int numerr; + int size = ECC_BLOCK_SIZE; + if (block + size > buffer->data + ram_console_buffer_size) + size = buffer->data + ram_console_buffer_size - block; + numerr = ram_console_decode_rs8(block, size, par); + if (numerr > 0) { +#if 0 + printk(KERN_INFO "ram_console: error in block %p, %d\n", + block, numerr); +#endif + ram_console_corrected_bytes += numerr; + } else if (numerr < 0) { +#if 0 + printk(KERN_INFO "ram_console: uncorrectable error in " + "block %p\n", block); +#endif + ram_console_bad_blocks++; + } + block += ECC_BLOCK_SIZE; + par += ECC_SIZE; + } + if (ram_console_corrected_bytes || ram_console_bad_blocks) + strbuf_len = snprintf(strbuf, sizeof(strbuf), + "\n%d Corrected bytes, %d unrecoverable blocks\n", + ram_console_corrected_bytes, ram_console_bad_blocks); + else + strbuf_len = snprintf(strbuf, sizeof(strbuf), + "\nNo errors detected\n"); + if (strbuf_len >= sizeof(strbuf)) + strbuf_len = sizeof(strbuf) - 1; + total_size += strbuf_len; +#endif + + if (bootinfo) + bootinfo_size = strlen(bootinfo) + strlen(bootinfo_label); + total_size += bootinfo_size; + + if (dest == NULL) { + dest = kmalloc(total_size, GFP_KERNEL); + if (dest == NULL) { + printk(KERN_ERR + "ram_console: failed to allocate buffer\n"); + return; + } + } + + ram_console_old_log = dest; + ram_console_old_log_size = total_size; + memcpy(ram_console_old_log, + &buffer->data[le32_to_cpu(buffer->start)], buffer->size - le32_to_cpu(buffer->start)); + memcpy(ram_console_old_log + buffer->size - le32_to_cpu(buffer->start), + &buffer->data[0], le32_to_cpu(buffer->start)); + ptr = ram_console_old_log + old_log_size; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + memcpy(ptr, strbuf, strbuf_len); + ptr += strbuf_len; +#endif + if (bootinfo) { + memcpy(ptr, bootinfo_label, strlen(bootinfo_label)); + ptr += strlen(bootinfo_label); + memcpy(ptr, bootinfo, bootinfo_size); + ptr += bootinfo_size; + } +} + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_DEBUG_CONSOLE_SUSPENDED +static int ram_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + resume_console(); + return NOTIFY_DONE; +} + +static struct notifier_block ram_panic_blk = { + .notifier_call = ram_panic_event, +}; +#endif + +static int __init ram_console_init(struct ram_console_buffer *buffer, + size_t buffer_size, const char *bootinfo, + char *old_buf) +{ +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + int numerr; + uint8_t *par; +#endif + ram_console_buffer = buffer; + ram_console_buffer_size = + buffer_size - sizeof(struct ram_console_buffer); + + if (ram_console_buffer_size > buffer_size) { + pr_err("ram_console: buffer %p, invalid size %zu, " + "datasize %zu\n", buffer, buffer_size, + ram_console_buffer_size); + return 0; + } + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + ram_console_buffer_size -= (DIV_ROUND_UP(ram_console_buffer_size, + ECC_BLOCK_SIZE) + 1) * ECC_SIZE; + + if (ram_console_buffer_size > buffer_size) { + pr_err("ram_console: buffer %p, invalid size %zu, " + "non-ecc datasize %zu\n", + buffer, buffer_size, ram_console_buffer_size); + return 0; + } + + ram_console_par_buffer = buffer->data + ram_console_buffer_size; + + + /* first consecutive root is 0 + * primitive element to generate roots = 1 + */ + ram_console_rs_decoder = init_rs(ECC_SYMSIZE, ECC_POLY, 0, 1, ECC_SIZE); + if (ram_console_rs_decoder == NULL) { + printk(KERN_INFO "ram_console: init_rs failed\n"); + return 0; + } + + ram_console_corrected_bytes = 0; + ram_console_bad_blocks = 0; + + par = ram_console_par_buffer + + DIV_ROUND_UP(ram_console_buffer_size, ECC_BLOCK_SIZE) * ECC_SIZE; + + numerr = ram_console_decode_rs8(buffer, sizeof(*buffer), par); + if (numerr > 0) { + printk(KERN_INFO "ram_console: error in header, %d\n", numerr); + ram_console_corrected_bytes += numerr; + } else if (numerr < 0) { + printk(KERN_INFO + "ram_console: uncorrectable error in header\n"); + ram_console_bad_blocks++; + } +#endif + + // ignore start corruption + buffer->start = 0; + + if (buffer->sig == RAM_CONSOLE_SIG) { + if (buffer->size > ram_console_buffer_size + || le32_to_cpu(buffer->start) > buffer->size) { + printk(KERN_INFO "ram_console: found existing invalid " + "buffer, size %d, start %d\n", + buffer->size, le32_to_cpu(buffer->start)); + memset(buffer, 0, buffer_size); + } + else { + printk(KERN_INFO "ram_console: found existing buffer, " + "size %d, start %d\n", + buffer->size, le32_to_cpu(buffer->start)); + ram_console_save_old(buffer, bootinfo, old_buf); + } + } else { + printk(KERN_INFO "ram_console: no valid data in buffer " + "(sig = 0x%08x)\n", buffer->sig); + memset(buffer, 0, buffer_size); + } + + buffer->sig = RAM_CONSOLE_SIG; + buffer->start = 0; + buffer->size = 0; + + register_console(&ram_console); +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE + console_verbose(); +#endif +#ifdef CONFIG_ANDROID_RAM_CONSOLE_DEBUG_CONSOLE_SUSPENDED + atomic_notifier_chain_register(&panic_notifier_list, + &ram_panic_blk); +#endif + return 0; +} + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT +static int __init ram_console_early_init(void) +{ + return ram_console_init((struct ram_console_buffer *) + CONFIG_ANDROID_RAM_CONSOLE_EARLY_ADDR, + CONFIG_ANDROID_RAM_CONSOLE_EARLY_SIZE, + NULL, + ram_console_old_log_init_buffer); +} +#else +static int ram_console_driver_probe(struct platform_device *pdev) +{ + struct resource *res = pdev->resource; + size_t start; + size_t buffer_size; + void *buffer; + const char *bootinfo = NULL; + struct ram_console_platform_data *pdata = pdev->dev.platform_data; + + if (res == NULL || pdev->num_resources != 1 || + !(res->flags & IORESOURCE_MEM)) { + printk(KERN_ERR "ram_console: invalid resource, %p %d flags " + "%lx\n", res, pdev->num_resources, res ? res->flags : 0); + return -ENXIO; + } + buffer_size = resource_size(res); + start = res->start; + printk(KERN_INFO "ram_console: got buffer at %zx, size %zx\n", + start, buffer_size); + + memblock_remove(res->start, buffer_size); + + buffer = ioremap(res->start, buffer_size); + if (buffer == NULL) { + printk(KERN_ERR "ram_console: failed to map memory\n"); + return -ENOMEM; + } + + if (pdata) + bootinfo = pdata->bootinfo; + + return ram_console_init(buffer, buffer_size, bootinfo, NULL/* allocate */); +} + +static struct of_device_id ram_console_of_match[] = { + { .compatible = "android,ram-console", }, + { }, +}; +MODULE_DEVICE_TABLE(of, ram_console_of_match); + +static struct platform_driver ram_console_driver = { + .probe = ram_console_driver_probe, + .driver = { + .name = "ram_console", + .of_match_table = of_match_ptr(ram_console_of_match), + }, +}; + +static int __init ram_console_module_init(void) +{ + int err; + err = platform_driver_register(&ram_console_driver); + return err; +} +#endif + +#ifndef CONFIG_PRINTK +#define dmesg_restrict 0 +#endif + +static ssize_t ram_console_read_old(struct file *file, char __user *buf, + size_t len, loff_t *offset) +{ + loff_t pos = *offset; + ssize_t count; + + if (dmesg_restrict && !capable(CAP_SYSLOG)) + return -EPERM; + + if (pos >= ram_console_old_log_size) + return 0; + + count = min(len, (size_t)(ram_console_old_log_size - pos)); + if (copy_to_user(buf, ram_console_old_log + pos, count)) + return -EFAULT; + + *offset += count; + return count; +} + +static const struct proc_ops ram_console_file_ops = { + .proc_read = ram_console_read_old, +}; + +static int __init ram_console_late_init(void) +{ + struct proc_dir_entry *entry; + +// if (ram_console_old_log == NULL) +// return 0; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT + ram_console_old_log = kmalloc(ram_console_old_log_size, GFP_KERNEL); + if (ram_console_old_log == NULL) { + printk(KERN_ERR + "ram_console: failed to allocate buffer for old log\n"); + ram_console_old_log_size = 0; + return 0; + } + memcpy(ram_console_old_log, + ram_console_old_log_init_buffer, ram_console_old_log_size); +#endif + entry = proc_create("last_kmsg", S_IFREG | S_IRUGO, NULL, + &ram_console_file_ops); + if (!entry) { + printk(KERN_ERR "ram_console: failed to create proc entry\n"); + kfree(ram_console_old_log); + ram_console_old_log = NULL; + return 0; + } + + proc_set_size(entry, ram_console_old_log_size); + return 0; +} + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT +console_initcall(ram_console_early_init); +#else +postcore_initcall(ram_console_module_init); +#endif +late_initcall(ram_console_late_init); + diff --git a/drivers/staging/android/ramconsole/ram_console.h b/drivers/staging/android/ramconsole/ram_console.h new file mode 100644 index 0000000000000..9f1125c110660 --- /dev/null +++ b/drivers/staging/android/ramconsole/ram_console.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _INCLUDE_LINUX_PLATFORM_DATA_RAM_CONSOLE_H_ +#define _INCLUDE_LINUX_PLATFORM_DATA_RAM_CONSOLE_H_ + +struct ram_console_platform_data { + const char *bootinfo; +}; + +#endif /* _INCLUDE_LINUX_PLATFORM_DATA_RAM_CONSOLE_H_ */ From f6f35c502007855cdcb8fad7278dd30682f6e87e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 22 Feb 2020 23:25:39 +0300 Subject: [PATCH 749/851] mmc: core: Add raw_boot_mult field to mmc_ext_csd Bootloader of NVIDIA Tegra devices linearizes the boot0/boot1/main partitions into a single virtual space, and thus, all partition addresses are shifted by the size of boot0 + boot1 partitions. The offset needs to be known in order to find the EFI entry on internal EMMC storage of Tegra devices. Signed-off-by: Dmitry Osipenko --- drivers/mmc/core/mmc.c | 2 ++ include/linux/mmc/card.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 838726b68ff3e..29e58ffae3797 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -418,6 +418,8 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) ext_csd[EXT_CSD_ERASE_TIMEOUT_MULT]; card->ext_csd.raw_hc_erase_grp_size = ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE]; + card->ext_csd.raw_boot_mult = + ext_csd[EXT_CSD_BOOT_MULT]; if (card->ext_csd.rev >= 3) { u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT]; card->ext_csd.part_config = ext_csd[EXT_CSD_PART_CONFIG]; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 74e6c0624d277..37f9758751020 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -109,6 +109,7 @@ struct mmc_ext_csd { u8 raw_hc_erase_gap_size; /* 221 */ u8 raw_erase_timeout_mult; /* 223 */ u8 raw_hc_erase_grp_size; /* 224 */ + u8 raw_boot_mult; /* 226 */ u8 raw_sec_trim_mult; /* 229 */ u8 raw_sec_erase_mult; /* 230 */ u8 raw_sec_feature_support;/* 231 */ From 00b53b0e99b5d9aa1709434188c020c7a0c71adc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 22 Feb 2020 23:43:07 +0300 Subject: [PATCH 750/851] mmc: block: Add mmc_bdev_to_card() helper Add mmc_bdev_to_card() helper which is needed for finding EFI entry on EMMC of NVIDIA Tegra devices. Signed-off-by: Dmitry Osipenko --- drivers/mmc/core/block.c | 15 +++++++++++++++ include/linux/mmc/blkdev.h | 13 +++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 include/linux/mmc/blkdev.h diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ce8aed5629295..93ca25e972594 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -313,6 +314,20 @@ static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr, return ret; } +struct mmc_card *mmc_bdev_to_card(struct block_device *bdev) +{ + struct mmc_blk_data *md; + + if (bdev->bd_disk->major != MMC_BLOCK_MAJOR) + return NULL; + + md = mmc_blk_get(bdev->bd_disk); + if (!md) + return NULL; + + return md->queue.card; +} + static int mmc_blk_open(struct block_device *bdev, fmode_t mode) { struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk); diff --git a/include/linux/mmc/blkdev.h b/include/linux/mmc/blkdev.h new file mode 100644 index 0000000000000..67608c58de700 --- /dev/null +++ b/include/linux/mmc/blkdev.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/include/linux/mmc/blkdev.h + */ +#ifndef LINUX_MMC_BLOCK_DEVICE_H +#define LINUX_MMC_BLOCK_DEVICE_H + +struct block_device; +struct mmc_card; + +struct mmc_card *mmc_bdev_to_card(struct block_device *bdev); + +#endif /* LINUX_MMC_BLOCK_DEVICE_H */ From a03e8ad4183fd31e50422fc29ad728810c867867 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 27 Mar 2021 20:27:29 +0300 Subject: [PATCH 751/851] partitions/efi: Support NVIDIA Tegra devices NVIDIA Tegra consumer devices have EMMC storage that has GPT entry at a non-standard location. Support looking up GPT entry at a given sector. This patch enables access to internal EMMC storage for Acer A500 and Ouya devices that are well supported by the upstream kernel. Signed-off-by: Dmitry Osipenko --- block/partitions/Kconfig | 8 ++++ block/partitions/Makefile | 1 + block/partitions/check.h | 2 + block/partitions/core.c | 3 ++ block/partitions/efi.c | 18 ++++++++ block/partitions/tegra.c | 86 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 118 insertions(+) create mode 100644 block/partitions/tegra.c diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig index 6e2a649669e57..be086916c6a6f 100644 --- a/block/partitions/Kconfig +++ b/block/partitions/Kconfig @@ -268,3 +268,11 @@ config CMDLINE_PARTITION help Say Y here if you want to read the partition table from bootargs. The format for the command line is just like mtdparts. + +config TEGRA_PARTITION + bool "NVIDIA Tegra Partition support" if PARTITION_ADVANCED + default y if ARCH_TEGRA + depends on EFI_PARTITION && MMC_BLOCK && (ARCH_TEGRA || COMPILE_TEST) + help + Say Y here if you would like to be able to read the hard disk + partition table format used by NVIDIA Tegra machines. diff --git a/block/partitions/Makefile b/block/partitions/Makefile index a7f05cdb02a84..83cb70c6d08d3 100644 --- a/block/partitions/Makefile +++ b/block/partitions/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_IBM_PARTITION) += ibm.o obj-$(CONFIG_EFI_PARTITION) += efi.o obj-$(CONFIG_KARMA_PARTITION) += karma.o obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o +obj-$(CONFIG_TEGRA_PARTITION) += tegra.o diff --git a/block/partitions/check.h b/block/partitions/check.h index c577e9ee67f05..5fcc850874658 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -22,6 +22,7 @@ struct parsed_partitions { int limit; bool access_beyond_eod; char *pp_buf; + sector_t force_gpt_sector; }; typedef struct { @@ -67,4 +68,5 @@ int osf_partition(struct parsed_partitions *state); int sgi_partition(struct parsed_partitions *state); int sun_partition(struct parsed_partitions *state); int sysv68_partition(struct parsed_partitions *state); +int tegra_partition_forced_gpt(struct parsed_partitions *state); int ultrix_partition(struct parsed_partitions *state); diff --git a/block/partitions/core.c b/block/partitions/core.c index 4230d4f718795..6e990e1543047 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -82,6 +82,9 @@ static int (*check_part[])(struct parsed_partitions *) = { #endif #ifdef CONFIG_SYSV68_PARTITION sysv68_partition, +#endif +#ifdef CONFIG_TEGRA_PARTITION + tegra_partition_forced_gpt, #endif NULL }; diff --git a/block/partitions/efi.c b/block/partitions/efi.c index e2716792ecc13..dced55c0d2e16 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -98,6 +98,15 @@ static int force_gpt; static int __init force_gpt_fn(char *str) { + /* + * This check allows to properly parse cmdline variants like + * "gpt gpt_sector=" and "gpt_sector= gpt" since + * "gpt" overlaps with the "gpt_sector=", see tegra_gpt_sector_fn(). + * The argument is absent for a boolean cmdline option. + */ + if (strlen(str)) + return 0; + force_gpt = 1; return 1; } @@ -621,6 +630,15 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, if (!good_agpt && force_gpt) good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes); + /* + * The force_gpt_sector is used by NVIDIA Tegra partition parser in + * order to convey a non-standard location of the GPT entry for lookup. + * By default force_gpt_sector is set to 0 and has no effect. + */ + if (!good_agpt && force_gpt && state->force_gpt_sector) + good_agpt = is_gpt_valid(state, state->force_gpt_sector, + &agpt, &aptes); + /* The obviously unsuccessful case */ if (!good_pgpt && !good_agpt) goto fail; diff --git a/block/partitions/tegra.c b/block/partitions/tegra.c new file mode 100644 index 0000000000000..d8801a885a62b --- /dev/null +++ b/block/partitions/tegra.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "tegra-partition: " fmt + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "check.h" + +#define TEGRA_PT_ERR(_state, fmt, ...) \ + pr_debug("%s: " fmt, \ + (_state)->bdev->bd_disk->disk_name, ##__VA_ARGS__) + +static const struct of_device_id tegra_sdhci_match[] = { + { .compatible = "nvidia,tegra20-sdhci", }, + { .compatible = "nvidia,tegra30-sdhci", }, + { .compatible = "nvidia,tegra114-sdhci", }, + { .compatible = "nvidia,tegra124-sdhci", }, + {} +}; + +static int +tegra_partition_table_emmc_boot_offset(struct parsed_partitions *state) +{ + struct mmc_card *card = mmc_bdev_to_card(state->bdev); + + /* filter out unrelated and untested boot sources */ + if (!card || card->ext_csd.rev < 3 || + !mmc_card_is_blockaddr(card) || + mmc_card_is_removable(card->host) || + bdev_logical_block_size(state->bdev) != SZ_512 || + !of_match_node(tegra_sdhci_match, card->host->parent->of_node)) { + TEGRA_PT_ERR(state, "unexpected boot source\n"); + return -1; + } + + /* + * eMMC storage has two special boot partitions in addition to the + * main one. NVIDIA's bootloader linearizes eMMC boot0->boot1->main + * accesses, this means that the partition table addresses are shifted + * by the size of boot partitions. In accordance with the eMMC + * specification, the boot partition size is calculated as follows: + * + * boot partition size = 128K byte x BOOT_SIZE_MULT + * + * This function returns number of sectors occupied by the both boot + * partitions. + */ + return card->ext_csd.raw_boot_mult * SZ_128K / + SZ_512 * MMC_NUM_BOOT_PARTITION; +} + +int tegra_partition_forced_gpt(struct parsed_partitions *state) +{ + int ret, boot_offset; + + if (!soc_is_tegra()) + return 0; + + boot_offset = tegra_partition_table_emmc_boot_offset(state); + if (boot_offset < 0) + return 0; + + /* + * The fixed GPT entry address is calculated like this: + * + * gpt_sector = ext_csd.sectors_num - ext_csd.boot_sectors_num - 1 + * + * This algorithm is defined by NVIDIA and used by Android devices. + */ + state->force_gpt_sector = get_capacity(state->bdev->bd_disk); + state->force_gpt_sector -= boot_offset + 1; + + ret = efi_partition(state); + state->force_gpt_sector = 0; + + return ret; +} From db5317f537c17cebf10caf3259a3aaa33636cf0d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 22 Jul 2021 21:19:59 +0300 Subject: [PATCH 752/851] partitions/efi: Support gpt_sector parameter needed by NVIDIA Tegra devices Add support for the gpt_sector cmdline parameter which will be used for finding EFI entry on internal EMMC storage of NVIDIA Tegra20+ devices. Signed-off-by: Dmitry Osipenko --- block/partitions/tegra.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/block/partitions/tegra.c b/block/partitions/tegra.c index d8801a885a62b..02dfc068a7a5f 100644 --- a/block/partitions/tegra.c +++ b/block/partitions/tegra.c @@ -3,6 +3,7 @@ #define pr_fmt(fmt) "tegra-partition: " fmt #include +#include #include #include #include @@ -58,6 +59,19 @@ tegra_partition_table_emmc_boot_offset(struct parsed_partitions *state) SZ_512 * MMC_NUM_BOOT_PARTITION; } +/* + * This allows a kernel command line option 'gpt_sector=' to + * enable GPT header lookup at a non-standard location. This option + * is provided to kernel by NVIDIA's proprietary bootloader. + */ +static sector_t tegra_gpt_sector; +static int __init tegra_gpt_sector_fn(char *str) +{ + WARN_ON(kstrtoull(str, 10, &tegra_gpt_sector) < 0); + return 1; +} +__setup("gpt_sector=", tegra_gpt_sector_fn); + int tegra_partition_forced_gpt(struct parsed_partitions *state) { int ret, boot_offset; @@ -70,14 +84,28 @@ int tegra_partition_forced_gpt(struct parsed_partitions *state) return 0; /* + * All NVIDIA Tegra devices use a proprietary partition table. + * This table may have GPT entry at any given sector. Android + * devices may have GPT entry at a proper location, at a fixed + * sector that is calculated based on sectors number, or at any + * sector that is conveyed using a non-standard kernel cmdline + * argument. + * + * We support the variants with a fixed address and non-standard + * kernel cmdline. + * * The fixed GPT entry address is calculated like this: * * gpt_sector = ext_csd.sectors_num - ext_csd.boot_sectors_num - 1 * * This algorithm is defined by NVIDIA and used by Android devices. */ - state->force_gpt_sector = get_capacity(state->bdev->bd_disk); - state->force_gpt_sector -= boot_offset + 1; + if (tegra_gpt_sector) { + state->force_gpt_sector = tegra_gpt_sector; + } else { + state->force_gpt_sector = get_capacity(state->bdev->bd_disk); + state->force_gpt_sector -= boot_offset + 1; + } ret = efi_partition(state); state->force_gpt_sector = 0; From 15b222571ad6d6dafeddf465ed821d3fda72849c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 23 Feb 2020 00:25:30 +0300 Subject: [PATCH 753/851] partitions: Support NVIDIA Tegra Partition Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All NVIDIA Tegra devices use a special partition table format for the internal storage partitioning. Most of Tegra devices have GPT partition in addition to TegraPT, but some older Android consumer-grade devices do not or GPT is placed in a wrong sector, and thus, the TegraPT is needed in order to support these devices properly by the upstream kernel. This patch adds support for NVIDIA Tegra Partition Table format that is used at least by all NVIDIA Tegra20 and Tegra30 devices. Tested-by: Nils Östlund Signed-off-by: Dmitry Osipenko --- arch/arm/mach-tegra/tegra.c | 54 ++++ block/partitions/check.h | 1 + block/partitions/core.c | 1 + block/partitions/tegra.c | 552 +++++++++++++++++++++++++++++++++- include/soc/tegra/bootdata.h | 46 +++ include/soc/tegra/partition.h | 84 ++++++ 6 files changed, 722 insertions(+), 16 deletions(-) create mode 100644 include/soc/tegra/bootdata.h create mode 100644 include/soc/tegra/partition.h diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index ab5008f35803c..06a516c7e19c3 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -28,7 +28,9 @@ #include +#include #include +#include #include #include @@ -62,9 +64,61 @@ u32 tegra_uart_config[3] = { 0, }; +static void __init tegra_boot_config_table_init(void) +{ + struct tegra30_boot_config_table __iomem *t30_bct; + struct tegra20_boot_config_table __iomem *t20_bct; + struct tegra20_boot_info_table __iomem *t20_bit; + u32 iram_end = TEGRA_IRAM_BASE + TEGRA_IRAM_SIZE; + u32 iram_start = TEGRA_IRAM_BASE; + u32 pt_addr, pt_size, bct_size; + + t20_bit = IO_ADDRESS(TEGRA_IRAM_BASE); + + if (of_machine_is_compatible("nvidia,tegra20")) { + bct_size = sizeof(*t20_bct); + + if (t20_bit->bct_size != bct_size || + t20_bit->bct_ptr < iram_start || + t20_bit->bct_ptr > iram_end - bct_size) + return; + + t20_bct = IO_ADDRESS(t20_bit->bct_ptr); + + if (t20_bct->boot_data_version != TEGRA_BOOTDATA_VERSION_T20) + return; + + pt_addr = t20_bct->partition_table_logical_sector_address; + pt_size = t20_bct->partition_table_num_logical_sectors; + + } else if (of_machine_is_compatible("nvidia,tegra30")) { + bct_size = sizeof(*t30_bct); + + if (t20_bit->bct_size != bct_size || + t20_bit->bct_ptr < iram_start || + t20_bit->bct_ptr > iram_end - bct_size) + return; + + t30_bct = IO_ADDRESS(t20_bit->bct_ptr); + + if (t30_bct->boot_data_version != TEGRA_BOOTDATA_VERSION_T30) + return; + + pt_addr = t30_bct->partition_table_logical_sector_address; + pt_size = t30_bct->partition_table_num_logical_sectors; + } else { + return; + } + + pr_info("%s: BCT found in IRAM\n", __func__); + + tegra_partition_table_setup(pt_addr, pt_size); +} + static void __init tegra_init_early(void) { of_register_trusted_foundations(); + tegra_boot_config_table_init(); tegra_cpu_reset_handler_init(); call_firmware_op(l2x0_init); } diff --git a/block/partitions/check.h b/block/partitions/check.h index 5fcc850874658..1ce445d1c7f0b 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -69,4 +69,5 @@ int sgi_partition(struct parsed_partitions *state); int sun_partition(struct parsed_partitions *state); int sysv68_partition(struct parsed_partitions *state); int tegra_partition_forced_gpt(struct parsed_partitions *state); +int tegra_partition(struct parsed_partitions *state); int ultrix_partition(struct parsed_partitions *state); diff --git a/block/partitions/core.c b/block/partitions/core.c index 6e990e1543047..d6ac7bb8082be 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -85,6 +85,7 @@ static int (*check_part[])(struct parsed_partitions *) = { #endif #ifdef CONFIG_TEGRA_PARTITION tegra_partition_forced_gpt, + tegra_partition, #endif NULL }; diff --git a/block/partitions/tegra.c b/block/partitions/tegra.c index 02dfc068a7a5f..193928f09c0f3 100644 --- a/block/partitions/tegra.c +++ b/block/partitions/tegra.c @@ -1,4 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * NVIDIA Tegra Partition Table + * + * Copyright (C) 2020 GRATE-DRIVER project + * Copyright (C) 2020 Dmitry Osipenko + * + * Credits for the partition table format: + * + * Andrey Danin (Toshiba AC100 TegraPT format) + * Gilles Grandou (Toshiba AC100 TegraPT format) + * Ryan Grachek (Google TV "Molly" TegraPT format) + * Stephen Warren (Useful suggestions about eMMC/etc) + */ #define pr_fmt(fmt) "tegra-partition: " fmt @@ -6,43 +19,486 @@ #include #include #include +#include #include +#include +#include #include #include #include #include +#include #include "check.h" -#define TEGRA_PT_ERR(_state, fmt, ...) \ +#define TEGRA_PT_SECTOR_SIZE(ptp) ((ptp)->logical_sector_size / SZ_512) +#define TEGRA_PT_SECTOR(ptp, s) ((s) * TEGRA_PT_SECTOR_SIZE(ptp)) + +#define TEGRA_PT_HEADER_SIZE \ + (sizeof(struct tegra_partition_header_insecure) + \ + sizeof(struct tegra_partition_header_secure)) + +#define TEGRA_PT_MAX_PARTITIONS(ptp) \ + (((ptp)->logical_sector_size - TEGRA_PT_HEADER_SIZE) / \ + sizeof(struct tegra_partition)) + +#define TEGRA_PT_ERR(ptp, fmt, ...) \ pr_debug("%s: " fmt, \ - (_state)->bdev->bd_disk->disk_name, ##__VA_ARGS__) + (ptp)->state->bdev->bd_disk->disk_name, ##__VA_ARGS__) + +#define TEGRA_PT_PARSE_ERR(ptp, fmt, ...) \ + TEGRA_PT_ERR(ptp, "sector %llu: invalid " fmt, \ + (ptp)->sector, ##__VA_ARGS__) + +struct tegra_partition_table_parser { + struct tegra_partition_table *pt; + unsigned int logical_sector_size; + struct parsed_partitions *state; + bool pt_entry_checked; + sector_t sector; + int boot_offset; + u32 dev_instance; + u32 dev_id; +}; + +union tegra_partition_table_u { + struct tegra_partition_table pt; + u8 pt_parts[SZ_4K / SZ_512][SZ_512]; +}; + +struct tegra_partition_type { + unsigned int type; + char *name; +}; + +static sector_t tegra_pt_logical_sector_address; +static sector_t tegra_pt_logical_sectors_num; + +void tegra_partition_table_setup(unsigned int logical_sector_address, + unsigned int logical_sectors_num) +{ + tegra_pt_logical_sector_address = logical_sector_address; + tegra_pt_logical_sectors_num = logical_sectors_num; + + pr_info("initialized to logical sector = %llu sectors_num = %llu\n", + tegra_pt_logical_sector_address, tegra_pt_logical_sectors_num); +} + +/* + * Some partitions are very sensitive, changing data on them may brick device. + * + * For more details about partitions see: + * + * "https://docs.nvidia.com/jetson/l4t/Tegra Linux Driver Package Development Guide/part_config.html" + */ +static const char * const partitions_blacklist[] = { + "BCT", "EBT", "EB2", "EKS", "GP1", "GPT", "MBR", "PT", +}; + +static bool tegra_partition_name_match(struct tegra_partition *p, + const char *name) +{ + return !strncmp(p->partition_name, name, TEGRA_PT_NAME_SIZE); +} + +static bool tegra_partition_skip(struct tegra_partition *p, + struct tegra_partition_table_parser *ptp, + sector_t sector) +{ + unsigned int i; + + /* skip eMMC boot partitions */ + if (sector < ptp->boot_offset) + return true; + + for (i = 0; i < ARRAY_SIZE(partitions_blacklist); i++) { + if (tegra_partition_name_match(p, partitions_blacklist[i])) + return true; + } + + return false; +} + +static const struct tegra_partition_type tegra_partition_expected_types[] = { + { .type = TEGRA_PT_PART_TYPE_BCT, .name = "BCT", }, + { .type = TEGRA_PT_PART_TYPE_EBT, .name = "EBT", }, + { .type = TEGRA_PT_PART_TYPE_EBT, .name = "EB2", }, + { .type = TEGRA_PT_PART_TYPE_PT, .name = "PT", }, + { .type = TEGRA_PT_PART_TYPE_GP1, .name = "GP1", }, + { .type = TEGRA_PT_PART_TYPE_GPT, .name = "GPT", }, + { .type = TEGRA_PT_PART_TYPE_GENERIC, .name = NULL, }, +}; + +static int tegra_partition_type_valid(struct tegra_partition_table_parser *ptp, + struct tegra_partition *p) +{ + const struct tegra_partition_type *ptype; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(tegra_partition_expected_types); i++) { + ptype = &tegra_partition_expected_types[i]; + + if (ptype->name && !tegra_partition_name_match(p, ptype->name)) + continue; + + if (p->part_info.partition_type == ptype->type) + return 0; + + /* + * Unsure about all possible types, let's emit error and + * allow to continue for now. + */ + if (!ptype->name) + return 1; + } + + return -1; +} + +static bool tegra_partition_valid(struct tegra_partition_table_parser *ptp, + struct tegra_partition *p, + struct tegra_partition *prev, + sector_t sector, + sector_t size) +{ + struct tegra_partition_info *prev_pi = &prev->part_info; + sector_t sect_end = TEGRA_PT_SECTOR(ptp, + prev_pi->logical_sector_address + + prev_pi->logical_sectors_num); + char *type, name[2][TEGRA_PT_NAME_SIZE + 1]; + int err; + + strscpy(name[0], p->partition_name, sizeof(name[0])); + strscpy(name[1], prev->partition_name, sizeof(name[1])); + + /* validate expected partition name/type */ + err = tegra_partition_type_valid(ptp, p); + if (err) { + TEGRA_PT_PARSE_ERR(ptp, "partition_type: [%s] partition_type=%u\n", + name[0], p->part_info.partition_type); + if (err < 0) + return false; + + TEGRA_PT_ERR(ptp, "continuing, please update list of expected types\n"); + } + + /* validate partition table BCT addresses */ + if (tegra_partition_name_match(p, "PT")) { + if (sector != TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sector_address) && + size != TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sectors_num)) { + TEGRA_PT_PARSE_ERR(ptp, "PT location: sector=%llu size=%llu\n", + sector, size); + return false; + } + + if (ptp->pt_entry_checked) { + TEGRA_PT_PARSE_ERR(ptp, "(duplicated) PT\n"); + return false; + } + + ptp->pt_entry_checked = true; + } + + if (sector + size < sector) { + TEGRA_PT_PARSE_ERR(ptp, "size: [%s] integer overflow sector=%llu size=%llu\n", + name[0], sector, size); + return false; + } + + /* validate allocation_policy=sequential (absolute unsupported) */ + if (p != prev && sect_end > sector) { + TEGRA_PT_PARSE_ERR(ptp, "allocation_policy: [%s] end=%llu [%s] sector=%llu size=%llu\n", + name[1], sect_end, name[0], sector, size); + return false; + } + + if (ptp->dev_instance != p->mount_info.device_instance) { + TEGRA_PT_PARSE_ERR(ptp, "device_instance: [%s] device_instance=%u|%u\n", + name[0], ptp->dev_instance, + p->mount_info.device_instance); + return false; + } + + if (ptp->dev_id != p->mount_info.device_id) { + TEGRA_PT_PARSE_ERR(ptp, "device_id: [%s] device_id=%u|%u\n", + name[0], ptp->dev_id, + p->mount_info.device_id); + return false; + } + + if (p->partition_id > 127) { + TEGRA_PT_PARSE_ERR(ptp, "partition_id: [%s] partition_id=%u\n", + name[0], p->partition_id); + return false; + } + + sect_end = get_capacity(ptp->state->bdev->bd_disk); + + /* eMMC boot partitions are below ptp->boot_offset */ + if (sector < ptp->boot_offset) { + sect_end += ptp->boot_offset; + type = "boot"; + } else { + sector -= ptp->boot_offset; + type = "main"; + } + + /* validate size */ + if (!size || sector + size > sect_end) { + TEGRA_PT_PARSE_ERR(ptp, "size: [%s] %s partition boot_offt=%d end=%llu sector=%llu size=%llu\n", + name[0], type, ptp->boot_offset, sect_end, + sector, size); + return false; + } + + return true; +} + +static bool tegra_partitions_parsed(struct tegra_partition_table_parser *ptp, + bool check_only) +{ + struct parsed_partitions *state = ptp->state; + struct tegra_partition_table *pt = ptp->pt; + sector_t sector, size; + int i, slot = 1; + + ptp->pt_entry_checked = false; + + for (i = 0; i < pt->secure.num_partitions; i++) { + struct tegra_partition *p = &pt->partitions[i]; + struct tegra_partition *prev = &pt->partitions[max(i - 1, 0)]; + struct tegra_partition_info *pi = &p->part_info; + + if (slot == state->limit && !check_only) + break; + + sector = TEGRA_PT_SECTOR(ptp, pi->logical_sector_address); + size = TEGRA_PT_SECTOR(ptp, pi->logical_sectors_num); + + if (check_only && + !tegra_partition_valid(ptp, p, prev, sector, size)) + return false; + + if (check_only || + tegra_partition_skip(p, ptp, sector)) + continue; + + put_partition(state, slot++, sector - ptp->boot_offset, size); + } + + if (check_only && !ptp->pt_entry_checked) { + TEGRA_PT_PARSE_ERR(ptp, "PT: table entry not found\n"); + return false; + } + + return true; +} + +static bool +tegra_partition_table_parsed(struct tegra_partition_table_parser *ptp) +{ + if (ptp->pt->secure.num_partitions == 0 || + ptp->pt->secure.num_partitions > TEGRA_PT_MAX_PARTITIONS(ptp)) { + TEGRA_PT_PARSE_ERR(ptp, "num_partitions=%u\n", + ptp->pt->secure.num_partitions); + return false; + } + + return tegra_partitions_parsed(ptp, true) && + tegra_partitions_parsed(ptp, false); +} + +static int +tegra_partition_table_insec_hdr_valid(struct tegra_partition_table_parser *ptp) +{ + if (ptp->pt->insecure.magic != TEGRA_PT_MAGIC || + ptp->pt->insecure.version != TEGRA_PT_VERSION) { + TEGRA_PT_PARSE_ERR(ptp, "insecure header: magic=0x%llx ver=0x%x\n", + ptp->pt->insecure.magic, + ptp->pt->insecure.version); + return 0; + } + + return 1; +} + +static int +tegra_partition_table_sec_hdr_valid(struct tegra_partition_table_parser *ptp) +{ + size_t pt_size = ptp->pt->secure.num_partitions; + + pt_size *= sizeof(ptp->pt->partitions[0]); + pt_size += TEGRA_PT_HEADER_SIZE; + + if (ptp->pt->secure.magic != TEGRA_PT_MAGIC || + ptp->pt->secure.version != TEGRA_PT_VERSION || + ptp->pt->secure.length != ptp->pt->insecure.length || + ptp->pt->secure.length < pt_size) { + TEGRA_PT_PARSE_ERR(ptp, "secure header: magic=0x%llx ver=0x%x length=%u|%u|%zu\n", + ptp->pt->secure.magic, + ptp->pt->secure.version, + ptp->pt->secure.length, + ptp->pt->insecure.length, + pt_size); + return 0; + } + + return 1; +} + +static int +tegra_partition_table_unencrypted(struct tegra_partition_table_parser *ptp) +{ + /* AES IV, all zeros if unencrypted */ + if (ptp->pt->secure.random_data[0] || ptp->pt->secure.random_data[1] || + ptp->pt->secure.random_data[2] || ptp->pt->secure.random_data[3]) { + pr_err_once("encrypted partition table unsupported\n"); + return 0; + } + + return 1; +} + +static int tegra_read_partition_table(struct tegra_partition_table_parser *ptp) +{ + union tegra_partition_table_u *ptu = (typeof(ptu))ptp->pt; + unsigned int i; + Sector sect; + void *part; + + for (i = 0; i < ptp->logical_sector_size / SZ_512; i++) { + /* + * Partition table takes at maximum 4096 bytes, but + * read_part_sector() guarantees only that SECTOR_SIZE will + * be read at minimum. + */ + part = read_part_sector(ptp->state, ptp->sector + i, §); + if (!part) { + TEGRA_PT_ERR(ptp, "failed to read sector %llu\n", + ptp->sector + i); + return 0; + } + + memcpy(ptu->pt_parts[i], part, SZ_512); + put_dev_sector(sect); + } + + return 1; +} + +static int tegra_partition_scan(struct tegra_partition_table_parser *ptp) +{ + sector_t start_sector, num_sectors; + int ret = 0; + + num_sectors = TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sectors_num); + start_sector = TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sector_address); + + if (start_sector < ptp->boot_offset) { + TEGRA_PT_ERR(ptp, + "scanning eMMC boot partitions unimplemented\n"); + return 0; + } + + ptp->sector = start_sector - ptp->boot_offset; + + /* + * Partition table is duplicated for num_sectors. + * If first table is corrupted, we will try next. + */ + while (num_sectors--) { + ret = tegra_read_partition_table(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_insec_hdr_valid(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_unencrypted(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_sec_hdr_valid(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_parsed(ptp); + if (ret) + break; +next_sector: + ptp->sector += TEGRA_PT_SECTOR_SIZE(ptp); + } + + return ret; +} + +static const u32 tegra20_sdhci_bases[TEGRA_PT_SDHCI_DEVICE_INSTANCES] = { + 0xc8000000, 0xc8000200, 0xc8000400, 0xc8000600, +}; + +static const u32 tegra30_sdhci_bases[TEGRA_PT_SDHCI_DEVICE_INSTANCES] = { + 0x78000000, 0x78000200, 0x78000400, 0x78000600, +}; + +static const u32 tegra124_sdhci_bases[TEGRA_PT_SDHCI_DEVICE_INSTANCES] = { + 0x700b0000, 0x700b0200, 0x700b0400, 0x700b0600, +}; static const struct of_device_id tegra_sdhci_match[] = { - { .compatible = "nvidia,tegra20-sdhci", }, - { .compatible = "nvidia,tegra30-sdhci", }, - { .compatible = "nvidia,tegra114-sdhci", }, - { .compatible = "nvidia,tegra124-sdhci", }, + { .compatible = "nvidia,tegra20-sdhci", .data = tegra20_sdhci_bases, }, + { .compatible = "nvidia,tegra30-sdhci", .data = tegra30_sdhci_bases, }, + { .compatible = "nvidia,tegra114-sdhci", .data = tegra30_sdhci_bases, }, + { .compatible = "nvidia,tegra124-sdhci", .data = tegra124_sdhci_bases, }, {} }; static int -tegra_partition_table_emmc_boot_offset(struct parsed_partitions *state) +tegra_partition_table_emmc_boot_offset(struct tegra_partition_table_parser *ptp) { - struct mmc_card *card = mmc_bdev_to_card(state->bdev); + struct mmc_card *card = mmc_bdev_to_card(ptp->state->bdev); + const struct of_device_id *matched; + const u32 *sdhci_bases; + const __be32 *addrp; + u32 sdhci_base; + unsigned int i; - /* filter out unrelated and untested boot sources */ + /* filter out unexpected/untested boot sources */ if (!card || card->ext_csd.rev < 3 || !mmc_card_is_blockaddr(card) || mmc_card_is_removable(card->host) || - bdev_logical_block_size(state->bdev) != SZ_512 || - !of_match_node(tegra_sdhci_match, card->host->parent->of_node)) { - TEGRA_PT_ERR(state, "unexpected boot source\n"); + bdev_logical_block_size(ptp->state->bdev) != SZ_512) { + TEGRA_PT_ERR(ptp, "unexpected boot source\n"); return -1; } + /* skip everything unrelated to Tegra eMMC */ + matched = of_match_node(tegra_sdhci_match, card->host->parent->of_node); + if (!matched) + return -1; + + sdhci_bases = matched->data; + + /* figure out SDHCI instance ID by the base address */ + addrp = of_get_address(card->host->parent->of_node, 0, NULL, NULL); + if (!addrp) + return -1; + + sdhci_base = of_translate_address(card->host->parent->of_node, addrp); + + for (i = 0; i < TEGRA_PT_SDHCI_DEVICE_INSTANCES; i++) { + if (sdhci_base == sdhci_bases[i]) + break; + } + + if (i == TEGRA_PT_SDHCI_DEVICE_INSTANCES) + return -1; + + ptp->dev_id = TEGRA_PT_SDHCI_DEVICE_ID; + ptp->dev_instance = i; + /* * eMMC storage has two special boot partitions in addition to the * main one. NVIDIA's bootloader linearizes eMMC boot0->boot1->main @@ -59,6 +515,67 @@ tegra_partition_table_emmc_boot_offset(struct parsed_partitions *state) SZ_512 * MMC_NUM_BOOT_PARTITION; } +/* + * Logical sector size may vary per device model and apparently there is no + * way to get information about the size from kernel. The info is hardcoded + * into bootloader and it doesn't tell us, so we'll just try all possible + * well-known sizes until succeed. + * + * For example Samsung Galaxy Tab 10.1 uses 2K sectors. While Acer A500, + * Nexus 7 and Ouya are using 4K sectors. + */ +static const unsigned int tegra_pt_logical_sector_sizes[] = { + SZ_4K, SZ_2K, +}; + +/* + * The 'tegraboot=' command line option is provided to kernel + * by NVIDIA's proprietary bootloader on most Tegra devices. If it isn't + * provided, then it should be added to the cmdline via device-tree bootargs + * or by other means. + */ +static bool tegra_boot_sdmmc; +static int __init tegra_boot_fn(char *str) +{ + tegra_boot_sdmmc = !strcmp(str, "sdmmc"); + return 1; +} +__setup("tegraboot=", tegra_boot_fn); + +int tegra_partition(struct parsed_partitions *state) +{ + struct tegra_partition_table_parser ptp = {}; + unsigned int i; + int ret; + + if (!soc_is_tegra() || !tegra_boot_sdmmc) + return 0; + + ptp.state = state; + + ptp.boot_offset = tegra_partition_table_emmc_boot_offset(&ptp); + if (ptp.boot_offset < 0) + return 0; + + ptp.pt = kmalloc(SZ_4K, GFP_KERNEL); + if (!ptp.pt) + return 0; + + for (i = 0; i < ARRAY_SIZE(tegra_pt_logical_sector_sizes); i++) { + ptp.logical_sector_size = tegra_pt_logical_sector_sizes[i]; + + ret = tegra_partition_scan(&ptp); + if (ret == 1) { + strlcat(state->pp_buf, "\n", PAGE_SIZE); + break; + } + } + + kfree(ptp.pt); + + return ret; +} + /* * This allows a kernel command line option 'gpt_sector=' to * enable GPT header lookup at a non-standard location. This option @@ -74,13 +591,16 @@ __setup("gpt_sector=", tegra_gpt_sector_fn); int tegra_partition_forced_gpt(struct parsed_partitions *state) { - int ret, boot_offset; + struct tegra_partition_table_parser ptp = {}; + int ret = 0; if (!soc_is_tegra()) return 0; - boot_offset = tegra_partition_table_emmc_boot_offset(state); - if (boot_offset < 0) + ptp.state = state; + + ptp.boot_offset = tegra_partition_table_emmc_boot_offset(&ptp); + if (ptp.boot_offset < 0) return 0; /* @@ -104,7 +624,7 @@ int tegra_partition_forced_gpt(struct parsed_partitions *state) state->force_gpt_sector = tegra_gpt_sector; } else { state->force_gpt_sector = get_capacity(state->bdev->bd_disk); - state->force_gpt_sector -= boot_offset + 1; + state->force_gpt_sector -= ptp.boot_offset + 1; } ret = efi_partition(state); diff --git a/include/soc/tegra/bootdata.h b/include/soc/tegra/bootdata.h new file mode 100644 index 0000000000000..7be207cb25198 --- /dev/null +++ b/include/soc/tegra/bootdata.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_TEGRA_BOOTDATA_H__ +#define __SOC_TEGRA_BOOTDATA_H__ + +#include +#include + +#define TEGRA_BOOTDATA_VERSION_T20 NVBOOT_BOOTDATA_VERSION(0x2, 0x1) +#define TEGRA_BOOTDATA_VERSION_T30 NVBOOT_BOOTDATA_VERSION(0x3, 0x1) + +#define NVBOOT_BOOTDATA_VERSION(a, b) ((((a) & 0xffff) << 16) | \ + ((b) & 0xffff)) +#define NVBOOT_CMAC_AES_HASH_LENGTH 4 + +struct tegra20_boot_info_table { + u32 unused_data1[14]; + u32 bct_size; + u32 bct_ptr; +} __packed; + +struct tegra20_boot_config_table { + u32 crypto_hash[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 random_aes_blk[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 boot_data_version; + u32 unused_data1[712]; + u32 unused_consumer_data1; + u16 partition_table_logical_sector_address; + u16 partition_table_num_logical_sectors; + u32 unused_consumer_data[294]; + u32 unused_data[3]; +} __packed; + +struct tegra30_boot_config_table { + u32 crypto_hash[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 random_aes_blk[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 boot_data_version; + u32 unused_data1[1016]; + u32 unused_consumer_data1; + u16 partition_table_logical_sector_address; + u16 partition_table_num_logical_sectors; + u32 unused_consumer_data[502]; + u32 unused_data[3]; +} __packed; + +#endif /* __SOC_TEGRA_BOOTDATA_H__ */ diff --git a/include/soc/tegra/partition.h b/include/soc/tegra/partition.h new file mode 100644 index 0000000000000..e8fcce1a725d0 --- /dev/null +++ b/include/soc/tegra/partition.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_TEGRA_PARTITION_H__ +#define __SOC_TEGRA_PARTITION_H__ + +#include +#include + +#define TEGRA_PT_MAGIC 0xffffffff8f9e8d8bULL +#define TEGRA_PT_VERSION 0x100 +#define TEGRA_PT_AES_HASH_SIZE 4 +#define TEGRA_PT_NAME_SIZE 4 + +#define TEGRA_PT_SDHCI_DEVICE_ID 18 +#define TEGRA_PT_SDHCI_DEVICE_INSTANCES 4 + +#define TEGRA_PT_PART_TYPE_BCT 1 +#define TEGRA_PT_PART_TYPE_EBT 2 +#define TEGRA_PT_PART_TYPE_PT 3 +#define TEGRA_PT_PART_TYPE_GENERIC 6 +#define TEGRA_PT_PART_TYPE_GP1 9 +#define TEGRA_PT_PART_TYPE_GPT 10 + +struct tegra_partition_mount_info { + u32 device_id; + u32 device_instance; + u32 device_attr; + u8 mount_path[TEGRA_PT_NAME_SIZE]; + u32 file_system_type; + u32 file_system_attr; +} __packed; + +struct tegra_partition_info { + u32 partition_attr; + u32 __pad1; + u64 logical_sector_address; + u64 logical_sectors_num; + u64 physical_sector_address; + u64 physical_sectors_num; + u32 partition_type; + u32 __pad2; +} __packed; + +struct tegra_partition { + u32 partition_id; + u8 partition_name[TEGRA_PT_NAME_SIZE]; + struct tegra_partition_mount_info mount_info; + struct tegra_partition_info part_info; +} __packed; + +struct tegra_partition_header_insecure { + u64 magic; + u32 version; + u32 length; + u32 signature[TEGRA_PT_AES_HASH_SIZE]; +} __packed; + +struct tegra_partition_header_secure { + u32 random_data[TEGRA_PT_AES_HASH_SIZE]; + u64 magic; + u32 version; + u32 length; + u32 num_partitions; + u32 __pad; +} __packed; + +struct tegra_partition_table { + struct tegra_partition_header_insecure insecure; + struct tegra_partition_header_secure secure; + struct tegra_partition partitions[]; +} __packed; + +#ifdef CONFIG_TEGRA_PARTITION +void tegra_partition_table_setup(unsigned int logical_sector_address, + unsigned int logical_sectors_num); +#else +static inline void +tegra_partition_table_setup(unsigned int logical_sector_address, + unsigned int logical_sectors_num) +{ +} +#endif + +#endif /* __SOC_TEGRA_PARTITION_H__ */ From 214d9dbb64547aca303550239c91e29748a0638a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 14 May 2020 14:48:22 +0300 Subject: [PATCH 754/851] soc/tegra: Expose Boot Configuration Table via sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's quite useful to have unencrypted BCT exposed to userspace for debugging purposes, so let's expose it via sysfs. The BCT data will be present in '/sys/tegra/boot_config_table' binary file if BCT is available. Suggested-by: Michał Mirosław Signed-off-by: Dmitry Osipenko --- arch/arm/mach-tegra/tegra.c | 4 +++ drivers/soc/tegra/Makefile | 1 + drivers/soc/tegra/bootdata.c | 67 ++++++++++++++++++++++++++++++++++++ drivers/soc/tegra/common.c | 15 ++++++++ include/soc/tegra/bootdata.h | 2 ++ include/soc/tegra/common.h | 3 ++ 6 files changed, 92 insertions(+) create mode 100644 drivers/soc/tegra/bootdata.c diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index 06a516c7e19c3..575dca04d77bb 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -72,6 +72,7 @@ static void __init tegra_boot_config_table_init(void) u32 iram_end = TEGRA_IRAM_BASE + TEGRA_IRAM_SIZE; u32 iram_start = TEGRA_IRAM_BASE; u32 pt_addr, pt_size, bct_size; + void __iomem *bct_ptr; t20_bit = IO_ADDRESS(TEGRA_IRAM_BASE); @@ -90,6 +91,7 @@ static void __init tegra_boot_config_table_init(void) pt_addr = t20_bct->partition_table_logical_sector_address; pt_size = t20_bct->partition_table_num_logical_sectors; + bct_ptr = t20_bct; } else if (of_machine_is_compatible("nvidia,tegra30")) { bct_size = sizeof(*t30_bct); @@ -106,12 +108,14 @@ static void __init tegra_boot_config_table_init(void) pt_addr = t30_bct->partition_table_logical_sector_address; pt_size = t30_bct->partition_table_num_logical_sectors; + bct_ptr = t30_bct; } else { return; } pr_info("%s: BCT found in IRAM\n", __func__); + tegra_bootdata_bct_setup(bct_ptr, bct_size); tegra_partition_table_setup(pt_addr, pt_size); } diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile index 9c809c1814bd4..8be2bfb4d95d6 100644 --- a/drivers/soc/tegra/Makefile +++ b/drivers/soc/tegra/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += fuse/ +obj-y += bootdata.o obj-y += common.o obj-$(CONFIG_SOC_TEGRA_FLOWCTRL) += flowctrl.o obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o diff --git a/drivers/soc/tegra/bootdata.c b/drivers/soc/tegra/bootdata.c new file mode 100644 index 0000000000000..c42d98332f080 --- /dev/null +++ b/drivers/soc/tegra/bootdata.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include +#include + +union tegra_bct_entry { + struct tegra20_boot_config_table t20; + struct tegra30_boot_config_table t30; +}; + +/* + * spare_bct will be released once kernel is booted, hence not wasting + * kernel space if BCT is missing. The tegra_bct can't be allocated during + * of BCT setting up because it's too early for the slab allocator. + */ +static union tegra_bct_entry spare_bct __initdata; +static union tegra_bct_entry *tegra_bct; + +static ssize_t boot_config_table_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + memcpy(buf, (u8 *)tegra_bct + off, count); + return count; +} +static BIN_ATTR_RO(boot_config_table, 0); + +static int __init tegra_bootdata_bct_sysfs_init(void) +{ + int err; + + if (!bin_attr_boot_config_table.size) + return 0; + + tegra_bct = kmalloc(bin_attr_boot_config_table.size, GFP_KERNEL); + if (!tegra_bct) + return -ENOMEM; + + memcpy(tegra_bct, &spare_bct, bin_attr_boot_config_table.size); + + err = sysfs_create_bin_file(tegra_soc_kobj, + &bin_attr_boot_config_table); + if (err) + goto free_bct; + + return 0; + +free_bct: + kfree(tegra_bct); + + return err; +} +late_initcall(tegra_bootdata_bct_sysfs_init) + +void __init tegra_bootdata_bct_setup(void __iomem *bct_ptr, size_t bct_size) +{ + memcpy_fromio(&spare_bct, bct_ptr, bct_size); + bin_attr_boot_config_table.size = bct_size; +} diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c index a42d4f98c0783..39fea68fe844e 100644 --- a/drivers/soc/tegra/common.c +++ b/drivers/soc/tegra/common.c @@ -10,10 +10,13 @@ #include #include #include +#include #include #include +struct kobject *tegra_soc_kobj; + static const struct of_device_id tegra_machine_match[] = { { .compatible = "nvidia,tegra20", }, { .compatible = "nvidia,tegra30", }, @@ -126,3 +129,15 @@ int devm_tegra_core_dev_init_opp_table(struct device *dev, return 0; } EXPORT_SYMBOL_GPL(devm_tegra_core_dev_init_opp_table); + +static int __init tegra_soc_sysfs_init(void) +{ + if (!soc_is_tegra()) + return 0; + + tegra_soc_kobj = kobject_create_and_add("tegra", NULL); + WARN_ON(!tegra_soc_kobj); + + return 0; +} +arch_initcall(tegra_soc_sysfs_init); diff --git a/include/soc/tegra/bootdata.h b/include/soc/tegra/bootdata.h index 7be207cb25198..d5c7a251517d1 100644 --- a/include/soc/tegra/bootdata.h +++ b/include/soc/tegra/bootdata.h @@ -43,4 +43,6 @@ struct tegra30_boot_config_table { u32 unused_data[3]; } __packed; +void tegra_bootdata_bct_setup(void __iomem *bct_ptr, size_t bct_size); + #endif /* __SOC_TEGRA_BOOTDATA_H__ */ diff --git a/include/soc/tegra/common.h b/include/soc/tegra/common.h index 265ad90e45a27..dd9215c76368a 100644 --- a/include/soc/tegra/common.h +++ b/include/soc/tegra/common.h @@ -8,6 +8,7 @@ #include #include +#include struct device; @@ -21,6 +22,8 @@ struct tegra_core_opp_params { }; #ifdef CONFIG_ARCH_TEGRA +extern struct kobject *tegra_soc_kobj; + bool soc_is_tegra(void); int devm_tegra_core_dev_init_opp_table(struct device *dev, From 0038f47bed38a983c84bfcebd9c6e2ff8343d6f3 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 5 Jul 2020 20:33:12 +0300 Subject: [PATCH 755/851] xxx: partitions/tegra: Enable debug by default The debug messages are useful for a WIP devices, so let's enable them. Signed-off-by: Dmitry Osipenko --- block/partitions/tegra.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/partitions/tegra.c b/block/partitions/tegra.c index 193928f09c0f3..82678a998b3d7 100644 --- a/block/partitions/tegra.c +++ b/block/partitions/tegra.c @@ -13,6 +13,8 @@ * Stephen Warren (Useful suggestions about eMMC/etc) */ +#define DEBUG + #define pr_fmt(fmt) "tegra-partition: " fmt #include From ee5e6e916faa5292f39e4b49bd2f9514cfb30685 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 31 Mar 2020 01:09:15 +0300 Subject: [PATCH 756/851] ARM: tegra_defconfig: Enable CONFIG_ARM_APPENDED_DTB Downstream bootloader of Tegra20/30 doesn't support device-tree, so compiled DTB needs to be manually appended to the kernel's zImage. Signed-off-by: Dmitry Osipenko --- arch/arm/configs/tegra_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 0965ed0501dcc..5531b34fb4454 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -20,6 +20,8 @@ CONFIG_SLAB=y CONFIG_ARCH_TEGRA=y CONFIG_SMP=y CONFIG_HIGHMEM=y +CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_KEXEC=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y From 80f8a5b1dfa3cb51f298eb8e3ed02efc253b14a7 Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Fri, 14 Sep 2018 14:59:15 +0200 Subject: [PATCH 757/851] PCI: disable nv_msi_ht_cap_quirk_leaf quirk on arm/arm64 This patch disable the use of nv_msi_ht_cap_quirk_leaf quirk on arm and arm64 NVIDIA devices such as Tegra This fixes the following output: "pci 0000:00:01.0: nv_msi_ht_cap_quirk didn't locate host bridge" as experienced on a Trimslice device with PCI host bridge enabled v3: exclude the quirk for arm and arm64 instead of only for x86 v2: use __maybe_unused to avoid a warning on nv_msi_ht_cap_quirk_leaf Signed-off-by: Nicolas Chauvet Reviewed-by: Manikanta Maddireddy Acked-by: Thierry Reding --- drivers/pci/quirks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ab3de1551b503..f8fdef0742412 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2944,12 +2944,15 @@ static void nv_msi_ht_cap_quirk_all(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); -static void nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev) +static void __maybe_unused nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev) { return __nv_msi_ht_cap_quirk(dev, 0); } +/* HyperTransport is not relevant on theses arches */ +#if !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); +#endif static void quirk_msi_intx_disable_bug(struct pci_dev *dev) { From c476684a4d44978521ea275f1a9d42e68cd483ee Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 5 May 2020 04:55:08 +0300 Subject: [PATCH 758/851] drm/tegra: dc: Add legacy BO tiling compatibility The BO tiling isn't respected since the time when DRM modifiers became supported and in a result older userspace can't set tiling mode. This patch restores the legacy BO tiling flag support, which is useful for developing purposes. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/dc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 150c7b9c74097..25b9370b81791 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -763,6 +763,23 @@ static void tegra_plane_atomic_update(struct drm_plane *plane, */ if (i < 2) window.stride[i] = fb->pitches[i]; + + /* + * There are two ways to set tiling mode on Tegra: + * + * 1. New: using DRM modifiers + * 2. Old: using Tegra BO flags + * + * Older userspace doesn't support ADDFB2 IOCTL. Assume that + * legacy userspace is used if BO flag is set and FB modifier + * isn't set to maintain userspace compatibility. + */ + if (i == 0 && + window.tiling.mode == TEGRA_BO_TILING_MODE_PITCH && + window.tiling.value == 0) { + struct tegra_bo *bo = tegra_fb_get_plane(fb, i); + window.tiling.mode = bo->tiling.mode; + } } tegra_dc_setup_window(p, &window); From 9345c825cbb587bac927d806bd23e35c083cdcfc Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 26 May 2018 01:50:10 +0300 Subject: [PATCH 759/851] XXX: drm: Add generic colorkey properties for display planes Color keying is the action of replacing pixels matching a given color (or range of colors) with transparent pixels in an overlay when performing blitting. Depending on the hardware capabilities, the matching pixel can either become fully transparent or gain adjustment of the pixels component values. Color keying is found in a large number of devices whose capabilities often differ, but they still have enough common features in range to standardize color key properties. This commit adds new generic DRM plane properties related to the color keying, providing initial color keying support. Signed-off-by: Laurent Pinchart Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/drm_atomic_uapi.c | 20 ++++ drivers/gpu/drm/drm_blend.c | 152 ++++++++++++++++++++++++++++++ include/drm/drm_blend.h | 3 + include/drm/drm_plane.h | 92 ++++++++++++++++++ 4 files changed, 267 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 7e48d40600fff..39a6a6ec028b5 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -592,6 +592,16 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->rotation = val; } else if (property == plane->zpos_property) { state->zpos = val; + } else if (property == plane->colorkey.plane_mask_property) { + state->colorkey.plane_mask = val; + } else if (property == plane->colorkey.mode_property) { + state->colorkey.mode = val; + } else if (property == plane->colorkey.mask_property) { + state->colorkey.mask = val; + } else if (property == plane->colorkey.min_property) { + state->colorkey.min = val; + } else if (property == plane->colorkey.max_property) { + state->colorkey.max = val; } else if (property == plane->color_encoding_property) { state->color_encoding = val; } else if (property == plane->color_range_property) { @@ -658,6 +668,16 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->rotation; } else if (property == plane->zpos_property) { *val = state->zpos; + } else if (property == plane->colorkey.plane_mask_property) { + *val = state->colorkey.plane_mask; + } else if (property == plane->colorkey.mode_property) { + *val = state->colorkey.mode; + } else if (property == plane->colorkey.mask_property) { + *val = state->colorkey.mask; + } else if (property == plane->colorkey.min_property) { + *val = state->colorkey.min; + } else if (property == plane->colorkey.max_property) { + *val = state->colorkey.max; } else if (property == plane->color_encoding_property) { *val = state->color_encoding; } else if (property == plane->color_range_property) { diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index ec37cbfabb507..d011c9cd43613 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -185,6 +185,11 @@ * plane does not expose the "alpha" property, then this is * assumed to be 1.0 * + * colorkey: + * Color keying is set up with drm_plane_create_colorkey_properties(). + * It adds support for actions like replacing a range of colors with a + * transparent color in the plane. Color keying is disabled by default. + * * Note that all the property extensions described here apply either to the * plane or the CRTC (e.g. for the background color, which currently is not * exposed and assumed to be black). @@ -616,3 +621,150 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_blend_mode_property); + +static const char * const plane_colorkey_mode_name[] = { + [DRM_PLANE_COLORKEY_MODE_DISABLED] = "disabled", + [DRM_PLANE_COLORKEY_MODE_TRANSPARENT] = "transparent", +}; + +/** + * drm_plane_create_colorkey_properties - create colorkey properties + * @plane: drm plane + * @supported_modes: bitmask of supported color keying modes + * + * This function creates the generic color keying properties and attaches them + * to the @plane to enable color keying control for blending operations. + * + * Glossary: + * + * Destination plane: + * Plane to which color keying properties are applied, this planes takes + * the effect of color keying operation. The effect is determined by a + * given color keying mode. + * + * Source plane: + * Pixels of this plane are the source for color key matching operation. + * + * Color keying is controlled by these properties: + * + * colorkey.plane_mask: + * The mask property specifies which planes participate in color key + * matching process, these planes are the color key sources. It is up + * to userspace to decide what planes it wants to select. + * + * Drivers return an error from their plane atomic check if mask can't be + * handled. + * + * colorkey.mode: + * The mode is an enumerated property that controls how color keying + * operates. + * + * colorkey.mask: + * This property specifies the pixel components mask. Unmasked pixel + * components are not participating in the matching. This mask value is + * applied to colorkey.min / max values. The mask value is given in a + * 64-bit integer in ARGB16161616 format, where A is the alpha value and + * R, G and B correspond to the color components. Drivers shall convert + * ARGB16161616 value into appropriate format within planes atomic check. + * + * Drivers return an error from their plane atomic check if mask can't be + * handled. + * + * colorkey.min, colorkey.max: + * These two properties specify the colors that are treated as the color + * key. Pixel whose value is in the [min, max] range is the color key + * matching pixel. The minimum and maximum values are expressed as a + * 64-bit integer in ARGB16161616 format, where A is the alpha value and + * R, G and B correspond to the color components. Drivers shall convert + * ARGB16161616 value into appropriate format within planes atomic check. + * The converted value shall be *rounded up* to the nearest value. + * + * When a single color key is desired instead of a range, userspace shall + * set the min and max properties to the same value. + * + * Drivers return an error from their plane atomic check if range can't be + * handled. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_plane_create_colorkey_properties(struct drm_plane *plane, + u32 supported_modes) +{ + struct drm_prop_enum_list modes_list[DRM_PLANE_COLORKEY_MODES_NUM]; + struct drm_device *dev = plane->dev; + struct drm_property *plane_mask_prop; + struct drm_property *mode_prop; + struct drm_property *mask_prop; + struct drm_property *min_prop; + struct drm_property *max_prop; + unsigned int modes_num = 0; + unsigned int i; + + /* modes are driver-specific, build the list of supported modes */ + for (i = 0; i < DRM_PLANE_COLORKEY_MODES_NUM; i++) { + if (!(supported_modes & BIT(i))) + continue; + + modes_list[modes_num].name = plane_colorkey_mode_name[i]; + modes_list[modes_num].type = i; + modes_num++; + } + + /* at least one mode should be supported */ + if (!modes_num) + return -EINVAL; + + plane_mask_prop = drm_property_create_range(dev, 0, + "colorkey.plane_mask", + 0, U64_MAX); + if (!plane_mask_prop) + return -ENOMEM; + + mode_prop = drm_property_create_enum(dev, 0, "colorkey.mode", + modes_list, modes_num); + if (!mode_prop) + goto err_destroy_plane_mask_prop; + + mask_prop = drm_property_create_range(dev, 0, "colorkey.mask", + 0, U64_MAX); + if (!mask_prop) + goto err_destroy_mode_prop; + + min_prop = drm_property_create_range(dev, 0, "colorkey.min", + 0, U64_MAX); + if (!min_prop) + goto err_destroy_mask_prop; + + max_prop = drm_property_create_range(dev, 0, "colorkey.max", + 0, U64_MAX); + if (!max_prop) + goto err_destroy_min_prop; + + drm_object_attach_property(&plane->base, plane_mask_prop, 0); + drm_object_attach_property(&plane->base, mode_prop, 0); + drm_object_attach_property(&plane->base, mask_prop, 0); + drm_object_attach_property(&plane->base, min_prop, 0); + drm_object_attach_property(&plane->base, max_prop, 0); + + plane->colorkey.plane_mask_property = plane_mask_prop; + plane->colorkey.mode_property = mode_prop; + plane->colorkey.mask_property = mask_prop; + plane->colorkey.min_property = min_prop; + plane->colorkey.max_property = max_prop; + + return 0; + +err_destroy_min_prop: + drm_property_destroy(dev, min_prop); +err_destroy_mask_prop: + drm_property_destroy(dev, mask_prop); +err_destroy_mode_prop: + drm_property_destroy(dev, mode_prop); +err_destroy_plane_mask_prop: + drm_property_destroy(dev, plane_mask_prop); + + return -ENOMEM; +} +EXPORT_SYMBOL(drm_plane_create_colorkey_properties); + diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h index 88bdfec3bd884..47e0c90d4177d 100644 --- a/include/drm/drm_blend.h +++ b/include/drm/drm_blend.h @@ -58,4 +58,7 @@ int drm_atomic_normalize_zpos(struct drm_device *dev, struct drm_atomic_state *state); int drm_plane_create_blend_mode_property(struct drm_plane *plane, unsigned int supported_modes); + +int drm_plane_create_colorkey_properties(struct drm_plane *plane, + u32 supported_modes); #endif diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 1294610e84f47..4c05b2a336c09 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -40,6 +40,52 @@ enum drm_scaling_filter { DRM_SCALING_FILTER_NEAREST_NEIGHBOR, }; +/** + * enum drm_plane_colorkey_mode - uapi plane colorkey mode enumeration + */ +enum drm_plane_colorkey_mode { + /** + * @DRM_PLANE_COLORKEY_MODE_DISABLED: + * + * No color keying performed in this mode. + */ + DRM_PLANE_COLORKEY_MODE_DISABLED, + + /** + * @DRM_PLANE_COLORKEY_MODE_TRANSPARENT: + * + * Destination plane pixels are completely transparent in areas + * where pixels of a source plane are matching a given color key + * range, in other cases pixels of a destination plane are unaffected. + * In areas where two or more source planes overlap, the topmost + * plane takes precedence. + */ + DRM_PLANE_COLORKEY_MODE_TRANSPARENT, + + /** + * @DRM_PLANE_COLORKEY_MODES_NUM: + * + * Total number of color keying modes. + */ + DRM_PLANE_COLORKEY_MODES_NUM, +}; + +/** + * struct drm_plane_colorkey_state - plane color keying state + * @mode: color keying mode + * @plane_mask: source planes that participate in color key matching + * @mask: color key mask (in ARGB16161616 format) + * @min: color key range minimum (in ARGB16161616 format) + * @max: color key range maximum (in ARGB16161616 format) + */ +struct drm_plane_colorkey_state { + enum drm_plane_colorkey_mode mode; + u32 plane_mask; + u64 mask; + u64 min; + u64 max; +}; + /** * struct drm_plane_state - mutable plane state * @@ -165,6 +211,13 @@ struct drm_plane_state { */ unsigned int normalized_zpos; + /** + * @colorkey: + * Color keying of the plane. See drm_plane_create_colorkey_properties() + * for more details. + */ + struct drm_plane_colorkey_state colorkey; + /** * @color_encoding: * @@ -725,6 +778,19 @@ struct drm_plane { */ struct drm_property *blend_mode_property; + /** + * @colorkey: + * Optional color keying properties for this plane. See + * drm_plane_create_colorkey_properties(). + */ + struct { + struct drm_property *plane_mask_property; + struct drm_property *mode_property; + struct drm_property *mask_property; + struct drm_property *min_property; + struct drm_property *max_property; + } colorkey; + /** * @color_encoding_property: * @@ -930,4 +996,30 @@ drm_plane_get_damage_clips(const struct drm_plane_state *state) int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); +/** + * drm_colorkey_extract_component - get color key component value + * @ckey64: 64bit color key value + * @comp_name: name of 16bit color component to extract + * @nbits: size in bits of extracted component value + * + * Extract 16bit color component of @ckey64 given by @comp_name (alpha, red, + * green or blue) and convert it to unsigned integer that has bit-width of + * @nbits (result is rounded-up). + */ +#define drm_colorkey_extract_component(ckey64, comp_name, nbits) \ + __drm_ckey_extract(ckey64, __drm_ckey_ ## comp_name ## _shift, nbits) + +#define __drm_ckey_alpha_shift 48 +#define __drm_ckey_red_shift 32 +#define __drm_ckey_green_shift 16 +#define __drm_ckey_blue_shift 0 + +static inline u16 __drm_ckey_extract(u64 ckey64, u8 ckey_shift, u8 nbits) +{ + u16 mask = (1 << (16 - nbits)) - 1; + u32 ret = ((u16)(ckey64 >> ckey_shift) + mask) >> (16 - nbits); + + return min_t(u16, ret, (1 << nbits) - 1); +} + #endif From 7f5a08da3f8490bd3881ae1c9788c31369d4cba4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 21 Dec 2020 00:57:22 +0300 Subject: [PATCH 760/851] WIP: drm/grate: Add Host1x and DRM drivers with experimental changes The new drivers and UAPI provide all features that are necessary for implementing of a proper userspace driver. The new driver takes a different approach in regards to working with sync points by making each submitted job to take an individual sync point, this solves the problem with the sync point recovery that old driver suffered from. The new Host1x driver and v2 UAPI are optimized for performance and minimal resources consumption. The new v2 UAPI exposes to userspace the HW cmdstream-related features that are necessary for a proper HW fencing. The staging v1 UAPI is kept functional and all of current userspace will continue to work with the new driver, note that some of the never-really-used features of v1 UAPI have been removed (like sync point increment IOCTL) and will return EPERM. The goal is to merge new features into the main driver once they are ready. Key moments of v2 UAPI: - Raw sync points are not exposed to userspace. - Job descriptors are embedded into the commands stream. Kernel driver parses the stream and patches the descriptors in-place. - Commands buffer is copied from userspace via usrptr (no BO allocation and uncached-reading overhead). - Channel is not restricted to a single client, thus 3d channel can take a multi-client job that uses 2d / 3d. - Supports explicit jobs fencing. DRM sync object and DRM scheduler are utilized to provide the fencing support. - Allows to use host1x gather opcode, which is restricted to data-upload usecase only. This allows userspace to pre-allocate a gather buffer, put shaders code / data there, and use two-word "gather" opcode to upload data from the buffer without pushing that data into the commands stream on each submission. Signed-off-by: Dmitry Osipenko --- drivers/gpu/Makefile | 1 + drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 3 +- drivers/gpu/drm/grate/Kconfig | 38 + drivers/gpu/drm/grate/Makefile | 38 + drivers/gpu/drm/grate/channel.c | 62 + drivers/gpu/drm/grate/channel.h | 40 + drivers/gpu/drm/grate/client.c | 93 + drivers/gpu/drm/grate/client.h | 51 + drivers/gpu/drm/grate/dc.c | 3362 ++++++++++++++ drivers/gpu/drm/grate/dc.h | 832 ++++ drivers/gpu/drm/grate/dp.c | 876 ++++ drivers/gpu/drm/grate/dp.h | 177 + drivers/gpu/drm/grate/dpaux.c | 823 ++++ drivers/gpu/drm/grate/dpaux.h | 73 + drivers/gpu/drm/grate/drm.c | 738 +++ drivers/gpu/drm/grate/drm.h | 177 + drivers/gpu/drm/grate/dsi.c | 1700 +++++++ drivers/gpu/drm/grate/dsi.h | 143 + drivers/gpu/drm/grate/falcon.c | 222 + drivers/gpu/drm/grate/falcon.h | 114 + drivers/gpu/drm/grate/fb.c | 423 ++ drivers/gpu/drm/grate/gart.c | 715 +++ drivers/gpu/drm/grate/gart.h | 73 + drivers/gpu/drm/grate/gem.c | 727 +++ drivers/gpu/drm/grate/gem.h | 88 + drivers/gpu/drm/grate/gr2d.c | 498 ++ drivers/gpu/drm/grate/gr2d.h | 29 + drivers/gpu/drm/grate/gr3d.c | 769 ++++ drivers/gpu/drm/grate/gr3d.h | 24 + drivers/gpu/drm/grate/hda.c | 63 + drivers/gpu/drm/grate/hda.h | 20 + drivers/gpu/drm/grate/hdmi.c | 1768 +++++++ drivers/gpu/drm/grate/hdmi.h | 557 +++ drivers/gpu/drm/grate/hub.c | 1051 +++++ drivers/gpu/drm/grate/hub.h | 98 + drivers/gpu/drm/grate/mipi-phy.c | 134 + drivers/gpu/drm/grate/mipi-phy.h | 48 + drivers/gpu/drm/grate/output.c | 265 ++ drivers/gpu/drm/grate/plane.c | 957 ++++ drivers/gpu/drm/grate/plane.h | 107 + drivers/gpu/drm/grate/rgb.c | 326 ++ drivers/gpu/drm/grate/sor.c | 4054 +++++++++++++++++ drivers/gpu/drm/grate/sor.h | 457 ++ drivers/gpu/drm/grate/trace.c | 2 + drivers/gpu/drm/grate/trace.h | 68 + drivers/gpu/drm/grate/uapi/debug.c | 45 + drivers/gpu/drm/grate/uapi/debug.h | 12 + drivers/gpu/drm/grate/uapi/job.h | 117 + drivers/gpu/drm/grate/uapi/job_v1.c | 665 +++ drivers/gpu/drm/grate/uapi/job_v2.c | 780 ++++ drivers/gpu/drm/grate/uapi/patching.c | 630 +++ drivers/gpu/drm/grate/uapi/scheduler.c | 250 + drivers/gpu/drm/grate/uapi/scheduler.h | 10 + drivers/gpu/drm/grate/uapi/uapi.c | 472 ++ drivers/gpu/drm/grate/uapi/uapi.h | 113 + drivers/gpu/drm/grate/vic.c | 262 ++ drivers/gpu/drm/grate/vic.h | 37 + drivers/gpu/drm/tegra/Kconfig | 5 +- drivers/gpu/drm/tegra/Makefile | 2 +- drivers/gpu/host1x-grate/Kconfig | 51 + drivers/gpu/host1x-grate/Makefile | 24 + drivers/gpu/host1x-grate/buffer_object.c | 101 + drivers/gpu/host1x-grate/bus.c | 930 ++++ drivers/gpu/host1x-grate/bus.h | 18 + drivers/gpu/host1x-grate/debug.c | 100 + drivers/gpu/host1x-grate/dma_pool.c | 123 + drivers/gpu/host1x-grate/fence.c | 104 + drivers/gpu/host1x-grate/host1x.c | 469 ++ drivers/gpu/host1x-grate/host1x.h | 81 + drivers/gpu/host1x-grate/iommu.c | 139 + drivers/gpu/host1x-grate/mipi.c | 554 +++ drivers/gpu/host1x-grate/soc/channel.c | 363 ++ drivers/gpu/host1x-grate/soc/channel_hw.c | 282 ++ drivers/gpu/host1x-grate/soc/debug.c | 168 + drivers/gpu/host1x-grate/soc/host1x01.c | 78 + drivers/gpu/host1x-grate/soc/host1x01.h | 29 + drivers/gpu/host1x-grate/soc/host1x02.c | 78 + drivers/gpu/host1x-grate/soc/host1x02.h | 30 + drivers/gpu/host1x-grate/soc/host1x04.c | 78 + drivers/gpu/host1x-grate/soc/host1x04.h | 30 + drivers/gpu/host1x-grate/soc/host1x05.c | 78 + drivers/gpu/host1x-grate/soc/host1x05.h | 30 + drivers/gpu/host1x-grate/soc/host1x06.c | 78 + drivers/gpu/host1x-grate/soc/host1x06.h | 30 + drivers/gpu/host1x-grate/soc/host1x07.c | 78 + drivers/gpu/host1x-grate/soc/host1x07.h | 30 + .../host1x-grate/soc/hw/host1x01_hardware.h | 143 + .../host1x-grate/soc/hw/host1x02_hardware.h | 142 + .../host1x-grate/soc/hw/host1x04_hardware.h | 142 + .../host1x-grate/soc/hw/host1x05_hardware.h | 142 + .../host1x-grate/soc/hw/host1x06_hardware.h | 147 + .../host1x-grate/soc/hw/host1x07_hardware.h | 147 + .../host1x-grate/soc/hw/hw_host1x01_channel.h | 123 + .../host1x-grate/soc/hw/hw_host1x01_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x01_uclass.h | 180 + .../host1x-grate/soc/hw/hw_host1x02_channel.h | 123 + .../host1x-grate/soc/hw/hw_host1x02_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x02_uclass.h | 181 + .../host1x-grate/soc/hw/hw_host1x04_channel.h | 135 + .../host1x-grate/soc/hw/hw_host1x04_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x04_uclass.h | 181 + .../host1x-grate/soc/hw/hw_host1x05_channel.h | 135 + .../host1x-grate/soc/hw/hw_host1x05_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x05_uclass.h | 181 + .../soc/hw/hw_host1x06_hypervisor.h | 32 + .../host1x-grate/soc/hw/hw_host1x06_uclass.h | 181 + .../gpu/host1x-grate/soc/hw/hw_host1x06_vm.h | 160 + .../soc/hw/hw_host1x07_hypervisor.h | 32 + .../host1x-grate/soc/hw/hw_host1x07_uclass.h | 181 + .../gpu/host1x-grate/soc/hw/hw_host1x07_vm.h | 159 + drivers/gpu/host1x-grate/soc/mlocks.c | 123 + drivers/gpu/host1x-grate/soc/mlocks_hw.c | 27 + drivers/gpu/host1x-grate/soc/pushbuf.c | 308 ++ drivers/gpu/host1x-grate/soc/syncpoints.c | 246 + drivers/gpu/host1x-grate/soc/syncpoints_hw.c | 261 ++ drivers/gpu/host1x/Kconfig | 1 + drivers/iommu/tegra-gart.c | 4 + drivers/video/Kconfig | 1 + include/linux/host1x-grate.h | 1841 ++++++++ include/uapi/drm/grate_drm.h | 1095 +++++ 121 files changed, 38210 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/grate/Kconfig create mode 100644 drivers/gpu/drm/grate/Makefile create mode 100644 drivers/gpu/drm/grate/channel.c create mode 100644 drivers/gpu/drm/grate/channel.h create mode 100644 drivers/gpu/drm/grate/client.c create mode 100644 drivers/gpu/drm/grate/client.h create mode 100644 drivers/gpu/drm/grate/dc.c create mode 100644 drivers/gpu/drm/grate/dc.h create mode 100644 drivers/gpu/drm/grate/dp.c create mode 100644 drivers/gpu/drm/grate/dp.h create mode 100644 drivers/gpu/drm/grate/dpaux.c create mode 100644 drivers/gpu/drm/grate/dpaux.h create mode 100644 drivers/gpu/drm/grate/drm.c create mode 100644 drivers/gpu/drm/grate/drm.h create mode 100644 drivers/gpu/drm/grate/dsi.c create mode 100644 drivers/gpu/drm/grate/dsi.h create mode 100644 drivers/gpu/drm/grate/falcon.c create mode 100644 drivers/gpu/drm/grate/falcon.h create mode 100644 drivers/gpu/drm/grate/fb.c create mode 100644 drivers/gpu/drm/grate/gart.c create mode 100644 drivers/gpu/drm/grate/gart.h create mode 100644 drivers/gpu/drm/grate/gem.c create mode 100644 drivers/gpu/drm/grate/gem.h create mode 100644 drivers/gpu/drm/grate/gr2d.c create mode 100644 drivers/gpu/drm/grate/gr2d.h create mode 100644 drivers/gpu/drm/grate/gr3d.c create mode 100644 drivers/gpu/drm/grate/gr3d.h create mode 100644 drivers/gpu/drm/grate/hda.c create mode 100644 drivers/gpu/drm/grate/hda.h create mode 100644 drivers/gpu/drm/grate/hdmi.c create mode 100644 drivers/gpu/drm/grate/hdmi.h create mode 100644 drivers/gpu/drm/grate/hub.c create mode 100644 drivers/gpu/drm/grate/hub.h create mode 100644 drivers/gpu/drm/grate/mipi-phy.c create mode 100644 drivers/gpu/drm/grate/mipi-phy.h create mode 100644 drivers/gpu/drm/grate/output.c create mode 100644 drivers/gpu/drm/grate/plane.c create mode 100644 drivers/gpu/drm/grate/plane.h create mode 100644 drivers/gpu/drm/grate/rgb.c create mode 100644 drivers/gpu/drm/grate/sor.c create mode 100644 drivers/gpu/drm/grate/sor.h create mode 100644 drivers/gpu/drm/grate/trace.c create mode 100644 drivers/gpu/drm/grate/trace.h create mode 100644 drivers/gpu/drm/grate/uapi/debug.c create mode 100644 drivers/gpu/drm/grate/uapi/debug.h create mode 100644 drivers/gpu/drm/grate/uapi/job.h create mode 100644 drivers/gpu/drm/grate/uapi/job_v1.c create mode 100644 drivers/gpu/drm/grate/uapi/job_v2.c create mode 100644 drivers/gpu/drm/grate/uapi/patching.c create mode 100644 drivers/gpu/drm/grate/uapi/scheduler.c create mode 100644 drivers/gpu/drm/grate/uapi/scheduler.h create mode 100644 drivers/gpu/drm/grate/uapi/uapi.c create mode 100644 drivers/gpu/drm/grate/uapi/uapi.h create mode 100644 drivers/gpu/drm/grate/vic.c create mode 100644 drivers/gpu/drm/grate/vic.h create mode 100644 drivers/gpu/host1x-grate/Kconfig create mode 100644 drivers/gpu/host1x-grate/Makefile create mode 100644 drivers/gpu/host1x-grate/buffer_object.c create mode 100644 drivers/gpu/host1x-grate/bus.c create mode 100644 drivers/gpu/host1x-grate/bus.h create mode 100644 drivers/gpu/host1x-grate/debug.c create mode 100644 drivers/gpu/host1x-grate/dma_pool.c create mode 100644 drivers/gpu/host1x-grate/fence.c create mode 100644 drivers/gpu/host1x-grate/host1x.c create mode 100644 drivers/gpu/host1x-grate/host1x.h create mode 100644 drivers/gpu/host1x-grate/iommu.c create mode 100644 drivers/gpu/host1x-grate/mipi.c create mode 100644 drivers/gpu/host1x-grate/soc/channel.c create mode 100644 drivers/gpu/host1x-grate/soc/channel_hw.c create mode 100644 drivers/gpu/host1x-grate/soc/debug.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x01.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x01.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x02.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x02.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x04.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x04.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x05.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x05.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x06.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x06.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x07.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x07.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x01_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x02_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x04_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x05_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x06_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x07_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x01_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x01_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x01_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x02_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x02_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x02_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x04_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x04_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x04_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x05_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x05_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x05_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x06_hypervisor.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x06_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x06_vm.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x07_hypervisor.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x07_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x07_vm.h create mode 100644 drivers/gpu/host1x-grate/soc/mlocks.c create mode 100644 drivers/gpu/host1x-grate/soc/mlocks_hw.c create mode 100644 drivers/gpu/host1x-grate/soc/pushbuf.c create mode 100644 drivers/gpu/host1x-grate/soc/syncpoints.c create mode 100644 drivers/gpu/host1x-grate/soc/syncpoints_hw.c create mode 100644 include/linux/host1x-grate.h create mode 100644 include/uapi/drm/grate_drm.h diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index 835c88318cec6..1d53cd887918b 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -3,6 +3,7 @@ # taken to initialize them in the correct order. Link order is the only way # to ensure this currently. obj-$(CONFIG_TEGRA_HOST1X) += host1x/ +obj-$(CONFIG_GRATE_HOST1X) += host1x-grate/ obj-y += drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ obj-$(CONFIG_TRACE_GPU_MEM) += trace/ diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f3bc90baca61e..6f052074ef2ee 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -329,6 +329,8 @@ source "drivers/gpu/drm/msm/Kconfig" source "drivers/gpu/drm/fsl-dcu/Kconfig" +source "drivers/gpu/drm/grate/Kconfig" + source "drivers/gpu/drm/tegra/Kconfig" source "drivers/gpu/drm/stm/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index ad11121548983..88031e4be1689 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -100,7 +100,8 @@ obj-y += tilcdc/ obj-$(CONFIG_DRM_QXL) += qxl/ obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/ obj-$(CONFIG_DRM_MSM) += msm/ -obj-$(CONFIG_DRM_TEGRA) += tegra/ +obj-$(CONFIG_DRM_TEGRA) += grate/ +obj-$(CONFIG_DRM_TEGRA_ORIG) += tegra/ obj-$(CONFIG_DRM_STM) += stm/ obj-$(CONFIG_DRM_STI) += sti/ obj-y += imx/ diff --git a/drivers/gpu/drm/grate/Kconfig b/drivers/gpu/drm/grate/Kconfig new file mode 100644 index 0000000000000..29fd0cb297a00 --- /dev/null +++ b/drivers/gpu/drm/grate/Kconfig @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_TEGRA + tristate "NVIDIA Tegra DRM (with experimental changes)" + depends on ARCH_TEGRA || (ARM && COMPILE_TEST) + depends on COMMON_CLK + depends on DRM + depends on OF + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select DRM_PANEL + select DRM_SCHED + select GRATE_HOST1X + select GRATE_HOST1X_DRV + select INTERCONNECT + select IOMMU_IOVA + select CEC_CORE if CEC_NOTIFIER + help + Choose this option if you have an NVIDIA Tegra SoC. + + To compile this driver as a module, choose M here: the module + will be called tegra-drm. + +if DRM_TEGRA + +config DRM_TEGRA_DEBUG + bool "NVIDIA Tegra DRM debug support" + help + Say yes here to enable debugging support. + +config DRM_TEGRA_STAGING + bool "Enable HOST1X interface" + depends on STAGING + help + Say yes if HOST1X should be available for userspace DRM users. + + If unsure, choose N. + +endif diff --git a/drivers/gpu/drm/grate/Makefile b/drivers/gpu/drm/grate/Makefile new file mode 100644 index 0000000000000..14c32a2159258 --- /dev/null +++ b/drivers/gpu/drm/grate/Makefile @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0 +ccflags-y := -I $(srctree)/$(src) +ccflags-y += -I $(srctree)/$(src)/uapi +ccflags-y += -I $(srctree)/drivers/gpu/host1x-grate/soc/hw +ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG + +tegra-drm-y := \ + drm.o \ + gem.o \ + fb.o \ + dp.o \ + hub.o \ + plane.o \ + dc.o \ + output.o \ + rgb.o \ + hda.o \ + hdmi.o \ + mipi-phy.o \ + dsi.o \ + sor.o \ + dpaux.o \ + gr2d.o \ + gr3d.o \ + falcon.o \ + vic.o \ + trace.o \ + channel.o \ + client.o \ + gart.o \ + uapi/debug.o \ + uapi/job_v1.o \ + uapi/job_v2.o \ + uapi/patching.o \ + uapi/scheduler.o \ + uapi/uapi.o + +obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o diff --git a/drivers/gpu/drm/grate/channel.c b/drivers/gpu/drm/grate/channel.c new file mode 100644 index 0000000000000..1b7a7a5381a2d --- /dev/null +++ b/drivers/gpu/drm/grate/channel.c @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "channel.h" +#include "scheduler.h" + +struct tegra_drm_channel * +tegra_drm_open_channel(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client, + u64 pipes_bitmask, + unsigned int num_pushbuf_words, + unsigned int hw_jobs_limit, + unsigned int job_hang_limit, + unsigned int timeout_msecs, + const char *name) +{ + struct drm_device *drm = tegra->drm; + struct host1x *host = dev_get_drvdata(drm->dev->parent); + struct host1x_client *client = &drm_client->base; + struct tegra_drm_channel *drm_channel; + int err; + + drm_channel = kzalloc(sizeof(*drm_channel), GFP_KERNEL); + if (!drm_channel) + return ERR_PTR(-ENOMEM); + + drm_channel->channel = host1x_channel_request(host, client->dev, + num_pushbuf_words); + if (IS_ERR(drm_channel->channel)) { + err = PTR_ERR(drm_channel->channel); + goto err_free_channel; + } + + drm_channel->acceptable_pipes = pipes_bitmask; + + err = drm_sched_init(&drm_channel->sched, + &tegra_drm_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(timeout_msecs / 2), + NULL, NULL, name); + if (err) + goto err_put_channel; + + list_add_tail(&drm_channel->list, &tegra->channels); + + return drm_channel; + +err_put_channel: + host1x_channel_put(drm_channel->channel); + +err_free_channel: + kfree(drm_channel); + + return ERR_PTR(err); +} + +void tegra_drm_close_channel(struct tegra_drm_channel *drm_channel) +{ + drm_sched_fini(&drm_channel->sched); + host1x_channel_put(drm_channel->channel); + list_del(&drm_channel->list); + kfree(drm_channel); +} diff --git a/drivers/gpu/drm/grate/channel.h b/drivers/gpu/drm/grate/channel.h new file mode 100644 index 0000000000000..42eb4886beb26 --- /dev/null +++ b/drivers/gpu/drm/grate/channel.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TEGRA_DRM_CHANNEL_H +#define __TEGRA_DRM_CHANNEL_H + +#include "drm.h" + +#define TEGRA_DRM_PIPE_2D BIT(DRM_TEGRA_PIPE_ID_2D) +#define TEGRA_DRM_PIPE_3D BIT(DRM_TEGRA_PIPE_ID_3D) +#define TEGRA_DRM_PIPE_VIC BIT(DRM_TEGRA_PIPE_ID_VIC) + +struct tegra_drm_channel { + struct drm_gpu_scheduler sched; + struct host1x_channel *channel; + struct list_head list; + u64 acceptable_pipes; +}; + +static inline struct tegra_drm_channel * +to_tegra_drm_channel(struct drm_gpu_scheduler *sched) +{ + return container_of(sched, struct tegra_drm_channel, sched); +} + +struct tegra_drm; +struct tegra_drm_client; + +struct tegra_drm_channel * +tegra_drm_open_channel(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client, + u64 pipes_bitmask, + unsigned int num_pushbuf_words, + unsigned int hw_jobs_limit, + unsigned int job_hang_limit, + unsigned int timeout_msecs, + const char *name); + +void tegra_drm_close_channel(struct tegra_drm_channel *drm_channel); + +#endif diff --git a/drivers/gpu/drm/grate/client.c b/drivers/gpu/drm/grate/client.c new file mode 100644 index 0000000000000..cb42f4bb42c8e --- /dev/null +++ b/drivers/gpu/drm/grate/client.c @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "drm.h" + +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + +int tegra_drm_register_client(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client) +{ + struct drm_device *drm = tegra->drm; + struct host1x *host1x = dev_get_drvdata(drm->dev->parent); + struct host1x_client *client = &drm_client->base; + int err; + + drm_client->mlock = host1x_mlock_request(host1x, client->dev); + if (IS_ERR(drm_client->mlock)) { + err = PTR_ERR(drm_client->mlock); + return err; + } + + list_add_tail(&drm_client->list, &tegra->clients); + drm_client->drm = tegra; + + return 0; +} + +void tegra_drm_unregister_client(struct tegra_drm_client *drm_client) +{ + host1x_mlock_put(drm_client->mlock); + list_del(&drm_client->list); + drm_client->drm = NULL; +} + +struct iommu_group * +tegra_drm_client_iommu_attach(struct tegra_drm_client *drm_client, bool shared) +{ + struct host1x_client *client = &drm_client->base; + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + struct iommu_group *group = NULL; + int err; + + if (tegra->domain) { + group = iommu_group_get(client->dev); + if (!group) { + dev_err(client->dev, "failed to get IOMMU group\n"); + return ERR_PTR(-ENODEV); + } + + if (!shared || !tegra->group) { +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (client->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(client->dev); + arm_iommu_detach_device(client->dev); + arm_iommu_release_mapping(mapping); + } +#endif + err = iommu_attach_group(tegra->domain, group); + if (err < 0) { + iommu_group_put(group); + return ERR_PTR(err); + } + + if (shared) + tegra->group = group; + } + } + + return group; +} + +void tegra_drm_client_iommu_detach(struct tegra_drm_client *drm_client, + struct iommu_group *group, + bool shared) +{ + struct host1x_client *client = &drm_client->base; + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + if (group) { + if (!shared || group == tegra->group) { + iommu_detach_group(tegra->domain, group); + + if (group == tegra->group) + tegra->group = NULL; + } + + iommu_group_put(group); + } +} diff --git a/drivers/gpu/drm/grate/client.h b/drivers/gpu/drm/grate/client.h new file mode 100644 index 0000000000000..2a7e3ee820dc1 --- /dev/null +++ b/drivers/gpu/drm/grate/client.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TEGRA_DRM_CLIENT_H +#define __TEGRA_DRM_CLIENT_H + +#include "drm.h" + +struct tegra_drm_job; + +struct tegra_drm_client { + struct host1x_client base; + struct host1x_mlock *mlock; + struct tegra_drm *drm; + struct list_head list; + const unsigned long *addr_regs; + unsigned int num_regs; + u64 pipe; + + int (*refine_class)(struct tegra_drm_client *client, u64 pipes, + unsigned int *classid); + + int (*prepare_job)(struct tegra_drm_client *client, + struct tegra_drm_job *job); + + int (*unprepare_job)(struct tegra_drm_client *client, + struct tegra_drm_job *job); + + int (*reset_hw)(struct tegra_drm_client *client); +}; + +static inline struct tegra_drm_client * +to_tegra_drm_client(struct host1x_client *client) +{ + return container_of(client, struct tegra_drm_client, base); +} + +struct tegra_drm; + +int tegra_drm_register_client(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client); + +void tegra_drm_unregister_client(struct tegra_drm_client *drm_client); + +struct iommu_group * +tegra_drm_client_iommu_attach(struct tegra_drm_client *drm_client, bool shared); + +void tegra_drm_client_iommu_detach(struct tegra_drm_client *drm_client, + struct iommu_group *group, + bool shared); + +#endif diff --git a/drivers/gpu/drm/grate/dc.c b/drivers/gpu/drm/grate/dc.c new file mode 100644 index 0000000000000..dd11276deb464 --- /dev/null +++ b/drivers/gpu/drm/grate/dc.c @@ -0,0 +1,3362 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012 NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "dc.h" +#include "drm.h" +#include "gem.h" +#include "hub.h" +#include "plane.h" + +static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state); + +static void tegra_dc_stats_reset(struct tegra_dc_stats *stats) +{ + stats->frames = 0; + stats->vblank = 0; + stats->underflow = 0; + stats->overflow = 0; +} + +/* Reads the active copy of a register. */ +static u32 tegra_dc_readl_active(struct tegra_dc *dc, unsigned long offset) +{ + u32 value; + + tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS); + value = tegra_dc_readl(dc, offset); + tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS); + + return value; +} + +static inline unsigned int tegra_plane_offset(struct tegra_plane *plane, + unsigned int offset) +{ + if (offset >= 0x500 && offset <= 0x638) { + offset = 0x000 + (offset - 0x500); + return plane->offset + offset; + } + + if (offset >= 0x700 && offset <= 0x719) { + offset = 0x180 + (offset - 0x700); + return plane->offset + offset; + } + + if (offset >= 0x800 && offset <= 0x839) { + offset = 0x1c0 + (offset - 0x800); + return plane->offset + offset; + } + + dev_WARN(plane->dc->dev, "invalid offset: %x\n", offset); + + return plane->offset + offset; +} + +static inline u32 tegra_plane_readl(struct tegra_plane *plane, + unsigned int offset) +{ + return tegra_dc_readl(plane->dc, tegra_plane_offset(plane, offset)); +} + +static inline void tegra_plane_writel(struct tegra_plane *plane, u32 value, + unsigned int offset) +{ + tegra_dc_writel(plane->dc, value, tegra_plane_offset(plane, offset)); +} + +bool tegra_dc_has_output(struct tegra_dc *dc, struct device *dev) +{ + struct device_node *np = dc->dev->of_node; + struct of_phandle_iterator it; + int err; + + of_for_each_phandle(&it, err, np, "nvidia,outputs", NULL, 0) + if (it.node == dev->of_node) + return true; + + return false; +} + +/* + * Double-buffered registers have two copies: ASSEMBLY and ACTIVE. When the + * *_ACT_REQ bits are set the ASSEMBLY copy is latched into the ACTIVE copy. + * Latching happens mmediately if the display controller is in STOP mode or + * on the next frame boundary otherwise. + * + * Triple-buffered registers have three copies: ASSEMBLY, ARM and ACTIVE. The + * ASSEMBLY copy is latched into the ARM copy immediately after *_UPDATE bits + * are written. When the *_ACT_REQ bits are written, the ARM copy is latched + * into the ACTIVE copy, either immediately if the display controller is in + * STOP mode, or at the next frame boundary otherwise. + */ +void tegra_dc_commit(struct tegra_dc *dc) +{ + tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL); + tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL); +} + +static inline u32 compute_dda_inc(unsigned int in, unsigned int out, bool v, + unsigned int bpp) +{ + fixed20_12 outf = dfixed_init(out); + fixed20_12 inf = dfixed_init(in); + u32 dda_inc; + int max; + + if (v) + max = 15; + else { + switch (bpp) { + case 2: + max = 8; + break; + + default: + WARN_ON_ONCE(1); + fallthrough; + case 4: + max = 4; + break; + } + } + + outf.full = max_t(u32, outf.full - dfixed_const(1), dfixed_const(1)); + inf.full -= dfixed_const(1); + + dda_inc = dfixed_div(inf, outf); + dda_inc = min_t(u32, dda_inc, dfixed_const(max)); + + return dda_inc; +} + +static inline u32 compute_initial_dda(unsigned int in) +{ + fixed20_12 inf = dfixed_init(in); + return dfixed_frac(inf); +} + +static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane) +{ + u32 background[3] = { + BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE, + BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE, + BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE, + }; + u32 foreground = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255) | + BLEND_COLOR_KEY_NONE; + u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255); + enum drm_plane_colorkey_mode mode; + struct tegra_plane_state *state; + u32 blending[2]; + unsigned int i; + + /* disable blending for non-overlapping case */ + tegra_plane_writel(plane, blendnokey, DC_WIN_BLEND_NOKEY); + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_1WIN); + + state = to_tegra_plane_state(plane->base.state); + mode = plane->base.state->colorkey.mode; + + /* setup color keying */ + if (mode == DRM_PLANE_COLORKEY_MODE_TRANSPARENT) { + /* color key matched areas are transparent */ + foreground = background[0] | BLEND_COLOR_KEY_0; + } + + /* setup alpha blending */ + if (state->opaque) { + /* + * Since custom fix-weight blending isn't utilized and weight + * of top window is set to max, we can enforce dependent + * blending which in this case results in transparent bottom + * window if top window is opaque and if top window enables + * alpha blending, then bottom window is getting alpha value + * of 1 minus the sum of alpha components of the overlapping + * plane. + */ + background[0] |= BLEND_CONTROL_DEPENDENT; + background[1] |= BLEND_CONTROL_DEPENDENT; + + /* + * The region where three windows overlap is the intersection + * of the two regions where two windows overlap. It contributes + * to the area if all of the windows on top of it have an alpha + * component. + */ + switch (state->base.normalized_zpos) { + case 0: + if (state->blending[0].alpha && + state->blending[1].alpha) + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + + case 1: + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + } + } else { + /* + * Enable alpha blending if pixel format has an alpha + * component. + */ + foreground |= BLEND_CONTROL_ALPHA; + + /* + * If any of the windows on top of this window is opaque, it + * will completely conceal this window within that area. If + * top window has an alpha component, it is blended over the + * bottom window. + */ + for (i = 0; i < 2; i++) { + if (state->blending[i].alpha && + state->blending[i].top) + background[i] |= BLEND_CONTROL_DEPENDENT; + } + + switch (state->base.normalized_zpos) { + case 0: + if (state->blending[0].alpha && + state->blending[1].alpha) + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + + case 1: + /* + * When both middle and topmost windows have an alpha, + * these windows a mixed together and then the result + * is blended over the bottom window. + */ + if (state->blending[0].alpha && + state->blending[0].top) + background[2] |= BLEND_CONTROL_ALPHA; + + if (state->blending[1].alpha && + state->blending[1].top) + background[2] |= BLEND_CONTROL_ALPHA; + break; + } + } + + switch (state->base.normalized_zpos) { + case 0: + tegra_plane_writel(plane, background[0], DC_WIN_BLEND_2WIN_X); + tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y); + tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY); + break; + + case 1: + /* + * If window B / C is topmost, then X / Y registers are + * matching the order of blending[...] state indices, + * otherwise a swap is required. + */ + if (!state->blending[0].top && state->blending[1].top) { + blending[0] = foreground; + blending[1] = background[1]; + } else { + blending[0] = background[0]; + blending[1] = foreground; + } + + tegra_plane_writel(plane, blending[0], DC_WIN_BLEND_2WIN_X); + tegra_plane_writel(plane, blending[1], DC_WIN_BLEND_2WIN_Y); + tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY); + break; + + case 2: + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_X); + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_Y); + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_3WIN_XY); + break; + } +} + +static void tegra_plane_setup_blending(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + u32 value; + + value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 | + BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC | + BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC; + tegra_plane_writel(plane, value, DC_WIN_BLEND_MATCH_SELECT); + + value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 | + BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC | + BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC; + tegra_plane_writel(plane, value, DC_WIN_BLEND_NOMATCH_SELECT); + + value = K2(255) | K1(255) | WINDOW_LAYER_DEPTH(255 - window->zpos); + tegra_plane_writel(plane, value, DC_WIN_BLEND_LAYER_CONTROL); +} + +static bool +tegra_plane_use_horizontal_filtering(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + struct tegra_dc *dc = plane->dc; + + if (window->src.w == window->dst.w) + return false; + + if (plane->index == 0 && dc->soc->has_win_a_without_filters) + return false; + + return true; +} + +static bool +tegra_plane_use_vertical_filtering(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + struct tegra_dc *dc = plane->dc; + + if (window->src.h == window->dst.h) + return false; + + if (plane->index == 0 && dc->soc->has_win_a_without_filters) + return false; + + if (plane->index == 2 && dc->soc->has_win_c_without_vert_filter) + return false; + + return true; +} + +static void tegra_dc_setup_window(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp; + struct tegra_dc *dc = plane->dc; + bool yuv, planar; + u32 value; + + /* + * For YUV planar modes, the number of bytes per pixel takes into + * account only the luma component and therefore is 1. + */ + yuv = tegra_plane_format_is_yuv(window->format, &planar); + if (!yuv) + bpp = window->bits_per_pixel / 8; + else + bpp = planar ? 1 : 2; + + tegra_plane_writel(plane, window->format, DC_WIN_COLOR_DEPTH); + tegra_plane_writel(plane, window->swap, DC_WIN_BYTE_SWAP); + + value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x); + tegra_plane_writel(plane, value, DC_WIN_POSITION); + + value = V_SIZE(window->dst.h) | H_SIZE(window->dst.w); + tegra_plane_writel(plane, value, DC_WIN_SIZE); + + h_offset = window->src.x * bpp; + v_offset = window->src.y; + h_size = window->src.w * bpp; + v_size = window->src.h; + + if (window->reflect_x) + h_offset += (window->src.w - 1) * bpp; + + if (window->reflect_y) + v_offset += window->src.h - 1; + + value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size); + tegra_plane_writel(plane, value, DC_WIN_PRESCALED_SIZE); + + /* + * For DDA computations the number of bytes per pixel for YUV planar + * modes needs to take into account all Y, U and V components. + */ + if (yuv && planar) + bpp = 2; + + h_dda = compute_dda_inc(window->src.w, window->dst.w, false, bpp); + v_dda = compute_dda_inc(window->src.h, window->dst.h, true, bpp); + + value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda); + tegra_plane_writel(plane, value, DC_WIN_DDA_INC); + + h_dda = compute_initial_dda(window->src.x); + v_dda = compute_initial_dda(window->src.y); + + tegra_plane_writel(plane, h_dda, DC_WIN_H_INITIAL_DDA); + tegra_plane_writel(plane, v_dda, DC_WIN_V_INITIAL_DDA); + + tegra_plane_writel(plane, 0, DC_WIN_UV_BUF_STRIDE); + tegra_plane_writel(plane, 0, DC_WIN_BUF_STRIDE); + + tegra_plane_writel(plane, window->base[0], DC_WINBUF_START_ADDR); + + if (yuv && planar) { + tegra_plane_writel(plane, window->base[1], DC_WINBUF_START_ADDR_U); + tegra_plane_writel(plane, window->base[2], DC_WINBUF_START_ADDR_V); + value = window->stride[1] << 16 | window->stride[0]; + tegra_plane_writel(plane, value, DC_WIN_LINE_STRIDE); + } else { + tegra_plane_writel(plane, window->stride[0], DC_WIN_LINE_STRIDE); + } + + tegra_plane_writel(plane, h_offset, DC_WINBUF_ADDR_H_OFFSET); + tegra_plane_writel(plane, v_offset, DC_WINBUF_ADDR_V_OFFSET); + + if (dc->soc->supports_block_linear) { + unsigned long height = window->tiling.value; + + switch (window->tiling.mode) { + case TEGRA_BO_TILING_MODE_PITCH: + value = DC_WINBUF_SURFACE_KIND_PITCH; + break; + + case TEGRA_BO_TILING_MODE_TILED: + value = DC_WINBUF_SURFACE_KIND_TILED; + break; + + case TEGRA_BO_TILING_MODE_BLOCK: + value = DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(height) | + DC_WINBUF_SURFACE_KIND_BLOCK; + break; + } + + tegra_plane_writel(plane, value, DC_WINBUF_SURFACE_KIND); + } else { + switch (window->tiling.mode) { + case TEGRA_BO_TILING_MODE_PITCH: + value = DC_WIN_BUFFER_ADDR_MODE_LINEAR_UV | + DC_WIN_BUFFER_ADDR_MODE_LINEAR; + break; + + case TEGRA_BO_TILING_MODE_TILED: + value = DC_WIN_BUFFER_ADDR_MODE_TILE_UV | + DC_WIN_BUFFER_ADDR_MODE_TILE; + break; + + case TEGRA_BO_TILING_MODE_BLOCK: + /* + * No need to handle this here because ->atomic_check + * will already have filtered it out. + */ + break; + } + + tegra_plane_writel(plane, value, DC_WIN_BUFFER_ADDR_MODE); + } + + value = WIN_ENABLE; + + if (yuv) { + /* setup colorspace conversion coefficients */ + tegra_plane_writel(plane, window->csc.yof, DC_WIN_CSC_YOF); + tegra_plane_writel(plane, window->csc.kyrgb, DC_WIN_CSC_KYRGB); + tegra_plane_writel(plane, window->csc.kur, DC_WIN_CSC_KUR); + tegra_plane_writel(plane, window->csc.kvr, DC_WIN_CSC_KVR); + tegra_plane_writel(plane, window->csc.kug, DC_WIN_CSC_KUG); + tegra_plane_writel(plane, window->csc.kvg, DC_WIN_CSC_KVG); + tegra_plane_writel(plane, window->csc.kub, DC_WIN_CSC_KUB); + tegra_plane_writel(plane, window->csc.kvb, DC_WIN_CSC_KVB); + + value |= CSC_ENABLE; + } else if (window->bits_per_pixel < 24) { + value |= COLOR_EXPAND; + } + + if (window->reflect_x) + value |= H_DIRECTION; + + if (window->reflect_y) + value |= V_DIRECTION; + + if (tegra_plane_use_horizontal_filtering(plane, window)) { + /* + * Enable horizontal 6-tap filter and set filtering + * coefficients to the default values defined in TRM. + */ + tegra_plane_writel(plane, 0x00008000, DC_WIN_H_FILTER_P(0)); + tegra_plane_writel(plane, 0x3e087ce1, DC_WIN_H_FILTER_P(1)); + tegra_plane_writel(plane, 0x3b117ac1, DC_WIN_H_FILTER_P(2)); + tegra_plane_writel(plane, 0x591b73aa, DC_WIN_H_FILTER_P(3)); + tegra_plane_writel(plane, 0x57256d9a, DC_WIN_H_FILTER_P(4)); + tegra_plane_writel(plane, 0x552f668b, DC_WIN_H_FILTER_P(5)); + tegra_plane_writel(plane, 0x73385e8b, DC_WIN_H_FILTER_P(6)); + tegra_plane_writel(plane, 0x72435583, DC_WIN_H_FILTER_P(7)); + tegra_plane_writel(plane, 0x714c4c8b, DC_WIN_H_FILTER_P(8)); + tegra_plane_writel(plane, 0x70554393, DC_WIN_H_FILTER_P(9)); + tegra_plane_writel(plane, 0x715e389b, DC_WIN_H_FILTER_P(10)); + tegra_plane_writel(plane, 0x71662faa, DC_WIN_H_FILTER_P(11)); + tegra_plane_writel(plane, 0x536d25ba, DC_WIN_H_FILTER_P(12)); + tegra_plane_writel(plane, 0x55731bca, DC_WIN_H_FILTER_P(13)); + tegra_plane_writel(plane, 0x387a11d9, DC_WIN_H_FILTER_P(14)); + tegra_plane_writel(plane, 0x3c7c08f1, DC_WIN_H_FILTER_P(15)); + + value |= H_FILTER; + } + + if (tegra_plane_use_vertical_filtering(plane, window)) { + unsigned int i, k; + + /* + * Enable vertical 2-tap filter and set filtering + * coefficients to the default values defined in TRM. + */ + for (i = 0, k = 128; i < 16; i++, k -= 8) + tegra_plane_writel(plane, k, DC_WIN_V_FILTER_P(i)); + + value |= V_FILTER; + } + + tegra_plane_writel(plane, value, DC_WIN_WIN_OPTIONS); + + if (dc->soc->has_legacy_blending) + tegra_plane_setup_blending_legacy(plane); + else + tegra_plane_setup_blending(plane, window); +} + +static const u32 tegra20_primary_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* non-native formats */ + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, +}; + +static const u64 tegra20_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED, + DRM_FORMAT_MOD_INVALID +}; + +static const u32 tegra114_primary_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, +}; + +static const u32 tegra124_primary_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + /* new on Tegra124 */ + DRM_FORMAT_RGBX8888, + DRM_FORMAT_BGRX8888, +}; + +static const u64 tegra124_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5), + DRM_FORMAT_MOD_INVALID +}; + +static int tegra_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state); + unsigned int supported_rotation = DRM_MODE_ROTATE_0 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y; + unsigned int rotation = new_plane_state->rotation; + struct tegra_bo_tiling *tiling = &plane_state->tiling; + struct tegra_plane *tegra = to_tegra_plane(plane); + struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc); + int err; + + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + + /* no need for further checks if the plane is being disabled */ + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; + return 0; + } + + err = tegra_plane_format(new_plane_state->fb->format->format, + &plane_state->format, + &plane_state->swap); + if (err < 0) + return err; + + /* + * Tegra20 and Tegra30 are special cases here because they support + * only variants of specific formats with an alpha component, but not + * the corresponding opaque formats. However, the opaque formats can + * be emulated by disabling alpha blending for the plane. + */ + if (dc->soc->has_legacy_blending) { + err = tegra_plane_setup_legacy_state(tegra, plane_state); + if (err < 0) + return err; + } + + err = tegra_fb_get_tiling(new_plane_state->fb, tiling); + if (err < 0) + return err; + + if (tiling->mode == TEGRA_BO_TILING_MODE_BLOCK && + !dc->soc->supports_block_linear) { + DRM_ERROR("hardware doesn't support block linear mode\n"); + return -EINVAL; + } + + /* + * Older userspace used custom BO flag in order to specify the Y + * reflection, while modern userspace uses the generic DRM rotation + * property in order to achieve the same result. The legacy BO flag + * duplicates the DRM rotation property when both are set. + */ + if (tegra_fb_is_bottom_up(new_plane_state->fb)) + rotation |= DRM_MODE_REFLECT_Y; + + rotation = drm_rotation_simplify(rotation, supported_rotation); + + if (rotation & DRM_MODE_REFLECT_X) + plane_state->reflect_x = true; + else + plane_state->reflect_x = false; + + if (rotation & DRM_MODE_REFLECT_Y) + plane_state->reflect_y = true; + else + plane_state->reflect_y = false; + + /* + * Tegra doesn't support different strides for U and V planes so we + * error out if the user tries to display a framebuffer with such a + * configuration. + */ + if (new_plane_state->fb->format->num_planes > 2) { + if (new_plane_state->fb->pitches[2] != new_plane_state->fb->pitches[1]) { + DRM_ERROR("unsupported UV-plane configuration\n"); + return -EINVAL; + } + } + + err = tegra_plane_state_add(tegra, new_plane_state); + if (err < 0) + return err; + + return 0; +} + +static void tegra_plane_atomic_disable(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + struct tegra_plane *p = to_tegra_plane(plane); + u32 value; + + /* rien ne va plus */ + if (!old_state || !old_state->crtc) + return; + + value = tegra_plane_readl(p, DC_WIN_WIN_OPTIONS); + value &= ~WIN_ENABLE; + tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS); +} + +static void tegra_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); + struct drm_framebuffer *fb = new_state->fb; + struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc_window window; + const struct drm_tegra_plane_csc_blob *csc; + unsigned int i; + + /* rien ne va plus */ + if (!new_state->crtc || !new_state->fb) + return; + + if (!new_state->visible) + return tegra_plane_atomic_disable(plane, state); + + memset(&window, 0, sizeof(window)); + window.src.x = new_state->src.x1 >> 16; + window.src.y = new_state->src.y1 >> 16; + window.src.w = drm_rect_width(&new_state->src) >> 16; + window.src.h = drm_rect_height(&new_state->src) >> 16; + window.dst.x = new_state->dst.x1; + window.dst.y = new_state->dst.y1; + window.dst.w = drm_rect_width(&new_state->dst); + window.dst.h = drm_rect_height(&new_state->dst); + window.bits_per_pixel = fb->format->cpp[0] * 8; + window.reflect_x = tegra_plane_state->reflect_x; + window.reflect_y = tegra_plane_state->reflect_y; + + /* copy from state */ + window.zpos = new_state->normalized_zpos; + window.tiling = tegra_plane_state->tiling; + window.format = tegra_plane_state->format; + window.swap = tegra_plane_state->swap; + + for (i = 0; i < fb->format->num_planes; i++) { + window.base[i] = tegra_plane_state->iova[i] + fb->offsets[i]; + + /* + * Tegra uses a shared stride for UV planes. Framebuffers are + * already checked for this in the tegra_plane_atomic_check() + * function, so it's safe to ignore the V-plane pitch here. + */ + if (i < 2) + window.stride[i] = fb->pitches[i]; + + /* + * There are two ways to set tiling mode on Tegra: + * + * 1. New: using DRM modifiers + * 2. Old: using Tegra BO flags + * + * Older userspace doesn't support ADDFB2 IOCTL. Assume that + * legacy userspace is used if BO flag is set and FB modifier + * isn't set to maintain userspace compatibility. + */ + if (i == 0 && + window.tiling.mode == TEGRA_BO_TILING_MODE_PITCH && + window.tiling.value == 0) { + struct tegra_bo *bo = tegra_fb_get_plane(fb, i); + window.tiling.mode = bo->tiling.mode; + } + } + + if (tegra_plane_state->csc_blob) { + csc = tegra_plane_state->csc_blob->data; + + window.csc.yof = csc->yof; + window.csc.kyrgb = csc->kyrgb; + window.csc.kur = csc->kur; + window.csc.kvr = csc->kvr; + window.csc.kug = csc->kug; + window.csc.kvg = csc->kvg; + window.csc.kub = csc->kub; + window.csc.kvb = csc->kvb; + } else { + window.csc.yof = 0x00f0; + window.csc.kyrgb = 0x012a; + window.csc.kur = 0x0000; + window.csc.kvr = 0x0198; + window.csc.kug = 0x039b; + window.csc.kvg = 0x032f; + window.csc.kub = 0x0204; + window.csc.kvb = 0x0000; + } + + tegra_dc_setup_window(p, &window); +} + +static int tegra_plane_atomic_async_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state; + int err; + + /* + * It is not obvious whether it's fine for framebuffer to disappear + * while display controller could be accessing it or hardware could + * cope with that somehow. Let's just disallow that to happen. + */ + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + if (!new_plane_state->fb) + return -EINVAL; + + /* the rest should be fine to change asynchronously */ + err = tegra_plane_atomic_check(plane, state); + if (err) + return err; + + return 0; +} + +static inline void tegra_plane_clear_latching(struct drm_plane *plane) +{ + struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc *dc = p->dc; + + /* clear pending latching request from the previous async update */ + tegra_dc_writel(dc, 0, DC_CMD_STATE_CONTROL); +} + +static inline void tegra_plane_atomic_flush(struct drm_plane *plane) +{ + struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc *dc = p->dc; + + /* latch updated registers and activate the new state */ + tegra_dc_writel(dc, 1 << (p->index + 9), DC_CMD_STATE_CONTROL); + tegra_dc_writel(dc, 1 << (p->index + 1), DC_CMD_STATE_CONTROL); +} + +static void tegra_plane_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state; + + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + + tegra_plane_clear_latching(plane); + tegra_plane_copy_state(plane, new_plane_state); + tegra_plane_atomic_update(plane, state); + tegra_plane_atomic_flush(plane); +} + +static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = { + .prepare_fb = tegra_plane_prepare_fb, + .cleanup_fb = tegra_plane_cleanup_fb, + .atomic_check = tegra_plane_atomic_check, + .atomic_disable = tegra_plane_atomic_disable, + .atomic_update = tegra_plane_atomic_update, + .atomic_async_check = tegra_plane_atomic_async_check, + .atomic_async_update = tegra_plane_atomic_async_update, +}; + +static unsigned long tegra_plane_get_possible_crtcs(struct drm_device *drm) +{ + /* + * Ideally this would use drm_crtc_mask(), but that would require the + * CRTC to already be in the mode_config's list of CRTCs. However, it + * will only be added to that list in the drm_crtc_init_with_planes() + * (in tegra_dc_init()), which in turn requires registration of these + * planes. So we have ourselves a nice little chicken and egg problem + * here. + * + * We work around this by manually creating the mask from the number + * of CRTCs that have been registered, and should therefore always be + * the same as drm_crtc_index() after registration. + */ + return 1 << drm->mode_config.num_crtc; +} + +static void tegra_plane_create_csc_property(struct tegra_plane *plane) +{ + /* set default colorspace conversion coefficients to ITU-R BT.601 */ + struct drm_tegra_plane_csc_blob csc_bt601 = { + .yof = 0x00f0, + .kyrgb = 0x012a, + .kur = 0x0000, + .kvr = 0x0198, + .kug = 0x039b, + .kvg = 0x032f, + .kub = 0x0204, + .kvb = 0x0000, + }; + struct drm_property_blob *blob; + + blob = drm_property_create_blob(plane->base.dev, sizeof(csc_bt601), + &csc_bt601); + if (!blob) { + dev_err(plane->dc->dev, "failed to create CSC BLOB\n"); + return; + } + + plane->props.csc_blob = drm_property_create( + plane->base.dev, DRM_MODE_PROP_BLOB, "YUV to RGB CSC", 0); + + if (!plane->props.csc_blob) { + dev_err(plane->dc->dev, "failed to create CSC property\n"); + drm_property_blob_put(blob); + return; + } + + drm_object_attach_property(&plane->base.base, plane->props.csc_blob, 0); + + plane->csc_default = blob; +} + +static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm, + struct tegra_dc *dc) +{ + unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm); + enum drm_plane_type type = DRM_PLANE_TYPE_PRIMARY; + struct tegra_plane *plane; + unsigned int num_formats; + const u64 *modifiers; + const u32 *formats; + int err; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + /* Always use window A as primary window */ + plane->offset = 0xa00; + plane->index = 0; + plane->dc = dc; + + num_formats = dc->soc->num_primary_formats; + formats = dc->soc->primary_formats; + modifiers = dc->soc->modifiers; + + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, + &tegra_plane_funcs, formats, + num_formats, modifiers, type, NULL); + if (err < 0) { + kfree(plane); + return ERR_PTR(err); + } + + drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); + drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255); + + err = drm_plane_create_rotation_property(&plane->base, + DRM_MODE_ROTATE_0, + DRM_MODE_ROTATE_0 | + DRM_MODE_ROTATE_180 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y); + if (err < 0) + dev_err(dc->dev, "failed to create rotation property: %d\n", + err); + + if (dc->soc->has_legacy_blending) + drm_plane_create_colorkey_properties(&plane->base, + BIT(DRM_PLANE_COLORKEY_MODE_DISABLED) | + BIT(DRM_PLANE_COLORKEY_MODE_TRANSPARENT)); + + if (dc->soc->has_win_a_csc) + tegra_plane_create_csc_property(plane); + + return &plane->base; +} + +static const u32 tegra_legacy_cursor_plane_formats[] = { + DRM_FORMAT_RGBA8888, +}; + +static const u32 tegra_cursor_plane_formats[] = { + DRM_FORMAT_ARGB8888, +}; + +static int tegra_cursor_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state); + struct tegra_plane *tegra = to_tegra_plane(plane); + int err; + + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + + /* no need for further checks if the plane is being disabled */ + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; + return 0; + } + + /* scaling not supported for cursor */ + if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) || + (new_plane_state->src_h >> 16 != new_plane_state->crtc_h)) + return -EINVAL; + + /* only square cursors supported */ + if (new_plane_state->src_w != new_plane_state->src_h) + return -EINVAL; + + if (new_plane_state->crtc_w != 32 && new_plane_state->crtc_w != 64 && + new_plane_state->crtc_w != 128 && new_plane_state->crtc_w != 256) + return -EINVAL; + + err = tegra_plane_state_add(tegra, new_plane_state); + if (err < 0) + return err; + + return 0; +} + +static void tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); + struct tegra_dc *dc = to_tegra_dc(new_state->crtc); + struct tegra_drm *tegra = plane->dev->dev_private; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + u64 dma_mask = *dc->dev->dma_mask; +#endif + unsigned int x, y; + u32 value = 0; + + /* rien ne va plus */ + if (!new_state->crtc || !new_state->fb) + return; + + /* + * Legacy display supports hardware clipping of the cursor, but + * nvdisplay relies on software to clip the cursor to the screen. + */ + if (!dc->soc->has_nvdisplay) + value |= CURSOR_CLIP_DISPLAY; + + switch (new_state->crtc_w) { + case 32: + value |= CURSOR_SIZE_32x32; + break; + + case 64: + value |= CURSOR_SIZE_64x64; + break; + + case 128: + value |= CURSOR_SIZE_128x128; + break; + + case 256: + value |= CURSOR_SIZE_256x256; + break; + + default: + WARN(1, "cursor size %ux%u not supported\n", + new_state->crtc_w, new_state->crtc_h); + return; + } + + value |= (tegra_plane_state->iova[0] >> 10) & 0x3fffff; + tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + value = (tegra_plane_state->iova[0] >> 32) & (dma_mask >> 32); + tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); +#endif + + /* enable cursor and set blend mode */ + value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); + value |= CURSOR_ENABLE; + tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); + + value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL); + value &= ~CURSOR_DST_BLEND_MASK; + value &= ~CURSOR_SRC_BLEND_MASK; + + if (dc->soc->has_nvdisplay) + value &= ~CURSOR_COMPOSITION_MODE_XOR; + else + value |= CURSOR_MODE_NORMAL; + + value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC; + value |= CURSOR_SRC_BLEND_K1_TIMES_SRC; + value |= CURSOR_ALPHA; + tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL); + + /* nvdisplay relies on software for clipping */ + if (dc->soc->has_nvdisplay) { + struct drm_rect src; + + x = new_state->dst.x1; + y = new_state->dst.y1; + + drm_rect_fp_to_int(&src, &new_state->src); + + value = (src.y1 & tegra->vmask) << 16 | (src.x1 & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR); + + value = (drm_rect_height(&src) & tegra->vmask) << 16 | + (drm_rect_width(&src) & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR); + } else { + x = new_state->crtc_x; + y = new_state->crtc_y; + } + + /* position the cursor */ + value = ((y & tegra->vmask) << 16) | (x & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); +} + +static void tegra_cursor_atomic_disable(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + struct tegra_dc *dc; + u32 value; + + /* rien ne va plus */ + if (!old_state || !old_state->crtc) + return; + + dc = to_tegra_dc(old_state->crtc); + + value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); + value &= ~CURSOR_ENABLE; + tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); +} + +static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = { + .prepare_fb = tegra_plane_prepare_fb, + .cleanup_fb = tegra_plane_cleanup_fb, + .atomic_check = tegra_cursor_atomic_check, + .atomic_update = tegra_cursor_atomic_update, + .atomic_disable = tegra_cursor_atomic_disable, +}; + +static const uint64_t linear_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, + struct tegra_dc *dc) +{ + unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm); + struct tegra_plane *plane; + unsigned int num_formats; + const u32 *formats; + int err; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + /* + * This index is kind of fake. The cursor isn't a regular plane, but + * its update and activation request bits in DC_CMD_STATE_CONTROL do + * use the same programming. Setting this fake index here allows the + * code in tegra_add_plane_state() to do the right thing without the + * need to special-casing the cursor plane. + */ + plane->index = 6; + plane->dc = dc; + + if (!dc->soc->has_nvdisplay) { + num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats); + formats = tegra_legacy_cursor_plane_formats; + } else { + num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); + formats = tegra_cursor_plane_formats; + } + + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, + &tegra_plane_funcs, formats, + num_formats, linear_modifiers, + DRM_PLANE_TYPE_CURSOR, NULL); + if (err < 0) { + kfree(plane); + return ERR_PTR(err); + } + + drm_plane_helper_add(&plane->base, &tegra_cursor_plane_helper_funcs); + drm_plane_create_zpos_immutable_property(&plane->base, 255); + + return &plane->base; +} + +static const u32 tegra20_overlay_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* non-native formats */ + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, + /* planar formats */ + DRM_FORMAT_UYVY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, +}; + +static const u32 tegra114_overlay_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + /* planar formats */ + DRM_FORMAT_UYVY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, +}; + +static const u32 tegra124_overlay_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + /* new on Tegra124 */ + DRM_FORMAT_RGBX8888, + DRM_FORMAT_BGRX8888, + /* planar formats */ + DRM_FORMAT_UYVY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, +}; + +static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, + struct tegra_dc *dc, + unsigned int index, + bool cursor) +{ + unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm); + struct tegra_plane *plane; + unsigned int num_formats; + enum drm_plane_type type; + const u32 *formats; + int err; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + plane->offset = 0xa00 + 0x200 * index; + plane->index = index; + plane->dc = dc; + + num_formats = dc->soc->num_overlay_formats; + formats = dc->soc->overlay_formats; + + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + + if (!cursor) + type = DRM_PLANE_TYPE_OVERLAY; + else + type = DRM_PLANE_TYPE_CURSOR; + + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, + &tegra_plane_funcs, formats, + num_formats, linear_modifiers, + type, NULL); + if (err < 0) { + kfree(plane); + return ERR_PTR(err); + } + + drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); + drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255); + tegra_plane_create_csc_property(plane); + + err = drm_plane_create_rotation_property(&plane->base, + DRM_MODE_ROTATE_0, + DRM_MODE_ROTATE_0 | + DRM_MODE_ROTATE_180 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y); + if (err < 0) + dev_err(dc->dev, "failed to create rotation property: %d\n", + err); + + if (dc->soc->has_legacy_blending) + drm_plane_create_colorkey_properties(&plane->base, + BIT(DRM_PLANE_COLORKEY_MODE_DISABLED) | + BIT(DRM_PLANE_COLORKEY_MODE_TRANSPARENT)); + + return &plane->base; +} + +static struct drm_plane *tegra_dc_add_shared_planes(struct drm_device *drm, + struct tegra_dc *dc) +{ + struct drm_plane *plane, *primary = NULL; + unsigned int i, j; + + for (i = 0; i < dc->soc->num_wgrps; i++) { + const struct tegra_windowgroup_soc *wgrp = &dc->soc->wgrps[i]; + + if (wgrp->dc == dc->pipe) { + for (j = 0; j < wgrp->num_windows; j++) { + unsigned int index = wgrp->windows[j]; + + plane = tegra_shared_plane_create(drm, dc, + wgrp->index, + index); + if (IS_ERR(plane)) + return plane; + + /* + * Choose the first shared plane owned by this + * head as the primary plane. + */ + if (!primary) { + plane->type = DRM_PLANE_TYPE_PRIMARY; + primary = plane; + } + } + } + } + + return primary; +} + +static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm, + struct tegra_dc *dc) +{ + struct drm_plane *planes[2], *primary; + unsigned int planes_num; + unsigned int i; + int err; + + primary = tegra_primary_plane_create(drm, dc); + if (IS_ERR(primary)) + return primary; + + if (dc->soc->supports_cursor) + planes_num = 2; + else + planes_num = 1; + + for (i = 0; i < planes_num; i++) { + planes[i] = tegra_dc_overlay_plane_create(drm, dc, 1 + i, + false); + if (IS_ERR(planes[i])) { + err = PTR_ERR(planes[i]); + + while (i--) + tegra_plane_funcs.destroy(planes[i]); + + tegra_plane_funcs.destroy(primary); + return ERR_PTR(err); + } + } + + return primary; +} + +static void tegra_dc_destroy(struct drm_crtc *crtc) +{ + drm_crtc_cleanup(crtc); +} + +static void tegra_crtc_reset(struct drm_crtc *crtc) +{ + struct tegra_dc_state *state = kzalloc(sizeof(*state), GFP_KERNEL); + + if (crtc->state) + tegra_crtc_atomic_destroy_state(crtc, crtc->state); + + __drm_atomic_helper_crtc_reset(crtc, &state->base); +} + +static struct drm_crtc_state * +tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc) +{ + struct tegra_dc_state *state = to_dc_state(crtc->state); + struct tegra_dc_state *copy; + + copy = kmalloc(sizeof(*copy), GFP_KERNEL); + if (!copy) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, ©->base); + copy->ckey = state->ckey; + copy->clk = state->clk; + copy->pclk = state->pclk; + copy->div = state->div; + copy->planes = state->planes; + + return ©->base; +} + +static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + __drm_atomic_helper_crtc_destroy_state(state); + kfree(state); +} + +#define DEBUGFS_REG32(_name) { .name = #_name, .offset = _name } + +static const struct debugfs_reg32 tegra_dc_regs[] = { + DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_CONT_SYNCPT_VSYNC), + DEBUGFS_REG32(DC_CMD_DISPLAY_COMMAND_OPTION0), + DEBUGFS_REG32(DC_CMD_DISPLAY_COMMAND), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE), + DEBUGFS_REG32(DC_CMD_DISPLAY_POWER_CONTROL), + DEBUGFS_REG32(DC_CMD_INT_STATUS), + DEBUGFS_REG32(DC_CMD_INT_MASK), + DEBUGFS_REG32(DC_CMD_INT_ENABLE), + DEBUGFS_REG32(DC_CMD_INT_TYPE), + DEBUGFS_REG32(DC_CMD_INT_POLARITY), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE1), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE2), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE3), + DEBUGFS_REG32(DC_CMD_STATE_ACCESS), + DEBUGFS_REG32(DC_CMD_STATE_CONTROL), + DEBUGFS_REG32(DC_CMD_DISPLAY_WINDOW_HEADER), + DEBUGFS_REG32(DC_CMD_REG_ACT_CONTROL), + DEBUGFS_REG32(DC_COM_CRC_CONTROL), + DEBUGFS_REG32(DC_COM_CRC_CHECKSUM), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(3)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(3)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(3)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(0)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(1)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(2)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(3)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_DATA(0)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_DATA(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(3)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(4)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(5)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(6)), + DEBUGFS_REG32(DC_COM_PIN_MISC_CONTROL), + DEBUGFS_REG32(DC_COM_PIN_PM0_CONTROL), + DEBUGFS_REG32(DC_COM_PIN_PM0_DUTY_CYCLE), + DEBUGFS_REG32(DC_COM_PIN_PM1_CONTROL), + DEBUGFS_REG32(DC_COM_PIN_PM1_DUTY_CYCLE), + DEBUGFS_REG32(DC_COM_SPI_CONTROL), + DEBUGFS_REG32(DC_COM_SPI_START_BYTE), + DEBUGFS_REG32(DC_COM_HSPI_WRITE_DATA_AB), + DEBUGFS_REG32(DC_COM_HSPI_WRITE_DATA_CD), + DEBUGFS_REG32(DC_COM_HSPI_CS_DC), + DEBUGFS_REG32(DC_COM_SCRATCH_REGISTER_A), + DEBUGFS_REG32(DC_COM_SCRATCH_REGISTER_B), + DEBUGFS_REG32(DC_COM_GPIO_CTRL), + DEBUGFS_REG32(DC_COM_GPIO_DEBOUNCE_COUNTER), + DEBUGFS_REG32(DC_COM_CRC_CHECKSUM_LATCHED), + DEBUGFS_REG32(DC_DISP_DISP_SIGNAL_OPTIONS0), + DEBUGFS_REG32(DC_DISP_DISP_SIGNAL_OPTIONS1), + DEBUGFS_REG32(DC_DISP_DISP_WIN_OPTIONS), + DEBUGFS_REG32(DC_DISP_DISP_MEM_HIGH_PRIORITY), + DEBUGFS_REG32(DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER), + DEBUGFS_REG32(DC_DISP_DISP_TIMING_OPTIONS), + DEBUGFS_REG32(DC_DISP_REF_TO_SYNC), + DEBUGFS_REG32(DC_DISP_SYNC_WIDTH), + DEBUGFS_REG32(DC_DISP_BACK_PORCH), + DEBUGFS_REG32(DC_DISP_ACTIVE), + DEBUGFS_REG32(DC_DISP_FRONT_PORCH), + DEBUGFS_REG32(DC_DISP_H_PULSE0_CONTROL), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_A), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_B), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_C), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_D), + DEBUGFS_REG32(DC_DISP_H_PULSE1_CONTROL), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_A), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_B), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_C), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_D), + DEBUGFS_REG32(DC_DISP_H_PULSE2_CONTROL), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_A), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_B), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_C), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_D), + DEBUGFS_REG32(DC_DISP_V_PULSE0_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_A), + DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_B), + DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_C), + DEBUGFS_REG32(DC_DISP_V_PULSE1_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_A), + DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_B), + DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_C), + DEBUGFS_REG32(DC_DISP_V_PULSE2_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE2_POSITION_A), + DEBUGFS_REG32(DC_DISP_V_PULSE3_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE3_POSITION_A), + DEBUGFS_REG32(DC_DISP_M0_CONTROL), + DEBUGFS_REG32(DC_DISP_M1_CONTROL), + DEBUGFS_REG32(DC_DISP_DI_CONTROL), + DEBUGFS_REG32(DC_DISP_PP_CONTROL), + DEBUGFS_REG32(DC_DISP_PP_SELECT_A), + DEBUGFS_REG32(DC_DISP_PP_SELECT_B), + DEBUGFS_REG32(DC_DISP_PP_SELECT_C), + DEBUGFS_REG32(DC_DISP_PP_SELECT_D), + DEBUGFS_REG32(DC_DISP_DISP_CLOCK_CONTROL), + DEBUGFS_REG32(DC_DISP_DISP_INTERFACE_CONTROL), + DEBUGFS_REG32(DC_DISP_DISP_COLOR_CONTROL), + DEBUGFS_REG32(DC_DISP_SHIFT_CLOCK_OPTIONS), + DEBUGFS_REG32(DC_DISP_DATA_ENABLE_OPTIONS), + DEBUGFS_REG32(DC_DISP_SERIAL_INTERFACE_OPTIONS), + DEBUGFS_REG32(DC_DISP_LCD_SPI_OPTIONS), + DEBUGFS_REG32(DC_DISP_BORDER_COLOR), + DEBUGFS_REG32(DC_DISP_COLOR_KEY0_LOWER), + DEBUGFS_REG32(DC_DISP_COLOR_KEY0_UPPER), + DEBUGFS_REG32(DC_DISP_COLOR_KEY1_LOWER), + DEBUGFS_REG32(DC_DISP_COLOR_KEY1_UPPER), + DEBUGFS_REG32(DC_DISP_CURSOR_FOREGROUND), + DEBUGFS_REG32(DC_DISP_CURSOR_BACKGROUND), + DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR), + DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR_NS), + DEBUGFS_REG32(DC_DISP_CURSOR_POSITION), + DEBUGFS_REG32(DC_DISP_CURSOR_POSITION_NS), + DEBUGFS_REG32(DC_DISP_INIT_SEQ_CONTROL), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_A), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_B), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_C), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_D), + DEBUGFS_REG32(DC_DISP_DC_MCCIF_FIFOCTRL), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY0A_HYST), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY0B_HYST), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY1A_HYST), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY1B_HYST), + DEBUGFS_REG32(DC_DISP_DAC_CRT_CTRL), + DEBUGFS_REG32(DC_DISP_DISP_MISC_CONTROL), + DEBUGFS_REG32(DC_DISP_SD_CONTROL), + DEBUGFS_REG32(DC_DISP_SD_CSC_COEFF), + DEBUGFS_REG32(DC_DISP_SD_LUT(0)), + DEBUGFS_REG32(DC_DISP_SD_LUT(1)), + DEBUGFS_REG32(DC_DISP_SD_LUT(2)), + DEBUGFS_REG32(DC_DISP_SD_LUT(3)), + DEBUGFS_REG32(DC_DISP_SD_LUT(4)), + DEBUGFS_REG32(DC_DISP_SD_LUT(5)), + DEBUGFS_REG32(DC_DISP_SD_LUT(6)), + DEBUGFS_REG32(DC_DISP_SD_LUT(7)), + DEBUGFS_REG32(DC_DISP_SD_LUT(8)), + DEBUGFS_REG32(DC_DISP_SD_FLICKER_CONTROL), + DEBUGFS_REG32(DC_DISP_DC_PIXEL_COUNT), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(0)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(1)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(2)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(3)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(4)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(5)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(6)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(7)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(0)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(1)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(2)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(3)), + DEBUGFS_REG32(DC_DISP_SD_BL_CONTROL), + DEBUGFS_REG32(DC_DISP_SD_HW_K_VALUES), + DEBUGFS_REG32(DC_DISP_SD_MAN_K_VALUES), + DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR_HI), + DEBUGFS_REG32(DC_DISP_BLEND_CURSOR_CONTROL), + DEBUGFS_REG32(DC_WIN_WIN_OPTIONS), + DEBUGFS_REG32(DC_WIN_BYTE_SWAP), + DEBUGFS_REG32(DC_WIN_BUFFER_CONTROL), + DEBUGFS_REG32(DC_WIN_COLOR_DEPTH), + DEBUGFS_REG32(DC_WIN_POSITION), + DEBUGFS_REG32(DC_WIN_SIZE), + DEBUGFS_REG32(DC_WIN_PRESCALED_SIZE), + DEBUGFS_REG32(DC_WIN_H_INITIAL_DDA), + DEBUGFS_REG32(DC_WIN_V_INITIAL_DDA), + DEBUGFS_REG32(DC_WIN_DDA_INC), + DEBUGFS_REG32(DC_WIN_LINE_STRIDE), + DEBUGFS_REG32(DC_WIN_BUF_STRIDE), + DEBUGFS_REG32(DC_WIN_UV_BUF_STRIDE), + DEBUGFS_REG32(DC_WIN_BUFFER_ADDR_MODE), + DEBUGFS_REG32(DC_WIN_DV_CONTROL), + DEBUGFS_REG32(DC_WIN_BLEND_NOKEY), + DEBUGFS_REG32(DC_WIN_BLEND_1WIN), + DEBUGFS_REG32(DC_WIN_BLEND_2WIN_X), + DEBUGFS_REG32(DC_WIN_BLEND_2WIN_Y), + DEBUGFS_REG32(DC_WIN_BLEND_3WIN_XY), + DEBUGFS_REG32(DC_WIN_HP_FETCH_CONTROL), + DEBUGFS_REG32(DC_WINBUF_START_ADDR), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_NS), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_U), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_U_NS), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_V), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_V_NS), + DEBUGFS_REG32(DC_WINBUF_ADDR_H_OFFSET), + DEBUGFS_REG32(DC_WINBUF_ADDR_H_OFFSET_NS), + DEBUGFS_REG32(DC_WINBUF_ADDR_V_OFFSET), + DEBUGFS_REG32(DC_WINBUF_ADDR_V_OFFSET_NS), + DEBUGFS_REG32(DC_WINBUF_UFLOW_STATUS), + DEBUGFS_REG32(DC_WINBUF_AD_UFLOW_STATUS), + DEBUGFS_REG32(DC_WINBUF_BD_UFLOW_STATUS), + DEBUGFS_REG32(DC_WINBUF_CD_UFLOW_STATUS), +}; + +static int tegra_dc_show_regs(struct seq_file *s, void *data) +{ + struct drm_info_node *node = s->private; + struct tegra_dc *dc = node->info_ent->data; + unsigned int i; + int err = 0; + + drm_modeset_lock(&dc->base.mutex, NULL); + + if (!dc->base.state->active) { + err = -EBUSY; + goto unlock; + } + + for (i = 0; i < ARRAY_SIZE(tegra_dc_regs); i++) { + unsigned int offset = tegra_dc_regs[i].offset; + + seq_printf(s, "%-40s %#05x %08x\n", tegra_dc_regs[i].name, + offset, tegra_dc_readl(dc, offset)); + } + +unlock: + drm_modeset_unlock(&dc->base.mutex); + return err; +} + +static int tegra_dc_show_crc(struct seq_file *s, void *data) +{ + struct drm_info_node *node = s->private; + struct tegra_dc *dc = node->info_ent->data; + int err = 0; + u32 value; + + drm_modeset_lock(&dc->base.mutex, NULL); + + if (!dc->base.state->active) { + err = -EBUSY; + goto unlock; + } + + value = DC_COM_CRC_CONTROL_ACTIVE_DATA | DC_COM_CRC_CONTROL_ENABLE; + tegra_dc_writel(dc, value, DC_COM_CRC_CONTROL); + tegra_dc_commit(dc); + + drm_crtc_wait_one_vblank(&dc->base); + drm_crtc_wait_one_vblank(&dc->base); + + value = tegra_dc_readl(dc, DC_COM_CRC_CHECKSUM); + seq_printf(s, "%08x\n", value); + + tegra_dc_writel(dc, 0, DC_COM_CRC_CONTROL); + +unlock: + drm_modeset_unlock(&dc->base.mutex); + return err; +} + +static int tegra_dc_show_stats(struct seq_file *s, void *data) +{ + struct drm_info_node *node = s->private; + struct tegra_dc *dc = node->info_ent->data; + + seq_printf(s, "frames: %lu\n", dc->stats.frames); + seq_printf(s, "vblank: %lu\n", dc->stats.vblank); + seq_printf(s, "underflow: %lu\n", dc->stats.underflow); + seq_printf(s, "overflow: %lu\n", dc->stats.overflow); + + seq_printf(s, "frames total: %lu\n", dc->stats.frames_total); + seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total); + seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total); + seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total); + + return 0; +} + +static struct drm_info_list debugfs_files[] = { + { "regs", tegra_dc_show_regs, 0, NULL }, + { "crc", tegra_dc_show_crc, 0, NULL }, + { "stats", tegra_dc_show_stats, 0, NULL }, +}; + +static int tegra_dc_late_register(struct drm_crtc *crtc) +{ + unsigned int i, count = ARRAY_SIZE(debugfs_files); + struct drm_minor *minor = crtc->dev->primary; + struct dentry *root; + struct tegra_dc *dc = to_tegra_dc(crtc); + +#ifdef CONFIG_DEBUG_FS + root = crtc->debugfs_entry; +#else + root = NULL; +#endif + + dc->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files), + GFP_KERNEL); + if (!dc->debugfs_files) + return -ENOMEM; + + for (i = 0; i < count; i++) + dc->debugfs_files[i].data = dc; + + drm_debugfs_create_files(dc->debugfs_files, count, root, minor); + + return 0; +} + +static void tegra_dc_early_unregister(struct drm_crtc *crtc) +{ + unsigned int count = ARRAY_SIZE(debugfs_files); + struct drm_minor *minor = crtc->dev->primary; + struct tegra_dc *dc = to_tegra_dc(crtc); + + drm_debugfs_remove_files(dc->debugfs_files, count, minor); + kfree(dc->debugfs_files); + dc->debugfs_files = NULL; +} + +static u32 tegra_dc_get_vblank_counter(struct drm_crtc *crtc) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + + /* XXX vblank syncpoints don't work with nvdisplay yet */ + if (dc->syncpt && !dc->soc->has_nvdisplay) + return host1x_syncpt_read(dc->syncpt); + + /* fallback to software emulated VBLANK counter */ + return (u32)drm_crtc_vblank_count(&dc->base); +} + +static int tegra_dc_enable_vblank(struct drm_crtc *crtc) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + + value = tegra_dc_readl(dc, DC_CMD_INT_MASK); + value |= VBLANK_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); + + return 0; +} + +static void tegra_dc_disable_vblank(struct drm_crtc *crtc) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + + value = tegra_dc_readl(dc, DC_CMD_INT_MASK); + value &= ~VBLANK_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); +} + +static const struct drm_crtc_funcs tegra_crtc_funcs = { + .page_flip = drm_atomic_helper_page_flip, + .set_config = drm_atomic_helper_set_config, + .destroy = tegra_dc_destroy, + .reset = tegra_crtc_reset, + .atomic_duplicate_state = tegra_crtc_atomic_duplicate_state, + .atomic_destroy_state = tegra_crtc_atomic_destroy_state, + .late_register = tegra_dc_late_register, + .early_unregister = tegra_dc_early_unregister, + .get_vblank_counter = tegra_dc_get_vblank_counter, + .enable_vblank = tegra_dc_enable_vblank, + .disable_vblank = tegra_dc_disable_vblank, +}; + +static int tegra_dc_set_timings(struct tegra_dc *dc, + struct drm_display_mode *mode) +{ + unsigned int h_ref_to_sync = 1; + unsigned int v_ref_to_sync = 1; + unsigned long value; + + if (!dc->soc->has_nvdisplay) { + tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS); + + value = (v_ref_to_sync << 16) | h_ref_to_sync; + tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC); + } + + value = ((mode->vsync_end - mode->vsync_start) << 16) | + ((mode->hsync_end - mode->hsync_start) << 0); + tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH); + + value = ((mode->vtotal - mode->vsync_end) << 16) | + ((mode->htotal - mode->hsync_end) << 0); + tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH); + + value = ((mode->vsync_start - mode->vdisplay) << 16) | + ((mode->hsync_start - mode->hdisplay) << 0); + tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH); + + value = (mode->vdisplay << 16) | mode->hdisplay; + tegra_dc_writel(dc, value, DC_DISP_ACTIVE); + + return 0; +} + +/** + * tegra_dc_state_setup_clock - check clock settings and store them in atomic + * state + * @dc: display controller + * @crtc_state: CRTC atomic state + * @clk: parent clock for display controller + * @pclk: pixel clock + * @div: shift clock divider + * + * Returns: + * 0 on success or a negative error-code on failure. + */ +int tegra_dc_state_setup_clock(struct tegra_dc *dc, + struct drm_crtc_state *crtc_state, + struct clk *clk, unsigned long pclk, + unsigned int div) +{ + struct tegra_dc_state *state = to_dc_state(crtc_state); + + if (!clk_has_parent(dc->clk, clk)) + return -EINVAL; + + state->clk = clk; + state->pclk = pclk; + state->div = div; + + return 0; +} + +static void tegra_dc_update_voltage_state(struct tegra_dc *dc, + struct tegra_dc_state *state) +{ + unsigned long rate, pstate; + struct dev_pm_opp *opp; + int err; + + if (!dc->has_opp_table) + return; + + /* calculate actual pixel clock rate which depends on internal divider */ + rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2); + + /* find suitable OPP for the rate */ + opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate); + + if (opp == ERR_PTR(-ERANGE)) + opp = dev_pm_opp_find_freq_floor(dc->dev, &rate); + + if (IS_ERR(opp)) { + dev_err(dc->dev, "failed to find OPP for %luHz: %pe\n", + rate, opp); + return; + } + + pstate = dev_pm_opp_get_required_pstate(opp, 0); + dev_pm_opp_put(opp); + + /* + * The minimum core voltage depends on the pixel clock rate (which + * depends on internal clock divider of the CRTC) and not on the + * rate of the display controller clock. This is why we're not using + * dev_pm_opp_set_rate() API and instead controlling the power domain + * directly. + */ + err = dev_pm_genpd_set_performance_state(dc->dev, pstate); + if (err) + dev_err(dc->dev, "failed to set power domain state to %lu: %d\n", + pstate, err); +} + +static void tegra_dc_commit_state(struct tegra_dc *dc, + struct tegra_dc_state *state) +{ + u32 value; + int err; + + err = clk_set_parent(dc->clk, state->clk); + if (err < 0) + dev_err(dc->dev, "failed to set parent clock: %d\n", err); + + /* + * Outputs may not want to change the parent clock rate. This is only + * relevant to Tegra20 where only a single display PLL is available. + * Since that PLL would typically be used for HDMI, an internal LVDS + * panel would need to be driven by some other clock such as PLL_P + * which is shared with other peripherals. Changing the clock rate + * should therefore be avoided. + */ + if (state->pclk > 0) { + err = clk_set_rate(state->clk, state->pclk); + if (err < 0) + dev_err(dc->dev, + "failed to set clock rate to %lu Hz\n", + state->pclk); + + err = clk_set_rate(dc->clk, state->pclk); + if (err < 0) + dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n", + dc->clk, state->pclk, err); + } + + DRM_DEBUG_KMS("rate: %lu, div: %u\n", clk_get_rate(dc->clk), + state->div); + DRM_DEBUG_KMS("pclk: %lu\n", state->pclk); + + if (!dc->soc->has_nvdisplay) { + value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1; + tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); + } + + tegra_dc_update_voltage_state(dc, state); +} + +static void tegra_dc_stop(struct tegra_dc *dc) +{ + u32 value; + + /* stop the display controller */ + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_COMMAND); + value &= ~DISP_CTRL_MODE_MASK; + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND); + + tegra_dc_commit(dc); +} + +static bool tegra_dc_idle(struct tegra_dc *dc) +{ + u32 value; + + value = tegra_dc_readl_active(dc, DC_CMD_DISPLAY_COMMAND); + + return (value & DISP_CTRL_MODE_MASK) == 0; +} + +static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout) +{ + timeout = jiffies + msecs_to_jiffies(timeout); + + while (time_before(jiffies, timeout)) { + if (tegra_dc_idle(dc)) + return 0; + + usleep_range(1000, 2000); + } + + dev_dbg(dc->dev, "timeout waiting for DC to become idle\n"); + return -ETIMEDOUT; +} + +static void +tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state, + bool prepare_bandwidth_transition) +{ + const struct tegra_plane_state *old_tegra_state, *new_tegra_state; + const struct tegra_dc_state *old_dc_state, *new_dc_state; + u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw; + const struct drm_plane_state *old_plane_state; + const struct drm_crtc_state *old_crtc_state; + struct tegra_dc_window window, old_window; + struct tegra_dc *dc = to_tegra_dc(crtc); + struct tegra_plane *tegra; + struct drm_plane *plane; + + if (dc->soc->has_nvdisplay) + return; + + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + old_dc_state = to_const_dc_state(old_crtc_state); + new_dc_state = to_const_dc_state(crtc->state); + + if (!crtc->state->active) { + if (!old_crtc_state->active) + return; + + /* + * When CRTC is disabled on DPMS, the state of attached planes + * is kept unchanged. Hence we need to enforce removal of the + * bandwidths from the ICC paths. + */ + drm_atomic_crtc_for_each_plane(plane, crtc) { + tegra = to_tegra_plane(plane); + + icc_set_bw(tegra->icc_mem, 0, 0); + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } + + return; + } + + for_each_old_plane_in_state(old_crtc_state->state, plane, + old_plane_state, i) { + old_tegra_state = to_const_tegra_plane_state(old_plane_state); + new_tegra_state = to_const_tegra_plane_state(plane->state); + tegra = to_tegra_plane(plane); + + /* + * We're iterating over the global atomic state and it contains + * planes from another CRTC, hence we need to filter out the + * planes unrelated to this CRTC. + */ + if (tegra->dc != dc) + continue; + + new_avg_bw = new_tegra_state->avg_memory_bandwidth; + old_avg_bw = old_tegra_state->avg_memory_bandwidth; + + new_peak_bw = new_tegra_state->total_peak_memory_bandwidth; + old_peak_bw = old_tegra_state->total_peak_memory_bandwidth; + + /* + * See the comment related to !crtc->state->active above, + * which explains why bandwidths need to be updated when + * CRTC is turning ON. + */ + if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw && + old_crtc_state->active) + continue; + + window.src.h = drm_rect_height(&plane->state->src) >> 16; + window.dst.h = drm_rect_height(&plane->state->dst); + + old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16; + old_window.dst.h = drm_rect_height(&old_plane_state->dst); + + /* + * During the preparation phase (atomic_begin), the memory + * freq should go high before the DC changes are committed + * if bandwidth requirement goes up, otherwise memory freq + * should to stay high if BW requirement goes down. The + * opposite applies to the completion phase (post_commit). + */ + if (prepare_bandwidth_transition) { + new_avg_bw = max(old_avg_bw, new_avg_bw); + new_peak_bw = max(old_peak_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &old_window)) + window = old_window; + } + + icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &window)) + icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw); + else + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } +} + +static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + int err; + + if (!tegra_dc_idle(dc)) { + tegra_dc_stop(dc); + + /* + * Ignore the return value, there isn't anything useful to do + * in case this fails. + */ + tegra_dc_wait_idle(dc, 100); + } + + /* + * This should really be part of the RGB encoder driver, but clearing + * these bits has the side-effect of stopping the display controller. + * When that happens no VBLANK interrupts will be raised. At the same + * time the encoder is disabled before the display controller, so the + * above code is always going to timeout waiting for the controller + * to go idle. + * + * Given the close coupling between the RGB encoder and the display + * controller doing it here is still kind of okay. None of the other + * encoder drivers require these bits to be cleared. + * + * XXX: Perhaps given that the display controller is switched off at + * this point anyway maybe clearing these bits isn't even useful for + * the RGB encoder? + */ + if (dc->rgb) { + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_POWER_CONTROL); + value &= ~(PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE | + PW4_ENABLE | PM0_ENABLE | PM1_ENABLE); + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL); + } + + tegra_dc_stats_reset(&dc->stats); + drm_crtc_vblank_off(crtc); + + spin_lock_irq(&crtc->dev->event_lock); + + if (crtc->state->event) { + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + } + + spin_unlock_irq(&crtc->dev->event_lock); + + err = host1x_client_suspend(&dc->client); + if (err < 0) + dev_err(dc->dev, "failed to suspend: %d\n", err); + + dev_pm_genpd_set_performance_state(dc->dev, 0); +} + +static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + struct tegra_dc_state *crtc_state = to_dc_state(crtc->state); + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + int err; + + err = host1x_client_resume(&dc->client); + if (err < 0) { + dev_err(dc->dev, "failed to resume: %d\n", err); + return; + } + + /* initialize display controller */ + if (dc->syncpt) { + u32 syncpt = dc->syncpt->id, enable; + + if (dc->soc->has_nvdisplay) + enable = 1 << 31; + else + enable = 1 << 8; + + value = SYNCPT_CNTRL_NO_STALL; + tegra_dc_writel(dc, value, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL); + + value = enable | syncpt; + tegra_dc_writel(dc, value, DC_CMD_CONT_SYNCPT_VSYNC); + } + + if (dc->soc->has_nvdisplay) { + value = DSC_TO_UF_INT | DSC_BBUF_UF_INT | DSC_RBUF_UF_INT | + DSC_OBUF_UF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_TYPE); + + value = DSC_TO_UF_INT | DSC_BBUF_UF_INT | DSC_RBUF_UF_INT | + DSC_OBUF_UF_INT | SD3_BUCKET_WALK_DONE_INT | + HEAD_UF_INT | MSF_INT | REG_TMOUT_INT | + REGION_CRC_INT | V_PULSE2_INT | V_PULSE3_INT | + VBLANK_INT | FRAME_END_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY); + + value = SD3_BUCKET_WALK_DONE_INT | HEAD_UF_INT | VBLANK_INT | + FRAME_END_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE); + + value = HEAD_UF_INT | REG_TMOUT_INT | FRAME_END_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); + + tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS); + } else { + value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_TYPE); + + value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY); + + /* initialize timer */ + value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) | + WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20); + tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY); + + value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) | + WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1); + tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER); + + value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE); + + value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); + } + + if (dc->soc->supports_background_color) + tegra_dc_writel(dc, 0, DC_DISP_BLEND_BACKGROUND_COLOR); + else + tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR); + + /* apply PLL and pixel clock changes */ + tegra_dc_commit_state(dc, crtc_state); + + /* program display mode */ + tegra_dc_set_timings(dc, mode); + + /* interlacing isn't supported yet, so disable it */ + if (dc->soc->supports_interlacing) { + value = tegra_dc_readl(dc, DC_DISP_INTERLACE_CONTROL); + value &= ~INTERLACE_ENABLE; + tegra_dc_writel(dc, value, DC_DISP_INTERLACE_CONTROL); + } + + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_COMMAND); + value &= ~DISP_CTRL_MODE_MASK; + value |= DISP_CTRL_MODE_C_DISPLAY; + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND); + + if (!dc->soc->has_nvdisplay) { + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_POWER_CONTROL); + value |= PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE | + PW4_ENABLE | PM0_ENABLE | PM1_ENABLE; + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL); + } + + /* enable underflow reporting and display red for missing pixels */ + if (dc->soc->has_nvdisplay) { + value = UNDERFLOW_MODE_RED | UNDERFLOW_REPORT_ENABLE; + tegra_dc_writel(dc, value, DC_COM_RG_UNDERFLOW); + } + + /* + * TC358768 DPI to DSI bridge, used on Asus TF700T, requires to have + * a usable PCLK output before encoder is enabled because bridge is + * clocked by PCLK and bridge is programmed before encoder is enabled. + * Hence the PCLK clock shifter must be programmed here, otherwise + * output clock is not usable and bridge hangs because of it. + */ + if (dc->rgb) { + /* XXX: parameterize? */ + value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE; + tegra_dc_writel(dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS); + } + + tegra_dc_commit(dc); + + drm_crtc_vblank_on(crtc); +} + +static void tegra_crtc_atomic_begin(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + unsigned long flags; + + tegra_crtc_update_memory_bandwidth(crtc, state, true); + + if (crtc->state->event) { + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, crtc->state->event); + else + drm_crtc_arm_vblank_event(crtc, crtc->state->event); + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + + crtc->state->event = NULL; + } +} + +static void tegra_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, + crtc); + struct tegra_dc_state *dc_state = to_dc_state(crtc_state); + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + + if (dc->soc->has_legacy_blending) { + tegra_dc_writel(dc, dc_state->ckey.min, DC_DISP_COLOR_KEY0_LOWER); + tegra_dc_writel(dc, dc_state->ckey.max, DC_DISP_COLOR_KEY0_UPPER); + } + + value = dc_state->planes << 8 | GENERAL_UPDATE; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + + value = dc_state->planes | GENERAL_ACT_REQ; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); +} + +static bool tegra_plane_is_cursor(const struct drm_plane_state *state) +{ + const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc; + const struct drm_format_info *fmt = state->fb->format; + unsigned int src_w = drm_rect_width(&state->src) >> 16; + unsigned int dst_w = drm_rect_width(&state->dst); + + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) + return false; + + if (soc->supports_cursor) + return true; + + if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256) + return false; + + return true; +} + +static unsigned long +tegra_plane_overlap_mask(struct drm_crtc_state *state, + const struct drm_plane_state *plane_state) +{ + const struct drm_plane_state *other_state; + const struct tegra_plane *tegra; + unsigned long overlap_mask = 0; + struct drm_plane *plane; + struct drm_rect rect; + + if (!plane_state->visible || !plane_state->fb) + return 0; + + /* + * Data-prefetch FIFO will easily help to overcome temporal memory + * pressure if other plane overlaps with the cursor plane. + */ + if (tegra_plane_is_cursor(plane_state)) + return 0; + + drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) { + rect = plane_state->dst; + + tegra = to_tegra_plane(other_state->plane); + + if (!other_state->visible || !other_state->fb) + continue; + + /* + * Ignore cursor plane overlaps because it's not practical to + * assume that it contributes to the bandwidth in overlapping + * area if window width is small. + */ + if (tegra_plane_is_cursor(other_state)) + continue; + + if (drm_rect_intersect(&rect, &other_state->dst)) + overlap_mask |= BIT(tegra->index); + } + + return overlap_mask; +} + +static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask; + u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {}; + bool all_planes_overlap_simultaneously = true; + const struct tegra_plane_state *tegra_state; + const struct drm_plane_state *plane_state; + struct tegra_dc *dc = to_tegra_dc(crtc); + const struct drm_crtc_state *old_state; + struct drm_crtc_state *new_state; + struct tegra_plane *tegra; + struct drm_plane *plane; + + /* + * The nv-display uses shared planes. The algorithm below assumes + * maximum 3 planes per-CRTC, this assumption isn't applicable to + * the nv-display. Note that T124 support has additional windows, + * but currently they aren't supported by the driver. + */ + if (dc->soc->has_nvdisplay) + return 0; + + new_state = drm_atomic_get_new_crtc_state(state, crtc); + old_state = drm_atomic_get_old_crtc_state(state, crtc); + + /* + * For overlapping planes pixel's data is fetched for each plane at + * the same time, hence bandwidths are accumulated in this case. + * This needs to be taken into account for calculating total bandwidth + * consumed by all planes. + * + * Here we get the overlapping state of each plane, which is a + * bitmask of plane indices telling with what planes there is an + * overlap. Note that bitmask[plane] includes BIT(plane) in order + * to make further code nicer and simpler. + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM)) + return -EINVAL; + + plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth; + mask = tegra_plane_overlap_mask(new_state, plane_state); + overlap_mask[tegra->index] = mask; + + if (hweight_long(mask) != 3) + all_planes_overlap_simultaneously = false; + } + + /* + * Then we calculate maximum bandwidth of each plane state. + * The bandwidth includes the plane BW + BW of the "simultaneously" + * overlapping planes, where "simultaneously" means areas where DC + * fetches from the planes simultaneously during of scan-out process. + * + * For example, if plane A overlaps with planes B and C, but B and C + * don't overlap, then the peak bandwidth will be either in area where + * A-and-B or A-and-C planes overlap. + * + * The plane_peak_bw[] contains peak memory bandwidth values of + * each plane, this information is needed by interconnect provider + * in order to set up latency allowance based on the peak BW, see + * tegra_crtc_update_memory_bandwidth(). + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0; + + /* + * Note that plane's atomic check doesn't touch the + * total_peak_memory_bandwidth of enabled plane, hence the + * current state contains the old bandwidth state from the + * previous CRTC commit. + */ + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + for_each_set_bit(i, &overlap_mask[tegra->index], 3) { + if (i == tegra->index) + continue; + + if (all_planes_overlap_simultaneously) + overlap_bw += plane_peak_bw[i]; + else + overlap_bw = max(overlap_bw, plane_peak_bw[i]); + } + + new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw; + old_peak_bw = tegra_state->total_peak_memory_bandwidth; + + /* + * If plane's peak bandwidth changed (for example plane isn't + * overlapped anymore) and plane isn't in the atomic state, + * then add plane to the state in order to have the bandwidth + * updated. + */ + if (old_peak_bw != new_peak_bw) { + struct tegra_plane_state *new_tegra_state; + struct drm_plane_state *new_plane_state; + + new_plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(new_plane_state)) + return PTR_ERR(new_plane_state); + + new_tegra_state = to_tegra_plane_state(new_plane_state); + new_tegra_state->total_peak_memory_bandwidth = new_peak_bw; + } + } + + return 0; +} + +static int tegra_crtc_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + int err; + + err = tegra_crtc_calculate_memory_bandwidth(crtc, state); + if (err) + return err; + + return 0; +} + +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + /* + * Display bandwidth is allowed to go down only once hardware state + * is known to be armed, i.e. state was committed and VBLANK event + * received. + */ + tegra_crtc_update_memory_bandwidth(crtc, state, false); +} + +static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = { + .atomic_check = tegra_crtc_atomic_check, + .atomic_begin = tegra_crtc_atomic_begin, + .atomic_flush = tegra_crtc_atomic_flush, + .atomic_enable = tegra_crtc_atomic_enable, + .atomic_disable = tegra_crtc_atomic_disable, +}; + +static irqreturn_t tegra_dc_irq(int irq, void *data) +{ + struct tegra_dc *dc = data; + unsigned long status; + + status = tegra_dc_readl(dc, DC_CMD_INT_STATUS); + tegra_dc_writel(dc, status, DC_CMD_INT_STATUS); + + if (status & FRAME_END_INT) { + /* + dev_dbg(dc->dev, "%s(): frame end\n", __func__); + */ + dc->stats.frames_total++; + dc->stats.frames++; + } + + if (status & VBLANK_INT) { + /* + dev_dbg(dc->dev, "%s(): vertical blank\n", __func__); + */ + drm_crtc_handle_vblank(&dc->base); + dc->stats.vblank_total++; + dc->stats.vblank++; + } + + if (status & (WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT)) { + /* + dev_dbg(dc->dev, "%s(): underflow\n", __func__); + */ + dc->stats.underflow_total++; + dc->stats.underflow++; + } + + if (status & (WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT)) { + /* + dev_dbg(dc->dev, "%s(): overflow\n", __func__); + */ + dc->stats.overflow_total++; + dc->stats.overflow++; + } + + if (status & HEAD_UF_INT) { + dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__); + dc->stats.underflow_total++; + dc->stats.underflow++; + } + + return IRQ_HANDLED; +} + +static bool tegra_dc_has_window_groups(struct tegra_dc *dc) +{ + unsigned int i; + + if (!dc->soc->wgrps) + return true; + + for (i = 0; i < dc->soc->num_wgrps; i++) { + const struct tegra_windowgroup_soc *wgrp = &dc->soc->wgrps[i]; + + if (wgrp->dc == dc->pipe && wgrp->num_windows > 0) + return true; + } + + return false; +} + +static int tegra_dc_early_init(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs++; + + return 0; +} + +static int tegra_dc_init(struct host1x_client *client) +{ + struct tegra_drm_client *drm_client = to_tegra_drm_client(client); + struct drm_device *drm = dev_get_drvdata(client->host); + struct host1x *host = dev_get_drvdata(drm->dev->parent); + struct tegra_dc *dc = host1x_client_to_dc(client); + struct tegra_drm *tegra = drm->dev_private; + struct drm_plane *primary = NULL; + struct drm_plane *cursor = NULL; + int err; + + /* + * XXX do not register DCs with no window groups because we cannot + * assign a primary plane to them, which in turn will cause KMS to + * crash. + */ + if (!tegra_dc_has_window_groups(dc)) + return 0; + + /* + * Set the display hub as the host1x client parent for the display + * controller. This is needed for the runtime reference counting that + * ensures the display hub is always powered when any of the display + * controllers are. + */ + if (dc->soc->has_nvdisplay) + client->parent = &tegra->hub->client; + + dc->syncpt = host1x_syncpt_request(host); + if (IS_ERR(dc->syncpt)) { + dev_warn(dc->dev, "failed to allocate syncpoint %ld\n", + PTR_ERR(dc->syncpt)); + dc->syncpt = NULL; + } else { + host1x_syncpt_associate_device(dc->syncpt, dc->dev); + host1x_syncpt_reset(dc->syncpt, 0); + } + + dc->group = tegra_drm_client_iommu_attach(drm_client, true); + if (IS_ERR(dc->group)) { + err = PTR_ERR(dc->group); + dev_err(client->dev, "failed to attach to domain: %d\n", err); + return err; + } + + if (dc->soc->wgrps) + primary = tegra_dc_add_shared_planes(drm, dc); + else + primary = tegra_dc_add_planes(drm, dc); + + if (IS_ERR(primary)) { + err = PTR_ERR(primary); + goto cleanup; + } + + if (dc->soc->supports_cursor) { + cursor = tegra_dc_cursor_plane_create(drm, dc); + if (IS_ERR(cursor)) { + err = PTR_ERR(cursor); + goto cleanup; + } + } else { + /* dedicate one overlay to mouse cursor */ + cursor = tegra_dc_overlay_plane_create(drm, dc, 2, true); + if (IS_ERR(cursor)) { + err = PTR_ERR(cursor); + goto cleanup; + } + } + + err = drm_crtc_init_with_planes(drm, &dc->base, primary, cursor, + &tegra_crtc_funcs, NULL); + if (err < 0) + goto cleanup; + + drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs); + + /* + * Keep track of the minimum pitch alignment across all display + * controllers. + */ + if (dc->soc->pitch_align > tegra->pitch_align) + tegra->pitch_align = dc->soc->pitch_align; + + /* track maximum resolution */ + if (dc->soc->has_nvdisplay) + drm->mode_config.max_width = drm->mode_config.max_height = 16384; + else + drm->mode_config.max_width = drm->mode_config.max_height = 4096; + + err = tegra_dc_rgb_init(drm, dc); + if (err < 0 && err != -ENODEV) { + dev_err(dc->dev, "failed to initialize RGB output: %d\n", err); + goto cleanup; + } + + err = devm_request_irq(dc->dev, dc->irq, tegra_dc_irq, 0, + dev_name(dc->dev), dc); + if (err < 0) { + dev_err(dc->dev, "failed to request IRQ#%u: %d\n", dc->irq, + err); + goto cleanup; + } + + /* + * Inherit the DMA parameters (such as maximum segment size) from the + * parent host1x device. + */ + client->dev->dma_parms = client->host->dma_parms; + + return 0; + +cleanup: + if (!IS_ERR_OR_NULL(cursor)) + drm_plane_cleanup(cursor); + + if (!IS_ERR(primary)) + drm_plane_cleanup(primary); + + tegra_drm_client_iommu_detach(drm_client, dc->group, true); + host1x_syncpt_put(dc->syncpt); + + return err; +} + +static int tegra_dc_exit(struct host1x_client *client) +{ + struct tegra_drm_client *drm_client = to_tegra_drm_client(client); + struct tegra_dc *dc = host1x_client_to_dc(client); + int err; + + if (!tegra_dc_has_window_groups(dc)) + return 0; + + /* avoid a dangling pointer just in case this disappears */ + client->dev->dma_parms = NULL; + + devm_free_irq(dc->dev, dc->irq, dc); + + err = tegra_dc_rgb_exit(dc); + if (err) { + dev_err(dc->dev, "failed to shutdown RGB output: %d\n", err); + return err; + } + + tegra_drm_client_iommu_detach(drm_client, dc->group, true); + host1x_syncpt_put(dc->syncpt); + + return 0; +} + +static int tegra_dc_late_exit(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs--; + + return 0; +} + +static int tegra_dc_runtime_suspend(struct host1x_client *client) +{ + struct tegra_dc *dc = host1x_client_to_dc(client); + struct device *dev = client->dev; + int err; + + err = reset_control_assert(dc->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + + if (dc->soc->has_powergate) + tegra_powergate_power_off(dc->powergate); + + clk_disable_unprepare(dc->clk); + pm_runtime_put_sync(dev); + + return 0; +} + +static int tegra_dc_runtime_resume(struct host1x_client *client) +{ + struct tegra_dc *dc = host1x_client_to_dc(client); + struct device *dev = client->dev; + int err; + + err = pm_runtime_resume_and_get(dev); + if (err < 0) { + dev_err(dev, "failed to get runtime PM: %d\n", err); + return err; + } + + if (dc->soc->has_powergate) { + err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk, + dc->rst); + if (err < 0) { + dev_err(dev, "failed to power partition: %d\n", err); + goto put_rpm; + } + } else { + err = clk_prepare_enable(dc->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto put_rpm; + } + + err = reset_control_deassert(dc->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + } + + return 0; + +disable_clk: + clk_disable_unprepare(dc->clk); +put_rpm: + pm_runtime_put_sync(dev); + return err; +} + +static const struct host1x_client_ops dc_client_ops = { + .early_init = tegra_dc_early_init, + .init = tegra_dc_init, + .exit = tegra_dc_exit, + .late_exit = tegra_dc_late_exit, + .suspend = tegra_dc_runtime_suspend, + .resume = tegra_dc_runtime_resume, +}; + +static const struct tegra_dc_soc_info tegra20_dc_soc_info = { + .has_win_a_csc = false, + .supports_background_color = false, + .supports_interlacing = false, + .supports_cursor = false, + .supports_block_linear = false, + .supports_sector_layout = false, + .has_legacy_blending = true, + .pitch_align = 8, + .has_powergate = false, + .coupled_pm = true, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra20_primary_formats), + .primary_formats = tegra20_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats), + .overlay_formats = tegra20_overlay_formats, + .modifiers = tegra20_modifiers, + .has_win_a_without_filters = true, + .has_win_b_vfilter_mem_client = true, + .has_win_c_without_vert_filter = true, + .plane_tiled_memory_bandwidth_x2 = false, +}; + +static const struct tegra_dc_soc_info tegra30_dc_soc_info = { + .has_win_a_csc = false, + .supports_background_color = false, + .supports_interlacing = false, + .supports_cursor = false, + .supports_block_linear = false, + .supports_sector_layout = false, + .has_legacy_blending = true, + .pitch_align = 8, + .has_powergate = false, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra20_primary_formats), + .primary_formats = tegra20_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats), + .overlay_formats = tegra20_overlay_formats, + .modifiers = tegra20_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = true, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, +}; + +static const struct tegra_dc_soc_info tegra114_dc_soc_info = { + .has_win_a_csc = true, + .supports_background_color = false, + .supports_interlacing = false, + .supports_cursor = false, + .supports_block_linear = false, + .supports_sector_layout = false, + .has_legacy_blending = true, + .pitch_align = 64, + .has_powergate = true, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra114_primary_formats), + .primary_formats = tegra114_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats), + .overlay_formats = tegra114_overlay_formats, + .modifiers = tegra20_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, +}; + +static const struct tegra_dc_soc_info tegra124_dc_soc_info = { + .has_win_a_csc = true, + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = false, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = true, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra124_primary_formats), + .primary_formats = tegra124_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats), + .overlay_formats = tegra124_overlay_formats, + .modifiers = tegra124_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, +}; + +static const struct tegra_dc_soc_info tegra210_dc_soc_info = { + .has_win_a_csc = true, + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = false, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = true, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra114_primary_formats), + .primary_formats = tegra114_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats), + .overlay_formats = tegra114_overlay_formats, + .modifiers = tegra124_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, +}; + +static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { + { + .index = 0, + .dc = 0, + .windows = (const unsigned int[]) { 0 }, + .num_windows = 1, + }, { + .index = 1, + .dc = 1, + .windows = (const unsigned int[]) { 1 }, + .num_windows = 1, + }, { + .index = 2, + .dc = 1, + .windows = (const unsigned int[]) { 2 }, + .num_windows = 1, + }, { + .index = 3, + .dc = 2, + .windows = (const unsigned int[]) { 3 }, + .num_windows = 1, + }, { + .index = 4, + .dc = 2, + .windows = (const unsigned int[]) { 4 }, + .num_windows = 1, + }, { + .index = 5, + .dc = 2, + .windows = (const unsigned int[]) { 5 }, + .num_windows = 1, + }, +}; + +static const struct tegra_dc_soc_info tegra186_dc_soc_info = { + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = false, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = false, + .coupled_pm = false, + .has_nvdisplay = true, + .wgrps = tegra186_dc_wgrps, + .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, +}; + +static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = { + { + .index = 0, + .dc = 0, + .windows = (const unsigned int[]) { 0 }, + .num_windows = 1, + }, { + .index = 1, + .dc = 1, + .windows = (const unsigned int[]) { 1 }, + .num_windows = 1, + }, { + .index = 2, + .dc = 1, + .windows = (const unsigned int[]) { 2 }, + .num_windows = 1, + }, { + .index = 3, + .dc = 2, + .windows = (const unsigned int[]) { 3 }, + .num_windows = 1, + }, { + .index = 4, + .dc = 2, + .windows = (const unsigned int[]) { 4 }, + .num_windows = 1, + }, { + .index = 5, + .dc = 2, + .windows = (const unsigned int[]) { 5 }, + .num_windows = 1, + }, +}; + +static const struct tegra_dc_soc_info tegra194_dc_soc_info = { + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = true, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = false, + .coupled_pm = false, + .has_nvdisplay = true, + .wgrps = tegra194_dc_wgrps, + .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, +}; + +static const struct of_device_id tegra_dc_of_match[] = { + { + .compatible = "nvidia,tegra194-dc", + .data = &tegra194_dc_soc_info, + }, { + .compatible = "nvidia,tegra186-dc", + .data = &tegra186_dc_soc_info, + }, { + .compatible = "nvidia,tegra210-dc", + .data = &tegra210_dc_soc_info, + }, { + .compatible = "nvidia,tegra124-dc", + .data = &tegra124_dc_soc_info, + }, { + .compatible = "nvidia,tegra114-dc", + .data = &tegra114_dc_soc_info, + }, { + .compatible = "nvidia,tegra30-dc", + .data = &tegra30_dc_soc_info, + }, { + .compatible = "nvidia,tegra20-dc", + .data = &tegra20_dc_soc_info, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, tegra_dc_of_match); + +static int tegra_dc_parse_dt(struct tegra_dc *dc) +{ + struct device_node *np; + u32 value = 0; + int err; + + err = of_property_read_u32(dc->dev->of_node, "nvidia,head", &value); + if (err < 0) { + dev_err(dc->dev, "missing \"nvidia,head\" property\n"); + + /* + * If the nvidia,head property isn't present, try to find the + * correct head number by looking up the position of this + * display controller's node within the device tree. Assuming + * that the nodes are ordered properly in the DTS file and + * that the translation into a flattened device tree blob + * preserves that ordering this will actually yield the right + * head number. + * + * If those assumptions don't hold, this will still work for + * cases where only a single display controller is used. + */ + for_each_matching_node(np, tegra_dc_of_match) { + if (np == dc->dev->of_node) { + of_node_put(np); + break; + } + + value++; + } + } + + dc->pipe = value; + + return 0; +} + +static int tegra_dc_match_by_pipe(struct device *dev, const void *data) +{ + struct tegra_dc *dc = dev_get_drvdata(dev); + unsigned int pipe = (unsigned long)(void *)data; + + return dc->pipe == pipe; +} + +static int tegra_dc_couple(struct tegra_dc *dc) +{ + /* + * On Tegra20, DC1 requires DC0 to be taken out of reset in order to + * be enabled, otherwise CPU hangs on writing to CMD_DISPLAY_COMMAND / + * POWER_CONTROL registers during CRTC enabling. + */ + if (dc->soc->coupled_pm && dc->pipe == 1) { + struct device *companion; + struct tegra_dc *parent; + + companion = driver_find_device(dc->dev->driver, NULL, (const void *)0, + tegra_dc_match_by_pipe); + if (!companion) + return -EPROBE_DEFER; + + parent = dev_get_drvdata(companion); + dc->client.parent = &parent->client; + + dev_dbg(dc->dev, "coupled to %s\n", dev_name(companion)); + } + + return 0; +} + +static int tegra_dc_init_opp_table(struct tegra_dc *dc) +{ + struct tegra_core_opp_params opp_params = {}; + int err; + + err = devm_tegra_core_dev_init_opp_table(dc->dev, &opp_params); + if (err && err != -ENODEV) + return err; + + if (err) + dc->has_opp_table = false; + else + dc->has_opp_table = true; + + return 0; +} + +static int tegra_dc_probe(struct platform_device *pdev) +{ + u64 dma_mask = dma_get_mask(pdev->dev.parent); + struct tegra_dc *dc; + int err; + + err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + + dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL); + if (!dc) + return -ENOMEM; + + dc->soc = of_device_get_match_data(&pdev->dev); + + INIT_LIST_HEAD(&dc->list); + dc->dev = &pdev->dev; + + err = tegra_dc_parse_dt(dc); + if (err < 0) + return err; + + err = tegra_dc_couple(dc); + if (err < 0) + return err; + + dc->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dc->clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + return PTR_ERR(dc->clk); + } + + dc->rst = devm_reset_control_get(&pdev->dev, "dc"); + if (IS_ERR(dc->rst)) { + dev_err(&pdev->dev, "failed to get reset\n"); + return PTR_ERR(dc->rst); + } + + /* assert reset and disable clock */ + err = clk_prepare_enable(dc->clk); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + err = reset_control_assert(dc->rst); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + clk_disable_unprepare(dc->clk); + + if (dc->soc->has_powergate) { + if (dc->pipe == 0) + dc->powergate = TEGRA_POWERGATE_DIS; + else + dc->powergate = TEGRA_POWERGATE_DISB; + + tegra_powergate_power_off(dc->powergate); + } + + err = tegra_dc_init_opp_table(dc); + if (err < 0) + return err; + + dc->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dc->regs)) + return PTR_ERR(dc->regs); + + dc->irq = platform_get_irq(pdev, 0); + if (dc->irq < 0) + return -ENXIO; + + err = tegra_dc_rgb_probe(dc); + if (err < 0 && err != -ENODEV) + return dev_err_probe(&pdev->dev, err, + "failed to probe RGB output\n"); + + platform_set_drvdata(pdev, dc); + pm_runtime_enable(&pdev->dev); + + INIT_LIST_HEAD(&dc->client.list); + dc->client.ops = &dc_client_ops; + dc->client.dev = &pdev->dev; + + err = host1x_client_register(&dc->client); + if (err < 0) { + dev_err(&pdev->dev, "failed to register host1x client: %d\n", + err); + goto disable_pm; + } + + return 0; + +disable_pm: + pm_runtime_disable(&pdev->dev); + tegra_dc_rgb_remove(dc); + + return err; +} + +static int tegra_dc_remove(struct platform_device *pdev) +{ + struct tegra_dc *dc = platform_get_drvdata(pdev); + int err; + + err = host1x_client_unregister(&dc->client); + if (err < 0) { + dev_err(&pdev->dev, "failed to unregister host1x client: %d\n", + err); + return err; + } + + err = tegra_dc_rgb_remove(dc); + if (err < 0) { + dev_err(&pdev->dev, "failed to remove RGB output: %d\n", err); + return err; + } + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +struct platform_driver tegra_dc_driver = { + .driver = { + .name = "tegra-dc", + .of_match_table = tegra_dc_of_match, + }, + .probe = tegra_dc_probe, + .remove = tegra_dc_remove, +}; diff --git a/drivers/gpu/drm/grate/dc.h b/drivers/gpu/drm/grate/dc.h new file mode 100644 index 0000000000000..22ab755e73c9f --- /dev/null +++ b/drivers/gpu/drm/grate/dc.h @@ -0,0 +1,832 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012 NVIDIA CORPORATION. All rights reserved. + */ + +#ifndef TEGRA_DC_H +#define TEGRA_DC_H 1 + +#include + +#include + +#include "drm.h" + +struct tegra_output; + +#define TEGRA_DC_LEGACY_PLANES_NUM 7 + +struct tegra_dc_color_key_state { + u32 min; + u32 max; +}; + +struct tegra_dc_state { + struct drm_crtc_state base; + + struct clk *clk; + unsigned long pclk; + unsigned int div; + + struct tegra_dc_color_key_state ckey; + + u32 planes; +}; + +static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state) +{ + if (state) + return container_of(state, struct tegra_dc_state, base); + + return NULL; +} + +static inline const struct tegra_dc_state * +to_const_dc_state(const struct drm_crtc_state *state) +{ + return to_dc_state((struct drm_crtc_state *)state); +} + +struct tegra_dc_stats { + unsigned long frames; + unsigned long vblank; + unsigned long underflow; + unsigned long overflow; + + unsigned long frames_total; + unsigned long vblank_total; + unsigned long underflow_total; + unsigned long overflow_total; +}; + +struct tegra_windowgroup_soc { + unsigned int index; + unsigned int dc; + const unsigned int *windows; + unsigned int num_windows; +}; + +struct tegra_dc_soc_info { + bool has_win_a_csc; + bool supports_background_color; + bool supports_interlacing; + bool supports_cursor; + bool supports_block_linear; + bool supports_sector_layout; + bool has_legacy_blending; + unsigned int pitch_align; + bool has_powergate; + bool coupled_pm; + bool has_nvdisplay; + const struct tegra_windowgroup_soc *wgrps; + unsigned int num_wgrps; + const u32 *primary_formats; + unsigned int num_primary_formats; + const u32 *overlay_formats; + unsigned int num_overlay_formats; + const u64 *modifiers; + bool has_win_a_without_filters; + bool has_win_b_vfilter_mem_client; + bool has_win_c_without_vert_filter; + bool plane_tiled_memory_bandwidth_x2; +}; + +struct tegra_dc { + struct host1x_client client; + struct host1x_syncpt *syncpt; + struct device *dev; + + struct iommu_group *group; + + struct drm_crtc base; + unsigned int powergate; + int pipe; + + struct clk *clk; + struct reset_control *rst; + void __iomem *regs; + int irq; + + struct tegra_output *rgb; + + struct tegra_dc_stats stats; + struct list_head list; + + struct drm_info_list *debugfs_files; + + const struct tegra_dc_soc_info *soc; + + bool has_opp_table; +}; + +static inline struct tegra_dc * +host1x_client_to_dc(struct host1x_client *client) +{ + return container_of(client, struct tegra_dc, client); +} + +static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc) +{ + return crtc ? container_of(crtc, struct tegra_dc, base) : NULL; +} + +static inline void tegra_dc_writel(struct tegra_dc *dc, u32 value, + unsigned int offset) +{ + trace_dc_writel(dc->dev, offset, value); + writel(value, dc->regs + (offset << 2)); +} + +static inline u32 tegra_dc_readl(struct tegra_dc *dc, unsigned int offset) +{ + u32 value = readl(dc->regs + (offset << 2)); + + trace_dc_readl(dc->dev, offset, value); + + return value; +} + +struct tegra_dc_window { + struct { + unsigned int yof; + unsigned int kyrgb; + unsigned int kur; + unsigned int kvr; + unsigned int kug; + unsigned int kvg; + unsigned int kub; + unsigned int kvb; + } csc; + struct { + unsigned int x; + unsigned int y; + unsigned int w; + unsigned int h; + } src; + struct { + unsigned int x; + unsigned int y; + unsigned int w; + unsigned int h; + } dst; + unsigned int bits_per_pixel; + unsigned int stride[2]; + unsigned long base[3]; + unsigned int zpos; + bool reflect_x; + bool reflect_y; + + struct tegra_bo_tiling tiling; + u32 format; + u32 swap; +}; + +/* from dc.c */ +bool tegra_dc_has_output(struct tegra_dc *dc, struct device *dev); +void tegra_dc_commit(struct tegra_dc *dc); +int tegra_dc_state_setup_clock(struct tegra_dc *dc, + struct drm_crtc_state *crtc_state, + struct clk *clk, unsigned long pclk, + unsigned int div); +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state); + +/* from rgb.c */ +int tegra_dc_rgb_probe(struct tegra_dc *dc); +int tegra_dc_rgb_remove(struct tegra_dc *dc); +int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc); +int tegra_dc_rgb_exit(struct tegra_dc *dc); + +#define DC_CMD_GENERAL_INCR_SYNCPT 0x000 +#define DC_CMD_GENERAL_INCR_SYNCPT_CNTRL 0x001 +#define SYNCPT_CNTRL_NO_STALL (1 << 8) +#define SYNCPT_CNTRL_SOFT_RESET (1 << 0) +#define DC_CMD_GENERAL_INCR_SYNCPT_ERROR 0x002 +#define DC_CMD_WIN_A_INCR_SYNCPT 0x008 +#define DC_CMD_WIN_A_INCR_SYNCPT_CNTRL 0x009 +#define DC_CMD_WIN_A_INCR_SYNCPT_ERROR 0x00a +#define DC_CMD_WIN_B_INCR_SYNCPT 0x010 +#define DC_CMD_WIN_B_INCR_SYNCPT_CNTRL 0x011 +#define DC_CMD_WIN_B_INCR_SYNCPT_ERROR 0x012 +#define DC_CMD_WIN_C_INCR_SYNCPT 0x018 +#define DC_CMD_WIN_C_INCR_SYNCPT_CNTRL 0x019 +#define DC_CMD_WIN_C_INCR_SYNCPT_ERROR 0x01a +#define DC_CMD_CONT_SYNCPT_VSYNC 0x028 +#define SYNCPT_VSYNC_ENABLE (1 << 8) +#define DC_CMD_DISPLAY_COMMAND_OPTION0 0x031 +#define DC_CMD_DISPLAY_COMMAND 0x032 +#define DISP_CTRL_MODE_STOP (0 << 5) +#define DISP_CTRL_MODE_C_DISPLAY (1 << 5) +#define DISP_CTRL_MODE_NC_DISPLAY (2 << 5) +#define DISP_CTRL_MODE_MASK (3 << 5) +#define DC_CMD_SIGNAL_RAISE 0x033 +#define DC_CMD_DISPLAY_POWER_CONTROL 0x036 +#define PW0_ENABLE (1 << 0) +#define PW1_ENABLE (1 << 2) +#define PW2_ENABLE (1 << 4) +#define PW3_ENABLE (1 << 6) +#define PW4_ENABLE (1 << 8) +#define PM0_ENABLE (1 << 16) +#define PM1_ENABLE (1 << 18) + +#define DC_CMD_INT_STATUS 0x037 +#define DC_CMD_INT_MASK 0x038 +#define DC_CMD_INT_ENABLE 0x039 +#define DC_CMD_INT_TYPE 0x03a +#define DC_CMD_INT_POLARITY 0x03b +#define CTXSW_INT (1 << 0) +#define FRAME_END_INT (1 << 1) +#define VBLANK_INT (1 << 2) +#define V_PULSE3_INT (1 << 4) +#define V_PULSE2_INT (1 << 5) +#define REGION_CRC_INT (1 << 6) +#define REG_TMOUT_INT (1 << 7) +#define WIN_A_UF_INT (1 << 8) +#define WIN_B_UF_INT (1 << 9) +#define WIN_C_UF_INT (1 << 10) +#define MSF_INT (1 << 12) +#define WIN_A_OF_INT (1 << 14) +#define WIN_B_OF_INT (1 << 15) +#define WIN_C_OF_INT (1 << 16) +#define HEAD_UF_INT (1 << 23) +#define SD3_BUCKET_WALK_DONE_INT (1 << 24) +#define DSC_OBUF_UF_INT (1 << 26) +#define DSC_RBUF_UF_INT (1 << 27) +#define DSC_BBUF_UF_INT (1 << 28) +#define DSC_TO_UF_INT (1 << 29) + +#define DC_CMD_SIGNAL_RAISE1 0x03c +#define DC_CMD_SIGNAL_RAISE2 0x03d +#define DC_CMD_SIGNAL_RAISE3 0x03e + +#define DC_CMD_STATE_ACCESS 0x040 +#define READ_MUX (1 << 0) +#define WRITE_MUX (1 << 2) + +#define DC_CMD_STATE_CONTROL 0x041 +#define GENERAL_ACT_REQ (1 << 0) +#define WIN_A_ACT_REQ (1 << 1) +#define WIN_B_ACT_REQ (1 << 2) +#define WIN_C_ACT_REQ (1 << 3) +#define CURSOR_ACT_REQ (1 << 7) +#define GENERAL_UPDATE (1 << 8) +#define WIN_A_UPDATE (1 << 9) +#define WIN_B_UPDATE (1 << 10) +#define WIN_C_UPDATE (1 << 11) +#define CURSOR_UPDATE (1 << 15) +#define COMMON_ACTREQ (1 << 16) +#define COMMON_UPDATE (1 << 17) +#define NC_HOST_TRIG (1 << 24) + +#define DC_CMD_DISPLAY_WINDOW_HEADER 0x042 +#define WINDOW_A_SELECT (1 << 4) +#define WINDOW_B_SELECT (1 << 5) +#define WINDOW_C_SELECT (1 << 6) + +#define DC_CMD_REG_ACT_CONTROL 0x043 + +#define DC_COM_CRC_CONTROL 0x300 +#define DC_COM_CRC_CONTROL_ALWAYS (1 << 3) +#define DC_COM_CRC_CONTROL_FULL_FRAME (0 << 2) +#define DC_COM_CRC_CONTROL_ACTIVE_DATA (1 << 2) +#define DC_COM_CRC_CONTROL_WAIT (1 << 1) +#define DC_COM_CRC_CONTROL_ENABLE (1 << 0) +#define DC_COM_CRC_CHECKSUM 0x301 +#define DC_COM_PIN_OUTPUT_ENABLE(x) (0x302 + (x)) +#define DC_COM_PIN_OUTPUT_POLARITY(x) (0x306 + (x)) +#define LVS_OUTPUT_POLARITY_LOW (1 << 28) +#define LHS_OUTPUT_POLARITY_LOW (1 << 30) +#define DC_COM_PIN_OUTPUT_DATA(x) (0x30a + (x)) +#define DC_COM_PIN_INPUT_ENABLE(x) (0x30e + (x)) +#define DC_COM_PIN_INPUT_DATA(x) (0x312 + (x)) +#define DC_COM_PIN_OUTPUT_SELECT(x) (0x314 + (x)) + +#define DC_COM_PIN_MISC_CONTROL 0x31b +#define DC_COM_PIN_PM0_CONTROL 0x31c +#define DC_COM_PIN_PM0_DUTY_CYCLE 0x31d +#define DC_COM_PIN_PM1_CONTROL 0x31e +#define DC_COM_PIN_PM1_DUTY_CYCLE 0x31f + +#define DC_COM_SPI_CONTROL 0x320 +#define DC_COM_SPI_START_BYTE 0x321 +#define DC_COM_HSPI_WRITE_DATA_AB 0x322 +#define DC_COM_HSPI_WRITE_DATA_CD 0x323 +#define DC_COM_HSPI_CS_DC 0x324 +#define DC_COM_SCRATCH_REGISTER_A 0x325 +#define DC_COM_SCRATCH_REGISTER_B 0x326 +#define DC_COM_GPIO_CTRL 0x327 +#define DC_COM_GPIO_DEBOUNCE_COUNTER 0x328 +#define DC_COM_CRC_CHECKSUM_LATCHED 0x329 + +#define DC_COM_RG_UNDERFLOW 0x365 +#define UNDERFLOW_MODE_RED (1 << 8) +#define UNDERFLOW_REPORT_ENABLE (1 << 0) + +#define DC_DISP_DISP_SIGNAL_OPTIONS0 0x400 +#define H_PULSE0_ENABLE (1 << 8) +#define H_PULSE1_ENABLE (1 << 10) +#define H_PULSE2_ENABLE (1 << 12) + +#define DC_DISP_DISP_SIGNAL_OPTIONS1 0x401 + +#define DC_DISP_DISP_WIN_OPTIONS 0x402 +#define HDMI_ENABLE (1 << 30) +#define DSI_ENABLE (1 << 29) +#define SOR1_TIMING_CYA (1 << 27) +#define CURSOR_ENABLE (1 << 16) + +#define SOR_ENABLE(x) (1 << (25 + (((x) > 1) ? ((x) + 1) : (x)))) + +#define DC_DISP_DISP_MEM_HIGH_PRIORITY 0x403 +#define CURSOR_THRESHOLD(x) (((x) & 0x03) << 24) +#define WINDOW_A_THRESHOLD(x) (((x) & 0x7f) << 16) +#define WINDOW_B_THRESHOLD(x) (((x) & 0x7f) << 8) +#define WINDOW_C_THRESHOLD(x) (((x) & 0xff) << 0) + +#define DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER 0x404 +#define CURSOR_DELAY(x) (((x) & 0x3f) << 24) +#define WINDOW_A_DELAY(x) (((x) & 0x3f) << 16) +#define WINDOW_B_DELAY(x) (((x) & 0x3f) << 8) +#define WINDOW_C_DELAY(x) (((x) & 0x3f) << 0) + +#define DC_DISP_DISP_TIMING_OPTIONS 0x405 +#define VSYNC_H_POSITION(x) ((x) & 0xfff) + +#define DC_DISP_REF_TO_SYNC 0x406 +#define DC_DISP_SYNC_WIDTH 0x407 +#define DC_DISP_BACK_PORCH 0x408 +#define DC_DISP_ACTIVE 0x409 +#define DC_DISP_FRONT_PORCH 0x40a +#define DC_DISP_H_PULSE0_CONTROL 0x40b +#define DC_DISP_H_PULSE0_POSITION_A 0x40c +#define DC_DISP_H_PULSE0_POSITION_B 0x40d +#define DC_DISP_H_PULSE0_POSITION_C 0x40e +#define DC_DISP_H_PULSE0_POSITION_D 0x40f +#define DC_DISP_H_PULSE1_CONTROL 0x410 +#define DC_DISP_H_PULSE1_POSITION_A 0x411 +#define DC_DISP_H_PULSE1_POSITION_B 0x412 +#define DC_DISP_H_PULSE1_POSITION_C 0x413 +#define DC_DISP_H_PULSE1_POSITION_D 0x414 +#define DC_DISP_H_PULSE2_CONTROL 0x415 +#define DC_DISP_H_PULSE2_POSITION_A 0x416 +#define DC_DISP_H_PULSE2_POSITION_B 0x417 +#define DC_DISP_H_PULSE2_POSITION_C 0x418 +#define DC_DISP_H_PULSE2_POSITION_D 0x419 +#define DC_DISP_V_PULSE0_CONTROL 0x41a +#define DC_DISP_V_PULSE0_POSITION_A 0x41b +#define DC_DISP_V_PULSE0_POSITION_B 0x41c +#define DC_DISP_V_PULSE0_POSITION_C 0x41d +#define DC_DISP_V_PULSE1_CONTROL 0x41e +#define DC_DISP_V_PULSE1_POSITION_A 0x41f +#define DC_DISP_V_PULSE1_POSITION_B 0x420 +#define DC_DISP_V_PULSE1_POSITION_C 0x421 +#define DC_DISP_V_PULSE2_CONTROL 0x422 +#define DC_DISP_V_PULSE2_POSITION_A 0x423 +#define DC_DISP_V_PULSE3_CONTROL 0x424 +#define DC_DISP_V_PULSE3_POSITION_A 0x425 +#define DC_DISP_M0_CONTROL 0x426 +#define DC_DISP_M1_CONTROL 0x427 +#define DC_DISP_DI_CONTROL 0x428 +#define DC_DISP_PP_CONTROL 0x429 +#define DC_DISP_PP_SELECT_A 0x42a +#define DC_DISP_PP_SELECT_B 0x42b +#define DC_DISP_PP_SELECT_C 0x42c +#define DC_DISP_PP_SELECT_D 0x42d + +#define PULSE_MODE_NORMAL (0 << 3) +#define PULSE_MODE_ONE_CLOCK (1 << 3) +#define PULSE_POLARITY_HIGH (0 << 4) +#define PULSE_POLARITY_LOW (1 << 4) +#define PULSE_QUAL_ALWAYS (0 << 6) +#define PULSE_QUAL_VACTIVE (2 << 6) +#define PULSE_QUAL_VACTIVE1 (3 << 6) +#define PULSE_LAST_START_A (0 << 8) +#define PULSE_LAST_END_A (1 << 8) +#define PULSE_LAST_START_B (2 << 8) +#define PULSE_LAST_END_B (3 << 8) +#define PULSE_LAST_START_C (4 << 8) +#define PULSE_LAST_END_C (5 << 8) +#define PULSE_LAST_START_D (6 << 8) +#define PULSE_LAST_END_D (7 << 8) + +#define PULSE_START(x) (((x) & 0xfff) << 0) +#define PULSE_END(x) (((x) & 0xfff) << 16) + +#define DC_DISP_DISP_CLOCK_CONTROL 0x42e +#define PIXEL_CLK_DIVIDER_PCD1 (0 << 8) +#define PIXEL_CLK_DIVIDER_PCD1H (1 << 8) +#define PIXEL_CLK_DIVIDER_PCD2 (2 << 8) +#define PIXEL_CLK_DIVIDER_PCD3 (3 << 8) +#define PIXEL_CLK_DIVIDER_PCD4 (4 << 8) +#define PIXEL_CLK_DIVIDER_PCD6 (5 << 8) +#define PIXEL_CLK_DIVIDER_PCD8 (6 << 8) +#define PIXEL_CLK_DIVIDER_PCD9 (7 << 8) +#define PIXEL_CLK_DIVIDER_PCD12 (8 << 8) +#define PIXEL_CLK_DIVIDER_PCD16 (9 << 8) +#define PIXEL_CLK_DIVIDER_PCD18 (10 << 8) +#define PIXEL_CLK_DIVIDER_PCD24 (11 << 8) +#define PIXEL_CLK_DIVIDER_PCD13 (12 << 8) +#define SHIFT_CLK_DIVIDER(x) ((x) & 0xff) + +#define DC_DISP_DISP_INTERFACE_CONTROL 0x42f +#define DISP_DATA_FORMAT_DF1P1C (0 << 0) +#define DISP_DATA_FORMAT_DF1P2C24B (1 << 0) +#define DISP_DATA_FORMAT_DF1P2C18B (2 << 0) +#define DISP_DATA_FORMAT_DF1P2C16B (3 << 0) +#define DISP_DATA_FORMAT_DF2S (4 << 0) +#define DISP_DATA_FORMAT_DF3S (5 << 0) +#define DISP_DATA_FORMAT_DFSPI (6 << 0) +#define DISP_DATA_FORMAT_DF1P3C24B (7 << 0) +#define DISP_DATA_FORMAT_DF1P3C18B (8 << 0) +#define DISP_ALIGNMENT_MSB (0 << 8) +#define DISP_ALIGNMENT_LSB (1 << 8) +#define DISP_ORDER_RED_BLUE (0 << 9) +#define DISP_ORDER_BLUE_RED (1 << 9) + +#define DC_DISP_DISP_COLOR_CONTROL 0x430 +#define BASE_COLOR_SIZE666 ( 0 << 0) +#define BASE_COLOR_SIZE111 ( 1 << 0) +#define BASE_COLOR_SIZE222 ( 2 << 0) +#define BASE_COLOR_SIZE333 ( 3 << 0) +#define BASE_COLOR_SIZE444 ( 4 << 0) +#define BASE_COLOR_SIZE555 ( 5 << 0) +#define BASE_COLOR_SIZE565 ( 6 << 0) +#define BASE_COLOR_SIZE332 ( 7 << 0) +#define BASE_COLOR_SIZE888 ( 8 << 0) +#define BASE_COLOR_SIZE101010 (10 << 0) +#define BASE_COLOR_SIZE121212 (12 << 0) +#define DITHER_CONTROL_MASK (3 << 8) +#define DITHER_CONTROL_DISABLE (0 << 8) +#define DITHER_CONTROL_ORDERED (2 << 8) +#define DITHER_CONTROL_ERRDIFF (3 << 8) +#define BASE_COLOR_SIZE_MASK (0xf << 0) +#define BASE_COLOR_SIZE_666 ( 0 << 0) +#define BASE_COLOR_SIZE_111 ( 1 << 0) +#define BASE_COLOR_SIZE_222 ( 2 << 0) +#define BASE_COLOR_SIZE_333 ( 3 << 0) +#define BASE_COLOR_SIZE_444 ( 4 << 0) +#define BASE_COLOR_SIZE_555 ( 5 << 0) +#define BASE_COLOR_SIZE_565 ( 6 << 0) +#define BASE_COLOR_SIZE_332 ( 7 << 0) +#define BASE_COLOR_SIZE_888 ( 8 << 0) +#define BASE_COLOR_SIZE_101010 ( 10 << 0) +#define BASE_COLOR_SIZE_121212 ( 12 << 0) + +#define DC_DISP_SHIFT_CLOCK_OPTIONS 0x431 +#define SC1_H_QUALIFIER_NONE (1 << 16) +#define SC0_H_QUALIFIER_NONE (1 << 0) + +#define DC_DISP_DATA_ENABLE_OPTIONS 0x432 +#define DE_SELECT_ACTIVE_BLANK (0 << 0) +#define DE_SELECT_ACTIVE (1 << 0) +#define DE_SELECT_ACTIVE_IS (2 << 0) +#define DE_CONTROL_ONECLK (0 << 2) +#define DE_CONTROL_NORMAL (1 << 2) +#define DE_CONTROL_EARLY_EXT (2 << 2) +#define DE_CONTROL_EARLY (3 << 2) +#define DE_CONTROL_ACTIVE_BLANK (4 << 2) + +#define DC_DISP_SERIAL_INTERFACE_OPTIONS 0x433 +#define DC_DISP_LCD_SPI_OPTIONS 0x434 +#define DC_DISP_BORDER_COLOR 0x435 +#define DC_DISP_COLOR_KEY0_LOWER 0x436 +#define DC_DISP_COLOR_KEY0_UPPER 0x437 +#define DC_DISP_COLOR_KEY1_LOWER 0x438 +#define DC_DISP_COLOR_KEY1_UPPER 0x439 + +#define DC_DISP_CURSOR_FOREGROUND 0x43c +#define DC_DISP_CURSOR_BACKGROUND 0x43d + +#define DC_DISP_CURSOR_START_ADDR 0x43e +#define CURSOR_CLIP_DISPLAY (0 << 28) +#define CURSOR_CLIP_WIN_A (1 << 28) +#define CURSOR_CLIP_WIN_B (2 << 28) +#define CURSOR_CLIP_WIN_C (3 << 28) +#define CURSOR_SIZE_32x32 (0 << 24) +#define CURSOR_SIZE_64x64 (1 << 24) +#define CURSOR_SIZE_128x128 (2 << 24) +#define CURSOR_SIZE_256x256 (3 << 24) +#define DC_DISP_CURSOR_START_ADDR_NS 0x43f + +#define DC_DISP_CURSOR_POSITION 0x440 +#define DC_DISP_CURSOR_POSITION_NS 0x441 + +#define DC_DISP_INIT_SEQ_CONTROL 0x442 +#define DC_DISP_SPI_INIT_SEQ_DATA_A 0x443 +#define DC_DISP_SPI_INIT_SEQ_DATA_B 0x444 +#define DC_DISP_SPI_INIT_SEQ_DATA_C 0x445 +#define DC_DISP_SPI_INIT_SEQ_DATA_D 0x446 + +#define DC_DISP_DC_MCCIF_FIFOCTRL 0x480 +#define DC_DISP_MCCIF_DISPLAY0A_HYST 0x481 +#define DC_DISP_MCCIF_DISPLAY0B_HYST 0x482 +#define DC_DISP_MCCIF_DISPLAY1A_HYST 0x483 +#define DC_DISP_MCCIF_DISPLAY1B_HYST 0x484 + +#define DC_DISP_DAC_CRT_CTRL 0x4c0 +#define DC_DISP_DISP_MISC_CONTROL 0x4c1 +#define DC_DISP_SD_CONTROL 0x4c2 +#define DC_DISP_SD_CSC_COEFF 0x4c3 +#define DC_DISP_SD_LUT(x) (0x4c4 + (x)) +#define DC_DISP_SD_FLICKER_CONTROL 0x4cd +#define DC_DISP_DC_PIXEL_COUNT 0x4ce +#define DC_DISP_SD_HISTOGRAM(x) (0x4cf + (x)) +#define DC_DISP_SD_BL_PARAMETERS 0x4d7 +#define DC_DISP_SD_BL_TF(x) (0x4d8 + (x)) +#define DC_DISP_SD_BL_CONTROL 0x4dc +#define DC_DISP_SD_HW_K_VALUES 0x4dd +#define DC_DISP_SD_MAN_K_VALUES 0x4de + +#define DC_DISP_BLEND_BACKGROUND_COLOR 0x4e4 +#define BACKGROUND_COLOR_ALPHA(x) (((x) & 0xff) << 24) +#define BACKGROUND_COLOR_BLUE(x) (((x) & 0xff) << 16) +#define BACKGROUND_COLOR_GREEN(x) (((x) & 0xff) << 8) +#define BACKGROUND_COLOR_RED(x) (((x) & 0xff) << 0) + +#define DC_DISP_INTERLACE_CONTROL 0x4e5 +#define INTERLACE_STATUS (1 << 2) +#define INTERLACE_START (1 << 1) +#define INTERLACE_ENABLE (1 << 0) + +#define DC_DISP_CURSOR_START_ADDR_HI 0x4ec +#define DC_DISP_BLEND_CURSOR_CONTROL 0x4f1 +#define CURSOR_COMPOSITION_MODE_BLEND (0 << 25) +#define CURSOR_COMPOSITION_MODE_XOR (1 << 25) +#define CURSOR_MODE_LEGACY (0 << 24) +#define CURSOR_MODE_NORMAL (1 << 24) +#define CURSOR_DST_BLEND_ZERO (0 << 16) +#define CURSOR_DST_BLEND_K1 (1 << 16) +#define CURSOR_DST_BLEND_NEG_K1_TIMES_SRC (2 << 16) +#define CURSOR_DST_BLEND_MASK (3 << 16) +#define CURSOR_SRC_BLEND_K1 (0 << 8) +#define CURSOR_SRC_BLEND_K1_TIMES_SRC (1 << 8) +#define CURSOR_SRC_BLEND_MASK (3 << 8) +#define CURSOR_ALPHA 0xff + +#define DC_WIN_CORE_ACT_CONTROL 0x50e +#define VCOUNTER (0 << 0) +#define HCOUNTER (1 << 0) + +#define DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLA 0x543 +#define LATENCY_CTL_MODE_ENABLE (1 << 2) + +#define DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLB 0x544 +#define WATERMARK_MASK 0x1fffffff + +#define DC_WIN_CORE_PRECOMP_WGRP_PIPE_METER 0x560 +#define PIPE_METER_INT(x) (((x) & 0xff) << 8) +#define PIPE_METER_FRAC(x) (((x) & 0xff) << 0) + +#define DC_WIN_CORE_IHUB_WGRP_POOL_CONFIG 0x561 +#define MEMPOOL_ENTRIES(x) (((x) & 0xffff) << 0) + +#define DC_WIN_CORE_IHUB_WGRP_FETCH_METER 0x562 +#define SLOTS(x) (((x) & 0xff) << 0) + +#define DC_WIN_CORE_IHUB_LINEBUF_CONFIG 0x563 +#define MODE_TWO_LINES (0 << 14) +#define MODE_FOUR_LINES (1 << 14) + +#define DC_WIN_CORE_IHUB_THREAD_GROUP 0x568 +#define THREAD_NUM_MASK (0x1f << 1) +#define THREAD_NUM(x) (((x) & 0x1f) << 1) +#define THREAD_GROUP_ENABLE (1 << 0) + +#define DC_WIN_H_FILTER_P(p) (0x601 + (p)) +#define DC_WIN_V_FILTER_P(p) (0x619 + (p)) + +#define DC_WIN_CSC_YOF 0x611 +#define DC_WIN_CSC_KYRGB 0x612 +#define DC_WIN_CSC_KUR 0x613 +#define DC_WIN_CSC_KVR 0x614 +#define DC_WIN_CSC_KUG 0x615 +#define DC_WIN_CSC_KVG 0x616 +#define DC_WIN_CSC_KUB 0x617 +#define DC_WIN_CSC_KVB 0x618 + +#define DC_WIN_WIN_OPTIONS 0x700 +#define H_DIRECTION (1 << 0) +#define V_DIRECTION (1 << 2) +#define COLOR_EXPAND (1 << 6) +#define H_FILTER (1 << 8) +#define V_FILTER (1 << 10) +#define CSC_ENABLE (1 << 18) +#define WIN_ENABLE (1 << 30) + +#define DC_WIN_BYTE_SWAP 0x701 +#define BYTE_SWAP_NOSWAP (0 << 0) +#define BYTE_SWAP_SWAP2 (1 << 0) +#define BYTE_SWAP_SWAP4 (2 << 0) +#define BYTE_SWAP_SWAP4HW (3 << 0) + +#define DC_WIN_BUFFER_CONTROL 0x702 +#define BUFFER_CONTROL_HOST (0 << 0) +#define BUFFER_CONTROL_VI (1 << 0) +#define BUFFER_CONTROL_EPP (2 << 0) +#define BUFFER_CONTROL_MPEGE (3 << 0) +#define BUFFER_CONTROL_SB2D (4 << 0) + +#define DC_WIN_COLOR_DEPTH 0x703 +#define WIN_COLOR_DEPTH_P1 0 +#define WIN_COLOR_DEPTH_P2 1 +#define WIN_COLOR_DEPTH_P4 2 +#define WIN_COLOR_DEPTH_P8 3 +#define WIN_COLOR_DEPTH_B4G4R4A4 4 +#define WIN_COLOR_DEPTH_B5G5R5A1 5 +#define WIN_COLOR_DEPTH_B5G6R5 6 +#define WIN_COLOR_DEPTH_A1B5G5R5 7 +#define WIN_COLOR_DEPTH_B8G8R8A8 12 +#define WIN_COLOR_DEPTH_R8G8B8A8 13 +#define WIN_COLOR_DEPTH_B6x2G6x2R6x2A8 14 +#define WIN_COLOR_DEPTH_R6x2G6x2B6x2A8 15 +#define WIN_COLOR_DEPTH_YCbCr422 16 +#define WIN_COLOR_DEPTH_YUV422 17 +#define WIN_COLOR_DEPTH_YCbCr420P 18 +#define WIN_COLOR_DEPTH_YUV420P 19 +#define WIN_COLOR_DEPTH_YCbCr422P 20 +#define WIN_COLOR_DEPTH_YUV422P 21 +#define WIN_COLOR_DEPTH_YCbCr422R 22 +#define WIN_COLOR_DEPTH_YUV422R 23 +#define WIN_COLOR_DEPTH_YCbCr422RA 24 +#define WIN_COLOR_DEPTH_YUV422RA 25 +#define WIN_COLOR_DEPTH_R4G4B4A4 27 +#define WIN_COLOR_DEPTH_R5G5B5A 28 +#define WIN_COLOR_DEPTH_AR5G5B5 29 +#define WIN_COLOR_DEPTH_B5G5R5X1 30 +#define WIN_COLOR_DEPTH_X1B5G5R5 31 +#define WIN_COLOR_DEPTH_R5G5B5X1 32 +#define WIN_COLOR_DEPTH_X1R5G5B5 33 +#define WIN_COLOR_DEPTH_R5G6B5 34 +#define WIN_COLOR_DEPTH_A8R8G8B8 35 +#define WIN_COLOR_DEPTH_A8B8G8R8 36 +#define WIN_COLOR_DEPTH_B8G8R8X8 37 +#define WIN_COLOR_DEPTH_R8G8B8X8 38 +#define WIN_COLOR_DEPTH_X8B8G8R8 65 +#define WIN_COLOR_DEPTH_X8R8G8B8 66 + +#define DC_WIN_POSITION 0x704 +#define H_POSITION(x) (((x) & 0x1fff) << 0) /* XXX 0x7fff on Tegra186 */ +#define V_POSITION(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */ + +#define DC_WIN_SIZE 0x705 +#define H_SIZE(x) (((x) & 0x1fff) << 0) /* XXX 0x7fff on Tegra186 */ +#define V_SIZE(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */ + +#define DC_WIN_PRESCALED_SIZE 0x706 +#define H_PRESCALED_SIZE(x) (((x) & 0x7fff) << 0) +#define V_PRESCALED_SIZE(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */ + +#define DC_WIN_H_INITIAL_DDA 0x707 +#define DC_WIN_V_INITIAL_DDA 0x708 +#define DC_WIN_DDA_INC 0x709 +#define H_DDA_INC(x) (((x) & 0xffff) << 0) +#define V_DDA_INC(x) (((x) & 0xffff) << 16) + +#define DC_WIN_LINE_STRIDE 0x70a +#define DC_WIN_BUF_STRIDE 0x70b +#define DC_WIN_UV_BUF_STRIDE 0x70c +#define DC_WIN_BUFFER_ADDR_MODE 0x70d +#define DC_WIN_BUFFER_ADDR_MODE_LINEAR (0 << 0) +#define DC_WIN_BUFFER_ADDR_MODE_TILE (1 << 0) +#define DC_WIN_BUFFER_ADDR_MODE_LINEAR_UV (0 << 16) +#define DC_WIN_BUFFER_ADDR_MODE_TILE_UV (1 << 16) + +#define DC_WIN_DV_CONTROL 0x70e + +#define DC_WIN_BLEND_NOKEY 0x70f +#define BLEND_WEIGHT1(x) (((x) & 0xff) << 16) +#define BLEND_WEIGHT0(x) (((x) & 0xff) << 8) + +#define DC_WIN_BLEND_1WIN 0x710 +#define BLEND_CONTROL_FIX (0 << 2) +#define BLEND_CONTROL_ALPHA (1 << 2) +#define BLEND_COLOR_KEY_NONE (0 << 0) +#define BLEND_COLOR_KEY_0 (1 << 0) +#define BLEND_COLOR_KEY_1 (2 << 0) +#define BLEND_COLOR_KEY_BOTH (3 << 0) + +#define DC_WIN_BLEND_2WIN_X 0x711 +#define BLEND_CONTROL_DEPENDENT (2 << 2) + +#define DC_WIN_BLEND_2WIN_Y 0x712 +#define DC_WIN_BLEND_3WIN_XY 0x713 + +#define DC_WIN_HP_FETCH_CONTROL 0x714 + +#define DC_WINBUF_START_ADDR 0x800 +#define DC_WINBUF_START_ADDR_NS 0x801 +#define DC_WINBUF_START_ADDR_U 0x802 +#define DC_WINBUF_START_ADDR_U_NS 0x803 +#define DC_WINBUF_START_ADDR_V 0x804 +#define DC_WINBUF_START_ADDR_V_NS 0x805 + +#define DC_WINBUF_ADDR_H_OFFSET 0x806 +#define DC_WINBUF_ADDR_H_OFFSET_NS 0x807 +#define DC_WINBUF_ADDR_V_OFFSET 0x808 +#define DC_WINBUF_ADDR_V_OFFSET_NS 0x809 + +#define DC_WINBUF_UFLOW_STATUS 0x80a +#define DC_WINBUF_SURFACE_KIND 0x80b +#define DC_WINBUF_SURFACE_KIND_PITCH (0 << 0) +#define DC_WINBUF_SURFACE_KIND_TILED (1 << 0) +#define DC_WINBUF_SURFACE_KIND_BLOCK (2 << 0) +#define DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(x) (((x) & 0x7) << 4) + +#define DC_WINBUF_START_ADDR_HI 0x80d + +#define DC_WINBUF_CDE_CONTROL 0x82f +#define ENABLE_SURFACE (1 << 0) + +#define DC_WINBUF_AD_UFLOW_STATUS 0xbca +#define DC_WINBUF_BD_UFLOW_STATUS 0xdca +#define DC_WINBUF_CD_UFLOW_STATUS 0xfca + +/* Tegra186 and later */ +#define DC_DISP_CORE_SOR_SET_CONTROL(x) (0x403 + (x)) +#define PROTOCOL_MASK (0xf << 8) +#define PROTOCOL_SINGLE_TMDS_A (0x1 << 8) + +#define DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR 0x442 +#define DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR 0x446 + +#define DC_WIN_CORE_WINDOWGROUP_SET_CONTROL 0x702 +#define OWNER_MASK (0xf << 0) +#define OWNER(x) (((x) & 0xf) << 0) + +#define DC_WIN_CROPPED_SIZE 0x706 + +#define DC_WIN_PLANAR_STORAGE 0x709 +#define PITCH(x) (((x) >> 6) & 0x1fff) + +#define DC_WIN_SET_PARAMS 0x70d +#define CLAMP_BEFORE_BLEND (1 << 15) +#define DEGAMMA_NONE (0 << 13) +#define DEGAMMA_SRGB (1 << 13) +#define DEGAMMA_YUV8_10 (2 << 13) +#define DEGAMMA_YUV12 (3 << 13) +#define INPUT_RANGE_BYPASS (0 << 10) +#define INPUT_RANGE_LIMITED (1 << 10) +#define INPUT_RANGE_FULL (2 << 10) +#define COLOR_SPACE_RGB (0 << 8) +#define COLOR_SPACE_YUV_601 (1 << 8) +#define COLOR_SPACE_YUV_709 (2 << 8) +#define COLOR_SPACE_YUV_2020 (3 << 8) + +#define DC_WIN_WINDOWGROUP_SET_CONTROL_INPUT_SCALER 0x70e +#define HORIZONTAL_TAPS_2 (1 << 3) +#define HORIZONTAL_TAPS_5 (4 << 3) +#define VERTICAL_TAPS_2 (1 << 0) +#define VERTICAL_TAPS_5 (4 << 0) + +#define DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_USAGE 0x711 +#define INPUT_SCALER_USE422 (1 << 2) +#define INPUT_SCALER_VBYPASS (1 << 1) +#define INPUT_SCALER_HBYPASS (1 << 0) + +#define DC_WIN_BLEND_LAYER_CONTROL 0x716 +#define COLOR_KEY_NONE (0 << 25) +#define COLOR_KEY_SRC (1 << 25) +#define COLOR_KEY_DST (2 << 25) +#define BLEND_BYPASS (1 << 24) +#define K2(x) (((x) & 0xff) << 16) +#define K1(x) (((x) & 0xff) << 8) +#define WINDOW_LAYER_DEPTH(x) (((x) & 0xff) << 0) + +#define DC_WIN_BLEND_MATCH_SELECT 0x717 +#define BLEND_FACTOR_DST_ALPHA_ZERO (0 << 12) +#define BLEND_FACTOR_DST_ALPHA_ONE (1 << 12) +#define BLEND_FACTOR_DST_ALPHA_NEG_K1_TIMES_SRC (2 << 12) +#define BLEND_FACTOR_DST_ALPHA_K2 (3 << 12) +#define BLEND_FACTOR_SRC_ALPHA_ZERO (0 << 8) +#define BLEND_FACTOR_SRC_ALPHA_K1 (1 << 8) +#define BLEND_FACTOR_SRC_ALPHA_K2 (2 << 8) +#define BLEND_FACTOR_SRC_ALPHA_NEG_K1_TIMES_DST (3 << 8) +#define BLEND_FACTOR_DST_COLOR_ZERO (0 << 4) +#define BLEND_FACTOR_DST_COLOR_ONE (1 << 4) +#define BLEND_FACTOR_DST_COLOR_K1 (2 << 4) +#define BLEND_FACTOR_DST_COLOR_K2 (3 << 4) +#define BLEND_FACTOR_DST_COLOR_K1_TIMES_DST (4 << 4) +#define BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_DST (5 << 4) +#define BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC (6 << 4) +#define BLEND_FACTOR_DST_COLOR_NEG_K1 (7 << 4) +#define BLEND_FACTOR_SRC_COLOR_ZERO (0 << 0) +#define BLEND_FACTOR_SRC_COLOR_ONE (1 << 0) +#define BLEND_FACTOR_SRC_COLOR_K1 (2 << 0) +#define BLEND_FACTOR_SRC_COLOR_K1_TIMES_DST (3 << 0) +#define BLEND_FACTOR_SRC_COLOR_NEG_K1_TIMES_DST (4 << 0) +#define BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC (5 << 0) + +#define DC_WIN_BLEND_NOMATCH_SELECT 0x718 + +#define DC_WIN_PRECOMP_WGRP_PARAMS 0x724 +#define SWAP_UV (1 << 0) + +#define DC_WIN_WINDOW_SET_CONTROL 0x730 +#define CONTROL_CSC_ENABLE (1 << 5) + +#define DC_WINBUF_CROPPED_POINT 0x806 +#define OFFSET_Y(x) (((x) & 0xffff) << 16) +#define OFFSET_X(x) (((x) & 0xffff) << 0) + +#endif /* TEGRA_DC_H */ diff --git a/drivers/gpu/drm/grate/dp.c b/drivers/gpu/drm/grate/dp.c new file mode 100644 index 0000000000000..70dfb7d1dec55 --- /dev/null +++ b/drivers/gpu/drm/grate/dp.c @@ -0,0 +1,876 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2013-2019 NVIDIA Corporation + * Copyright (C) 2015 Rob Clark + */ + +#include +#include +#include + +#include "dp.h" + +static const u8 drm_dp_edp_revisions[] = { 0x11, 0x12, 0x13, 0x14 }; + +static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps) +{ + caps->enhanced_framing = false; + caps->tps3_supported = false; + caps->fast_training = false; + caps->channel_coding = false; + caps->alternate_scrambler_reset = false; +} + +void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest, + const struct drm_dp_link_caps *src) +{ + dest->enhanced_framing = src->enhanced_framing; + dest->tps3_supported = src->tps3_supported; + dest->fast_training = src->fast_training; + dest->channel_coding = src->channel_coding; + dest->alternate_scrambler_reset = src->alternate_scrambler_reset; +} + +static void drm_dp_link_reset(struct drm_dp_link *link) +{ + unsigned int i; + + if (!link) + return; + + link->revision = 0; + link->max_rate = 0; + link->max_lanes = 0; + + drm_dp_link_caps_reset(&link->caps); + link->aux_rd_interval.cr = 0; + link->aux_rd_interval.ce = 0; + link->edp = 0; + + link->rate = 0; + link->lanes = 0; + + for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) + link->rates[i] = 0; + + link->num_rates = 0; +} + +/** + * drm_dp_link_add_rate() - add a rate to the list of supported rates + * @link: the link to add the rate to + * @rate: the rate to add + * + * Add a link rate to the list of supported link rates. + * + * Returns: + * 0 on success or one of the following negative error codes on failure: + * - ENOSPC if the maximum number of supported rates has been reached + * - EEXISTS if the link already supports this rate + * + * See also: + * drm_dp_link_remove_rate() + */ +int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate) +{ + unsigned int i, pivot; + + if (link->num_rates == DP_MAX_SUPPORTED_RATES) + return -ENOSPC; + + for (pivot = 0; pivot < link->num_rates; pivot++) + if (rate <= link->rates[pivot]) + break; + + if (pivot != link->num_rates && rate == link->rates[pivot]) + return -EEXIST; + + for (i = link->num_rates; i > pivot; i--) + link->rates[i] = link->rates[i - 1]; + + link->rates[pivot] = rate; + link->num_rates++; + + return 0; +} + +/** + * drm_dp_link_remove_rate() - remove a rate from the list of supported rates + * @link: the link from which to remove the rate + * @rate: the rate to remove + * + * Removes a link rate from the list of supported link rates. + * + * Returns: + * 0 on success or one of the following negative error codes on failure: + * - EINVAL if the specified rate is not among the supported rates + * + * See also: + * drm_dp_link_add_rate() + */ +int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate) +{ + unsigned int i; + + for (i = 0; i < link->num_rates; i++) + if (rate == link->rates[i]) + break; + + if (i == link->num_rates) + return -EINVAL; + + link->num_rates--; + + while (i < link->num_rates) { + link->rates[i] = link->rates[i + 1]; + i++; + } + + return 0; +} + +/** + * drm_dp_link_update_rates() - normalize the supported link rates array + * @link: the link for which to normalize the supported link rates + * + * Users should call this function after they've manually modified the array + * of supported link rates. This function removes any stale entries, compacts + * the array and updates the supported link rate count. Note that calling the + * drm_dp_link_remove_rate() function already does this janitorial work. + * + * See also: + * drm_dp_link_add_rate(), drm_dp_link_remove_rate() + */ +void drm_dp_link_update_rates(struct drm_dp_link *link) +{ + unsigned int i, count = 0; + + for (i = 0; i < link->num_rates; i++) { + if (link->rates[i] != 0) + link->rates[count++] = link->rates[i]; + } + + for (i = count; i < link->num_rates; i++) + link->rates[i] = 0; + + link->num_rates = count; +} + +/** + * drm_dp_link_probe() - probe a DisplayPort link for capabilities + * @aux: DisplayPort AUX channel + * @link: pointer to structure in which to return link capabilities + * + * The structure filled in by this function can usually be passed directly + * into drm_dp_link_power_up() and drm_dp_link_configure() to power up and + * configure the link based on the link's capabilities. + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 dpcd[DP_RECEIVER_CAP_SIZE], value; + unsigned int rd_interval; + int err; + + drm_dp_link_reset(link); + + err = drm_dp_dpcd_read(aux, DP_DPCD_REV, dpcd, sizeof(dpcd)); + if (err < 0) + return err; + + link->revision = dpcd[DP_DPCD_REV]; + link->max_rate = drm_dp_max_link_rate(dpcd); + link->max_lanes = drm_dp_max_lane_count(dpcd); + + link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd); + link->caps.tps3_supported = drm_dp_tps3_supported(dpcd); + link->caps.fast_training = drm_dp_fast_training_cap(dpcd); + link->caps.channel_coding = drm_dp_channel_coding_supported(dpcd); + + if (drm_dp_alternate_scrambler_reset_cap(dpcd)) { + link->caps.alternate_scrambler_reset = true; + + err = drm_dp_dpcd_readb(aux, DP_EDP_DPCD_REV, &value); + if (err < 0) + return err; + + if (value >= ARRAY_SIZE(drm_dp_edp_revisions)) + DRM_ERROR("unsupported eDP version: %02x\n", value); + else + link->edp = drm_dp_edp_revisions[value]; + } + + /* + * The DPCD stores the AUX read interval in units of 4 ms. There are + * two special cases: + * + * 1) if the TRAINING_AUX_RD_INTERVAL field is 0, the clock recovery + * and channel equalization should use 100 us or 400 us AUX read + * intervals, respectively + * + * 2) for DP v1.4 and above, clock recovery should always use 100 us + * AUX read intervals + */ + rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) { + DRM_DEBUG_KMS("AUX interval %u out of range (max. 4)\n", + rd_interval); + rd_interval = 4; + } + + rd_interval *= 4 * USEC_PER_MSEC; + + if (rd_interval == 0 || link->revision >= DP_DPCD_REV_14) + link->aux_rd_interval.cr = 100; + + if (rd_interval == 0) + link->aux_rd_interval.ce = 400; + + link->rate = link->max_rate; + link->lanes = link->max_lanes; + + /* Parse SUPPORTED_LINK_RATES from eDP 1.4 */ + if (link->edp >= 0x14) { + u8 supported_rates[DP_MAX_SUPPORTED_RATES * 2]; + unsigned int i; + u16 rate; + + err = drm_dp_dpcd_read(aux, DP_SUPPORTED_LINK_RATES, + supported_rates, + sizeof(supported_rates)); + if (err < 0) + return err; + + for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) { + rate = supported_rates[i * 2 + 1] << 8 | + supported_rates[i * 2 + 0]; + + drm_dp_link_add_rate(link, rate * 200); + } + } + + return 0; +} + +/** + * drm_dp_link_power_up() - power up a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 value; + int err; + + /* DP_SET_POWER register is only available on DPCD v1.1 and later */ + if (link->revision < 0x11) + return 0; + + err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (err < 0) + return err; + + value &= ~DP_SET_POWER_MASK; + value |= DP_SET_POWER_D0; + + err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + if (err < 0) + return err; + + /* + * According to the DP 1.1 specification, a "Sink Device must exit the + * power saving state within 1 ms" (Section 2.5.3.1, Table 5-52, "Sink + * Control Field" (register 0x600). + */ + usleep_range(1000, 2000); + + return 0; +} + +/** + * drm_dp_link_power_down() - power down a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 value; + int err; + + /* DP_SET_POWER register is only available on DPCD v1.1 and later */ + if (link->revision < 0x11) + return 0; + + err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (err < 0) + return err; + + value &= ~DP_SET_POWER_MASK; + value |= DP_SET_POWER_D3; + + err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + if (err < 0) + return err; + + return 0; +} + +/** + * drm_dp_link_configure() - configure a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 values[2], value; + int err; + + if (link->ops && link->ops->configure) { + err = link->ops->configure(link); + if (err < 0) { + DRM_ERROR("failed to configure DP link: %d\n", err); + return err; + } + } + + values[0] = drm_dp_link_rate_to_bw_code(link->rate); + values[1] = link->lanes; + + if (link->caps.enhanced_framing) + values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values)); + if (err < 0) + return err; + + if (link->caps.channel_coding) + value = DP_SET_ANSI_8B10B; + else + value = 0; + + err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, value); + if (err < 0) + return err; + + if (link->caps.alternate_scrambler_reset) { + err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, + DP_ALTERNATE_SCRAMBLER_RESET_ENABLE); + if (err < 0) + return err; + } + + return 0; +} + +/** + * drm_dp_link_choose() - choose the lowest possible configuration for a mode + * @link: DRM DP link object + * @mode: DRM display mode + * @info: DRM display information + * + * According to the eDP specification, a source should select a configuration + * with the lowest number of lanes and the lowest possible link rate that can + * match the bitrate requirements of a video mode. However it must ensure not + * to exceed the capabilities of the sink. + * + * Returns: 0 on success or a negative error code on failure. + */ +int drm_dp_link_choose(struct drm_dp_link *link, + const struct drm_display_mode *mode, + const struct drm_display_info *info) +{ + /* available link symbol clock rates */ + static const unsigned int rates[3] = { 162000, 270000, 540000 }; + /* available number of lanes */ + static const unsigned int lanes[3] = { 1, 2, 4 }; + unsigned long requirement, capacity; + unsigned int rate = link->max_rate; + unsigned int i, j; + + /* bandwidth requirement */ + requirement = mode->clock * info->bpc * 3; + + for (i = 0; i < ARRAY_SIZE(lanes) && lanes[i] <= link->max_lanes; i++) { + for (j = 0; j < ARRAY_SIZE(rates) && rates[j] <= rate; j++) { + /* + * Capacity for this combination of lanes and rate, + * factoring in the ANSI 8B/10B encoding. + * + * Link rates in the DRM DP helpers are really link + * symbol frequencies, so a tenth of the actual rate + * of the link. + */ + capacity = lanes[i] * (rates[j] * 10) * 8 / 10; + + if (capacity >= requirement) { + DRM_DEBUG_KMS("using %u lanes at %u kHz (%lu/%lu kbps)\n", + lanes[i], rates[j], requirement, + capacity); + link->lanes = lanes[i]; + link->rate = rates[j]; + return 0; + } + } + } + + return -ERANGE; +} + +/** + * DOC: Link training + * + * These functions contain common logic and helpers to implement DisplayPort + * link training. + */ + +/** + * drm_dp_link_train_init() - initialize DisplayPort link training state + * @train: DisplayPort link training state + */ +void drm_dp_link_train_init(struct drm_dp_link_train *train) +{ + struct drm_dp_link_train_set *request = &train->request; + struct drm_dp_link_train_set *adjust = &train->adjust; + unsigned int i; + + for (i = 0; i < 4; i++) { + request->voltage_swing[i] = 0; + adjust->voltage_swing[i] = 0; + + request->pre_emphasis[i] = 0; + adjust->pre_emphasis[i] = 0; + + request->post_cursor[i] = 0; + adjust->post_cursor[i] = 0; + } + + train->pattern = DP_TRAINING_PATTERN_DISABLE; + train->clock_recovered = false; + train->channel_equalized = false; +} + +static bool drm_dp_link_train_valid(const struct drm_dp_link_train *train) +{ + return train->clock_recovered && train->channel_equalized; +} + +static int drm_dp_link_apply_training(struct drm_dp_link *link) +{ + struct drm_dp_link_train_set *request = &link->train.request; + unsigned int lanes = link->lanes, *vs, *pe, *pc, i; + struct drm_dp_aux *aux = link->aux; + u8 values[4], pattern = 0; + int err; + + err = link->ops->apply_training(link); + if (err < 0) { + DRM_ERROR("failed to apply link training: %d\n", err); + return err; + } + + vs = request->voltage_swing; + pe = request->pre_emphasis; + pc = request->post_cursor; + + /* write currently selected voltage-swing and pre-emphasis levels */ + for (i = 0; i < lanes; i++) + values[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL(vs[i]) | + DP_TRAIN_PRE_EMPHASIS_LEVEL(pe[i]); + + err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values, lanes); + if (err < 0) { + DRM_ERROR("failed to set training parameters: %d\n", err); + return err; + } + + /* write currently selected post-cursor level (if supported) */ + if (link->revision >= 0x12 && link->rate == 540000) { + values[0] = values[1] = 0; + + for (i = 0; i < lanes; i++) + values[i / 2] |= DP_LANE_POST_CURSOR(i, pc[i]); + + err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_1_SET2, values, + DIV_ROUND_UP(lanes, 2)); + if (err < 0) { + DRM_ERROR("failed to set post-cursor: %d\n", err); + return err; + } + } + + /* write link pattern */ + if (link->train.pattern != DP_TRAINING_PATTERN_DISABLE) + pattern |= DP_LINK_SCRAMBLING_DISABLE; + + pattern |= link->train.pattern; + + err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern); + if (err < 0) { + DRM_ERROR("failed to set training pattern: %d\n", err); + return err; + } + + return 0; +} + +static void drm_dp_link_train_wait(struct drm_dp_link *link) +{ + unsigned long min = 0; + + switch (link->train.pattern) { + case DP_TRAINING_PATTERN_1: + min = link->aux_rd_interval.cr; + break; + + case DP_TRAINING_PATTERN_2: + case DP_TRAINING_PATTERN_3: + min = link->aux_rd_interval.ce; + break; + + default: + break; + } + + if (min > 0) + usleep_range(min, 2 * min); +} + +static void drm_dp_link_get_adjustments(struct drm_dp_link *link, + u8 status[DP_LINK_STATUS_SIZE]) +{ + struct drm_dp_link_train_set *adjust = &link->train.adjust; + unsigned int i; + + for (i = 0; i < link->lanes; i++) { + adjust->voltage_swing[i] = + drm_dp_get_adjust_request_voltage(status, i) >> + DP_TRAIN_VOLTAGE_SWING_SHIFT; + + adjust->pre_emphasis[i] = + drm_dp_get_adjust_request_pre_emphasis(status, i) >> + DP_TRAIN_PRE_EMPHASIS_SHIFT; + + adjust->post_cursor[i] = + drm_dp_get_adjust_request_post_cursor(status, i); + } +} + +static void drm_dp_link_train_adjust(struct drm_dp_link_train *train) +{ + struct drm_dp_link_train_set *request = &train->request; + struct drm_dp_link_train_set *adjust = &train->adjust; + unsigned int i; + + for (i = 0; i < 4; i++) + if (request->voltage_swing[i] != adjust->voltage_swing[i]) + request->voltage_swing[i] = adjust->voltage_swing[i]; + + for (i = 0; i < 4; i++) + if (request->pre_emphasis[i] != adjust->pre_emphasis[i]) + request->pre_emphasis[i] = adjust->pre_emphasis[i]; + + for (i = 0; i < 4; i++) + if (request->post_cursor[i] != adjust->post_cursor[i]) + request->post_cursor[i] = adjust->post_cursor[i]; +} + +static int drm_dp_link_recover_clock(struct drm_dp_link *link) +{ + u8 status[DP_LINK_STATUS_SIZE]; + int err; + + err = drm_dp_link_apply_training(link); + if (err < 0) + return err; + + drm_dp_link_train_wait(link); + + err = drm_dp_dpcd_read_link_status(link->aux, status); + if (err < 0) { + DRM_ERROR("failed to read link status: %d\n", err); + return err; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) + drm_dp_link_get_adjustments(link, status); + else + link->train.clock_recovered = true; + + return 0; +} + +static int drm_dp_link_clock_recovery(struct drm_dp_link *link) +{ + unsigned int repeat; + int err; + + /* start clock recovery using training pattern 1 */ + link->train.pattern = DP_TRAINING_PATTERN_1; + + for (repeat = 1; repeat < 5; repeat++) { + err = drm_dp_link_recover_clock(link); + if (err < 0) { + DRM_ERROR("failed to recover clock: %d\n", err); + return err; + } + + if (link->train.clock_recovered) + break; + + drm_dp_link_train_adjust(&link->train); + } + + return 0; +} + +static int drm_dp_link_equalize_channel(struct drm_dp_link *link) +{ + struct drm_dp_aux *aux = link->aux; + u8 status[DP_LINK_STATUS_SIZE]; + int err; + + err = drm_dp_link_apply_training(link); + if (err < 0) + return err; + + drm_dp_link_train_wait(link); + + err = drm_dp_dpcd_read_link_status(aux, status); + if (err < 0) { + DRM_ERROR("failed to read link status: %d\n", err); + return err; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + DRM_ERROR("clock recovery lost while equalizing channel\n"); + link->train.clock_recovered = false; + return 0; + } + + if (!drm_dp_channel_eq_ok(status, link->lanes)) + drm_dp_link_get_adjustments(link, status); + else + link->train.channel_equalized = true; + + return 0; +} + +static int drm_dp_link_channel_equalization(struct drm_dp_link *link) +{ + unsigned int repeat; + int err; + + /* start channel equalization using pattern 2 or 3 */ + if (link->caps.tps3_supported) + link->train.pattern = DP_TRAINING_PATTERN_3; + else + link->train.pattern = DP_TRAINING_PATTERN_2; + + for (repeat = 1; repeat < 5; repeat++) { + err = drm_dp_link_equalize_channel(link); + if (err < 0) { + DRM_ERROR("failed to equalize channel: %d\n", err); + return err; + } + + if (link->train.channel_equalized) + break; + + drm_dp_link_train_adjust(&link->train); + } + + return 0; +} + +static int drm_dp_link_downgrade(struct drm_dp_link *link) +{ + switch (link->rate) { + case 162000: + return -EINVAL; + + case 270000: + link->rate = 162000; + break; + + case 540000: + link->rate = 270000; + return 0; + } + + return 0; +} + +static void drm_dp_link_train_disable(struct drm_dp_link *link) +{ + int err; + + link->train.pattern = DP_TRAINING_PATTERN_DISABLE; + + err = drm_dp_link_apply_training(link); + if (err < 0) + DRM_ERROR("failed to disable link training: %d\n", err); +} + +static int drm_dp_link_train_full(struct drm_dp_link *link) +{ + int err; + +retry: + DRM_DEBUG_KMS("full-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", + link->rate / 100); + + err = drm_dp_link_configure(link->aux, link); + if (err < 0) { + DRM_ERROR("failed to configure DP link: %d\n", err); + return err; + } + + err = drm_dp_link_clock_recovery(link); + if (err < 0) { + DRM_ERROR("clock recovery failed: %d\n", err); + goto out; + } + + if (!link->train.clock_recovered) { + DRM_ERROR("clock recovery failed, downgrading link\n"); + + err = drm_dp_link_downgrade(link); + if (err < 0) + goto out; + + goto retry; + } + + DRM_DEBUG_KMS("clock recovery succeeded\n"); + + err = drm_dp_link_channel_equalization(link); + if (err < 0) { + DRM_ERROR("channel equalization failed: %d\n", err); + goto out; + } + + if (!link->train.channel_equalized) { + DRM_ERROR("channel equalization failed, downgrading link\n"); + + err = drm_dp_link_downgrade(link); + if (err < 0) + goto out; + + goto retry; + } + + DRM_DEBUG_KMS("channel equalization succeeded\n"); + +out: + drm_dp_link_train_disable(link); + return err; +} + +static int drm_dp_link_train_fast(struct drm_dp_link *link) +{ + u8 status[DP_LINK_STATUS_SIZE]; + int err; + + DRM_DEBUG_KMS("fast-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", + link->rate / 100); + + err = drm_dp_link_configure(link->aux, link); + if (err < 0) { + DRM_ERROR("failed to configure DP link: %d\n", err); + return err; + } + + /* transmit training pattern 1 for 500 microseconds */ + link->train.pattern = DP_TRAINING_PATTERN_1; + + err = drm_dp_link_apply_training(link); + if (err < 0) + goto out; + + usleep_range(500, 1000); + + /* transmit training pattern 2 or 3 for 500 microseconds */ + if (link->caps.tps3_supported) + link->train.pattern = DP_TRAINING_PATTERN_3; + else + link->train.pattern = DP_TRAINING_PATTERN_2; + + err = drm_dp_link_apply_training(link); + if (err < 0) + goto out; + + usleep_range(500, 1000); + + err = drm_dp_dpcd_read_link_status(link->aux, status); + if (err < 0) { + DRM_ERROR("failed to read link status: %d\n", err); + goto out; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + DRM_ERROR("clock recovery failed\n"); + err = -EIO; + } + + if (!drm_dp_channel_eq_ok(status, link->lanes)) { + DRM_ERROR("channel equalization failed\n"); + err = -EIO; + } + +out: + drm_dp_link_train_disable(link); + return err; +} + +/** + * drm_dp_link_train() - perform DisplayPort link training + * @link: a DP link object + * + * Uses the context stored in the DP link object to perform link training. It + * is expected that drivers will call drm_dp_link_probe() to obtain the link + * capabilities before performing link training. + * + * If the sink supports fast link training (no AUX CH handshake) and valid + * training settings are available, this function will try to perform fast + * link training and fall back to full link training on failure. + * + * Returns: 0 on success or a negative error code on failure. + */ +int drm_dp_link_train(struct drm_dp_link *link) +{ + int err; + + drm_dp_link_train_init(&link->train); + + if (link->caps.fast_training) { + if (drm_dp_link_train_valid(&link->train)) { + err = drm_dp_link_train_fast(link); + if (err < 0) + DRM_ERROR("fast link training failed: %d\n", + err); + else + return 0; + } else { + DRM_DEBUG_KMS("training parameters not available\n"); + } + } else { + DRM_DEBUG_KMS("fast link training not supported\n"); + } + + err = drm_dp_link_train_full(link); + if (err < 0) + DRM_ERROR("full link training failed: %d\n", err); + + return err; +} diff --git a/drivers/gpu/drm/grate/dp.h b/drivers/gpu/drm/grate/dp.h new file mode 100644 index 0000000000000..cb12ed0c54e7d --- /dev/null +++ b/drivers/gpu/drm/grate/dp.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2013-2019 NVIDIA Corporation. + * Copyright (C) 2015 Rob Clark + */ + +#ifndef DRM_TEGRA_DP_H +#define DRM_TEGRA_DP_H 1 + +#include + +struct drm_display_info; +struct drm_display_mode; +struct drm_dp_aux; +struct drm_dp_link; + +/** + * struct drm_dp_link_caps - DP link capabilities + */ +struct drm_dp_link_caps { + /** + * @enhanced_framing: + * + * enhanced framing capability (mandatory as of DP 1.2) + */ + bool enhanced_framing; + + /** + * tps3_supported: + * + * training pattern sequence 3 supported for equalization + */ + bool tps3_supported; + + /** + * @fast_training: + * + * AUX CH handshake not required for link training + */ + bool fast_training; + + /** + * @channel_coding: + * + * ANSI 8B/10B channel coding capability + */ + bool channel_coding; + + /** + * @alternate_scrambler_reset: + * + * eDP alternate scrambler reset capability + */ + bool alternate_scrambler_reset; +}; + +void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest, + const struct drm_dp_link_caps *src); + +/** + * struct drm_dp_link_ops - DP link operations + */ +struct drm_dp_link_ops { + /** + * @apply_training: + */ + int (*apply_training)(struct drm_dp_link *link); + + /** + * @configure: + */ + int (*configure)(struct drm_dp_link *link); +}; + +#define DP_TRAIN_VOLTAGE_SWING_LEVEL(x) ((x) << 0) +#define DP_TRAIN_PRE_EMPHASIS_LEVEL(x) ((x) << 3) +#define DP_LANE_POST_CURSOR(i, x) (((x) & 0x3) << (((i) & 1) << 2)) + +/** + * struct drm_dp_link_train_set - link training settings + * @voltage_swing: per-lane voltage swing + * @pre_emphasis: per-lane pre-emphasis + * @post_cursor: per-lane post-cursor + */ +struct drm_dp_link_train_set { + unsigned int voltage_swing[4]; + unsigned int pre_emphasis[4]; + unsigned int post_cursor[4]; +}; + +/** + * struct drm_dp_link_train - link training state information + * @request: currently requested settings + * @adjust: adjustments requested by sink + * @pattern: currently requested training pattern + * @clock_recovered: flag to track if clock recovery has completed + * @channel_equalized: flag to track if channel equalization has completed + */ +struct drm_dp_link_train { + struct drm_dp_link_train_set request; + struct drm_dp_link_train_set adjust; + + unsigned int pattern; + + bool clock_recovered; + bool channel_equalized; +}; + +/** + * struct drm_dp_link - DP link capabilities and configuration + * @revision: DP specification revision supported on the link + * @max_rate: maximum clock rate supported on the link + * @max_lanes: maximum number of lanes supported on the link + * @caps: capabilities supported on the link (see &drm_dp_link_caps) + * @aux_rd_interval: AUX read interval to use for training (in microseconds) + * @edp: eDP revision (0x11: eDP 1.1, 0x12: eDP 1.2, ...) + * @rate: currently configured link rate + * @lanes: currently configured number of lanes + * @rates: additional supported link rates in kHz (eDP 1.4) + * @num_rates: number of additional supported link rates (eDP 1.4) + */ +struct drm_dp_link { + unsigned char revision; + unsigned int max_rate; + unsigned int max_lanes; + + struct drm_dp_link_caps caps; + + /** + * @cr: clock recovery read interval + * @ce: channel equalization read interval + */ + struct { + unsigned int cr; + unsigned int ce; + } aux_rd_interval; + + unsigned char edp; + + unsigned int rate; + unsigned int lanes; + + unsigned long rates[DP_MAX_SUPPORTED_RATES]; + unsigned int num_rates; + + /** + * @ops: DP link operations + */ + const struct drm_dp_link_ops *ops; + + /** + * @aux: DP AUX channel + */ + struct drm_dp_aux *aux; + + /** + * @train: DP link training state + */ + struct drm_dp_link_train train; +}; + +int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate); +int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate); +void drm_dp_link_update_rates(struct drm_dp_link *link); + +int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_choose(struct drm_dp_link *link, + const struct drm_display_mode *mode, + const struct drm_display_info *info); + +void drm_dp_link_train_init(struct drm_dp_link_train *train); +int drm_dp_link_train(struct drm_dp_link *link); + +#endif diff --git a/drivers/gpu/drm/grate/dpaux.c b/drivers/gpu/drm/grate/dpaux.c new file mode 100644 index 0000000000000..7d7cc90b6fc94 --- /dev/null +++ b/drivers/gpu/drm/grate/dpaux.c @@ -0,0 +1,823 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 NVIDIA Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "dp.h" +#include "dpaux.h" +#include "drm.h" +#include "trace.h" + +static DEFINE_MUTEX(dpaux_lock); +static LIST_HEAD(dpaux_list); + +struct tegra_dpaux_soc { + unsigned int cmh; + unsigned int drvz; + unsigned int drvi; +}; + +struct tegra_dpaux { + struct drm_dp_aux aux; + struct device *dev; + + const struct tegra_dpaux_soc *soc; + + void __iomem *regs; + int irq; + + struct tegra_output *output; + + struct reset_control *rst; + struct clk *clk_parent; + struct clk *clk; + + struct regulator *vdd; + + struct completion complete; + struct work_struct work; + struct list_head list; + +#ifdef CONFIG_GENERIC_PINCONF + struct pinctrl_dev *pinctrl; + struct pinctrl_desc desc; +#endif +}; + +static inline struct tegra_dpaux *to_dpaux(struct drm_dp_aux *aux) +{ + return container_of(aux, struct tegra_dpaux, aux); +} + +static inline struct tegra_dpaux *work_to_dpaux(struct work_struct *work) +{ + return container_of(work, struct tegra_dpaux, work); +} + +static inline u32 tegra_dpaux_readl(struct tegra_dpaux *dpaux, + unsigned int offset) +{ + u32 value = readl(dpaux->regs + (offset << 2)); + + trace_dpaux_readl(dpaux->dev, offset, value); + + return value; +} + +static inline void tegra_dpaux_writel(struct tegra_dpaux *dpaux, + u32 value, unsigned int offset) +{ + trace_dpaux_writel(dpaux->dev, offset, value); + writel(value, dpaux->regs + (offset << 2)); +} + +static void tegra_dpaux_write_fifo(struct tegra_dpaux *dpaux, const u8 *buffer, + size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value = 0; + + for (j = 0; j < num; j++) + value |= buffer[i * 4 + j] << (j * 8); + + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXDATA_WRITE(i)); + } +} + +static void tegra_dpaux_read_fifo(struct tegra_dpaux *dpaux, u8 *buffer, + size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value; + + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXDATA_READ(i)); + + for (j = 0; j < num; j++) + buffer[i * 4 + j] = value >> (j * 8); + } +} + +static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux, + struct drm_dp_aux_msg *msg) +{ + unsigned long timeout = msecs_to_jiffies(250); + struct tegra_dpaux *dpaux = to_dpaux(aux); + unsigned long status; + ssize_t ret = 0; + u8 reply = 0; + u32 value; + + /* Tegra has 4x4 byte DP AUX transmit and receive FIFOs. */ + if (msg->size > 16) + return -EINVAL; + + /* + * Allow zero-sized messages only for I2C, in which case they specify + * address-only transactions. + */ + if (msg->size < 1) { + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_I2C_WRITE_STATUS_UPDATE: + case DP_AUX_I2C_WRITE: + case DP_AUX_I2C_READ: + value = DPAUX_DP_AUXCTL_CMD_ADDRESS_ONLY; + break; + + default: + return -EINVAL; + } + } else { + /* For non-zero-sized messages, set the CMDLEN field. */ + value = DPAUX_DP_AUXCTL_CMDLEN(msg->size - 1); + } + + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_I2C_WRITE: + if (msg->request & DP_AUX_I2C_MOT) + value |= DPAUX_DP_AUXCTL_CMD_MOT_WR; + else + value |= DPAUX_DP_AUXCTL_CMD_I2C_WR; + + break; + + case DP_AUX_I2C_READ: + if (msg->request & DP_AUX_I2C_MOT) + value |= DPAUX_DP_AUXCTL_CMD_MOT_RD; + else + value |= DPAUX_DP_AUXCTL_CMD_I2C_RD; + + break; + + case DP_AUX_I2C_WRITE_STATUS_UPDATE: + if (msg->request & DP_AUX_I2C_MOT) + value |= DPAUX_DP_AUXCTL_CMD_MOT_RQ; + else + value |= DPAUX_DP_AUXCTL_CMD_I2C_RQ; + + break; + + case DP_AUX_NATIVE_WRITE: + value |= DPAUX_DP_AUXCTL_CMD_AUX_WR; + break; + + case DP_AUX_NATIVE_READ: + value |= DPAUX_DP_AUXCTL_CMD_AUX_RD; + break; + + default: + return -EINVAL; + } + + tegra_dpaux_writel(dpaux, msg->address, DPAUX_DP_AUXADDR); + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXCTL); + + if ((msg->request & DP_AUX_I2C_READ) == 0) { + tegra_dpaux_write_fifo(dpaux, msg->buffer, msg->size); + ret = msg->size; + } + + /* start transaction */ + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXCTL); + value |= DPAUX_DP_AUXCTL_TRANSACTREQ; + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXCTL); + + status = wait_for_completion_timeout(&dpaux->complete, timeout); + if (!status) + return -ETIMEDOUT; + + /* read status and clear errors */ + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXSTAT); + tegra_dpaux_writel(dpaux, 0xf00, DPAUX_DP_AUXSTAT); + + if (value & DPAUX_DP_AUXSTAT_TIMEOUT_ERROR) + return -ETIMEDOUT; + + if ((value & DPAUX_DP_AUXSTAT_RX_ERROR) || + (value & DPAUX_DP_AUXSTAT_SINKSTAT_ERROR) || + (value & DPAUX_DP_AUXSTAT_NO_STOP_ERROR)) + return -EIO; + + switch ((value & DPAUX_DP_AUXSTAT_REPLY_TYPE_MASK) >> 16) { + case 0x00: + reply = DP_AUX_NATIVE_REPLY_ACK; + break; + + case 0x01: + reply = DP_AUX_NATIVE_REPLY_NACK; + break; + + case 0x02: + reply = DP_AUX_NATIVE_REPLY_DEFER; + break; + + case 0x04: + reply = DP_AUX_I2C_REPLY_NACK; + break; + + case 0x08: + reply = DP_AUX_I2C_REPLY_DEFER; + break; + } + + if ((msg->size > 0) && (msg->reply == DP_AUX_NATIVE_REPLY_ACK)) { + if (msg->request & DP_AUX_I2C_READ) { + size_t count = value & DPAUX_DP_AUXSTAT_REPLY_MASK; + + /* + * There might be a smarter way to do this, but since + * the DP helpers will already retry transactions for + * an -EBUSY return value, simply reuse that instead. + */ + if (count != msg->size) { + ret = -EBUSY; + goto out; + } + + tegra_dpaux_read_fifo(dpaux, msg->buffer, count); + ret = count; + } + } + + msg->reply = reply; + +out: + return ret; +} + +static void tegra_dpaux_hotplug(struct work_struct *work) +{ + struct tegra_dpaux *dpaux = work_to_dpaux(work); + + if (dpaux->output) + drm_helper_hpd_irq_event(dpaux->output->connector.dev); +} + +static irqreturn_t tegra_dpaux_irq(int irq, void *data) +{ + struct tegra_dpaux *dpaux = data; + irqreturn_t ret = IRQ_HANDLED; + u32 value; + + /* clear interrupts */ + value = tegra_dpaux_readl(dpaux, DPAUX_INTR_AUX); + tegra_dpaux_writel(dpaux, value, DPAUX_INTR_AUX); + + if (value & (DPAUX_INTR_PLUG_EVENT | DPAUX_INTR_UNPLUG_EVENT)) + schedule_work(&dpaux->work); + + if (value & DPAUX_INTR_IRQ_EVENT) { + /* TODO: handle this */ + } + + if (value & DPAUX_INTR_AUX_DONE) + complete(&dpaux->complete); + + return ret; +} + +enum tegra_dpaux_functions { + DPAUX_PADCTL_FUNC_AUX, + DPAUX_PADCTL_FUNC_I2C, + DPAUX_PADCTL_FUNC_OFF, +}; + +static void tegra_dpaux_pad_power_down(struct tegra_dpaux *dpaux) +{ + u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE); + + value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN; + + tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE); +} + +static void tegra_dpaux_pad_power_up(struct tegra_dpaux *dpaux) +{ + u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE); + + value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN; + + tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE); +} + +static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function) +{ + u32 value; + + switch (function) { + case DPAUX_PADCTL_FUNC_AUX: + value = DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) | + DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) | + DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) | + DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV | + DPAUX_HYBRID_PADCTL_MODE_AUX; + break; + + case DPAUX_PADCTL_FUNC_I2C: + value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV | + DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV | + DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) | + DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) | + DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) | + DPAUX_HYBRID_PADCTL_MODE_I2C; + break; + + case DPAUX_PADCTL_FUNC_OFF: + tegra_dpaux_pad_power_down(dpaux); + return 0; + + default: + return -ENOTSUPP; + } + + tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL); + tegra_dpaux_pad_power_up(dpaux); + + return 0; +} + +#ifdef CONFIG_GENERIC_PINCONF +static const struct pinctrl_pin_desc tegra_dpaux_pins[] = { + PINCTRL_PIN(0, "DP_AUX_CHx_P"), + PINCTRL_PIN(1, "DP_AUX_CHx_N"), +}; + +static const unsigned tegra_dpaux_pin_numbers[] = { 0, 1 }; + +static const char * const tegra_dpaux_groups[] = { + "dpaux-io", +}; + +static const char * const tegra_dpaux_functions[] = { + "aux", + "i2c", + "off", +}; + +static int tegra_dpaux_get_groups_count(struct pinctrl_dev *pinctrl) +{ + return ARRAY_SIZE(tegra_dpaux_groups); +} + +static const char *tegra_dpaux_get_group_name(struct pinctrl_dev *pinctrl, + unsigned int group) +{ + return tegra_dpaux_groups[group]; +} + +static int tegra_dpaux_get_group_pins(struct pinctrl_dev *pinctrl, + unsigned group, const unsigned **pins, + unsigned *num_pins) +{ + *pins = tegra_dpaux_pin_numbers; + *num_pins = ARRAY_SIZE(tegra_dpaux_pin_numbers); + + return 0; +} + +static const struct pinctrl_ops tegra_dpaux_pinctrl_ops = { + .get_groups_count = tegra_dpaux_get_groups_count, + .get_group_name = tegra_dpaux_get_group_name, + .get_group_pins = tegra_dpaux_get_group_pins, + .dt_node_to_map = pinconf_generic_dt_node_to_map_group, + .dt_free_map = pinconf_generic_dt_free_map, +}; + +static int tegra_dpaux_get_functions_count(struct pinctrl_dev *pinctrl) +{ + return ARRAY_SIZE(tegra_dpaux_functions); +} + +static const char *tegra_dpaux_get_function_name(struct pinctrl_dev *pinctrl, + unsigned int function) +{ + return tegra_dpaux_functions[function]; +} + +static int tegra_dpaux_get_function_groups(struct pinctrl_dev *pinctrl, + unsigned int function, + const char * const **groups, + unsigned * const num_groups) +{ + *num_groups = ARRAY_SIZE(tegra_dpaux_groups); + *groups = tegra_dpaux_groups; + + return 0; +} + +static int tegra_dpaux_set_mux(struct pinctrl_dev *pinctrl, + unsigned int function, unsigned int group) +{ + struct tegra_dpaux *dpaux = pinctrl_dev_get_drvdata(pinctrl); + + return tegra_dpaux_pad_config(dpaux, function); +} + +static const struct pinmux_ops tegra_dpaux_pinmux_ops = { + .get_functions_count = tegra_dpaux_get_functions_count, + .get_function_name = tegra_dpaux_get_function_name, + .get_function_groups = tegra_dpaux_get_function_groups, + .set_mux = tegra_dpaux_set_mux, +}; +#endif + +static int tegra_dpaux_probe(struct platform_device *pdev) +{ + struct tegra_dpaux *dpaux; + struct resource *regs; + u32 value; + int err; + + dpaux = devm_kzalloc(&pdev->dev, sizeof(*dpaux), GFP_KERNEL); + if (!dpaux) + return -ENOMEM; + + dpaux->soc = of_device_get_match_data(&pdev->dev); + INIT_WORK(&dpaux->work, tegra_dpaux_hotplug); + init_completion(&dpaux->complete); + INIT_LIST_HEAD(&dpaux->list); + dpaux->dev = &pdev->dev; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dpaux->regs = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(dpaux->regs)) + return PTR_ERR(dpaux->regs); + + dpaux->irq = platform_get_irq(pdev, 0); + if (dpaux->irq < 0) { + dev_err(&pdev->dev, "failed to get IRQ\n"); + return -ENXIO; + } + + if (!pdev->dev.pm_domain) { + dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux"); + if (IS_ERR(dpaux->rst)) { + dev_err(&pdev->dev, + "failed to get reset control: %ld\n", + PTR_ERR(dpaux->rst)); + return PTR_ERR(dpaux->rst); + } + } + + dpaux->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dpaux->clk)) { + dev_err(&pdev->dev, "failed to get module clock: %ld\n", + PTR_ERR(dpaux->clk)); + return PTR_ERR(dpaux->clk); + } + + dpaux->clk_parent = devm_clk_get(&pdev->dev, "parent"); + if (IS_ERR(dpaux->clk_parent)) { + dev_err(&pdev->dev, "failed to get parent clock: %ld\n", + PTR_ERR(dpaux->clk_parent)); + return PTR_ERR(dpaux->clk_parent); + } + + err = clk_set_rate(dpaux->clk_parent, 270000000); + if (err < 0) { + dev_err(&pdev->dev, "failed to set clock to 270 MHz: %d\n", + err); + return err; + } + + dpaux->vdd = devm_regulator_get_optional(&pdev->dev, "vdd"); + if (IS_ERR(dpaux->vdd)) { + if (PTR_ERR(dpaux->vdd) != -ENODEV) { + if (PTR_ERR(dpaux->vdd) != -EPROBE_DEFER) + dev_err(&pdev->dev, + "failed to get VDD supply: %ld\n", + PTR_ERR(dpaux->vdd)); + + return PTR_ERR(dpaux->vdd); + } + + dpaux->vdd = NULL; + } + + platform_set_drvdata(pdev, dpaux); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + err = devm_request_irq(dpaux->dev, dpaux->irq, tegra_dpaux_irq, 0, + dev_name(dpaux->dev), dpaux); + if (err < 0) { + dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n", + dpaux->irq, err); + return err; + } + + disable_irq(dpaux->irq); + + dpaux->aux.transfer = tegra_dpaux_transfer; + dpaux->aux.dev = &pdev->dev; + + drm_dp_aux_init(&dpaux->aux); + + /* + * Assume that by default the DPAUX/I2C pads will be used for HDMI, + * so power them up and configure them in I2C mode. + * + * The DPAUX code paths reconfigure the pads in AUX mode, but there + * is no possibility to perform the I2C mode configuration in the + * HDMI path. + */ + err = tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_I2C); + if (err < 0) + return err; + +#ifdef CONFIG_GENERIC_PINCONF + dpaux->desc.name = dev_name(&pdev->dev); + dpaux->desc.pins = tegra_dpaux_pins; + dpaux->desc.npins = ARRAY_SIZE(tegra_dpaux_pins); + dpaux->desc.pctlops = &tegra_dpaux_pinctrl_ops; + dpaux->desc.pmxops = &tegra_dpaux_pinmux_ops; + dpaux->desc.owner = THIS_MODULE; + + dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux); + if (IS_ERR(dpaux->pinctrl)) { + dev_err(&pdev->dev, "failed to register pincontrol\n"); + return PTR_ERR(dpaux->pinctrl); + } +#endif + /* enable and clear all interrupts */ + value = DPAUX_INTR_AUX_DONE | DPAUX_INTR_IRQ_EVENT | + DPAUX_INTR_UNPLUG_EVENT | DPAUX_INTR_PLUG_EVENT; + tegra_dpaux_writel(dpaux, value, DPAUX_INTR_EN_AUX); + tegra_dpaux_writel(dpaux, value, DPAUX_INTR_AUX); + + mutex_lock(&dpaux_lock); + list_add_tail(&dpaux->list, &dpaux_list); + mutex_unlock(&dpaux_lock); + + return 0; +} + +static int tegra_dpaux_remove(struct platform_device *pdev) +{ + struct tegra_dpaux *dpaux = platform_get_drvdata(pdev); + + cancel_work_sync(&dpaux->work); + + /* make sure pads are powered down when not in use */ + tegra_dpaux_pad_power_down(dpaux); + + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + mutex_lock(&dpaux_lock); + list_del(&dpaux->list); + mutex_unlock(&dpaux_lock); + + return 0; +} + +#ifdef CONFIG_PM +static int tegra_dpaux_suspend(struct device *dev) +{ + struct tegra_dpaux *dpaux = dev_get_drvdata(dev); + int err = 0; + + if (dpaux->rst) { + err = reset_control_assert(dpaux->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(dpaux->clk_parent); + clk_disable_unprepare(dpaux->clk); + + return err; +} + +static int tegra_dpaux_resume(struct device *dev) +{ + struct tegra_dpaux *dpaux = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(dpaux->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + return err; + } + + err = clk_prepare_enable(dpaux->clk_parent); + if (err < 0) { + dev_err(dev, "failed to enable parent clock: %d\n", err); + goto disable_clk; + } + + usleep_range(1000, 2000); + + if (dpaux->rst) { + err = reset_control_deassert(dpaux->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_parent; + } + + usleep_range(1000, 2000); + } + + return 0; + +disable_parent: + clk_disable_unprepare(dpaux->clk_parent); +disable_clk: + clk_disable_unprepare(dpaux->clk); + return err; +} +#endif + +static const struct dev_pm_ops tegra_dpaux_pm_ops = { + SET_RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL) +}; + +static const struct tegra_dpaux_soc tegra124_dpaux_soc = { + .cmh = 0x02, + .drvz = 0x04, + .drvi = 0x18, +}; + +static const struct tegra_dpaux_soc tegra210_dpaux_soc = { + .cmh = 0x02, + .drvz = 0x04, + .drvi = 0x30, +}; + +static const struct tegra_dpaux_soc tegra194_dpaux_soc = { + .cmh = 0x02, + .drvz = 0x04, + .drvi = 0x2c, +}; + +static const struct of_device_id tegra_dpaux_of_match[] = { + { .compatible = "nvidia,tegra194-dpaux", .data = &tegra194_dpaux_soc }, + { .compatible = "nvidia,tegra186-dpaux", .data = &tegra210_dpaux_soc }, + { .compatible = "nvidia,tegra210-dpaux", .data = &tegra210_dpaux_soc }, + { .compatible = "nvidia,tegra124-dpaux", .data = &tegra124_dpaux_soc }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_dpaux_of_match); + +struct platform_driver tegra_dpaux_driver = { + .driver = { + .name = "tegra-dpaux", + .of_match_table = tegra_dpaux_of_match, + .pm = &tegra_dpaux_pm_ops, + }, + .probe = tegra_dpaux_probe, + .remove = tegra_dpaux_remove, +}; + +struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np) +{ + struct tegra_dpaux *dpaux; + + mutex_lock(&dpaux_lock); + + list_for_each_entry(dpaux, &dpaux_list, list) + if (np == dpaux->dev->of_node) { + mutex_unlock(&dpaux_lock); + return &dpaux->aux; + } + + mutex_unlock(&dpaux_lock); + + return NULL; +} + +int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + unsigned long timeout; + int err; + + aux->drm_dev = output->connector.dev; + err = drm_dp_aux_register(aux); + if (err < 0) + return err; + + output->connector.polled = DRM_CONNECTOR_POLL_HPD; + dpaux->output = output; + + if (output->panel) { + enum drm_connector_status status; + + if (dpaux->vdd) { + err = regulator_enable(dpaux->vdd); + if (err < 0) + return err; + } + + timeout = jiffies + msecs_to_jiffies(250); + + while (time_before(jiffies, timeout)) { + status = drm_dp_aux_detect(aux); + + if (status == connector_status_connected) + break; + + usleep_range(1000, 2000); + } + + if (status != connector_status_connected) + return -ETIMEDOUT; + } + + enable_irq(dpaux->irq); + return 0; +} + +int drm_dp_aux_detach(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + unsigned long timeout; + int err; + + drm_dp_aux_unregister(aux); + disable_irq(dpaux->irq); + + if (dpaux->output->panel) { + enum drm_connector_status status; + + if (dpaux->vdd) { + err = regulator_disable(dpaux->vdd); + if (err < 0) + return err; + } + + timeout = jiffies + msecs_to_jiffies(250); + + while (time_before(jiffies, timeout)) { + status = drm_dp_aux_detect(aux); + + if (status == connector_status_disconnected) + break; + + usleep_range(1000, 2000); + } + + if (status != connector_status_disconnected) + return -ETIMEDOUT; + + dpaux->output = NULL; + } + + return 0; +} + +enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + u32 value; + + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXSTAT); + + if (value & DPAUX_DP_AUXSTAT_HPD_STATUS) + return connector_status_connected; + + return connector_status_disconnected; +} + +int drm_dp_aux_enable(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + + return tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_AUX); +} + +int drm_dp_aux_disable(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + + tegra_dpaux_pad_power_down(dpaux); + + return 0; +} diff --git a/drivers/gpu/drm/grate/dpaux.h b/drivers/gpu/drm/grate/dpaux.h new file mode 100644 index 0000000000000..5eced10fad37d --- /dev/null +++ b/drivers/gpu/drm/grate/dpaux.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013 NVIDIA Corporation + */ + +#ifndef DRM_TEGRA_DPAUX_H +#define DRM_TEGRA_DPAUX_H + +#define DPAUX_CTXSW 0x00 + +#define DPAUX_INTR_EN_AUX 0x01 +#define DPAUX_INTR_AUX 0x05 +#define DPAUX_INTR_AUX_DONE (1 << 3) +#define DPAUX_INTR_IRQ_EVENT (1 << 2) +#define DPAUX_INTR_UNPLUG_EVENT (1 << 1) +#define DPAUX_INTR_PLUG_EVENT (1 << 0) + +#define DPAUX_DP_AUXDATA_WRITE(x) (0x09 + ((x) << 2)) +#define DPAUX_DP_AUXDATA_READ(x) (0x19 + ((x) << 2)) +#define DPAUX_DP_AUXADDR 0x29 + +#define DPAUX_DP_AUXCTL 0x2d +#define DPAUX_DP_AUXCTL_TRANSACTREQ (1 << 16) +#define DPAUX_DP_AUXCTL_CMD_AUX_RD (9 << 12) +#define DPAUX_DP_AUXCTL_CMD_AUX_WR (8 << 12) +#define DPAUX_DP_AUXCTL_CMD_MOT_RQ (6 << 12) +#define DPAUX_DP_AUXCTL_CMD_MOT_RD (5 << 12) +#define DPAUX_DP_AUXCTL_CMD_MOT_WR (4 << 12) +#define DPAUX_DP_AUXCTL_CMD_I2C_RQ (2 << 12) +#define DPAUX_DP_AUXCTL_CMD_I2C_RD (1 << 12) +#define DPAUX_DP_AUXCTL_CMD_I2C_WR (0 << 12) +#define DPAUX_DP_AUXCTL_CMD_ADDRESS_ONLY (1 << 8) +#define DPAUX_DP_AUXCTL_CMDLEN(x) ((x) & 0xff) + +#define DPAUX_DP_AUXSTAT 0x31 +#define DPAUX_DP_AUXSTAT_HPD_STATUS (1 << 28) +#define DPAUX_DP_AUXSTAT_REPLY_TYPE_MASK (0xf0000) +#define DPAUX_DP_AUXSTAT_NO_STOP_ERROR (1 << 11) +#define DPAUX_DP_AUXSTAT_SINKSTAT_ERROR (1 << 10) +#define DPAUX_DP_AUXSTAT_RX_ERROR (1 << 9) +#define DPAUX_DP_AUXSTAT_TIMEOUT_ERROR (1 << 8) +#define DPAUX_DP_AUXSTAT_REPLY_MASK (0xff) + +#define DPAUX_DP_AUX_SINKSTAT_LO 0x35 +#define DPAUX_DP_AUX_SINKSTAT_HI 0x39 + +#define DPAUX_HPD_CONFIG 0x3d +#define DPAUX_HPD_CONFIG_UNPLUG_MIN_TIME(x) (((x) & 0xffff) << 16) +#define DPAUX_HPD_CONFIG_PLUG_MIN_TIME(x) ((x) & 0xffff) + +#define DPAUX_HPD_IRQ_CONFIG 0x41 +#define DPAUX_HPD_IRQ_CONFIG_MIN_LOW_TIME(x) ((x) & 0xffff) + +#define DPAUX_DP_AUX_CONFIG 0x45 + +#define DPAUX_HYBRID_PADCTL 0x49 +#define DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV (1 << 15) +#define DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV (1 << 14) +#define DPAUX_HYBRID_PADCTL_AUX_CMH(x) (((x) & 0x3) << 12) +#define DPAUX_HYBRID_PADCTL_AUX_DRVZ(x) (((x) & 0x7) << 8) +#define DPAUX_HYBRID_PADCTL_AUX_DRVI(x) (((x) & 0x3f) << 2) +#define DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV (1 << 1) +#define DPAUX_HYBRID_PADCTL_MODE_I2C (1 << 0) +#define DPAUX_HYBRID_PADCTL_MODE_AUX (0 << 0) + +#define DPAUX_HYBRID_SPARE 0x4d +#define DPAUX_HYBRID_SPARE_PAD_POWER_DOWN (1 << 0) + +#define DPAUX_SCRATCH_REG0 0x51 +#define DPAUX_SCRATCH_REG1 0x55 +#define DPAUX_SCRATCH_REG2 0x59 + +#endif diff --git a/drivers/gpu/drm/grate/drm.c b/drivers/gpu/drm/grate/drm.c new file mode 100644 index 0000000000000..1899212728bb3 --- /dev/null +++ b/drivers/gpu/drm/grate/drm.c @@ -0,0 +1,738 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012-2016 NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dc.h" +#include "drm.h" +#include "uapi.h" + +#define DRIVER_NAME "tegra" +#define DRIVER_DESC "NVIDIA Tegra graphics" +#define DRIVER_DATE "20120330" +#define DRIVER_MAJOR GRATE_KERNEL_DRM_VERSION +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 0 + +#define CARVEOUT_SZ SZ_64M + +static int tegra_atomic_check(struct drm_device *drm, + struct drm_atomic_state *state) +{ + int err; + + err = drm_atomic_helper_check(drm, state); + if (err < 0) + return err; + + return tegra_display_hub_atomic_check(drm, state); +} + +static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = { + .fb_create = tegra_fb_create, +#ifdef CONFIG_DRM_FBDEV_EMULATION + .output_poll_changed = drm_fb_helper_output_poll_changed, +#endif + .atomic_check = tegra_atomic_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static void tegra_atomic_post_commit(struct drm_device *drm, + struct drm_atomic_state *old_state) +{ + struct drm_crtc_state *old_crtc_state __maybe_unused; + struct drm_crtc *crtc; + unsigned int i; + + for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i) + tegra_crtc_atomic_post_commit(crtc, old_state); +} + +static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state) +{ + struct drm_device *drm = old_state->dev; + struct tegra_drm *tegra = drm->dev_private; + + if (tegra->hub) { + bool fence_cookie = dma_fence_begin_signalling(); + + drm_atomic_helper_commit_modeset_disables(drm, old_state); + tegra_display_hub_atomic_commit(drm, old_state); + drm_atomic_helper_commit_planes(drm, old_state, 0); + drm_atomic_helper_commit_modeset_enables(drm, old_state); + drm_atomic_helper_commit_hw_done(old_state); + dma_fence_end_signalling(fence_cookie); + drm_atomic_helper_wait_for_vblanks(drm, old_state); + drm_atomic_helper_cleanup_planes(drm, old_state); + } else { + drm_atomic_helper_commit_tail_rpm(old_state); + } + + tegra_atomic_post_commit(drm, old_state); +} + +static const struct drm_mode_config_helper_funcs +tegra_drm_mode_config_helpers = { + .atomic_commit_tail = tegra_atomic_commit_tail, +}; + +static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) +{ + struct host1x *host = dev_get_drvdata(drm->dev->parent); + struct tegra_drm *tegra = drm->dev_private; + struct tegra_drm_channel *drm_channel; + struct drm_gpu_scheduler *sched; + struct host1x_channel *channel; + struct tegra_drm_file *fpriv; + int err; + + fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); + if (!fpriv) + return -ENOMEM; + + filp->driver_priv = fpriv; + + /* each host1x channel has its own per-context job-queue */ + fpriv->sched_entities = kcalloc(host->soc->nb_channels, + sizeof(*fpriv->sched_entities), + GFP_KERNEL); + if (!fpriv->sched_entities) { + err = -ENOMEM; + goto err_free_fpriv; + } + + list_for_each_entry(drm_channel, &tegra->channels, list) { + channel = drm_channel->channel; + sched = &drm_channel->sched; + + err = drm_sched_entity_init(&fpriv->sched_entities[channel->id], + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); + if (err) + goto err_destroy_sched_entities; + } + + idr_preload(GFP_KERNEL); + spin_lock(&tegra->context_lock); + + err = idr_alloc(&tegra->drm_contexts, fpriv, 1, 0, GFP_ATOMIC); + + spin_unlock(&tegra->context_lock); + idr_preload_end(); + + if (err < 0) + goto err_destroy_sched_entities; + + fpriv->drm_context = err; + + idr_init(&fpriv->uapi_v1_contexts); + + return 0; + +err_destroy_sched_entities: + list_for_each_entry_continue_reverse(drm_channel, &tegra->channels, + list) { + channel = drm_channel->channel; + drm_sched_entity_destroy(&fpriv->sched_entities[channel->id]); + } + +err_free_fpriv: + kfree(fpriv); + + return err; +} + +static const struct drm_ioctl_desc tegra_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_uapi_gem_create, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_uapi_gem_mmap, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_uapi_syncpt_read, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_uapi_syncpt_incr, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_WAIT, tegra_uapi_syncpt_wait, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_OPEN_CHANNEL, tegra_uapi_open_channel, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_CLOSE_CHANNEL, tegra_uapi_close_channel, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT, tegra_uapi_get_syncpt, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT, tegra_uapi_v1_submit, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT_BASE, tegra_uapi_get_syncpt_base, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_TILING, tegra_uapi_gem_set_tiling, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_TILING, tegra_uapi_gem_get_tiling, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_FLAGS, tegra_uapi_gem_set_flags, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_FLAGS, tegra_uapi_gem_get_flags, \ + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_GEM_CPU_PREP, tegra_uapi_gem_cpu_prep, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT_V2, tegra_uapi_v2_submit, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_VERSION, tegra_uapi_version, + DRM_RENDER_ALLOW), +}; + +static const struct file_operations tegra_drm_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = tegra_drm_mmap, + .poll = drm_poll, + .read = drm_read, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, +}; + +static int tegra_uapi_v1_contexts_cleanup(int id, void *p, void *data) +{ + struct tegra_drm_context_v1 *context = p; + tegra_uapi_v1_free_context(context); + return 0; +} + +static void tegra_drm_postclose(struct drm_device *drm, struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct tegra_drm *tegra = drm->dev_private; + struct tegra_drm_channel *drm_channel; + struct host1x_channel *channel; + int val, err; + + spin_lock(&tegra->context_lock); + idr_remove(&tegra->drm_contexts, fpriv->drm_context); + spin_unlock(&tegra->context_lock); + + list_for_each_entry(drm_channel, &tegra->channels, list) { + channel = drm_channel->channel; + drm_sched_entity_destroy(&fpriv->sched_entities[channel->id]); + } + + /* job's completion is asynchronous, see tegra_drm_work_free_job() */ + err = readx_poll_timeout(atomic_read, &fpriv->num_active_jobs, + val, val == 0, 100000, 30 * 1000 * 1000); + WARN_ON_ONCE(err); + + spin_lock(&tegra->context_lock); + idr_for_each(&fpriv->uapi_v1_contexts, tegra_uapi_v1_contexts_cleanup, + NULL); + spin_unlock(&tegra->context_lock); + + idr_destroy(&fpriv->uapi_v1_contexts); + + kfree(fpriv->sched_entities); + kfree(fpriv); +} + +#ifdef CONFIG_DEBUG_FS +static int tegra_debugfs_framebuffers(struct seq_file *s, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)s->private; + struct drm_device *drm = node->minor->dev; + struct drm_framebuffer *fb; + + mutex_lock(&drm->mode_config.fb_lock); + + list_for_each_entry(fb, &drm->mode_config.fb_list, head) { + seq_printf(s, "%3d: user size: %d x %d, depth %d, %d bpp, refcount %d\n", + fb->base.id, fb->width, fb->height, + fb->format->depth, + fb->format->cpp[0] * 8, + drm_framebuffer_read_refcount(fb)); + } + + mutex_unlock(&drm->mode_config.fb_lock); + + return 0; +} + +static int tegra_debugfs_iova(struct seq_file *s, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)s->private; + struct drm_device *drm = node->minor->dev; + struct tegra_drm *tegra = drm->dev_private; + struct drm_printer p = drm_seq_file_printer(s); + + if (tegra->domain) { + mutex_lock(&tegra->mm_lock); + drm_mm_print(&tegra->mm, &p); + mutex_unlock(&tegra->mm_lock); + } + + return 0; +} + +static struct drm_info_list tegra_debugfs_list[] = { + { "framebuffers", tegra_debugfs_framebuffers, 0 }, + { "iova", tegra_debugfs_iova, 0 }, +}; + +static void tegra_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(tegra_debugfs_list, + ARRAY_SIZE(tegra_debugfs_list), + minor->debugfs_root, minor); +} +#endif + +static struct drm_driver tegra_drm_driver = { + .driver_features = DRIVER_MODESET | DRIVER_GEM | + DRIVER_ATOMIC | DRIVER_RENDER | + DRIVER_SYNCOBJ, + .open = tegra_drm_open, + .postclose = tegra_drm_postclose, + .lastclose = drm_fb_helper_lastclose, + +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = tegra_debugfs_init, +#endif + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = tegra_gem_prime_import, + + .dumb_create = tegra_bo_dumb_create, + + .ioctls = tegra_drm_ioctls, + .num_ioctls = ARRAY_SIZE(tegra_drm_ioctls), + .fops = &tegra_drm_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma) +{ + struct iova *alloc; + void *virt; + gfp_t gfp; + int err; + + if (!tegra->carveout.inited) + return NULL; + + if (tegra->domain) + size = iova_align(&tegra->carveout.domain, size); + else + size = PAGE_ALIGN(size); + + gfp = GFP_KERNEL | __GFP_ZERO; + if (!tegra->domain) { + /* + * Many units only support 32-bit addresses, even on 64-bit + * SoCs. If there is no IOMMU to translate into a 32-bit IO + * virtual address space, force allocations to be in the + * lower 32-bit range. + */ + gfp |= GFP_DMA; + } + + virt = (void *)__get_free_pages(gfp, get_order(size)); + if (!virt) + return ERR_PTR(-ENOMEM); + + if (!tegra->domain) { + /* + * If IOMMU is disabled, devices address physical memory + * directly. + */ + *dma = virt_to_phys(virt); + return virt; + } + + alloc = alloc_iova(&tegra->carveout.domain, + size >> tegra->carveout.shift, + tegra->carveout.limit, true); + if (!alloc) { + err = -EBUSY; + goto free_pages; + } + + *dma = iova_dma_addr(&tegra->carveout.domain, alloc); + err = iommu_map(tegra->domain, *dma, virt_to_phys(virt), + size, IOMMU_READ | IOMMU_WRITE); + if (err < 0) + goto free_iova; + + return virt; + +free_iova: + __free_iova(&tegra->carveout.domain, alloc); +free_pages: + free_pages((unsigned long)virt, get_order(size)); + + return ERR_PTR(err); +} + +void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, + dma_addr_t dma) +{ + if (tegra->domain) + size = iova_align(&tegra->carveout.domain, size); + else + size = PAGE_ALIGN(size); + + if (tegra->domain) { + iommu_unmap(tegra->domain, dma, size); + free_iova(&tegra->carveout.domain, + iova_pfn(&tegra->carveout.domain, dma)); + } + + free_pages((unsigned long)virt, get_order(size)); +} + +static int host1x_drm_probe(struct host1x_device *dev) +{ + struct drm_driver *driver = &tegra_drm_driver; + struct tegra_drm *tegra; + struct drm_device *drm; + int err; + + drm = drm_dev_alloc(driver, &dev->dev); + if (IS_ERR(drm)) + return PTR_ERR(drm); + + tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); + if (!tegra) { + err = -ENOMEM; + goto put; + } + + if (iommu_present(&platform_bus_type)) { + tegra->domain = iommu_domain_alloc(&platform_bus_type); + if (!tegra->domain) { + err = -ENOMEM; + goto free; + } + + err = iova_cache_get(); + if (err < 0) + goto domain; + } + + INIT_LIST_HEAD(&tegra->clients); + INIT_LIST_HEAD(&tegra->channels); + INIT_LIST_HEAD(&tegra->mm_eviction_list); + + mutex_init(&tegra->mm_lock); + idr_init_base(&tegra->drm_contexts, 1); + spin_lock_init(&tegra->context_lock); + init_completion(&tegra->gart_free_up); + + dev_set_drvdata(&dev->dev, drm); + drm->dev_private = tegra; + tegra->drm = drm; + + drm_mode_config_init(drm); + + drm->mode_config.min_width = 0; + drm->mode_config.min_height = 0; + + drm->mode_config.max_width = 4096; + drm->mode_config.max_height = 4096; + + drm->mode_config.allow_fb_modifiers = true; + + drm->mode_config.normalize_zpos = true; + + drm->mode_config.funcs = &tegra_drm_mode_config_funcs; + drm->mode_config.helper_private = &tegra_drm_mode_config_helpers; + + err = tegra_drm_fb_prepare(drm); + if (err < 0) + goto config; + + drm_kms_helper_poll_init(drm); + + err = host1x_device_init(dev); + if (err < 0) + goto fbdev; + + if (tegra->domain) { + u64 carveout_start, carveout_end, gem_start, gem_end; + u64 dma_mask = dma_get_mask(&dev->dev); + dma_addr_t start, end; + unsigned long order; + bool need_carveout; + + start = tegra->domain->geometry.aperture_start & dma_mask; + end = tegra->domain->geometry.aperture_end & dma_mask; + + if (of_machine_is_compatible("nvidia,tegra20")) + tegra->has_gart = true; + + /* + * Carveout isn't needed on pre-Tegra124, especially on Tegra20 + * as it uses GART that has very limited amount of IOVA space. + */ + if (of_machine_is_compatible("nvidia,tegra20") || + of_machine_is_compatible("nvidia,tegra30") || + of_machine_is_compatible("nvidia,tegra114")) + need_carveout = false; + else + need_carveout = true; + + gem_start = start; + gem_end = end; + + if (need_carveout) { + gem_end -= CARVEOUT_SZ; + carveout_start = gem_end + 1; + carveout_end = end; + + order = __ffs(tegra->domain->pgsize_bitmap); + init_iova_domain(&tegra->carveout.domain, 1UL << order, + carveout_start >> order); + + tegra->carveout.shift = + iova_shift(&tegra->carveout.domain); + tegra->carveout.limit = + carveout_end >> tegra->carveout.shift; + + tegra->carveout.inited = 1; + } + + drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1); + + DRM_DEBUG_DRIVER("IOMMU apertures:\n"); + DRM_DEBUG_DRIVER(" GEM: %#llx-%#llx\n", gem_start, gem_end); + + if (need_carveout) + DRM_DEBUG_DRIVER(" Carveout: %#llx-%#llx\n", + carveout_start, carveout_end); + } + + if (tegra->hub) { + err = tegra_display_hub_prepare(tegra->hub); + if (err < 0) + goto device; + } + + /* syncpoints are used for full 32-bit hardware VBLANK counters */ + drm->max_vblank_count = 0xffffffff; + + err = drm_vblank_init(drm, drm->mode_config.num_crtc); + if (err < 0) + goto hub; + + drm_mode_config_reset(drm); + + err = drm_aperture_remove_framebuffers(false, &tegra_drm_driver); + if (err < 0) + goto hub; + + err = tegra_drm_fb_init(drm); + if (err < 0) + goto hub; + + err = drm_dev_register(drm, 0); + if (err < 0) + goto fb; + + return 0; + +fb: + tegra_drm_fb_exit(drm); +hub: + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); +device: + if (tegra->domain) { + drm_mm_takedown(&tegra->mm); + if (tegra->carveout.inited) + put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); + } + + host1x_device_exit(dev); +fbdev: + drm_kms_helper_poll_fini(drm); + tegra_drm_fb_free(drm); +config: + drm_mode_config_cleanup(drm); + + idr_destroy(&tegra->drm_contexts); + mutex_destroy(&tegra->mm_lock); +domain: + if (tegra->domain) + iommu_domain_free(tegra->domain); +free: + kfree(tegra); +put: + drm_dev_put(drm); + return err; +} + +static int host1x_drm_remove(struct host1x_device *dev) +{ + struct drm_device *drm = dev_get_drvdata(&dev->dev); + struct tegra_drm *tegra = drm->dev_private; + int err; + + drm_dev_unregister(drm); + + drm_kms_helper_poll_fini(drm); + tegra_drm_fb_exit(drm); + drm_atomic_helper_shutdown(drm); + drm_mode_config_cleanup(drm); + + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); + + err = host1x_device_exit(dev); + if (err < 0) + dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err); + + if (tegra->domain) { + drm_mm_takedown(&tegra->mm); + if (tegra->carveout.inited) + put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); + iommu_domain_free(tegra->domain); + } + + idr_destroy(&tegra->drm_contexts); + mutex_destroy(&tegra->mm_lock); + + kfree(tegra); + drm_dev_put(drm); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int host1x_drm_suspend(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + + return drm_mode_config_helper_suspend(drm); +} + +static int host1x_drm_resume(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + + return drm_mode_config_helper_resume(drm); +} +#endif + +static SIMPLE_DEV_PM_OPS(host1x_drm_pm_ops, host1x_drm_suspend, + host1x_drm_resume); + +static const struct of_device_id host1x_drm_subdevs[] = { + { .compatible = "nvidia,tegra20-dc", }, + { .compatible = "nvidia,tegra20-hdmi", }, + { .compatible = "nvidia,tegra20-gr2d", }, + { .compatible = "nvidia,tegra20-gr3d", }, + { .compatible = "nvidia,tegra30-dc", }, + { .compatible = "nvidia,tegra30-hdmi", }, + { .compatible = "nvidia,tegra30-gr2d", }, + { .compatible = "nvidia,tegra30-gr3d", }, + { .compatible = "nvidia,tegra114-dc", }, + { .compatible = "nvidia,tegra114-dsi", }, + { .compatible = "nvidia,tegra114-hdmi", }, + { .compatible = "nvidia,tegra114-gr2d", }, + { .compatible = "nvidia,tegra114-gr3d", }, + { .compatible = "nvidia,tegra124-dc", }, + { .compatible = "nvidia,tegra124-sor", }, + { .compatible = "nvidia,tegra124-hdmi", }, + { .compatible = "nvidia,tegra124-dsi", }, + { .compatible = "nvidia,tegra124-vic", }, + { .compatible = "nvidia,tegra132-dsi", }, + { .compatible = "nvidia,tegra210-dc", }, + { .compatible = "nvidia,tegra210-dsi", }, + { .compatible = "nvidia,tegra210-sor", }, + { .compatible = "nvidia,tegra210-sor1", }, + { .compatible = "nvidia,tegra210-vic", }, + { .compatible = "nvidia,tegra186-display", }, + { .compatible = "nvidia,tegra186-dc", }, + { .compatible = "nvidia,tegra186-sor", }, + { .compatible = "nvidia,tegra186-sor1", }, + { .compatible = "nvidia,tegra186-vic", }, + { .compatible = "nvidia,tegra194-display", }, + { .compatible = "nvidia,tegra194-dc", }, + { .compatible = "nvidia,tegra194-sor", }, + { .compatible = "nvidia,tegra194-vic", }, + { /* sentinel */ } +}; + +static struct host1x_driver host1x_drm_driver = { + .driver = { + .name = "drm", + .pm = &host1x_drm_pm_ops, + }, + .probe = host1x_drm_probe, + .remove = host1x_drm_remove, + .subdevs = host1x_drm_subdevs, +}; + +static struct platform_driver * const drivers[] = { + &tegra_display_hub_driver, + &tegra_dc_driver, + &tegra_hdmi_driver, + &tegra_dsi_driver, + &tegra_dpaux_driver, + &tegra_sor_driver, + &tegra_gr2d_driver, + &tegra_gr3d_driver, + &tegra_vic_driver, +}; + +static int __init host1x_drm_init(void) +{ + int err; + + err = host1x_driver_register(&host1x_drm_driver); + if (err < 0) + return err; + + err = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); + if (err < 0) + goto unregister_host1x; + + return 0; + +unregister_host1x: + host1x_driver_unregister(&host1x_drm_driver); + return err; +} +module_init(host1x_drm_init); + +static void __exit host1x_drm_exit(void) +{ + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); + host1x_driver_unregister(&host1x_drm_driver); +} +module_exit(host1x_drm_exit); + +MODULE_AUTHOR("Thierry Reding "); +MODULE_DESCRIPTION("NVIDIA Tegra DRM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/grate/drm.h b/drivers/gpu/drm/grate/drm.h new file mode 100644 index 0000000000000..2f91c3d1939a6 --- /dev/null +++ b/drivers/gpu/drm/grate/drm.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012-2013 NVIDIA CORPORATION. All rights reserved. + */ + +#ifndef HOST1X_DRM_H +#define HOST1X_DRM_H 1 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "gem.h" +#include "channel.h" +#include "client.h" +#include "hub.h" +#include "trace.h" + +#define GRATE_KERNEL_DRM_VERSION (99991 + 6) + +/* XXX move to include/uapi/drm/drm_fourcc.h? */ +#define DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT BIT_ULL(22) + +struct reset_control; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +struct tegra_fbdev { + struct drm_fb_helper base; + struct drm_framebuffer *fb; +}; +#endif + +struct tegra_drm { + struct drm_device *drm; + + struct iommu_domain *domain; + struct iommu_group *group; + struct mutex mm_lock; + struct drm_mm mm; + struct list_head mm_eviction_list; + + struct { + struct iova_domain domain; + unsigned long shift; + unsigned long limit; + bool inited : 1; + } carveout; + + struct list_head clients; + struct list_head channels; + + spinlock_t context_lock; + struct idr drm_contexts; + +#ifdef CONFIG_DRM_FBDEV_EMULATION + struct tegra_fbdev *fbdev; +#endif + + unsigned int hmask, vmask; + unsigned int pitch_align; + unsigned int num_crtcs; + + struct tegra_display_hub *hub; + + struct completion gart_free_up; + + bool has_gart; +}; + +struct tegra_drm_file { + struct drm_sched_entity *sched_entities; + struct idr uapi_v1_contexts; + atomic_t num_active_jobs; + u64 drm_context; +}; + +int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm); +int tegra_drm_exit(struct tegra_drm *tegra); + +void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *iova); +void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, + dma_addr_t iova); + +struct cec_notifier; + +struct tegra_output { + struct device_node *of_node; + struct device *dev; + + struct drm_bridge *bridge; + struct drm_panel *panel; + struct i2c_adapter *ddc; + const struct edid *edid; + struct cec_notifier *cec; + unsigned int hpd_irq; + struct gpio_desc *hpd_gpio; + + struct drm_encoder encoder; + struct drm_connector connector; +}; + +static inline struct tegra_output *encoder_to_output(struct drm_encoder *e) +{ + return container_of(e, struct tegra_output, encoder); +} + +static inline struct tegra_output *connector_to_output(struct drm_connector *c) +{ + return container_of(c, struct tegra_output, connector); +} + +/* from output.c */ +int tegra_output_probe(struct tegra_output *output); +void tegra_output_remove(struct tegra_output *output); +int tegra_output_init(struct drm_device *drm, struct tegra_output *output); +void tegra_output_exit(struct tegra_output *output); +void tegra_output_find_possible_crtcs(struct tegra_output *output, + struct drm_device *drm); +int tegra_output_suspend(struct tegra_output *output); +int tegra_output_resume(struct tegra_output *output); + +int tegra_output_connector_get_modes(struct drm_connector *connector); +enum drm_connector_status +tegra_output_connector_detect(struct drm_connector *connector, bool force); +void tegra_output_connector_destroy(struct drm_connector *connector); + +/* from dpaux.c */ +struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np); +enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux); +int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output); +int drm_dp_aux_detach(struct drm_dp_aux *aux); +int drm_dp_aux_enable(struct drm_dp_aux *aux); +int drm_dp_aux_disable(struct drm_dp_aux *aux); + +/* from fb.c */ +struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer, + unsigned int index); +bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer); +int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, + struct tegra_bo_tiling *tiling); +struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, + struct drm_file *file, + const struct drm_mode_fb_cmd2 *cmd); +int tegra_drm_fb_prepare(struct drm_device *drm); +void tegra_drm_fb_free(struct drm_device *drm); +int tegra_drm_fb_init(struct drm_device *drm); +void tegra_drm_fb_exit(struct drm_device *drm); + +extern struct platform_driver tegra_display_hub_driver; +extern struct platform_driver tegra_dc_driver; +extern struct platform_driver tegra_hdmi_driver; +extern struct platform_driver tegra_dsi_driver; +extern struct platform_driver tegra_dpaux_driver; +extern struct platform_driver tegra_sor_driver; +extern struct platform_driver tegra_gr2d_driver; +extern struct platform_driver tegra_gr3d_driver; +extern struct platform_driver tegra_vic_driver; + +#endif /* HOST1X_DRM_H */ diff --git a/drivers/gpu/drm/grate/dsi.c b/drivers/gpu/drm/grate/dsi.c new file mode 100644 index 0000000000000..f6d13602e1b78 --- /dev/null +++ b/drivers/gpu/drm/grate/dsi.c @@ -0,0 +1,1700 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 NVIDIA Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include