Tuesday, April 27, 2021

Merge tag 'arm-Apple-M1-5.13' of Linux

@@ -0,0 +1,64 @@

+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause

+%YAML 1.2

+---

+$id: https://ift.tt/3aIYxt1

+$schema: https://ift.tt/32TGQTg

+

+title: Apple ARM Machine Device Tree Bindings

+

+maintainers:

+ - Hector Martin <marcan@marcan.st>

+

+description: |

+ ARM platforms using SoCs designed by Apple Inc., branded "Apple Silicon".

+

+ This currently includes devices based on the "M1" SoC, starting with the

+ three Mac models released in late 2020:

+

+ - Mac mini (M1, 2020)

+ - MacBook Pro (13-inch, M1, 2020)

+ - MacBook Air (M1, 2020)

+

+ The compatible property should follow this format:

+

+ compatible = "apple,<targettype>", "apple,<socid>", "apple,arm-platform";

+

+ <targettype> represents the board/device and comes from the `target-type`

+ property of the root node of the Apple Device Tree, lowercased. It can be

+ queried on macOS using the following command:

+

+ $ ioreg -d2 -l | grep target-type

+

+ <socid> is the lowercased SoC ID. Apple uses at least *five* different

+ names for their SoCs:

+

+ - Marketing name ("M1")

+ - Internal name ("H13G")

+ - Codename ("Tonga")

+ - SoC ID ("T8103")

+ - Package/IC part number ("APL1102")

+

+ Devicetrees should use the lowercased SoC ID, to avoid confusion if

+ multiple SoCs share the same marketing name. This can be obtained from

+ the `compatible` property of the arm-io node of the Apple Device Tree,

+ which can be queried as follows on macOS:

+

+ $ ioreg -n arm-io | grep compatible

+

+properties:

+ $nodename:

+ const: "/"

+ compatible:

+ oneOf:

+ - description: Apple M1 SoC based platforms

+ items:

+ - enum:

+ - apple,j274 # Mac mini (M1, 2020)

+ - apple,j293 # MacBook Pro (13-inch, M1, 2020)

+ - apple,j313 # MacBook Air (M1, 2020)

+ - const: apple,t8103

+ - const: apple,arm-platform

+

+additionalProperties: true

+

+...

@@ -85,6 +85,8 @@ properties:

compatible:

enum:

+ - apple,icestorm

+ - apple,firestorm

- arm,arm710t

- arm,arm720t

- arm,arm740t

@@ -54,6 +54,7 @@ properties:

compatible:

items:

- enum:

+ - apple,simple-framebuffer

- allwinner,simple-framebuffer

- amlogic,simple-framebuffer

- const: simple-framebuffer

@@ -84,9 +85,13 @@ properties:

Format of the framebuffer:

* `a8b8g8r8` - 32-bit pixels, d[31:24]=a, d[23:16]=b, d[15:8]=g, d[7:0]=r

* `r5g6b5` - 16-bit pixels, d[15:11]=r, d[10:5]=g, d[4:0]=b

+ * `x2r10g10b10` - 32-bit pixels, d[29:20]=r, d[19:10]=g, d[9:0]=b

+ * `x8r8g8b8` - 32-bit pixels, d[23:16]=r, d[15:8]=g, d[7:0]=b

enum:

- a8b8g8r8

- r5g6b5

+ - x2r10g10b10

+ - x8r8g8b8

display:

$ref: /schemas/types.yaml#/definitions/phandle

@@ -0,0 +1,88 @@

+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)

+%YAML 1.2

+---

+$id: https://ift.tt/3tVGCa1

+$schema: https://ift.tt/32TGQTg

+

+title: Apple Interrupt Controller

+

+maintainers:

+ - Hector Martin <marcan@marcan.st>

+

+description: |

+ The Apple Interrupt Controller is a simple interrupt controller present on

+ Apple ARM SoC platforms, including various iPhone and iPad devices and the

+ "Apple Silicon" Macs.

+

+ It provides the following features:

+

+ - Level-triggered hardware IRQs wired to SoC blocks

+ - Single mask bit per IRQ

+ - Per-IRQ affinity setting

+ - Automatic masking on event delivery (auto-ack)

+ - Software triggering (ORed with hw line)

+ - 2 per-CPU IPIs (meant as "self" and "other", but they are interchangeable

+ if not symmetric)

+ - Automatic prioritization (single event/ack register per CPU, lower IRQs =

+ higher priority)

+ - Automatic masking on ack

+ - Default "this CPU" register view and explicit per-CPU views

+

+ This device also represents the FIQ interrupt sources on platforms using AIC,

+ which do not go through a discrete interrupt controller.

+

+allOf:

+ - $ref: /schemas/interrupt-controller.yaml#

+

+properties:

+ compatible:

+ items:

+ - const: apple,t8103-aic

+ - const: apple,aic

+

+ interrupt-controller: true

+

+ '#interrupt-cells':

+ const: 3

+ description: |

+ The 1st cell contains the interrupt type:

+ - 0: Hardware IRQ

+ - 1: FIQ

+

+ The 2nd cell contains the interrupt number.

+ - HW IRQs: interrupt number

+ - FIQs:

+ - 0: physical HV timer

+ - 1: virtual HV timer

+ - 2: physical guest timer

+ - 3: virtual guest timer

+

+ The 3rd cell contains the interrupt flags. This is normally

+ IRQ_TYPE_LEVEL_HIGH (4).

+

+ reg:

+ description: |

+ Specifies base physical address and size of the AIC registers.

+ maxItems: 1

+

+required:

+ - compatible

+ - '#interrupt-cells'

+ - interrupt-controller

+ - reg

+

+additionalProperties: false

+

+examples:

+ - |

+ soc {

+ #address-cells = <2>;

+ #size-cells = <2>;

+

+ aic: interrupt-controller@23b100000 {

+ compatible = "apple,t8103-aic", "apple,aic";

+ #interrupt-cells = <3>;

+ interrupt-controller;

+ reg = <0x2 0x3b100000 0x0 0x8000>;

+ };

+ };

@@ -34,11 +34,30 @@ properties:

- arm,armv8-timer

interrupts:

+ minItems: 1

+ maxItems: 5

items:

- description: secure timer irq

- description: non-secure timer irq

- description: virtual timer irq

- description: hypervisor timer irq

+ - description: hypervisor virtual timer irq

+

+ interrupt-names:

+ oneOf:

+ - minItems: 2

+ items:

+ - const: phys

+ - const: virt

+ - const: hyp-phys

+ - const: hyp-virt

+ - minItems: 3

+ items:

+ - const: sec-phys

+ - const: phys

+ - const: virt

+ - const: hyp-phys

+ - const: hyp-virt

clock-frequency:

description: The frequency of the main counter, in Hz. Should be present

@@ -103,6 +103,8 @@ patternProperties:

description: Anvo-Systems Dresden GmbH

"^apm,.*":

description: Applied Micro Circuits Corporation (APM)

+ "^apple,.*":

+ description: Apple Inc.

"^aptina,.*":

description: Aptina Imaging

"^arasan,.*":

@@ -146,6 +146,362 @@ There are also equivalents to memcpy. The ins() and

outs() functions copy bytes, words or longs to the given

port.

+__iomem pointer tokens

+======================

+

+The data type for an MMIO address is an ``__iomem`` qualified pointer, such as

+``void __iomem *reg``. On most architectures it is a regular pointer that

+points to a virtual memory address and can be offset or dereferenced, but in

+portable code, it must only be passed from and to functions that explicitly

+operated on an ``__iomem`` token, in particular the ioremap() and

+readl()/writel() functions. The 'sparse' semantic code checker can be used to

+verify that this is done correctly.

+

+While on most architectures, ioremap() creates a page table entry for an

+uncached virtual address pointing to the physical MMIO address, some

+architectures require special instructions for MMIO, and the ``__iomem`` pointer

+just encodes the physical address or an offsettable cookie that is interpreted

+by readl()/writel().

+

+Differences between I/O access functions

+========================================

+

+readq(), readl(), readw(), readb(), writeq(), writel(), writew(), writeb()

+

+ These are the most generic accessors, providing serialization against other

+ MMIO accesses and DMA accesses as well as fixed endianness for accessing

+ little-endian PCI devices and on-chip peripherals. Portable device drivers

+ should generally use these for any access to ``__iomem`` pointers.

+

+ Note that posted writes are not strictly ordered against a spinlock, see

+ Documentation/driver-api/io_ordering.rst.

+

+readq_relaxed(), readl_relaxed(), readw_relaxed(), readb_relaxed(),

+writeq_relaxed(), writel_relaxed(), writew_relaxed(), writeb_relaxed()

+

+ On architectures that require an expensive barrier for serializing against

+ DMA, these "relaxed" versions of the MMIO accessors only serialize against

+ each other, but contain a less expensive barrier operation. A device driver

+ might use these in a particularly performance sensitive fast path, with a

+ comment that explains why the usage in a specific location is safe without

+ the extra barriers.

+

+ See memory-barriers.txt for a more detailed discussion on the precise ordering

+ guarantees of the non-relaxed and relaxed versions.

+

+ioread64(), ioread32(), ioread16(), ioread8(),

+iowrite64(), iowrite32(), iowrite16(), iowrite8()

+

+ These are an alternative to the normal readl()/writel() functions, with almost

+ identical behavior, but they can also operate on ``__iomem`` tokens returned

+ for mapping PCI I/O space with pci_iomap() or ioport_map(). On architectures

+ that require special instructions for I/O port access, this adds a small

+ overhead for an indirect function call implemented in lib/iomap.c, while on

+ other architectures, these are simply aliases.

+

+ioread64be(), ioread32be(), ioread16be()

+iowrite64be(), iowrite32be(), iowrite16be()

+

+ These behave in the same way as the ioread32()/iowrite32() family, but with

+ reversed byte order, for accessing devices with big-endian MMIO registers.

+ Device drivers that can operate on either big-endian or little-endian

+ registers may have to implement a custom wrapper function that picks one or

+ the other depending on which device was found.

+

+ Note: On some architectures, the normal readl()/writel() functions

+ traditionally assume that devices are the same endianness as the CPU, while

+ using a hardware byte-reverse on the PCI bus when running a big-endian kernel.

+ Drivers that use readl()/writel() this way are generally not portable, but

+ tend to be limited to a particular SoC.

+

+hi_lo_readq(), lo_hi_readq(), hi_lo_readq_relaxed(), lo_hi_readq_relaxed(),

+ioread64_lo_hi(), ioread64_hi_lo(), ioread64be_lo_hi(), ioread64be_hi_lo(),

+hi_lo_writeq(), lo_hi_writeq(), hi_lo_writeq_relaxed(), lo_hi_writeq_relaxed(),

+iowrite64_lo_hi(), iowrite64_hi_lo(), iowrite64be_lo_hi(), iowrite64be_hi_lo()

+

+ Some device drivers have 64-bit registers that cannot be accessed atomically

+ on 32-bit architectures but allow two consecutive 32-bit accesses instead.

+ Since it depends on the particular device which of the two halves has to be

+ accessed first, a helper is provided for each combination of 64-bit accessors

+ with either low/high or high/low word ordering. A device driver must include

+ either <linux/io-64-nonatomic-lo-hi.h> or <linux/io-64-nonatomic-hi-lo.h> to

+ get the function definitions along with helpers that redirect the normal

+ readq()/writeq() to them on architectures that do not provide 64-bit access

+ natively.

+

+__raw_readq(), __raw_readl(), __raw_readw(), __raw_readb(),

+__raw_writeq(), __raw_writel(), __raw_writew(), __raw_writeb()

+

+ These are low-level MMIO accessors without barriers or byteorder changes and

+ architecture specific behavior. Accesses are usually atomic in the sense that

+ a four-byte __raw_readl() does not get split into individual byte loads, but

+ multiple consecutive accesses can be combined on the bus. In portable code, it

+ is only safe to use these to access memory behind a device bus but not MMIO

+ registers, as there are no ordering guarantees with regard to other MMIO

+ accesses or even spinlocks. The byte order is generally the same as for normal

+ memory, so unlike the other functions, these can be used to copy data between

+ kernel memory and device memory.

+

+inl(), inw(), inb(), outl(), outw(), outb()

+

+ PCI I/O port resources traditionally require separate helpers as they are

+ implemented using special instructions on the x86 architecture. On most other

+ architectures, these are mapped to readl()/writel() style accessors

+ internally, usually pointing to a fixed area in virtual memory. Instead of an

+ ``__iomem`` pointer, the address is a 32-bit integer token to identify a port

+ number. PCI requires I/O port access to be non-posted, meaning that an outb()

+ must complete before the following code executes, while a normal writeb() may

+ still be in progress. On architectures that correctly implement this, I/O port

+ access is therefore ordered against spinlocks. Many non-x86 PCI host bridge

+ implementations and CPU architectures however fail to implement non-posted I/O

+ space on PCI, so they can end up being posted on such hardware.

+

+ In some architectures, the I/O port number space has a 1:1 mapping to

+ ``__iomem`` pointers, but this is not recommended and device drivers should

+ not rely on that for portability. Similarly, an I/O port number as described

+ in a PCI base address register may not correspond to the port number as seen

+ by a device driver. Portable drivers need to read the port number for the

+ resource provided by the kernel.

+

+ There are no direct 64-bit I/O port accessors, but pci_iomap() in combination

+ with ioread64/iowrite64 can be used instead.

+

+inl_p(), inw_p(), inb_p(), outl_p(), outw_p(), outb_p()

+

+ On ISA devices that require specific timing, the _p versions of the I/O

+ accessors add a small delay. On architectures that do not have ISA buses,

+ these are aliases to the normal inb/outb helpers.

+

+readsq, readsl, readsw, readsb

+writesq, writesl, writesw, writesb

+ioread64_rep, ioread32_rep, ioread16_rep, ioread8_rep

+iowrite64_rep, iowrite32_rep, iowrite16_rep, iowrite8_rep

+insl, insw, insb, outsl, outsw, outsb

+

+ These are helpers that access the same address multiple times, usually to copy

+ data between kernel memory byte stream and a FIFO buffer. Unlike the normal

+ MMIO accessors, these do not perform a byteswap on big-endian kernels, so the

+ first byte in the FIFO register corresponds to the first byte in the memory

+ buffer regardless of the architecture.

+

+Device memory mapping modes

+===========================

+

+Some architectures support multiple modes for mapping device memory.

+ioremap_*() variants provide a common abstraction around these

+architecture-specific modes, with a shared set of semantics.

+

+ioremap() is the most common mapping type, and is applicable to typical device

+memory (e.g. I/O registers). Other modes can offer weaker or stronger

+guarantees, if supported by the architecture. From most to least common, they

+are as follows:

+

+ioremap()

+---------

+

+The default mode, suitable for most memory-mapped devices, e.g. control

+registers. Memory mapped using ioremap() has the following characteristics:

+

+* Uncached - CPU-side caches are bypassed, and all reads and writes are handled

+ directly by the device

+* No speculative operations - the CPU may not issue a read or write to this

+ memory, unless the instruction that does so has been reached in committed

+ program flow.

+* No reordering - The CPU may not reorder accesses to this memory mapping with

+ respect to each other. On some architectures, this relies on barriers in

+ readl_relaxed()/writel_relaxed().

+* No repetition - The CPU may not issue multiple reads or writes for a single

+ program instruction.

+* No write-combining - Each I/O operation results in one discrete read or write

+ being issued to the device, and multiple writes are not combined into larger

+ writes. This may or may not be enforced when using __raw I/O accessors or

+ pointer dereferences.

+* Non-executable - The CPU is not allowed to speculate instruction execution

+ from this memory (it probably goes without saying, but you're also not

+ allowed to jump into device memory).

+

+On many platforms and buses (e.g. PCI), writes issued through ioremap()

+mappings are posted, which means that the CPU does not wait for the write to

+actually reach the target device before retiring the write instruction.

+

+On many platforms, I/O accesses must be aligned with respect to the access

+size; failure to do so will result in an exception or unpredictable results.

+

+ioremap_wc()

+------------

+

+Maps I/O memory as normal memory with write combining. Unlike ioremap(),

+

+* The CPU may speculatively issue reads from the device that the program

+ didn't actually execute, and may choose to basically read whatever it wants.

+* The CPU may reorder operations as long as the result is consistent from the

+ program's point of view.

+* The CPU may write to the same location multiple times, even when the program

+ issued a single write.

+* The CPU may combine several writes into a single larger write.

+

+This mode is typically used for video framebuffers, where it can increase

+performance of writes. It can also be used for other blocks of memory in

+devices (e.g. buffers or shared memory), but care must be taken as accesses are

+not guaranteed to be ordered with respect to normal ioremap() MMIO register

+accesses without explicit barriers.

+

+On a PCI bus, it is usually safe to use ioremap_wc() on MMIO areas marked as

+``IORESOURCE_PREFETCH``, but it may not be used on those without the flag.

+For on-chip devices, there is no corresponding flag, but a driver can use

+ioremap_wc() on a device that is known to be safe.

+

+ioremap_wt()

+------------

+

+Maps I/O memory as normal memory with write-through caching. Like ioremap_wc(),

+but also,

+

+* The CPU may cache writes issued to and reads from the device, and serve reads

+ from that cache.

+

+This mode is sometimes used for video framebuffers, where drivers still expect

+writes to reach the device in a timely manner (and not be stuck in the CPU

+cache), but reads may be served from the cache for efficiency. However, it is

+rarely useful these days, as framebuffer drivers usually perform writes only,

+for which ioremap_wc() is more efficient (as it doesn't needlessly trash the

+cache). Most drivers should not use this.

+

+ioremap_np()

+------------

+

+Like ioremap(), but explicitly requests non-posted write semantics. On some

+architectures and buses, ioremap() mappings have posted write semantics, which

+means that writes can appear to "complete" from the point of view of the

+CPU before the written data actually arrives at the target device. Writes are

+still ordered with respect to other writes and reads from the same device, but

+due to the posted write semantics, this is not the case with respect to other

+devices. ioremap_np() explicitly requests non-posted semantics, which means

+that the write instruction will not appear to complete until the device has

+received (and to some platform-specific extent acknowledged) the written data.

+

+This mapping mode primarily exists to cater for platforms with bus fabrics that

+require this particular mapping mode to work correctly. These platforms set the

+``IORESOURCE_MEM_NONPOSTED`` flag for a resource that requires ioremap_np()

+semantics and portable drivers should use an abstraction that automatically

+selects it where appropriate (see the `Higher-level ioremap abstractions`_

+section below).

+

+The bare ioremap_np() is only available on some architectures; on others, it

+always returns NULL. Drivers should not normally use it, unless they are

+platform-specific or they derive benefit from non-posted writes where

+supported, and can fall back to ioremap() otherwise. The normal approach to

+ensure posted write completion is to do a dummy read after a write as

+explained in `Accessing the device`_, which works with ioremap() on all

+platforms.

+

+ioremap_np() should never be used for PCI drivers. PCI memory space writes are

+always posted, even on architectures that otherwise implement ioremap_np().

+Using ioremap_np() for PCI BARs will at best result in posted write semantics,

+and at worst result in complete breakage.

+

+Note that non-posted write semantics are orthogonal to CPU-side ordering

+guarantees. A CPU may still choose to issue other reads or writes before a

+non-posted write instruction retires. See the previous section on MMIO access

+functions for details on the CPU side of things.

+

+ioremap_uc()

+------------

+

+ioremap_uc() behaves like ioremap() except that on the x86 architecture without

+'PAT' mode, it marks memory as uncached even when the MTRR has designated

+it as cacheable, see Documentation/x86/pat.rst.

+

+Portable drivers should avoid the use of ioremap_uc().

+

+ioremap_cache()

+---------------

+

+ioremap_cache() effectively maps I/O memory as normal RAM. CPU write-back

+caches can be used, and the CPU is free to treat the device as if it were a

+block of RAM. This should never be used for device memory which has side

+effects of any kind, or which does not return the data previously written on

+read.

+

+It should also not be used for actual RAM, as the returned pointer is an

+``__iomem`` token. memremap() can be used for mapping normal RAM that is outside

+of the linear kernel memory area to a regular pointer.

+

+Portable drivers should avoid the use of ioremap_cache().

+

+Architecture example

+--------------------

+

+Here is how the above modes map to memory attribute settings on the ARM64

+architecture:

+

++------------------------+--------------------------------------------+

+| API | Memory region type and cacheability |

++------------------------+--------------------------------------------+

+| ioremap_np() | Device-nGnRnE |

++------------------------+--------------------------------------------+

+| ioremap() | Device-nGnRE |

++------------------------+--------------------------------------------+

+| ioremap_uc() | (not implemented) |

++------------------------+--------------------------------------------+

+| ioremap_wc() | Normal-Non Cacheable |

++------------------------+--------------------------------------------+

+| ioremap_wt() | (not implemented; fallback to ioremap) |

++------------------------+--------------------------------------------+

+| ioremap_cache() | Normal-Write-Back Cacheable |

++------------------------+--------------------------------------------+

+

+Higher-level ioremap abstractions

+=================================

+

+Instead of using the above raw ioremap() modes, drivers are encouraged to use

+higher-level APIs. These APIs may implement platform-specific logic to

+automatically choose an appropriate ioremap mode on any given bus, allowing for

+a platform-agnostic driver to work on those platforms without any special

+cases. At the time of this writing, the following ioremap() wrappers have such

+logic:

+

+devm_ioremap_resource()

+

+ Can automatically select ioremap_np() over ioremap() according to platform

+ requirements, if the ``IORESOURCE_MEM_NONPOSTED`` flag is set on the struct

+ resource. Uses devres to automatically unmap the resource when the driver

+ probe() function fails or a device in unbound from its driver.

+

+ Documented in Documentation/driver-api/driver-model/devres.rst.

+

+of_address_to_resource()

+

+ Automatically sets the ``IORESOURCE_MEM_NONPOSTED`` flag for platforms that

+ require non-posted writes for certain buses (see the nonposted-mmio and

+ posted-mmio device tree properties).

+

+of_iomap()

+

+ Maps the resource described in a ``reg`` property in the device tree, doing

+ all required translations. Automatically selects ioremap_np() according to

+ platform requirements, as above.

+

+pci_ioremap_bar(), pci_ioremap_wc_bar()

+

+ Maps the resource described in a PCI base address without having to extract

+ the physical address first.

+

+pci_iomap(), pci_iomap_wc()

+

+ Like pci_ioremap_bar()/pci_ioremap_bar(), but also works on I/O space when

+ used together with ioread32()/iowrite32() and similar accessors

+

+pcim_iomap()

+

+ Like pci_iomap(), but uses devres to automatically unmap the resource when

+ the driver probe() function fails or a device in unbound from its driver

+

+ Documented in Documentation/driver-api/driver-model/devres.rst.

+

+Not using these wrappers may make drivers unusable on certain platforms with

+stricter rules for mapping I/O memory.

+

Public Functions Provided

=========================

@@ -310,6 +310,7 @@ IOMAP

devm_ioremap()

devm_ioremap_uc()

devm_ioremap_wc()

+ devm_ioremap_np()

devm_ioremap_resource() : checks resource, requests memory region, ioremaps

devm_ioremap_resource_wc()

devm_platform_ioremap_resource() : calls devm_ioremap_resource() for platform device

diff --git a/MAINTAINERS b/MAINTAINERS

index dec3739031fe7..1162b0917630a 100644

--- a/

MAINTAINERS

+++ b/

MAINTAINERS

@@ -1649,6 +1649,20 @@ F: arch/arm/mach-alpine/

F: arch/arm64/boot/dts/amazon/

F: drivers/*/*alpine*

+ARM/APPLE MACHINE SUPPORT

+M: Hector Martin <marcan@marcan.st>

+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)

+S: Maintained

+W: https://asahilinux.org

+B: https://ift.tt/3exT7SL

+C: irc://chat.freenode.net/asahi-dev

+T: git https://ift.tt/3tVcNqj

+F: Documentation/devicetree/bindings/arm/apple.yaml

+F: Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml

+F: arch/arm64/boot/dts/apple/

+F: drivers/irqchip/irq-apple-aic.c

+F: include/dt-bindings/interrupt-controller/apple-aic.h

+

ARM/ARTPEC MACHINE SUPPORT

M: Jesper Nilsson <jesper.nilsson@axis.com>

M: Lars Persson <lars.persson@axis.com>

@@ -26,6 +26,13 @@ config ARCH_ALPINE

This enables support for the Annapurna Labs Alpine

Soc family.

+config ARCH_APPLE

+ bool "Apple Silicon SoC family"

+ select APPLE_AIC

+ help

+ This enables support for Apple's in-house ARM SoC family, starting

+ with the Apple M1.

+

config ARCH_BCM2835

bool "Broadcom BCM2835 family"

select TIMER_OF

@@ -6,6 +6,7 @@ subdir-y += amazon

subdir-y += amd

subdir-y += amlogic

subdir-y += apm

+subdir-y += apple

subdir-y += arm

subdir-y += bitmain

subdir-y += broadcom

diff --git a/arch/arm64/boot/dts/apple/Makefile b/arch/arm64/boot/dts/apple/Makefile

new file mode 100644

index 0000000000000..cbbd701ebf05b

--- /dev/null

+++ b/

arch/arm64/boot/dts/apple/Makefile

@@ -0,0 +1,2 @@

+# SPDX-License-Identifier: GPL-2.0

+dtb-$(CONFIG_ARCH_APPLE) += t8103-j274.dtb

diff --git a/arch/arm64/boot/dts/apple/t8103-j274.dts b/arch/arm64/boot/dts/apple/t8103-j274.dts

new file mode 100644

index 0000000000000..e0f6775b98783

--- /dev/null

+++ b/

arch/arm64/boot/dts/apple/t8103-j274.dts

@@ -0,0 +1,45 @@

+// SPDX-License-Identifier: GPL-2.0+ OR MIT

+/*

+ * Apple Mac mini (M1, 2020)

+ *

+ * target-type: J274

+ *

+ * Copyright The Asahi Linux Contributors

+ */

+

+/dts-v1/;

+

+#include "t8103.dtsi"

+

+/ {

+ compatible = "apple,j274", "apple,t8103", "apple,arm-platform";

+ model = "Apple Mac mini (M1, 2020)";

+

+ aliases {

+ serial0 = &serial0;

+ };

+

+ chosen {

+ #address-cells = <2>;

+ #size-cells = <2>;

+ ranges;

+

+ stdout-path = "serial0";

+

+ framebuffer0: framebuffer@0 {

+ compatible = "apple,simple-framebuffer", "simple-framebuffer";

+ reg = <0 0 0 0>; /* To be filled by loader */

+ /* Format properties will be added by loader */

+ status = "disabled";

+ };

+ };

+

+ memory@800000000 {

+ device_type = "memory";

+ reg = <0x8 0 0x2 0>; /* To be filled by loader */

+ };

+};

+

+&serial0 {

+ status = "okay";

+};

diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi

new file mode 100644

index 0000000000000..a1e22a2ea2e53

--- /dev/null

+++ b/

arch/arm64/boot/dts/apple/t8103.dtsi

@@ -0,0 +1,135 @@

+// SPDX-License-Identifier: GPL-2.0+ OR MIT

+/*

+ * Apple T8103 "M1" SoC

+ *

+ * Other names: H13G, "Tonga"

+ *

+ * Copyright The Asahi Linux Contributors

+ */

+

+#include <dt-bindings/interrupt-controller/apple-aic.h>

+#include <dt-bindings/interrupt-controller/irq.h>

+

+/ {

+ compatible = "apple,t8103", "apple,arm-platform";

+

+ #address-cells = <2>;

+ #size-cells = <2>;

+

+ cpus {

+ #address-cells = <2>;

+ #size-cells = <0>;

+

+ cpu0: cpu@0 {

+ compatible = "apple,icestorm";

+ device_type = "cpu";

+ reg = <0x0 0x0>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+

+ cpu1: cpu@1 {

+ compatible = "apple,icestorm";

+ device_type = "cpu";

+ reg = <0x0 0x1>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+

+ cpu2: cpu@2 {

+ compatible = "apple,icestorm";

+ device_type = "cpu";

+ reg = <0x0 0x2>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+

+ cpu3: cpu@3 {

+ compatible = "apple,icestorm";

+ device_type = "cpu";

+ reg = <0x0 0x3>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+

+ cpu4: cpu@10100 {

+ compatible = "apple,firestorm";

+ device_type = "cpu";

+ reg = <0x0 0x10100>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+

+ cpu5: cpu@10101 {

+ compatible = "apple,firestorm";

+ device_type = "cpu";

+ reg = <0x0 0x10101>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+

+ cpu6: cpu@10102 {

+ compatible = "apple,firestorm";

+ device_type = "cpu";

+ reg = <0x0 0x10102>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+

+ cpu7: cpu@10103 {

+ compatible = "apple,firestorm";

+ device_type = "cpu";

+ reg = <0x0 0x10103>;

+ enable-method = "spin-table";

+ cpu-release-addr = <0 0>; /* To be filled by loader */

+ };

+ };

+

+ timer {

+ compatible = "arm,armv8-timer";

+ interrupt-parent = <&aic>;

+ interrupt-names = "phys", "virt", "hyp-phys", "hyp-virt";

+ interrupts = <AIC_FIQ AIC_TMR_GUEST_PHYS IRQ_TYPE_LEVEL_HIGH>,

+ <AIC_FIQ AIC_TMR_GUEST_VIRT IRQ_TYPE_LEVEL_HIGH>,

+ <AIC_FIQ AIC_TMR_HV_PHYS IRQ_TYPE_LEVEL_HIGH>,

+ <AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>;

+ };

+

+ clk24: clock-24m {

+ compatible = "fixed-clock";

+ #clock-cells = <0>;

+ clock-frequency = <24000000>;

+ clock-output-names = "clk24";

+ };

+

+ soc {

+ compatible = "simple-bus";

+ #address-cells = <2>;

+ #size-cells = <2>;

+

+ ranges;

+ nonposted-mmio;

+

+ serial0: serial@235200000 {

+ compatible = "apple,s5l-uart";

+ reg = <0x2 0x35200000 0x0 0x1000>;

+ reg-io-width = <4>;

+ interrupt-parent = <&aic>;

+ interrupts = <AIC_IRQ 605 IRQ_TYPE_LEVEL_HIGH>;

+ /*

+ * TODO: figure out the clocking properly, there may

+ * be a third selectable clock.

+ */

+ clocks = <&clk24>, <&clk24>;

+ clock-names = "uart", "clk_uart_baud0";

+ status = "disabled";

+ };

+

+ aic: interrupt-controller@23b100000 {

+ compatible = "apple,t8103-aic", "apple,aic";

+ #interrupt-cells = <3>;

+ interrupt-controller;

+ reg = <0x2 0x3b100000 0x0 0x8000>;

+ };

+ };

+};

@@ -32,6 +32,7 @@ CONFIG_ARCH_AGILEX=y

CONFIG_ARCH_N5X=y

CONFIG_ARCH_SUNXI=y

CONFIG_ARCH_ALPINE=y

+CONFIG_ARCH_APPLE=y

CONFIG_ARCH_BCM2835=y

CONFIG_ARCH_BCM4908=y

CONFIG_ARCH_BCM_IPROC=y

@@ -59,6 +59,7 @@

#define ARM_CPU_IMP_NVIDIA 0x4E

#define ARM_CPU_IMP_FUJITSU 0x46

#define ARM_CPU_IMP_HISI 0x48

+#define ARM_CPU_IMP_APPLE 0x61

#define ARM_CPU_PART_AEM_V8 0xD0F

#define ARM_CPU_PART_FOUNDATION 0xD00

@@ -99,6 +100,9 @@

#define HISI_CPU_PART_TSV110 0xD01

+#define APPLE_CPU_PART_M1_ICESTORM 0x022

+#define APPLE_CPU_PART_M1_FIRESTORM 0x023

+

#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)

#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)

#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)

@@ -127,6 +131,8 @@

#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)

#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)

#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)

+#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)

+#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)

/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */

#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX

@@ -169,16 +169,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);

#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))

#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))

-

-/*

- * PCI configuration space mapping function.

- *

- * The PCI specification disallows posted write configuration transactions.

- * Add an arch specific pci_remap_cfgspace() definition that is implemented

- * through nGnRnE device memory attribute as recommended by the ARM v8

- * Architecture reference manual Issue A.k B2.8.2 "Device memory".

- */

-#define pci_remap_cfgspace(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))

+#define ioremap_np(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))

/*

* io{read,write}{16,32,64}be() macros

@@ -1041,6 +1041,66 @@

#define TRFCR_ELx_ExTRE BIT(1)

#define TRFCR_ELx_E0TRE BIT(0)

+

+/* GIC Hypervisor interface registers */

+/* ICH_MISR_EL2 bit definitions */

+#define ICH_MISR_EOI (1 << 0)

+#define ICH_MISR_U (1 << 1)

+

+/* ICH_LR*_EL2 bit definitions */

+#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1)

+

+#define ICH_LR_EOI (1ULL << 41)

+#define ICH_LR_GROUP (1ULL << 60)

+#define ICH_LR_HW (1ULL << 61)

+#define ICH_LR_STATE (3ULL << 62)

+#define ICH_LR_PENDING_BIT (1ULL << 62)

+#define ICH_LR_ACTIVE_BIT (1ULL << 63)

+#define ICH_LR_PHYS_ID_SHIFT 32

+#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT)

+#define ICH_LR_PRIORITY_SHIFT 48

+#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT)

+

+/* ICH_HCR_EL2 bit definitions */

+#define ICH_HCR_EN (1 << 0)

+#define ICH_HCR_UIE (1 << 1)

+#define ICH_HCR_NPIE (1 << 3)

+#define ICH_HCR_TC (1 << 10)

+#define ICH_HCR_TALL0 (1 << 11)

+#define ICH_HCR_TALL1 (1 << 12)

+#define ICH_HCR_EOIcount_SHIFT 27

+#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)

+

+/* ICH_VMCR_EL2 bit definitions */

+#define ICH_VMCR_ACK_CTL_SHIFT 2

+#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT)

+#define ICH_VMCR_FIQ_EN_SHIFT 3

+#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT)

+#define ICH_VMCR_CBPR_SHIFT 4

+#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT)

+#define ICH_VMCR_EOIM_SHIFT 9

+#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT)

+#define ICH_VMCR_BPR1_SHIFT 18

+#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT)

+#define ICH_VMCR_BPR0_SHIFT 21

+#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT)

+#define ICH_VMCR_PMR_SHIFT 24

+#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)

+#define ICH_VMCR_ENG0_SHIFT 0

+#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)

+#define ICH_VMCR_ENG1_SHIFT 1

+#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)

+

+/* ICH_VTR_EL2 bit definitions */

+#define ICH_VTR_PRI_BITS_SHIFT 29

+#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT)

+#define ICH_VTR_ID_BITS_SHIFT 23

+#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT)

+#define ICH_VTR_SEIS_SHIFT 22

+#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)

+#define ICH_VTR_A3V_SHIFT 21

+#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)

+

#ifdef __ASSEMBLY__

.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30

@@ -409,6 +409,10 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size)

#define ioremap_uc(X,Y) ioremap((X),(Y))

#define ioremap_wc(X,Y) ioremap((X),(Y))

#define ioremap_wt(X,Y) ioremap((X),(Y))

+static inline void __iomem *ioremap_np(unsigned long offset, unsigned long size)

+{

+ return NULL;

+}

static inline void iounmap(volatile void __iomem *addr)

{

@@ -64,6 +64,14 @@ static u32 arch_timer_rate __ro_after_init;

u32 arch_timer_rate1 __ro_after_init;

static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;

+static const char *arch_timer_ppi_names[ARCH_TIMER_MAX_TIMER_PPI] = {

+ [ARCH_TIMER_PHYS_SECURE_PPI] = "sec-phys",

+ [ARCH_TIMER_PHYS_NONSECURE_PPI] = "phys",

+ [ARCH_TIMER_VIRT_PPI] = "virt",

+ [ARCH_TIMER_HYP_PPI] = "hyp-phys",

+ [ARCH_TIMER_HYP_VIRT_PPI] = "hyp-virt",

+};

+

static struct clock_event_device __percpu *arch_timer_evt;

static enum arch_timer_ppi_nr arch_timer_uses_ppi __ro_after_init = ARCH_TIMER_VIRT_PPI;

@@ -1281,8 +1289,9 @@ static void __init arch_timer_populate_kvm_info(void)

static int __init arch_timer_of_init(struct device_node *np)

{

- int i, ret;

+ int i, irq, ret;

u32 rate;

+ bool has_names;

if (arch_timers_present & ARCH_TIMER_TYPE_CP15) {

pr_warn("multiple nodes in dt, skipping\n");

@@ -1290,8 +1299,17 @@ static int __init arch_timer_of_init(struct device_node *np)

}

arch_timers_present |= ARCH_TIMER_TYPE_CP15;

- for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++)

- arch_timer_ppi[i] = irq_of_parse_and_map(np, i);

+

+ has_names = of_property_read_bool(np, "interrupt-names");

+

+ for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++) {

+ if (has_names)

+ irq = of_irq_get_byname(np, arch_timer_ppi_names[i]);

+ else

+ irq = of_irq_get(np, i);

+ if (irq > 0)

+ arch_timer_ppi[i] = irq;

+ }

arch_timer_populate_kvm_info();

@@ -593,4 +593,12 @@ config IRQ_IDT3243X

select GENERIC_IRQ_CHIP

select IRQ_DOMAIN

+config APPLE_AIC

+ bool "Apple Interrupt Controller (AIC)"

+ depends on ARM64

+ default ARCH_APPLE

+ help

+ Support for the Apple Interrupt Controller found on Apple Silicon SoCs,

+ such as the M1.

+

endmenu

@@ -115,3 +115,4 @@ obj-$(CONFIG_SL28CPLD_INTC) += irq-sl28cpld.o

obj-$(CONFIG_MACH_REALTEK_RTL) += irq-realtek-rtl.o

obj-$(CONFIG_WPCM450_AIC) += irq-wpcm450-aic.o

obj-$(CONFIG_IRQ_IDT3243X) += irq-idt3243x.o

+obj-$(CONFIG_APPLE_AIC) += irq-apple-aic.o

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c

new file mode 100644

index 0000000000000..c179e27062fd5

--- /dev/null

+++ b/

drivers/irqchip/irq-apple-aic.c

@@ -0,0 +1,852 @@

+// SPDX-License-Identifier: GPL-2.0-or-later

+/*

+ * Copyright The Asahi Linux Contributors

+ *

+ * Based on irq-lpc32xx:

+ * Copyright 2015-2016 Vladimir Zapolskiy <vz@mleia.com>

+ * Based on irq-bcm2836:

+ * Copyright 2015 Broadcom

+ */

+

+/*

+ * AIC is a fairly simple interrupt controller with the following features:

+ *

+ * - 896 level-triggered hardware IRQs

+ * - Single mask bit per IRQ

+ * - Per-IRQ affinity setting

+ * - Automatic masking on event delivery (auto-ack)

+ * - Software triggering (ORed with hw line)

+ * - 2 per-CPU IPIs (meant as "self" and "other", but they are

+ * interchangeable if not symmetric)

+ * - Automatic prioritization (single event/ack register per CPU, lower IRQs =

+ * higher priority)

+ * - Automatic masking on ack

+ * - Default "this CPU" register view and explicit per-CPU views

+ *

+ * In addition, this driver also handles FIQs, as these are routed to the same

+ * IRQ vector. These are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and

+ * performance counters (TODO).

+ *

+ * Implementation notes:

+ *

+ * - This driver creates two IRQ domains, one for HW IRQs and internal FIQs,

+ * and one for IPIs.

+ * - Since Linux needs more than 2 IPIs, we implement a software IRQ controller

+ * and funnel all IPIs into one per-CPU IPI (the second "self" IPI is unused).

+ * - FIQ hwirq numbers are assigned after true hwirqs, and are per-cpu.

+ * - DT bindings use 3-cell form (like GIC):

+ * - <0 nr flags> - hwirq #nr

+ * - <1 nr flags> - FIQ #nr

+ * - nr=0 Physical HV timer

+ * - nr=1 Virtual HV timer

+ * - nr=2 Physical guest timer

+ * - nr=3 Virtual guest timer

+ */

+

+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

+

+#include <linux/bits.h>

+#include <linux/bitfield.h>

+#include <linux/cpuhotplug.h>

+#include <linux/io.h>

+#include <linux/irqchip.h>

+#include <linux/irqdomain.h>

+#include <linux/limits.h>

+#include <linux/of_address.h>

+#include <linux/slab.h>

+#include <asm/exception.h>

+#include <asm/sysreg.h>

+#include <asm/virt.h>

+

+#include <dt-bindings/interrupt-controller/apple-aic.h>

+

+/*

+ * AIC registers (MMIO)

+ */

+

+#define AIC_INFO 0x0004

+#define AIC_INFO_NR_HW GENMASK(15, 0)

+

+#define AIC_CONFIG 0x0010

+

+#define AIC_WHOAMI 0x2000

+#define AIC_EVENT 0x2004

+#define AIC_EVENT_TYPE GENMASK(31, 16)

+#define AIC_EVENT_NUM GENMASK(15, 0)

+

+#define AIC_EVENT_TYPE_HW 1

+#define AIC_EVENT_TYPE_IPI 4

+#define AIC_EVENT_IPI_OTHER 1

+#define AIC_EVENT_IPI_SELF 2

+

+#define AIC_IPI_SEND 0x2008

+#define AIC_IPI_ACK 0x200c

+#define AIC_IPI_MASK_SET 0x2024

+#define AIC_IPI_MASK_CLR 0x2028

+

+#define AIC_IPI_SEND_CPU(cpu) BIT(cpu)

+

+#define AIC_IPI_OTHER BIT(0)

+#define AIC_IPI_SELF BIT(31)

+

+#define AIC_TARGET_CPU 0x3000

+#define AIC_SW_SET 0x4000

+#define AIC_SW_CLR 0x4080

+#define AIC_MASK_SET 0x4100

+#define AIC_MASK_CLR 0x4180

+

+#define AIC_CPU_IPI_SET(cpu) (0x5008 + ((cpu) << 7))

+#define AIC_CPU_IPI_CLR(cpu) (0x500c + ((cpu) << 7))

+#define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7))

+#define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7))

+

+#define MASK_REG(x) (4 * ((x) >> 5))

+#define MASK_BIT(x) BIT((x) & GENMASK(4, 0))

+

+/*

+ * IMP-DEF sysregs that control FIQ sources

+ * Note: sysreg-based IPIs are not supported yet.

+ */

+

+/* Core PMC control register */

+#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0)

+#define PMCR0_IMODE GENMASK(10, 8)

+#define PMCR0_IMODE_OFF 0

+#define PMCR0_IMODE_PMI 1

+#define PMCR0_IMODE_AIC 2

+#define PMCR0_IMODE_HALT 3

+#define PMCR0_IMODE_FIQ 4

+#define PMCR0_IACT BIT(11)

+

+/* IPI request registers */

+#define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0)

+#define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1)

+#define IPI_RR_CPU GENMASK(7, 0)

+/* Cluster only used for the GLOBAL register */

+#define IPI_RR_CLUSTER GENMASK(23, 16)

+#define IPI_RR_TYPE GENMASK(29, 28)

+#define IPI_RR_IMMEDIATE 0

+#define IPI_RR_RETRACT 1

+#define IPI_RR_DEFERRED 2

+#define IPI_RR_NOWAKE 3

+

+/* IPI status register */

+#define SYS_IMP_APL_IPI_SR_EL1 sys_reg(3, 5, 15, 1, 1)

+#define IPI_SR_PENDING BIT(0)

+

+/* Guest timer FIQ enable register */

+#define SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2 sys_reg(3, 5, 15, 1, 3)

+#define VM_TMR_FIQ_ENABLE_V BIT(0)

+#define VM_TMR_FIQ_ENABLE_P BIT(1)

+

+/* Deferred IPI countdown register */

+#define SYS_IMP_APL_IPI_CR_EL1 sys_reg(3, 5, 15, 3, 1)

+

+/* Uncore PMC control register */

+#define SYS_IMP_APL_UPMCR0_EL1 sys_reg(3, 7, 15, 0, 4)

+#define UPMCR0_IMODE GENMASK(18, 16)

+#define UPMCR0_IMODE_OFF 0

+#define UPMCR0_IMODE_AIC 2

+#define UPMCR0_IMODE_HALT 3

+#define UPMCR0_IMODE_FIQ 4

+

+/* Uncore PMC status register */

+#define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4)

+#define UPMSR_IACT BIT(0)

+

+#define AIC_NR_FIQ 4

+#define AIC_NR_SWIPI 32

+

+/*

+ * FIQ hwirq index definitions: FIQ sources use the DT binding defines

+ * directly, except that timers are special. At the irqchip level, the

+ * two timer types are represented by their access method: _EL0 registers

+ * or _EL02 registers. In the DT binding, the timers are represented

+ * by their purpose (HV or guest). This mapping is for when the kernel is

+ * running at EL2 (with VHE). When the kernel is running at EL1, the

+ * mapping differs and aic_irq_domain_translate() performs the remapping.

+ */

+

+#define AIC_TMR_EL0_PHYS AIC_TMR_HV_PHYS

+#define AIC_TMR_EL0_VIRT AIC_TMR_HV_VIRT

+#define AIC_TMR_EL02_PHYS AIC_TMR_GUEST_PHYS

+#define AIC_TMR_EL02_VIRT AIC_TMR_GUEST_VIRT

+

+struct aic_irq_chip {

+ void __iomem *base;

+ struct irq_domain *hw_domain;

+ struct irq_domain *ipi_domain;

+ int nr_hw;

+ int ipi_hwirq;

+};

+

+static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked);

+

+static DEFINE_PER_CPU(atomic_t, aic_vipi_flag);

+static DEFINE_PER_CPU(atomic_t, aic_vipi_enable);

+

+static struct aic_irq_chip *aic_irqc;

+

+static void aic_handle_ipi(struct pt_regs *regs);

+

+static u32 aic_ic_read(struct aic_irq_chip *ic, u32 reg)

+{

+ return readl_relaxed(ic->base + reg);

+}

+

+static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val)

+{

+ writel_relaxed(val, ic->base + reg);

+}

+

+/*

+ * IRQ irqchip

+ */

+

+static void aic_irq_mask(struct irq_data *d)

+{

+ struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);

+

+ aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irqd_to_hwirq(d)),

+ MASK_BIT(irqd_to_hwirq(d)));

+}

+

+static void aic_irq_unmask(struct irq_data *d)

+{

+ struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);

+

+ aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq),

+ MASK_BIT(irqd_to_hwirq(d)));

+}

+

+static void aic_irq_eoi(struct irq_data *d)

+{

+ /*

+ * Reading the interrupt reason automatically acknowledges and masks

+ * the IRQ, so we just unmask it here if needed.

+ */

+ if (!irqd_irq_disabled(d) && !irqd_irq_masked(d))

+ aic_irq_unmask(d);

+}

+

+static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)

+{

+ struct aic_irq_chip *ic = aic_irqc;

+ u32 event, type, irq;

+

+ do {

+ /*

+ * We cannot use a relaxed read here, as reads from DMA buffers

+ * need to be ordered after the IRQ fires.

+ */

+ event = readl(ic->base + AIC_EVENT);

+ type = FIELD_GET(AIC_EVENT_TYPE, event);

+ irq = FIELD_GET(AIC_EVENT_NUM, event);

+

+ if (type == AIC_EVENT_TYPE_HW)

+ handle_domain_irq(aic_irqc->hw_domain, irq, regs);

+ else if (type == AIC_EVENT_TYPE_IPI && irq == 1)

+ aic_handle_ipi(regs);

+ else if (event != 0)

+ pr_err_ratelimited("Unknown IRQ event %d, %d\n", type, irq);

+ } while (event);

+

+ /*

+ * vGIC maintenance interrupts end up here too, so we need to check

+ * for them separately. This should never trigger if KVM is working

+ * properly, because it will have already taken care of clearing it

+ * on guest exit before this handler runs.

+ */

+ if (is_kernel_in_hyp_mode() && (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) &&

+ read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {

+ pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n");

+ sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);

+ }

+}

+

+static int aic_irq_set_affinity(struct irq_data *d,

+ const struct cpumask *mask_val, bool force)

+{

+ irq_hw_number_t hwirq = irqd_to_hwirq(d);

+ struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);

+ int cpu;

+

+ if (force)

+ cpu = cpumask_first(mask_val);

+ else

+ cpu = cpumask_any_and(mask_val, cpu_online_mask);

+

+ aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu));

+ irq_data_update_effective_affinity(d, cpumask_of(cpu));

+

+ return IRQ_SET_MASK_OK;

+}

+

+static int aic_irq_set_type(struct irq_data *d, unsigned int type)

+{

+ /*

+ * Some IRQs (e.g. MSIs) implicitly have edge semantics, and we don't

+ * have a way to find out the type of any given IRQ, so just allow both.

+ */

+ return (type == IRQ_TYPE_LEVEL_HIGH || type == IRQ_TYPE_EDGE_RISING) ? 0 : -EINVAL;

+}

+

+static struct irq_chip aic_chip = {

+ .name = "AIC",

+ .irq_mask = aic_irq_mask,

+ .irq_unmask = aic_irq_unmask,

+ .irq_eoi = aic_irq_eoi,

+ .irq_set_affinity = aic_irq_set_affinity,

+ .irq_set_type = aic_irq_set_type,

+};

+

+/*

+ * FIQ irqchip

+ */

+

+static unsigned long aic_fiq_get_idx(struct irq_data *d)

+{

+ struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);

+

+ return irqd_to_hwirq(d) - ic->nr_hw;

+}

+

+static void aic_fiq_set_mask(struct irq_data *d)

+{

+ /* Only the guest timers have real mask bits, unfortunately. */

+ switch (aic_fiq_get_idx(d)) {

+ case AIC_TMR_EL02_PHYS:

+ sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENABLE_P, 0);

+ isb();

+ break;

+ case AIC_TMR_EL02_VIRT:

+ sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, VM_TMR_FIQ_ENABLE_V, 0);

+ isb();

+ break;

+ default:

+ break;

+ }

+}

+

+static void aic_fiq_clear_mask(struct irq_data *d)

+{

+ switch (aic_fiq_get_idx(d)) {

+ case AIC_TMR_EL02_PHYS:

+ sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, 0, VM_TMR_FIQ_ENABLE_P);

+ isb();

+ break;

+ case AIC_TMR_EL02_VIRT:

+ sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2, 0, VM_TMR_FIQ_ENABLE_V);

+ isb();

+ break;

+ default:

+ break;

+ }

+}

+

+static void aic_fiq_mask(struct irq_data *d)

+{

+ aic_fiq_set_mask(d);

+ __this_cpu_and(aic_fiq_unmasked, ~BIT(aic_fiq_get_idx(d)));

+}

+

+static void aic_fiq_unmask(struct irq_data *d)

+{

+ aic_fiq_clear_mask(d);

+ __this_cpu_or(aic_fiq_unmasked, BIT(aic_fiq_get_idx(d)));

+}

+

+static void aic_fiq_eoi(struct irq_data *d)

+{

+ /* We mask to ack (where we can), so we need to unmask at EOI. */

+ if (__this_cpu_read(aic_fiq_unmasked) & BIT(aic_fiq_get_idx(d)))

+ aic_fiq_clear_mask(d);

+}

+

+#define TIMER_FIRING(x) \

+ (((x) & (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_MASK | \

+ ARCH_TIMER_CTRL_IT_STAT)) == \

+ (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))

+

+static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)

+{

+ /*

+ * It would be really nice if we had a system register that lets us get

+ * the FIQ source state without having to peek down into sources...

+ * but such a register does not seem to exist.

+ *

+ * So, we have these potential sources to test for:

+ * - Fast IPIs (not yet used)

+ * - The 4 timers (CNTP, CNTV for each of HV and guest)

+ * - Per-core PMCs (not yet supported)

+ * - Per-cluster uncore PMCs (not yet supported)

+ *

+ * Since not dealing with any of these results in a FIQ storm,

+ * we check for everything here, even things we don't support yet.

+ */

+

+ if (read_sysreg_s(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) {

+ pr_err_ratelimited("Fast IPI fired. Acking.\n");

+ write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);

+ }

+

+ if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))

+ handle_domain_irq(aic_irqc->hw_domain,

+ aic_irqc->nr_hw + AIC_TMR_EL0_PHYS, regs);

+

+ if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))

+ handle_domain_irq(aic_irqc->hw_domain,

+ aic_irqc->nr_hw + AIC_TMR_EL0_VIRT, regs);

+

+ if (is_kernel_in_hyp_mode()) {

+ uint64_t enabled = read_sysreg_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2);

+

+ if ((enabled & VM_TMR_FIQ_ENABLE_P) &&

+ TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))

+ handle_domain_irq(aic_irqc->hw_domain,

+ aic_irqc->nr_hw + AIC_TMR_EL02_PHYS, regs);

+

+ if ((enabled & VM_TMR_FIQ_ENABLE_V) &&

+ TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))

+ handle_domain_irq(aic_irqc->hw_domain,

+ aic_irqc->nr_hw + AIC_TMR_EL02_VIRT, regs);

+ }

+

+ if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==

+ (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {

+ /*

+ * Not supported yet, let's figure out how to handle this when

+ * we implement these proprietary performance counters. For now,

+ * just mask it and move on.

+ */

+ pr_err_ratelimited("PMC FIQ fired. Masking.\n");

+ sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,

+ FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));

+ }

+

+ if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ &&

+ (read_sysreg_s(SYS_IMP_APL_UPMSR_EL1) & UPMSR_IACT)) {

+ /* Same story with uncore PMCs */

+ pr_err_ratelimited("Uncore PMC FIQ fired. Masking.\n");

+ sysreg_clear_set_s(SYS_IMP_APL_UPMCR0_EL1, UPMCR0_IMODE,

+ FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));

+ }

+}

+

+static int aic_fiq_set_type(struct irq_data *d, unsigned int type)

+{

+ return (type == IRQ_TYPE_LEVEL_HIGH) ? 0 : -EINVAL;

+}

+

+static struct irq_chip fiq_chip = {

+ .name = "AIC-FIQ",

+ .irq_mask = aic_fiq_mask,

+ .irq_unmask = aic_fiq_unmask,

+ .irq_ack = aic_fiq_set_mask,

+ .irq_eoi = aic_fiq_eoi,

+ .irq_set_type = aic_fiq_set_type,

+};

+

+/*

+ * Main IRQ domain

+ */

+

+static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,

+ irq_hw_number_t hw)

+{

+ struct aic_irq_chip *ic = id->host_data;

+

+ if (hw < ic->nr_hw) {

+ irq_domain_set_info(id, irq, hw, &aic_chip, id->host_data,

+ handle_fasteoi_irq, NULL, NULL);

+ irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));

+ } else {

+ irq_set_percpu_devid(irq);

+ irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data,

+ handle_percpu_devid_irq, NULL, NULL);

+ }

+

+ return 0;

+}

+

+static int aic_irq_domain_translate(struct irq_domain *id,

+ struct irq_fwspec *fwspec,

+ unsigned long *hwirq,

+ unsigned int *type)

+{

+ struct aic_irq_chip *ic = id->host_data;

+

+ if (fwspec->param_count != 3 || !is_of_node(fwspec->fwnode))

+ return -EINVAL;

+

+ switch (fwspec->param[0]) {

+ case AIC_IRQ:

+ if (fwspec->param[1] >= ic->nr_hw)

+ return -EINVAL;

+ *hwirq = fwspec->param[1];

+ break;

+ case AIC_FIQ:

+ if (fwspec->param[1] >= AIC_NR_FIQ)

+ return -EINVAL;

+ *hwirq = ic->nr_hw + fwspec->param[1];

+

+ /*

+ * In EL1 the non-redirected registers are the guest's,

+ * not EL2's, so remap the hwirqs to match.

+ */

+ if (!is_kernel_in_hyp_mode()) {

+ switch (fwspec->param[1]) {

+ case AIC_TMR_GUEST_PHYS:

+ *hwirq = ic->nr_hw + AIC_TMR_EL0_PHYS;

+ break;

+ case AIC_TMR_GUEST_VIRT:

+ *hwirq = ic->nr_hw + AIC_TMR_EL0_VIRT;

+ break;

+ case AIC_TMR_HV_PHYS:

+ case AIC_TMR_HV_VIRT:

+ return -ENOENT;

+ default:

+ break;

+ }

+ }

+ break;

+ default:

+ return -EINVAL;

+ }

+

+ *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;

+

+ return 0;

+}

+

+static int aic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,

+ unsigned int nr_irqs, void *arg)

+{

+ unsigned int type = IRQ_TYPE_NONE;

+ struct irq_fwspec *fwspec = arg;

+ irq_hw_number_t hwirq;

+ int i, ret;

+

+ ret = aic_irq_domain_translate(domain, fwspec, &hwirq, &type);

+ if (ret)

+ return ret;

+

+ for (i = 0; i < nr_irqs; i++) {

+ ret = aic_irq_domain_map(domain, virq + i, hwirq + i);

+ if (ret)

+ return ret;

+ }

+

+ return 0;

+}

+

+static void aic_irq_domain_free(struct irq_domain *domain, unsigned int virq,

+ unsigned int nr_irqs)

+{

+ int i;

+

+ for (i = 0; i < nr_irqs; i++) {

+ struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);

+

+ irq_set_handler(virq + i, NULL);

+ irq_domain_reset_irq_data(d);

+ }

+}

+

+static const struct irq_domain_ops aic_irq_domain_ops = {

+ .translate = aic_irq_domain_translate,

+ .alloc = aic_irq_domain_alloc,

+ .free = aic_irq_domain_free,

+};

+

+/*

+ * IPI irqchip

+ */

+

+static void aic_ipi_mask(struct irq_data *d)

+{

+ u32 irq_bit = BIT(irqd_to_hwirq(d));

+

+ /* No specific ordering requirements needed here. */

+ atomic_andnot(irq_bit, this_cpu_ptr(&aic_vipi_enable));

+}

+

+static void aic_ipi_unmask(struct irq_data *d)

+{

+ struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);

+ u32 irq_bit = BIT(irqd_to_hwirq(d));

+

+ atomic_or(irq_bit, this_cpu_ptr(&aic_vipi_enable));

+

+ /*

+ * The atomic_or() above must complete before the atomic_read()

+ * below to avoid racing aic_ipi_send_mask().

+ */

+ smp_mb__after_atomic();

+

+ /*

+ * If a pending vIPI was unmasked, raise a HW IPI to ourselves.

+ * No barriers needed here since this is a self-IPI.

+ */

+ if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit)

+ aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));

+}

+

+static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)

+{

+ struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);

+ u32 irq_bit = BIT(irqd_to_hwirq(d));

+ u32 send = 0;

+ int cpu;

+ unsigned long pending;

+

+ for_each_cpu(cpu, mask) {

+ /*

+ * This sequence is the mirror of the one in aic_ipi_unmask();

+ * see the comment there. Additionally, release semantics

+ * ensure that the vIPI flag set is ordered after any shared

+ * memory accesses that precede it. This therefore also pairs

+ * with the atomic_fetch_andnot in aic_handle_ipi().

+ */

+ pending = atomic_fetch_or_release(irq_bit, per_cpu_ptr(&aic_vipi_flag, cpu));

+

+ /*

+ * The atomic_fetch_or_release() above must complete before the

+ * atomic_read() below to avoid racing aic_ipi_unmask().

+ */

+ smp_mb__after_atomic();

+

+ if (!(pending & irq_bit) &&

+ (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit))

+ send |= AIC_IPI_SEND_CPU(cpu);

+ }

+

+ /*

+ * The flag writes must complete before the physical IPI is issued

+ * to another CPU. This is implied by the control dependency on

+ * the result of atomic_read_acquire() above, which is itself

+ * already ordered after the vIPI flag write.

+ */

+ if (send)

+ aic_ic_write(ic, AIC_IPI_SEND, send);

+}

+

+static struct irq_chip ipi_chip = {

+ .name = "AIC-IPI",

+ .irq_mask = aic_ipi_mask,

+ .irq_unmask = aic_ipi_unmask,

+ .ipi_send_mask = aic_ipi_send_mask,

+};

+

+/*

+ * IPI IRQ domain

+ */

+

+static void aic_handle_ipi(struct pt_regs *regs)

+{

+ int i;

+ unsigned long enabled, firing;

+

+ /*

+ * Ack the IPI. We need to order this after the AIC event read, but

+ * that is enforced by normal MMIO ordering guarantees.

+ */

+ aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);

+

+ /*

+ * The mask read does not need to be ordered. Only we can change

+ * our own mask anyway, so no races are possible here, as long as

+ * we are properly in the interrupt handler (which is covered by

+ * the barrier that is part of the top-level AIC handler's readl()).

+ */

+ enabled = atomic_read(this_cpu_ptr(&aic_vipi_enable));

+

+ /*

+ * Clear the IPIs we are about to handle. This pairs with the

+ * atomic_fetch_or_release() in aic_ipi_send_mask(), and needs to be

+ * ordered after the aic_ic_write() above (to avoid dropping vIPIs) and

+ * before IPI handling code (to avoid races handling vIPIs before they

+ * are signaled). The former is taken care of by the release semantics

+ * of the write portion, while the latter is taken care of by the

+ * acquire semantics of the read portion.

+ */

+ firing = atomic_fetch_andnot(enabled, this_cpu_ptr(&aic_vipi_flag)) & enabled;

+

+ for_each_set_bit(i, &firing, AIC_NR_SWIPI)

+ handle_domain_irq(aic_irqc->ipi_domain, i, regs);

+

+ /*

+ * No ordering needed here; at worst this just changes the timing of

+ * when the next IPI will be delivered.

+ */

+ aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);

+}

+

+static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq,

+ unsigned int nr_irqs, void *args)

+{

+ int i;

+

+ for (i = 0; i < nr_irqs; i++) {

+ irq_set_percpu_devid(virq + i);

+ irq_domain_set_info(d, virq + i, i, &ipi_chip, d->host_data,

+ handle_percpu_devid_irq, NULL, NULL);

+ }

+

+ return 0;

+}

+

+static void aic_ipi_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)

+{

+ /* Not freeing IPIs */

+}

+

+static const struct irq_domain_ops aic_ipi_domain_ops = {

+ .alloc = aic_ipi_alloc,

+ .free = aic_ipi_free,

+};

+

+static int aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node)

+{

+ struct irq_domain *ipi_domain;

+ int base_ipi;

+

+ ipi_domain = irq_domain_create_linear(irqc->hw_domain->fwnode, AIC_NR_SWIPI,

+ &aic_ipi_domain_ops, irqc);

+ if (WARN_ON(!ipi_domain))

+ return -ENODEV;

+

+ ipi_domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;

+ irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);

+

+ base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, AIC_NR_SWIPI,

+ NUMA_NO_NODE, NULL, false, NULL);

+

+ if (WARN_ON(!base_ipi)) {

+ irq_domain_remove(ipi_domain);

+ return -ENODEV;

+ }

+

+ set_smp_ipi_range(base_ipi, AIC_NR_SWIPI);

+

+ irqc->ipi_domain = ipi_domain;

+

+ return 0;

+}

+

+static int aic_init_cpu(unsigned int cpu)

+{

+ /* Mask all hard-wired per-CPU IRQ/FIQ sources */

+

+ /* Pending Fast IPI FIQs */

+ write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1);

+

+ /* Timer FIQs */

+ sysreg_clear_set(cntp_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);

+ sysreg_clear_set(cntv_ctl_el0, 0, ARCH_TIMER_CTRL_IT_MASK);

+

+ /* EL2-only (VHE mode) IRQ sources */

+ if (is_kernel_in_hyp_mode()) {

+ /* Guest timers */

+ sysreg_clear_set_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2,

+ VM_TMR_FIQ_ENABLE_V | VM_TMR_FIQ_ENABLE_P, 0);

+

+ /* vGIC maintenance IRQ */

+ sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0);

+ }

+

+ /* PMC FIQ */

+ sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,

+ FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));

+

+ /* Uncore PMC FIQ */

+ sysreg_clear_set_s(SYS_IMP_APL_UPMCR0_EL1, UPMCR0_IMODE,

+ FIELD_PREP(UPMCR0_IMODE, UPMCR0_IMODE_OFF));

+

+ /* Commit all of the above */

+ isb();

+

+ /*

+ * Make sure the kernel's idea of logical CPU order is the same as AIC's

+ * If we ever end up with a mismatch here, we will have to introduce

+ * a mapping table similar to what other irqchip drivers do.

+ */

+ WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id());

+

+ /*

+ * Always keep IPIs unmasked at the hardware level (except auto-masking

+ * by AIC during processing). We manage masks at the vIPI level.

+ */

+ aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER);

+ aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF);

+ aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);

+

+ /* Initialize the local mask state */

+ __this_cpu_write(aic_fiq_unmasked, 0);

+

+ return 0;

+}

+

+static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)

+{

+ int i;

+ void __iomem *regs;

+ u32 info;

+ struct aic_irq_chip *irqc;

+

+ regs = of_iomap(node, 0);

+ if (WARN_ON(!regs))

+ return -EIO;

+

+ irqc = kzalloc(sizeof(*irqc), GFP_KERNEL);

+ if (!irqc)

+ return -ENOMEM;

+

+ aic_irqc = irqc;

+ irqc->base = regs;

+

+ info = aic_ic_read(irqc, AIC_INFO);

+ irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info);

+

+ irqc->hw_domain = irq_domain_create_linear(of_node_to_fwnode(node),

+ irqc->nr_hw + AIC_NR_FIQ,

+ &aic_irq_domain_ops, irqc);

+ if (WARN_ON(!irqc->hw_domain)) {

+ iounmap(irqc->base);

+ kfree(irqc);

+ return -ENODEV;

+ }

+

+ irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED);

+

+ if (aic_init_smp(irqc, node)) {

+ irq_domain_remove(irqc->hw_domain);

+ iounmap(irqc->base);

+ kfree(irqc);

+ return -ENODEV;

+ }

+

+ set_handle_irq(aic_handle_irq);

+ set_handle_fiq(aic_handle_fiq);

+

+ for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)

+ aic_ic_write(irqc, AIC_MASK_SET + i * 4, U32_MAX);

+ for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++)

+ aic_ic_write(irqc, AIC_SW_CLR + i * 4, U32_MAX);

+ for (i = 0; i < irqc->nr_hw; i++)

+ aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1);

+

+ if (!is_kernel_in_hyp_mode())

+ pr_info("Kernel running in EL1, mapping interrupts");

+

+ cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING,

+ "irqchip/apple-aic/ipi:starting",

+ aic_init_cpu, NULL);

+

+ pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",

+ irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);

+

+ return 0;

+}

+

+IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init);

@@ -26,6 +26,7 @@ static struct of_bus *of_match_bus(struct device_node *np);

static int __of_address_to_resource(struct device_node *dev,

const __be32 *addrp, u64 size, unsigned int flags,

const char *name, struct resource *r);

+static bool of_mmio_is_nonposted(struct device_node *np);

/* Debug utility */

#ifdef DEBUG

@@ -847,6 +848,9 @@ static int __of_address_to_resource(struct device_node *dev,

return -EINVAL;

memset(r, 0, sizeof(struct resource));

+ if (of_mmio_is_nonposted(dev))

+ flags |= IORESOURCE_MEM_NONPOSTED;

+

r->start = taddr;

r->end = taddr + size - 1;

r->flags = flags;

@@ -896,7 +900,10 @@ void __iomem *of_iomap(struct device_node *np, int index)

if (of_address_to_resource(np, index, &res))

return NULL;

- return ioremap(res.start, resource_size(&res));

+ if (res.flags & IORESOURCE_MEM_NONPOSTED)

+ return ioremap_np(res.start, resource_size(&res));

+ else

+ return ioremap(res.start, resource_size(&res));

}

EXPORT_SYMBOL(of_iomap);

@@ -928,7 +935,11 @@ void __iomem *of_io_request_and_map(struct device_node *np, int index,

if (!request_mem_region(res.start, resource_size(&res), name))

return IOMEM_ERR_PTR(-EBUSY);

- mem = ioremap(res.start, resource_size(&res));

+ if (res.flags & IORESOURCE_MEM_NONPOSTED)

+ mem = ioremap_np(res.start, resource_size(&res));

+ else

+ mem = ioremap(res.start, resource_size(&res));

+

if (!mem) {

release_mem_region(res.start, resource_size(&res));

return IOMEM_ERR_PTR(-ENOMEM);

@@ -1094,3 +1105,31 @@ bool of_dma_is_coherent(struct device_node *np)

return false;

}

EXPORT_SYMBOL_GPL(of_dma_is_coherent);

+

+/**

+ * of_mmio_is_nonposted - Check if device uses non-posted MMIO

+ * @np: device node

+ *

+ * Returns true if the "nonposted-mmio" property was found for

+ * the device's bus.

+ *

+ * This is currently only enabled on builds that support Apple ARM devices, as

+ * an optimization.

+ */

+static bool of_mmio_is_nonposted(struct device_node *np)

+{

+ struct device_node *parent;

+ bool nonposted;

+

+ if (!IS_ENABLED(CONFIG_ARCH_APPLE))

+ return false;

+

+ parent = of_get_parent(np);

+ if (!parent)

+ return false;

+

+ nonposted = of_property_read_bool(parent, "nonposted-mmio");

+

+ of_node_put(parent);

+ return nonposted;

+}

@@ -942,7 +942,9 @@ static inline void *phys_to_virt(unsigned long address)

*

* ioremap_wc() and ioremap_wt() can provide more relaxed caching attributes

* for specific drivers if the architecture choses to implement them. If they

- * are not implemented we fall back to plain ioremap.

+ * are not implemented we fall back to plain ioremap. Conversely, ioremap_np()

+ * can provide stricter non-posted write semantics if the architecture

+ * implements them.

*/

#ifndef CONFIG_MMU

#ifndef ioremap

@@ -995,6 +997,23 @@ static inline void __iomem *ioremap_uc(phys_addr_t offset, size_t size)

}

#endif

+/*

+ * ioremap_np needs an explicit architecture implementation, as it

+ * requests stronger semantics than regular ioremap(). Portable drivers

+ * should instead use one of the higher-level abstractions, like

+ * devm_ioremap_resource(), to choose the correct variant for any given

+ * device and bus. Portable drivers with a good reason to want non-posted

+ * write semantics should always provide an ioremap() fallback in case

+ * ioremap_np() is not available.

+ */

+#ifndef ioremap_np

+#define ioremap_np ioremap_np

+static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size)

+{

+ return NULL;

+}

+#endif

+

#ifdef CONFIG_HAS_IOPORT_MAP

#ifndef CONFIG_GENERIC_IOMAP

#ifndef ioport_map

@@ -101,6 +101,15 @@ extern void ioport_unmap(void __iomem *);

#define ioremap_wt ioremap

#endif

+#ifndef ARCH_HAS_IOREMAP_NP

+/* See the comment in asm-generic/io.h about ioremap_np(). */

+#define ioremap_np ioremap_np

+static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size)

+{

+ return NULL;

+}

+#endif

+

#ifdef CONFIG_PCI

/* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */

struct pci_dev;

@@ -32,6 +32,7 @@ enum arch_timer_ppi_nr {

ARCH_TIMER_PHYS_NONSECURE_PPI,

ARCH_TIMER_VIRT_PPI,

ARCH_TIMER_HYP_PPI,

+ ARCH_TIMER_HYP_VIRT_PPI,

ARCH_TIMER_MAX_TIMER_PPI

};

@@ -0,0 +1,15 @@

+/* SPDX-License-Identifier: GPL-2.0+ OR MIT */

+#ifndef _DT_BINDINGS_INTERRUPT_CONTROLLER_APPLE_AIC_H

+#define _DT_BINDINGS_INTERRUPT_CONTROLLER_APPLE_AIC_H

+

+#include <dt-bindings/interrupt-controller/irq.h>

+

+#define AIC_IRQ 0

+#define AIC_FIQ 1

+

+#define AIC_TMR_HV_PHYS 0

+#define AIC_TMR_HV_VIRT 1

+#define AIC_TMR_GUEST_PHYS 2

+#define AIC_TMR_GUEST_VIRT 3

+

+#endif

@@ -100,6 +100,7 @@ enum cpuhp_state {

CPUHP_AP_CPU_PM_STARTING,

CPUHP_AP_IRQ_GIC_STARTING,

CPUHP_AP_IRQ_HIP04_STARTING,

+ CPUHP_AP_IRQ_APPLE_AIC_STARTING,

CPUHP_AP_IRQ_ARMADA_XP_STARTING,

CPUHP_AP_IRQ_BCM2836_STARTING,

CPUHP_AP_IRQ_MIPS_GIC_STARTING,

@@ -68,6 +68,8 @@ void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset,

resource_size_t size);

void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,

resource_size_t size);

+void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset,

+ resource_size_t size);

void devm_iounmap(struct device *dev, void __iomem *addr);

int check_signature(const volatile void __iomem *io_addr,

const unsigned char *signature, int length);

@@ -80,20 +82,20 @@ void devm_memunmap(struct device *dev, void *addr);

#ifdef CONFIG_PCI

/*

* The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and

- * Posting") mandate non-posted configuration transactions. There is

- * no ioremap API in the kernel that can guarantee non-posted write

- * semantics across arches so provide a default implementation for

- * mapping PCI config space that defaults to ioremap(); arches

- * should override it if they have memory mapping implementations that

- * guarantee non-posted writes semantics to make the memory mapping

- * compliant with the PCI specification.

+ * Posting") mandate non-posted configuration transactions. This default

+ * implementation attempts to use the ioremap_np() API to provide this

+ * on arches that support it, and falls back to ioremap() on those that

+ * don't. Overriding this function is deprecated; arches that properly

+ * support non-posted accesses should implement ioremap_np() instead, which

+ * this default implementation can then use to return mappings compliant with

+ * the PCI specification.

*/

#ifndef pci_remap_cfgspace

#define pci_remap_cfgspace pci_remap_cfgspace

static inline void __iomem *pci_remap_cfgspace(phys_addr_t offset,

size_t size)

{

- return ioremap(offset, size);

+ return ioremap_np(offset, size) ?: ioremap(offset, size);

}

#endif

#endif

@@ -108,6 +108,7 @@ struct resource {

#define IORESOURCE_MEM_32BIT (3<<3)

#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */

#define IORESOURCE_MEM_EXPANSIONROM (1<<6)

+#define IORESOURCE_MEM_NONPOSTED (1<<7)

/* PnP I/O specific bits (IORESOURCE_BITS) */

#define IORESOURCE_IO_16BIT_ADDR (1<<0)

@@ -575,67 +575,11 @@

#define ICC_SRE_EL1_DFB (1U << 1)

#define ICC_SRE_EL1_SRE (1U << 0)

-/*

- * Hypervisor interface registers (SRE only)

- */

-#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1)

-

-#define ICH_LR_EOI (1ULL << 41)

-#define ICH_LR_GROUP (1ULL << 60)

-#define ICH_LR_HW (1ULL << 61)

-#define ICH_LR_STATE (3ULL << 62)

-#define ICH_LR_PENDING_BIT (1ULL << 62)

-#define ICH_LR_ACTIVE_BIT (1ULL << 63)

-#define ICH_LR_PHYS_ID_SHIFT 32

-#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT)

-#define ICH_LR_PRIORITY_SHIFT 48

-#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT)

-

/* These are for GICv2 emulation only */

#define GICH_LR_VIRTUALID (0x3ffUL << 0)

#define GICH_LR_PHYSID_CPUID_SHIFT (10)

#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)

-#define ICH_MISR_EOI (1 << 0)

-#define ICH_MISR_U (1 << 1)

-

-#define ICH_HCR_EN (1 << 0)

-#define ICH_HCR_UIE (1 << 1)

-#define ICH_HCR_NPIE (1 << 3)

-#define ICH_HCR_TC (1 << 10)

-#define ICH_HCR_TALL0 (1 << 11)

-#define ICH_HCR_TALL1 (1 << 12)

-#define ICH_HCR_EOIcount_SHIFT 27

-#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)

-

-#define ICH_VMCR_ACK_CTL_SHIFT 2

-#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT)

-#define ICH_VMCR_FIQ_EN_SHIFT 3

-#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT)

-#define ICH_VMCR_CBPR_SHIFT 4

-#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT)

-#define ICH_VMCR_EOIM_SHIFT 9

-#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT)

-#define ICH_VMCR_BPR1_SHIFT 18

-#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT)

-#define ICH_VMCR_BPR0_SHIFT 21

-#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT)

-#define ICH_VMCR_PMR_SHIFT 24

-#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)

-#define ICH_VMCR_ENG0_SHIFT 0

-#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)

-#define ICH_VMCR_ENG1_SHIFT 1

-#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)

-

-#define ICH_VTR_PRI_BITS_SHIFT 29

-#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT)

-#define ICH_VTR_ID_BITS_SHIFT 23

-#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT)

-#define ICH_VTR_SEIS_SHIFT 22

-#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)

-#define ICH_VTR_A3V_SHIFT 21

-#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)

-

#define ICC_IAR1_EL1_SPURIOUS 0x3ff

#define ICC_SRE_EL2_SRE (1 << 0)

@@ -10,6 +10,7 @@ enum devm_ioremap_type {

DEVM_IOREMAP = 0,

DEVM_IOREMAP_UC,

DEVM_IOREMAP_WC,

+ DEVM_IOREMAP_NP,

};

void devm_ioremap_release(struct device *dev, void *res)

@@ -42,6 +43,9 @@ static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset,

case DEVM_IOREMAP_WC:

addr = ioremap_wc(offset, size);

break;

+ case DEVM_IOREMAP_NP:

+ addr = ioremap_np(offset, size);

+ break;

}

if (addr) {

@@ -99,6 +103,21 @@ void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,

EXPORT_SYMBOL(devm_ioremap_wc);

/**

+ * devm_ioremap_np - Managed ioremap_np()

+ * @dev: Generic device to remap IO address for

+ * @offset: Resource address to map

+ * @size: Size of map

+ *

+ * Managed ioremap_np(). Map is automatically unmapped on driver detach.

+ */

+void __iomem *devm_ioremap_np(struct device *dev, resource_size_t offset,

+ resource_size_t size)

+{

+ return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_NP);

+}

+EXPORT_SYMBOL(devm_ioremap_np);

+

+/**

* devm_iounmap - Managed iounmap()

* @dev: Generic device to unmap for

* @addr: Address to unmap

@@ -128,6 +147,9 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,

return IOMEM_ERR_PTR(-EINVAL);

}

+ if (type == DEVM_IOREMAP && res->flags & IORESOURCE_MEM_NONPOSTED)

+ type = DEVM_IOREMAP_NP;

+

size = resource_size(res);

if (res->name)



from Hacker News https://ift.tt/3eAn5pb

No comments:

Post a Comment

Note: Only a member of this blog may post a comment.