[Feature]add MT2731_MP2_MR2_SVN388 baseline version

Change-Id: Ief04314834b31e27effab435d3ca8ba33b499059
diff --git a/src/kernel/linux/v4.14/drivers/perf/Kconfig b/src/kernel/linux/v4.14/drivers/perf/Kconfig
new file mode 100644
index 0000000..e5197ff
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/Kconfig
@@ -0,0 +1,46 @@
+#
+# Performance Monitor Drivers
+#
+
+menu "Performance monitor support"
+	depends on PERF_EVENTS
+
+config ARM_PMU
+	depends on ARM || ARM64
+	bool "ARM PMU framework"
+	default y
+	help
+	  Say y if you want to use CPU performance monitors on ARM-based
+	  systems.
+
+config ARM_PMU_ACPI
+	depends on ARM_PMU && ACPI
+	def_bool y
+
+config QCOM_L2_PMU
+	bool "Qualcomm Technologies L2-cache PMU"
+	depends on ARCH_QCOM && ARM64 && ACPI
+	  help
+	  Provides support for the L2 cache performance monitor unit (PMU)
+	  in Qualcomm Technologies processors.
+	  Adds the L2 cache PMU into the perf events subsystem for
+	  monitoring L2 cache events.
+
+config QCOM_L3_PMU
+	bool "Qualcomm Technologies L3-cache PMU"
+	depends on ARCH_QCOM && ARM64 && ACPI
+	select QCOM_IRQ_COMBINER
+	help
+	   Provides support for the L3 cache performance monitor unit (PMU)
+	   in Qualcomm Technologies processors.
+	   Adds the L3 cache PMU into the perf events subsystem for
+	   monitoring L3 cache events.
+
+config XGENE_PMU
+        depends on ARCH_XGENE
+        bool "APM X-Gene SoC PMU"
+        default n
+        help
+          Say y if you want to use APM X-Gene SoC performance monitors.
+
+endmenu
diff --git a/src/kernel/linux/v4.14/drivers/perf/Makefile b/src/kernel/linux/v4.14/drivers/perf/Makefile
new file mode 100644
index 0000000..9402dc8
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
+obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
+obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
+obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
diff --git a/src/kernel/linux/v4.14/drivers/perf/arm_pmu.c b/src/kernel/linux/v4.14/drivers/perf/arm_pmu.c
new file mode 100644
index 0000000..22e365f
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/arm_pmu.c
@@ -0,0 +1,910 @@
+#undef DEBUG
+
+/*
+ * ARM performance counter support.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This code is based on the sparc64 perf event code, which is in turn based
+ * on the x86 code.
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sched/clock.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+
+#include <asm/irq_regs.h>
+
+static int
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result, ret;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	if (!cache_map)
+		return -ENOENT;
+
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	return ret;
+}
+
+static int
+armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
+{
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	if (!event_map)
+		return -ENOENT;
+
+	mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+	return (int)(config & raw_event_mask);
+}
+
+int
+armpmu_map_event(struct perf_event *event,
+		 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+		 const unsigned (*cache_map)
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX],
+		 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+	int type = event->attr.type;
+
+	if (type == event->pmu->type)
+		return armpmu_map_raw_event(raw_event_mask, config);
+
+	switch (type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_hw_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int armpmu_event_set_period(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;
+
+	local64_set(&hwc->prev_count, (u64)-left);
+
+	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+u64 armpmu_event_update(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = armpmu->read_counter(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void
+armpmu_read(struct perf_event *event)
+{
+	armpmu_event_update(event);
+}
+
+static void
+armpmu_stop(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in armpmu_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		armpmu->disable(event);
+		armpmu_event_update(event);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static void armpmu_start(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * ARM pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	/*
+	 * Set the period again. Some counters can't be stopped, so when we
+	 * were stopped we simply disabled the IRQ source and the counter
+	 * may have been left counting. If we don't do this step then we may
+	 * get an interrupt too soon or *way* too late if the overflow has
+	 * happened since disabling.
+	 */
+	armpmu_event_set_period(event);
+	armpmu->enable(event);
+}
+
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	armpmu_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
+	if (armpmu->clear_event_idx)
+		armpmu->clear_event_idx(hw_events, event);
+
+	perf_event_update_userpage(event);
+}
+
+static int
+armpmu_add(struct perf_event *event, int flags)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	/* An event following a process won't be stopped earlier */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return -ENOENT;
+
+	/* If we don't have a space for the counter then finish early. */
+	idx = armpmu->get_event_idx(hw_events, event);
+	if (idx < 0)
+		return idx;
+
+	/*
+	 * If there is an event in the counter we are going to use then make
+	 * sure it is disabled.
+	 */
+	event->hw.idx = idx;
+	armpmu->disable(event);
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		armpmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+			       struct perf_event *event)
+{
+	struct arm_pmu *armpmu;
+
+	if (is_software_event(event))
+		return 1;
+
+	/*
+	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+	 * core perf code won't check that the pmu->ctx == leader->ctx
+	 * until after pmu->event_init(event).
+	 */
+	if (event->pmu != pmu)
+		return 0;
+
+	if (event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	armpmu = to_arm_pmu(event->pmu);
+	return armpmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int
+validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct pmu_hw_events fake_pmu;
+
+	/*
+	 * Initialise the fake PMU. We only need to populate the
+	 * used_mask for the purposes of validation.
+	 */
+	memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
+
+	if (!validate_event(event->pmu, &fake_pmu, leader))
+		return -EINVAL;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
+			return -EINVAL;
+	}
+
+	if (!validate_event(event->pmu, &fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu)
+{
+	struct platform_device *pdev = armpmu->plat_device;
+
+	return pdev ? dev_get_platdata(&pdev->dev) : NULL;
+}
+
+static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
+{
+	struct arm_pmu *armpmu;
+	struct arm_pmu_platdata *plat;
+	int ret;
+	u64 start_clock, finish_clock;
+
+	/*
+	 * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
+	 * the handlers expect a struct arm_pmu*. The percpu_irq framework will
+	 * do any necessary shifting, we just need to perform the first
+	 * dereference.
+	 */
+	armpmu = *(void **)dev;
+
+	plat = armpmu_get_platdata(armpmu);
+
+	start_clock = sched_clock();
+	if (plat && plat->handle_irq)
+		ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
+	else
+		ret = armpmu->handle_irq(irq, armpmu);
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+	return ret;
+}
+
+static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
+}
+
+static int
+__hw_perf_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping;
+
+	mapping = armpmu->map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
+		pr_debug("ARM performance counters do not support "
+			 "mode exclusion\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base	    |= (unsigned long)mapping;
+
+	if (!is_sampling_event(event)) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		hwc->sample_period  = armpmu->max_period >> 1;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	if (event->group_leader != event) {
+		if (validate_group(event) != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int armpmu_event_init(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+
+	/*
+	 * Reject CPU-affine events for CPUs that are of a different class to
+	 * that which this PMU handles. Process-following events (where
+	 * event->cpu == -1) can be migrated between CPUs, and thus we have to
+	 * reject them later (in armpmu_add) if they're scheduled on a
+	 * different class of CPU.
+	 */
+	if (event->cpu != -1 &&
+		!cpumask_test_cpu(event->cpu, &armpmu->supported_cpus))
+		return -ENOENT;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	if (armpmu->map_event(event) == -ENOENT)
+		return -ENOENT;
+
+	return __hw_perf_event_init(event);
+}
+
+static void armpmu_enable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
+	if (enabled)
+		armpmu->start(armpmu);
+}
+
+static void armpmu_disable(struct pmu *pmu)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+
+	/* For task-bound events we may be called on other CPUs */
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return;
+
+	armpmu->stop(armpmu);
+}
+
+/*
+ * In heterogeneous systems, events are specific to a particular
+ * microarchitecture, and aren't suitable for another. Thus, only match CPUs of
+ * the same microarchitecture.
+ */
+static int armpmu_filter_match(struct perf_event *event)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	unsigned int cpu = smp_processor_id();
+	int ret;
+
+	ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+	if (ret && armpmu->filter_match)
+		return armpmu->filter_match(event);
+
+	return ret;
+}
+
+static ssize_t armpmu_cpumask_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
+	return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
+}
+
+static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL);
+
+static struct attribute *armpmu_common_attrs[] = {
+	&dev_attr_cpus.attr,
+	NULL,
+};
+
+static struct attribute_group armpmu_common_attr_group = {
+	.attrs = armpmu_common_attrs,
+};
+
+/* Set at runtime when we know what CPU type we are. */
+static struct arm_pmu *__oprofile_cpu_pmu;
+
+/*
+ * Despite the names, these two functions are CPU-specific and are used
+ * by the OProfile/perf code.
+ */
+const char *perf_pmu_name(void)
+{
+	if (!__oprofile_cpu_pmu)
+		return NULL;
+
+	return __oprofile_cpu_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	int max_events = 0;
+
+	if (__oprofile_cpu_pmu != NULL)
+		max_events = __oprofile_cpu_pmu->num_events;
+
+	return max_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
+{
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+	int irq = per_cpu(hw_events->irq, cpu);
+
+	if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
+		return;
+
+	if (irq_is_percpu(irq)) {
+		free_percpu_irq(irq, &hw_events->percpu_pmu);
+		cpumask_clear(&armpmu->active_irqs);
+		return;
+	}
+
+	free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+}
+
+void armpmu_free_irqs(struct arm_pmu *armpmu)
+{
+	int cpu;
+
+	for_each_cpu(cpu, &armpmu->supported_cpus)
+		armpmu_free_irq(armpmu, cpu);
+}
+
+int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
+{
+	int err = 0;
+	struct pmu_hw_events __percpu *hw_events = armpmu->hw_events;
+	const irq_handler_t handler = armpmu_dispatch_irq;
+	int irq = per_cpu(hw_events->irq, cpu);
+	if (!irq)
+		return 0;
+
+	if (irq_is_percpu(irq) && cpumask_empty(&armpmu->active_irqs)) {
+		err = request_percpu_irq(irq, handler, "arm-pmu",
+					 &hw_events->percpu_pmu);
+	} else if (irq_is_percpu(irq)) {
+		int other_cpu = cpumask_first(&armpmu->active_irqs);
+		int other_irq = per_cpu(hw_events->irq, other_cpu);
+
+		if (irq != other_irq) {
+			pr_warn("mismatched PPIs detected.\n");
+			err = -EINVAL;
+			goto err_out;
+		}
+	} else {
+		struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu);
+		unsigned long irq_flags;
+
+		err = irq_force_affinity(irq, cpumask_of(cpu));
+
+		if (err && num_possible_cpus() > 1) {
+			pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				irq, cpu);
+			goto err_out;
+		}
+
+		if (platdata && platdata->irq_flags) {
+			irq_flags = platdata->irq_flags;
+		} else {
+			irq_flags = IRQF_PERCPU |
+				    IRQF_NOBALANCING |
+				    IRQF_NO_THREAD;
+		}
+
+		err = request_irq(irq, handler, irq_flags, "arm-pmu",
+				  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
+	}
+
+	if (err)
+		goto err_out;
+
+	cpumask_set_cpu(cpu, &armpmu->active_irqs);
+	return 0;
+
+err_out:
+	pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
+	return err;
+}
+
+int armpmu_request_irqs(struct arm_pmu *armpmu)
+{
+	int cpu, err;
+
+	for_each_cpu(cpu, &armpmu->supported_cpus) {
+		err = armpmu_request_irq(armpmu, cpu);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
+{
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+	return per_cpu(hw_events->irq, cpu);
+}
+
+/*
+ * PMU hardware loses all context when a CPU goes offline.
+ * When a CPU is hotplugged back in, since some hardware registers are
+ * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
+ * junk values out of them.
+ */
+static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
+	int irq;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return 0;
+	if (pmu->reset)
+		pmu->reset(pmu);
+
+	irq = armpmu_get_cpu_irq(pmu, cpu);
+	if (irq) {
+		if (irq_is_percpu(irq)) {
+			enable_percpu_irq(irq, IRQ_TYPE_NONE);
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
+	int irq;
+
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return 0;
+
+	irq = armpmu_get_cpu_irq(pmu, cpu);
+	if (irq && irq_is_percpu(irq))
+		disable_percpu_irq(irq);
+
+	return 0;
+}
+
+#ifdef CONFIG_CPU_PM
+static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
+{
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct perf_event *event;
+	int idx;
+
+	for (idx = 0; idx < armpmu->num_events; idx++) {
+		/*
+		 * If the counter is not used skip it, there is no
+		 * need of stopping/restarting it.
+		 */
+		if (!test_bit(idx, hw_events->used_mask))
+			continue;
+
+		event = hw_events->events[idx];
+
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			/*
+			 * Stop and update the counter
+			 */
+			armpmu_stop(event, PERF_EF_UPDATE);
+			break;
+		case CPU_PM_EXIT:
+		case CPU_PM_ENTER_FAILED:
+			 /*
+			  * Restore and enable the counter.
+			  * armpmu_start() indirectly calls
+			  *
+			  * perf_event_update_userpage()
+			  *
+			  * that requires RCU read locking to be functional,
+			  * wrap the call within RCU_NONIDLE to make the
+			  * RCU subsystem aware this cpu is not idle from
+			  * an RCU perspective for the armpmu_start() call
+			  * duration.
+			  */
+			RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD));
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+			     void *v)
+{
+	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	/*
+	 * Always reset the PMU registers on power-up even if
+	 * there are no events running.
+	 */
+	if (cmd == CPU_PM_EXIT && armpmu->reset)
+		armpmu->reset(armpmu);
+
+	if (!enabled)
+		return NOTIFY_OK;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		armpmu->stop(armpmu);
+		cpu_pm_pmu_setup(armpmu, cmd);
+		break;
+	case CPU_PM_EXIT:
+	case CPU_PM_ENTER_FAILED:
+		cpu_pm_pmu_setup(armpmu, cmd);
+		armpmu->start(armpmu);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
+	return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
+}
+
+static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
+{
+	cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
+}
+#else
+static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
+static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
+#endif
+
+static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	int err;
+
+	err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_STARTING,
+				       &cpu_pmu->node);
+	if (err)
+		goto out;
+
+	err = cpu_pm_pmu_register(cpu_pmu);
+	if (err)
+		goto out_unregister;
+
+	return 0;
+
+out_unregister:
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+					    &cpu_pmu->node);
+out:
+	return err;
+}
+
+static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
+{
+	cpu_pm_pmu_unregister(cpu_pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+					    &cpu_pmu->node);
+}
+
+struct arm_pmu *armpmu_alloc(void)
+{
+	struct arm_pmu *pmu;
+	int cpu;
+
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu) {
+		pr_info("failed to allocate PMU device!\n");
+		goto out;
+	}
+
+	pmu->hw_events = alloc_percpu(struct pmu_hw_events);
+	if (!pmu->hw_events) {
+		pr_info("failed to allocate per-cpu PMU data.\n");
+		goto out_free_pmu;
+	}
+
+	pmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+		.filter_match	= armpmu_filter_match,
+		.attr_groups	= pmu->attr_groups,
+		/*
+		 * This is a CPU PMU potentially in a heterogeneous
+		 * configuration (e.g. big.LITTLE). This is not an uncore PMU,
+		 * and we have taken ctx sharing into account (e.g. with our
+		 * pmu::filter_match callback and pmu::event_init group
+		 * validation).
+		 */
+		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS,
+	};
+
+	pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
+		&armpmu_common_attr_group;
+
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events;
+
+		events = per_cpu_ptr(pmu->hw_events, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+		events->percpu_pmu = pmu;
+	}
+
+	return pmu;
+
+out_free_pmu:
+	kfree(pmu);
+out:
+	return NULL;
+}
+
+void armpmu_free(struct arm_pmu *pmu)
+{
+	free_percpu(pmu->hw_events);
+	kfree(pmu);
+}
+
+int armpmu_register(struct arm_pmu *pmu)
+{
+	int ret;
+
+	ret = cpu_pmu_init(pmu);
+	if (ret)
+		return ret;
+
+	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+	if (ret)
+		goto out_destroy;
+
+	if (!__oprofile_cpu_pmu)
+		__oprofile_cpu_pmu = pmu;
+
+	pr_info("enabled with %s PMU driver, %d counters available\n",
+		pmu->name, pmu->num_events);
+
+	return 0;
+
+out_destroy:
+	cpu_pmu_destroy(pmu);
+	return ret;
+}
+
+static int arm_pmu_hp_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
+				      "perf/arm/pmu:starting",
+				      arm_perf_starting_cpu,
+				      arm_perf_teardown_cpu);
+	if (ret)
+		pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
+		       ret);
+	return ret;
+}
+subsys_initcall(arm_pmu_hp_init);
diff --git a/src/kernel/linux/v4.14/drivers/perf/arm_pmu_acpi.c b/src/kernel/linux/v4.14/drivers/perf/arm_pmu_acpi.c
new file mode 100644
index 0000000..604e549
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/arm_pmu_acpi.c
@@ -0,0 +1,265 @@
+/*
+ * ACPI probing code for ARM performance counters.
+ *
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/perf/arm_pmu.h>
+
+#include <asm/cputype.h>
+
+static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
+static DEFINE_PER_CPU(int, pmu_irqs);
+
+static int arm_pmu_acpi_register_irq(int cpu)
+{
+	struct acpi_madt_generic_interrupt *gicc;
+	int gsi, trigger;
+
+	gicc = acpi_cpu_get_madt_gicc(cpu);
+
+	gsi = gicc->performance_interrupt;
+
+	/*
+	 * Per the ACPI spec, the MADT cannot describe a PMU that doesn't
+	 * have an interrupt. QEMU advertises this by using a GSI of zero,
+	 * which is not known to be valid on any hardware despite being
+	 * valid per the spec. Take the pragmatic approach and reject a
+	 * GSI of zero for now.
+	 */
+	if (!gsi)
+		return 0;
+
+	if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE)
+		trigger = ACPI_EDGE_SENSITIVE;
+	else
+		trigger = ACPI_LEVEL_SENSITIVE;
+
+	/*
+	 * Helpfully, the MADT GICC doesn't have a polarity flag for the
+	 * "performance interrupt". Luckily, on compliant GICs the polarity is
+	 * a fixed value in HW (for both SPIs and PPIs) that we cannot change
+	 * from SW.
+	 *
+	 * Here we pass in ACPI_ACTIVE_HIGH to keep the core code happy. This
+	 * may not match the real polarity, but that should not matter.
+	 *
+	 * Other interrupt controllers are not supported with ACPI.
+	 */
+	return acpi_register_gsi(NULL, gsi, trigger, ACPI_ACTIVE_HIGH);
+}
+
+static void arm_pmu_acpi_unregister_irq(int cpu)
+{
+	struct acpi_madt_generic_interrupt *gicc;
+	int gsi;
+
+	gicc = acpi_cpu_get_madt_gicc(cpu);
+
+	gsi = gicc->performance_interrupt;
+	if (gsi)
+		acpi_unregister_gsi(gsi);
+}
+
+static int arm_pmu_acpi_parse_irqs(void)
+{
+	int irq, cpu, irq_cpu, err;
+
+	for_each_possible_cpu(cpu) {
+		irq = arm_pmu_acpi_register_irq(cpu);
+		if (irq < 0) {
+			err = irq;
+			pr_warn("Unable to parse ACPI PMU IRQ for CPU%d: %d\n",
+				cpu, err);
+			goto out_err;
+		} else if (irq == 0) {
+			pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu);
+		}
+
+		per_cpu(pmu_irqs, cpu) = irq;
+	}
+
+	return 0;
+
+out_err:
+	for_each_possible_cpu(cpu) {
+		irq = per_cpu(pmu_irqs, cpu);
+		if (!irq)
+			continue;
+
+		arm_pmu_acpi_unregister_irq(cpu);
+
+		/*
+		 * Blat all copies of the IRQ so that we only unregister the
+		 * corresponding GSI once (e.g. when we have PPIs).
+		 */
+		for_each_possible_cpu(irq_cpu) {
+			if (per_cpu(pmu_irqs, irq_cpu) == irq)
+				per_cpu(pmu_irqs, irq_cpu) = 0;
+		}
+	}
+
+	return err;
+}
+
+static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
+{
+	unsigned long cpuid = read_cpuid_id();
+	struct arm_pmu *pmu;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		pmu = per_cpu(probed_pmus, cpu);
+		if (!pmu || pmu->acpi_cpuid != cpuid)
+			continue;
+
+		return pmu;
+	}
+
+	pmu = armpmu_alloc();
+	if (!pmu) {
+		pr_warn("Unable to allocate PMU for CPU%d\n",
+			smp_processor_id());
+		return NULL;
+	}
+
+	pmu->acpi_cpuid = cpuid;
+
+	return pmu;
+}
+
+/*
+ * This must run before the common arm_pmu hotplug logic, so that we can
+ * associate a CPU and its interrupt before the common code tries to manage the
+ * affinity and so on.
+ *
+ * Note that hotplug events are serialized, so we cannot race with another CPU
+ * coming up. The perf core won't open events while a hotplug event is in
+ * progress.
+ */
+static int arm_pmu_acpi_cpu_starting(unsigned int cpu)
+{
+	struct arm_pmu *pmu;
+	struct pmu_hw_events __percpu *hw_events;
+	int irq;
+
+	/* If we've already probed this CPU, we have nothing to do */
+	if (per_cpu(probed_pmus, cpu))
+		return 0;
+
+	irq = per_cpu(pmu_irqs, cpu);
+
+	pmu = arm_pmu_acpi_find_alloc_pmu();
+	if (!pmu)
+		return -ENOMEM;
+
+	cpumask_set_cpu(cpu, &pmu->supported_cpus);
+
+	per_cpu(probed_pmus, cpu) = pmu;
+
+	/*
+	 * Log and request the IRQ so the core arm_pmu code can manage it.  In
+	 * some situations (e.g. mismatched PPIs), we may fail to request the
+	 * IRQ. However, it may be too late for us to do anything about it.
+	 * The common ARM PMU code will log a warning in this case.
+	 */
+	hw_events = pmu->hw_events;
+	per_cpu(hw_events->irq, cpu) = irq;
+	armpmu_request_irq(pmu, cpu);
+
+	/*
+	 * Ideally, we'd probe the PMU here when we find the first matching
+	 * CPU. We can't do that for several reasons; see the comment in
+	 * arm_pmu_acpi_init().
+	 *
+	 * So for the time being, we're done.
+	 */
+	return 0;
+}
+
+int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
+{
+	int pmu_idx = 0;
+	int cpu, ret;
+
+	if (acpi_disabled)
+		return 0;
+
+	/*
+	 * Initialise and register the set of PMUs which we know about right
+	 * now. Ideally we'd do this in arm_pmu_acpi_cpu_starting() so that we
+	 * could handle late hotplug, but this may lead to deadlock since we
+	 * might try to register a hotplug notifier instance from within a
+	 * hotplug notifier.
+	 *
+	 * There's also the problem of having access to the right init_fn,
+	 * without tying this too deeply into the "real" PMU driver.
+	 *
+	 * For the moment, as with the platform/DT case, we need at least one
+	 * of a PMU's CPUs to be online at probe time.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+		char *base_name;
+
+		if (!pmu || pmu->name)
+			continue;
+
+		ret = init_fn(pmu);
+		if (ret == -ENODEV) {
+			/* PMU not handled by this driver, or not present */
+			continue;
+		} else if (ret) {
+			pr_warn("Unable to initialise PMU for CPU%d\n", cpu);
+			return ret;
+		}
+
+		base_name = pmu->name;
+		pmu->name = kasprintf(GFP_KERNEL, "%s_%d", base_name, pmu_idx++);
+		if (!pmu->name) {
+			pr_warn("Unable to allocate PMU name for CPU%d\n", cpu);
+			return -ENOMEM;
+		}
+
+		ret = armpmu_register(pmu);
+		if (ret) {
+			pr_warn("Failed to register PMU for CPU%d\n", cpu);
+			kfree(pmu->name);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int arm_pmu_acpi_init(void)
+{
+	int ret;
+
+	if (acpi_disabled)
+		return 0;
+
+	/*
+	 * We can't request IRQs yet, since we don't know the cookie value
+	 * until we know which CPUs share the same logical PMU. We'll handle
+	 * that in arm_pmu_acpi_cpu_starting().
+	 */
+	ret = arm_pmu_acpi_parse_irqs();
+	if (ret)
+		return ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+				"perf/arm/pmu_acpi:starting",
+				arm_pmu_acpi_cpu_starting, NULL);
+
+	return ret;
+}
+subsys_initcall(arm_pmu_acpi_init)
diff --git a/src/kernel/linux/v4.14/drivers/perf/arm_pmu_platform.c b/src/kernel/linux/v4.14/drivers/perf/arm_pmu_platform.c
new file mode 100644
index 0000000..4428852
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/arm_pmu_platform.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * platform_device probing code for ARM performance counters.
+ *
+ * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ */
+#define pr_fmt(fmt) "hw perfevents: " fmt
+
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/kconfig.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/percpu.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+static int probe_current_pmu(struct arm_pmu *pmu,
+			     const struct pmu_probe_info *info)
+{
+	int cpu = get_cpu();
+	unsigned int cpuid = read_cpuid_id();
+	int ret = -ENODEV;
+
+	pr_info("probing PMU on CPU %d\n", cpu);
+
+	for (; info->init != NULL; info++) {
+		if ((cpuid & info->mask) != info->cpuid)
+			continue;
+		ret = info->init(pmu);
+		break;
+	}
+
+	put_cpu();
+	return ret;
+}
+
+static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq)
+{
+	int cpu, ret;
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+
+	ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
+	if (ret)
+		return ret;
+
+	for_each_cpu(cpu, &pmu->supported_cpus)
+		per_cpu(hw_events->irq, cpu) = irq;
+
+	return 0;
+}
+
+static bool pmu_has_irq_affinity(struct device_node *node)
+{
+	return !!of_find_property(node, "interrupt-affinity", NULL);
+}
+
+static int pmu_parse_irq_affinity(struct device_node *node, int i)
+{
+	struct device_node *dn;
+	int cpu;
+
+	/*
+	 * If we don't have an interrupt-affinity property, we guess irq
+	 * affinity matches our logical CPU order, as we used to assume.
+	 * This is fragile, so we'll warn in pmu_parse_irqs().
+	 */
+	if (!pmu_has_irq_affinity(node))
+		return i;
+
+	dn = of_parse_phandle(node, "interrupt-affinity", i);
+	if (!dn) {
+		pr_warn("failed to parse interrupt-affinity[%d] for %s\n",
+			i, node->name);
+		return -EINVAL;
+	}
+
+	/* Now look up the logical CPU number */
+	for_each_possible_cpu(cpu) {
+		struct device_node *cpu_dn;
+
+		cpu_dn = of_cpu_device_node_get(cpu);
+		of_node_put(cpu_dn);
+
+		if (dn == cpu_dn)
+			break;
+	}
+
+	if (cpu >= nr_cpu_ids) {
+		pr_warn("failed to find logical CPU for %s\n", dn->name);
+	}
+
+	of_node_put(dn);
+
+	return cpu;
+}
+
+static int pmu_parse_irqs(struct arm_pmu *pmu)
+{
+	int i = 0, num_irqs;
+	struct platform_device *pdev = pmu->plat_device;
+	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+
+	num_irqs = platform_irq_count(pdev);
+	if (num_irqs < 0) {
+		pr_err("unable to count PMU IRQs\n");
+		return num_irqs;
+	}
+
+	/*
+	 * In this case we have no idea which CPUs are covered by the PMU.
+	 * To match our prior behaviour, we assume all CPUs in this case.
+	 */
+	if (num_irqs == 0) {
+		pr_warn("no irqs for PMU, sampling events not supported\n");
+		pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+		cpumask_setall(&pmu->supported_cpus);
+		return 0;
+	}
+
+	if (num_irqs == 1) {
+		int irq = platform_get_irq(pdev, 0);
+		if (irq && irq_is_percpu(irq))
+			return pmu_parse_percpu_irq(pmu, irq);
+	}
+
+	if (!pmu_has_irq_affinity(pdev->dev.of_node)) {
+		pr_warn("no interrupt-affinity property for %pOF, guessing.\n",
+			pdev->dev.of_node);
+	}
+
+	/*
+	 * Some platforms have all PMU IRQs OR'd into a single IRQ, with a
+	 * special platdata function that attempts to demux them.
+	 */
+	if (dev_get_platdata(&pdev->dev))
+		cpumask_setall(&pmu->supported_cpus);
+
+	for (i = 0; i < num_irqs; i++) {
+		int cpu, irq;
+
+		irq = platform_get_irq(pdev, i);
+		if (WARN_ON(irq <= 0))
+			continue;
+
+		if (irq_is_percpu(irq)) {
+			pr_warn("multiple PPIs or mismatched SPI/PPI detected\n");
+			return -EINVAL;
+		}
+
+		cpu = pmu_parse_irq_affinity(pdev->dev.of_node, i);
+		if (cpu < 0)
+			return cpu;
+		if (cpu >= nr_cpu_ids)
+			continue;
+
+		if (per_cpu(hw_events->irq, cpu)) {
+			pr_warn("multiple PMU IRQs for the same CPU detected\n");
+			return -EINVAL;
+		}
+
+		per_cpu(hw_events->irq, cpu) = irq;
+		cpumask_set_cpu(cpu, &pmu->supported_cpus);
+	}
+
+	return 0;
+}
+
+int arm_pmu_device_probe(struct platform_device *pdev,
+			 const struct of_device_id *of_table,
+			 const struct pmu_probe_info *probe_table)
+{
+	const struct of_device_id *of_id;
+	armpmu_init_fn init_fn;
+	struct device_node *node = pdev->dev.of_node;
+	struct arm_pmu *pmu;
+	int ret = -ENODEV;
+
+	pmu = armpmu_alloc();
+	if (!pmu)
+		return -ENOMEM;
+
+	pmu->plat_device = pdev;
+
+	ret = pmu_parse_irqs(pmu);
+	if (ret)
+		goto out_free;
+
+	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
+		init_fn = of_id->data;
+
+		pmu->secure_access = of_property_read_bool(pdev->dev.of_node,
+							   "secure-reg-access");
+
+		/* arm64 systems boot only as non-secure */
+		if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
+			pr_warn("ignoring \"secure-reg-access\" property for arm64\n");
+			pmu->secure_access = false;
+		}
+
+		ret = init_fn(pmu);
+	} else if (probe_table) {
+		cpumask_setall(&pmu->supported_cpus);
+		ret = probe_current_pmu(pmu, probe_table);
+	}
+
+	if (ret) {
+		pr_info("%pOF: failed to probe PMU!\n", node);
+		goto out_free;
+	}
+
+	ret = armpmu_request_irqs(pmu);
+	if (ret)
+		goto out_free_irqs;
+
+	ret = armpmu_register(pmu);
+	if (ret)
+		goto out_free;
+
+	return 0;
+
+out_free_irqs:
+	armpmu_free_irqs(pmu);
+out_free:
+	pr_info("%pOF: failed to register PMU devices!\n", node);
+	armpmu_free(pmu);
+	return ret;
+}
diff --git a/src/kernel/linux/v4.14/drivers/perf/qcom_l2_pmu.c b/src/kernel/linux/v4.14/drivers/perf/qcom_l2_pmu.c
new file mode 100644
index 0000000..b242cce
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/qcom_l2_pmu.c
@@ -0,0 +1,1015 @@
+/* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/local64.h>
+#include <asm/sysreg.h>
+
+#define MAX_L2_CTRS             9
+
+#define L2PMCR_NUM_EV_SHIFT     11
+#define L2PMCR_NUM_EV_MASK      0x1F
+
+#define L2PMCR                  0x400
+#define L2PMCNTENCLR            0x403
+#define L2PMCNTENSET            0x404
+#define L2PMINTENCLR            0x405
+#define L2PMINTENSET            0x406
+#define L2PMOVSCLR              0x407
+#define L2PMOVSSET              0x408
+#define L2PMCCNTCR              0x409
+#define L2PMCCNTR               0x40A
+#define L2PMCCNTSR              0x40C
+#define L2PMRESR                0x410
+#define IA_L2PMXEVCNTCR_BASE    0x420
+#define IA_L2PMXEVCNTR_BASE     0x421
+#define IA_L2PMXEVFILTER_BASE   0x423
+#define IA_L2PMXEVTYPER_BASE    0x424
+
+#define IA_L2_REG_OFFSET        0x10
+
+#define L2PMXEVFILTER_SUFILTER_ALL      0x000E0000
+#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x00000004
+#define L2PMXEVFILTER_ORGFILTER_ALL     0x00000003
+
+#define L2EVTYPER_REG_SHIFT     3
+
+#define L2PMRESR_GROUP_BITS     8
+#define L2PMRESR_GROUP_MASK     GENMASK(7, 0)
+
+#define L2CYCLE_CTR_BIT         31
+#define L2CYCLE_CTR_RAW_CODE    0xFE
+
+#define L2PMCR_RESET_ALL        0x6
+#define L2PMCR_COUNTERS_ENABLE  0x1
+#define L2PMCR_COUNTERS_DISABLE 0x0
+
+#define L2PMRESR_EN             BIT_ULL(63)
+
+#define L2_EVT_MASK             0x00000FFF
+#define L2_EVT_CODE_MASK        0x00000FF0
+#define L2_EVT_GRP_MASK         0x0000000F
+#define L2_EVT_CODE_SHIFT       4
+#define L2_EVT_GRP_SHIFT        0
+
+#define L2_EVT_CODE(event)   (((event) & L2_EVT_CODE_MASK) >> L2_EVT_CODE_SHIFT)
+#define L2_EVT_GROUP(event)  (((event) & L2_EVT_GRP_MASK) >> L2_EVT_GRP_SHIFT)
+
+#define L2_EVT_GROUP_MAX        7
+
+#define L2_COUNTER_RELOAD       BIT_ULL(31)
+#define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63)
+
+#define L2CPUSRSELR_EL1         sys_reg(3, 3, 15, 0, 6)
+#define L2CPUSRDR_EL1           sys_reg(3, 3, 15, 0, 7)
+
+#define reg_idx(reg, i)         (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
+
+static DEFINE_RAW_SPINLOCK(l2_access_lock);
+
+/**
+ * set_l2_indirect_reg: write value to an L2 register
+ * @reg: Address of L2 register.
+ * @value: Value to be written to register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+static void set_l2_indirect_reg(u64 reg, u64 val)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2_access_lock, flags);
+	write_sysreg_s(reg, L2CPUSRSELR_EL1);
+	isb();
+	write_sysreg_s(val, L2CPUSRDR_EL1);
+	isb();
+	raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+}
+
+/**
+ * get_l2_indirect_reg: read an L2 register value
+ * @reg: Address of L2 register.
+ *
+ * Use architecturally required barriers for ordering between system register
+ * accesses
+ */
+static u64 get_l2_indirect_reg(u64 reg)
+{
+	u64 val;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2_access_lock, flags);
+	write_sysreg_s(reg, L2CPUSRSELR_EL1);
+	isb();
+	val = read_sysreg_s(L2CPUSRDR_EL1);
+	raw_spin_unlock_irqrestore(&l2_access_lock, flags);
+
+	return val;
+}
+
+struct cluster_pmu;
+
+/*
+ * Aggregate PMU. Implements the core pmu functions and manages
+ * the hardware PMUs.
+ */
+struct l2cache_pmu {
+	struct hlist_node node;
+	u32 num_pmus;
+	struct pmu pmu;
+	int num_counters;
+	cpumask_t cpumask;
+	struct platform_device *pdev;
+	struct cluster_pmu * __percpu *pmu_cluster;
+	struct list_head clusters;
+};
+
+/*
+ * The cache is made up of one or more clusters, each cluster has its own PMU.
+ * Each cluster is associated with one or more CPUs.
+ * This structure represents one of the hardware PMUs.
+ *
+ * Events can be envisioned as a 2-dimensional array. Each column represents
+ * a group of events. There are 8 groups. Only one entry from each
+ * group can be in use at a time.
+ *
+ * Events are specified as 0xCCG, where CC is 2 hex digits specifying
+ * the code (array row) and G specifies the group (column).
+ *
+ * In addition there is a cycle counter event specified by L2CYCLE_CTR_RAW_CODE
+ * which is outside the above scheme.
+ */
+struct cluster_pmu {
+	struct list_head next;
+	struct perf_event *events[MAX_L2_CTRS];
+	struct l2cache_pmu *l2cache_pmu;
+	DECLARE_BITMAP(used_counters, MAX_L2_CTRS);
+	DECLARE_BITMAP(used_groups, L2_EVT_GROUP_MAX + 1);
+	int irq;
+	int cluster_id;
+	/* The CPU that is used for collecting events on this cluster */
+	int on_cpu;
+	/* All the CPUs associated with this cluster */
+	cpumask_t cluster_cpus;
+	spinlock_t pmu_lock;
+};
+
+#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
+
+static u32 l2_cycle_ctr_idx;
+static u32 l2_counter_present_mask;
+
+static inline u32 idx_to_reg_bit(u32 idx)
+{
+	if (idx == l2_cycle_ctr_idx)
+		return BIT(L2CYCLE_CTR_BIT);
+
+	return BIT(idx);
+}
+
+static inline struct cluster_pmu *get_cluster_pmu(
+	struct l2cache_pmu *l2cache_pmu, int cpu)
+{
+	return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu);
+}
+
+static void cluster_pmu_reset(void)
+{
+	/* Reset all counters */
+	set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+	set_l2_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
+	set_l2_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
+	set_l2_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
+}
+
+static inline void cluster_pmu_enable(void)
+{
+	set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
+}
+
+static inline void cluster_pmu_disable(void)
+{
+	set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
+}
+
+static inline void cluster_pmu_counter_set_value(u32 idx, u64 value)
+{
+	if (idx == l2_cycle_ctr_idx)
+		set_l2_indirect_reg(L2PMCCNTR, value);
+	else
+		set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
+}
+
+static inline u64 cluster_pmu_counter_get_value(u32 idx)
+{
+	u64 value;
+
+	if (idx == l2_cycle_ctr_idx)
+		value = get_l2_indirect_reg(L2PMCCNTR);
+	else
+		value = get_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx));
+
+	return value;
+}
+
+static inline void cluster_pmu_counter_enable(u32 idx)
+{
+	set_l2_indirect_reg(L2PMCNTENSET, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_disable(u32 idx)
+{
+	set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_enable_interrupt(u32 idx)
+{
+	set_l2_indirect_reg(L2PMINTENSET, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_counter_disable_interrupt(u32 idx)
+{
+	set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg_bit(idx));
+}
+
+static inline void cluster_pmu_set_evccntcr(u32 val)
+{
+	set_l2_indirect_reg(L2PMCCNTCR, val);
+}
+
+static inline void cluster_pmu_set_evcntcr(u32 ctr, u32 val)
+{
+	set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTCR, ctr), val);
+}
+
+static inline void cluster_pmu_set_evtyper(u32 ctr, u32 val)
+{
+	set_l2_indirect_reg(reg_idx(IA_L2PMXEVTYPER, ctr), val);
+}
+
+static void cluster_pmu_set_resr(struct cluster_pmu *cluster,
+			       u32 event_group, u32 event_cc)
+{
+	u64 field;
+	u64 resr_val;
+	u32 shift;
+	unsigned long flags;
+
+	shift = L2PMRESR_GROUP_BITS * event_group;
+	field = ((u64)(event_cc & L2PMRESR_GROUP_MASK) << shift);
+
+	spin_lock_irqsave(&cluster->pmu_lock, flags);
+
+	resr_val = get_l2_indirect_reg(L2PMRESR);
+	resr_val &= ~(L2PMRESR_GROUP_MASK << shift);
+	resr_val |= field;
+	resr_val |= L2PMRESR_EN;
+	set_l2_indirect_reg(L2PMRESR, resr_val);
+
+	spin_unlock_irqrestore(&cluster->pmu_lock, flags);
+}
+
+/*
+ * Hardware allows filtering of events based on the originating
+ * CPU. Turn this off by setting filter bits to allow events from
+ * all CPUS, subunits and ID independent events in this cluster.
+ */
+static inline void cluster_pmu_set_evfilter_sys_mode(u32 ctr)
+{
+	u32 val =  L2PMXEVFILTER_SUFILTER_ALL |
+		   L2PMXEVFILTER_ORGFILTER_IDINDEP |
+		   L2PMXEVFILTER_ORGFILTER_ALL;
+
+	set_l2_indirect_reg(reg_idx(IA_L2PMXEVFILTER, ctr), val);
+}
+
+static inline u32 cluster_pmu_getreset_ovsr(void)
+{
+	u32 result = get_l2_indirect_reg(L2PMOVSSET);
+
+	set_l2_indirect_reg(L2PMOVSCLR, result);
+	return result;
+}
+
+static inline bool cluster_pmu_has_overflowed(u32 ovsr)
+{
+	return !!(ovsr & l2_counter_present_mask);
+}
+
+static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx)
+{
+	return !!(ovsr & idx_to_reg_bit(idx));
+}
+
+static void l2_cache_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+	u32 idx = hwc->idx;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = cluster_pmu_counter_get_value(idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	/*
+	 * The cycle counter is 64-bit, but all other counters are
+	 * 32-bit, and we must handle 32-bit overflow explicitly.
+	 */
+	delta = now - prev;
+	if (idx != l2_cycle_ctr_idx)
+		delta &= 0xffffffff;
+
+	local64_add(delta, &event->count);
+}
+
+static void l2_cache_cluster_set_period(struct cluster_pmu *cluster,
+				       struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+	u64 new;
+
+	/*
+	 * We limit the max period to half the max counter value so
+	 * that even in the case of extreme interrupt latency the
+	 * counter will (hopefully) not wrap past its initial value.
+	 */
+	if (idx == l2_cycle_ctr_idx)
+		new = L2_CYCLE_COUNTER_RELOAD;
+	else
+		new = L2_COUNTER_RELOAD;
+
+	local64_set(&hwc->prev_count, new);
+	cluster_pmu_counter_set_value(idx, new);
+}
+
+static int l2_cache_get_event_idx(struct cluster_pmu *cluster,
+				   struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int num_ctrs = cluster->l2cache_pmu->num_counters - 1;
+	unsigned int group;
+
+	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
+		if (test_and_set_bit(l2_cycle_ctr_idx, cluster->used_counters))
+			return -EAGAIN;
+
+		return l2_cycle_ctr_idx;
+	}
+
+	idx = find_first_zero_bit(cluster->used_counters, num_ctrs);
+	if (idx == num_ctrs)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	/*
+	 * Check for column exclusion: event column already in use by another
+	 * event. This is for events which are not in the same group.
+	 * Conflicting events in the same group are detected in event_init.
+	 */
+	group = L2_EVT_GROUP(hwc->config_base);
+	if (test_bit(group, cluster->used_groups))
+		return -EAGAIN;
+
+	set_bit(idx, cluster->used_counters);
+	set_bit(group, cluster->used_groups);
+
+	return idx;
+}
+
+static void l2_cache_clear_event_idx(struct cluster_pmu *cluster,
+				      struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cluster->used_counters);
+	if (hwc->config_base != L2CYCLE_CTR_RAW_CODE)
+		clear_bit(L2_EVT_GROUP(hwc->config_base), cluster->used_groups);
+}
+
+static irqreturn_t l2_cache_handle_irq(int irq_num, void *data)
+{
+	struct cluster_pmu *cluster = data;
+	int num_counters = cluster->l2cache_pmu->num_counters;
+	u32 ovsr;
+	int idx;
+
+	ovsr = cluster_pmu_getreset_ovsr();
+	if (!cluster_pmu_has_overflowed(ovsr))
+		return IRQ_NONE;
+
+	for_each_set_bit(idx, cluster->used_counters, num_counters) {
+		struct perf_event *event = cluster->events[idx];
+		struct hw_perf_event *hwc;
+
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		if (!cluster_pmu_counter_has_overflowed(ovsr, idx))
+			continue;
+
+		l2_cache_event_update(event);
+		hwc = &event->hw;
+
+		l2_cache_cluster_set_period(cluster, hwc);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static void l2_cache_pmu_enable(struct pmu *pmu)
+{
+	/*
+	 * Although there is only one PMU (per socket) controlling multiple
+	 * physical PMUs (per cluster), because we do not support per-task mode
+	 * each event is associated with a CPU. Each event has pmu_enable
+	 * called on its CPU, so here it is only necessary to enable the
+	 * counters for the current CPU.
+	 */
+
+	cluster_pmu_enable();
+}
+
+static void l2_cache_pmu_disable(struct pmu *pmu)
+{
+	cluster_pmu_disable();
+}
+
+static int l2_cache_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cluster_pmu *cluster;
+	struct perf_event *sibling;
+	struct l2cache_pmu *l2cache_pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	l2cache_pmu = to_l2cache_pmu(event->pmu);
+
+	if (hwc->sample_period) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Sampling not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->cpu < 0) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Per-task mode not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* We cannot filter accurately so we just don't allow it. */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+	    event->attr.exclude_hv || event->attr.exclude_idle) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Can't exclude execution levels\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) ||
+	     ((event->attr.config & ~L2_EVT_MASK) != 0)) &&
+	    (event->attr.config != L2CYCLE_CTR_RAW_CODE)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				    "Invalid config %llx\n",
+				    event->attr.config);
+		return -EINVAL;
+	}
+
+	/* Don't allow groups with mixed PMUs, except for s/w events */
+	if (event->group_leader->pmu != event->pmu &&
+	    !is_software_event(event->group_leader)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Can't create mixed PMU group\n");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			    group_entry)
+		if (sibling->pmu != event->pmu &&
+		    !is_software_event(sibling)) {
+			dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+				 "Can't create mixed PMU group\n");
+			return -EINVAL;
+		}
+
+	cluster = get_cluster_pmu(l2cache_pmu, event->cpu);
+	if (!cluster) {
+		/* CPU has not been initialised */
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			"CPU%d not associated with L2 cluster\n", event->cpu);
+		return -EINVAL;
+	}
+
+	/* Ensure all events in a group are on the same cpu */
+	if ((event->group_leader != event) &&
+	    (cluster->on_cpu != event->group_leader->cpu)) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Can't create group on CPUs %d and %d",
+			 event->cpu, event->group_leader->cpu);
+		return -EINVAL;
+	}
+
+	if ((event != event->group_leader) &&
+	    !is_software_event(event->group_leader) &&
+	    (L2_EVT_GROUP(event->group_leader->attr.config) ==
+	     L2_EVT_GROUP(event->attr.config))) {
+		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			 "Column exclusion: conflicting events %llx %llx\n",
+		       event->group_leader->attr.config,
+		       event->attr.config);
+		return -EINVAL;
+	}
+
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			    group_entry) {
+		if ((sibling != event) &&
+		    !is_software_event(sibling) &&
+		    (L2_EVT_GROUP(sibling->attr.config) ==
+		     L2_EVT_GROUP(event->attr.config))) {
+			dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
+			     "Column exclusion: conflicting events %llx %llx\n",
+					    sibling->attr.config,
+					    event->attr.config);
+			return -EINVAL;
+		}
+	}
+
+	hwc->idx = -1;
+	hwc->config_base = event->attr.config;
+
+	/*
+	 * Ensure all events are on the same cpu so all events are in the
+	 * same cpu context, to avoid races on pmu_enable etc.
+	 */
+	event->cpu = cluster->on_cpu;
+
+	return 0;
+}
+
+static void l2_cache_event_start(struct perf_event *event, int flags)
+{
+	struct cluster_pmu *cluster;
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u32 config;
+	u32 event_cc, event_group;
+
+	hwc->state = 0;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	l2_cache_cluster_set_period(cluster, hwc);
+
+	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
+		cluster_pmu_set_evccntcr(0);
+	} else {
+		config = hwc->config_base;
+		event_cc    = L2_EVT_CODE(config);
+		event_group = L2_EVT_GROUP(config);
+
+		cluster_pmu_set_evcntcr(idx, 0);
+		cluster_pmu_set_evtyper(idx, event_group);
+		cluster_pmu_set_resr(cluster, event_group, event_cc);
+		cluster_pmu_set_evfilter_sys_mode(idx);
+	}
+
+	cluster_pmu_counter_enable_interrupt(idx);
+	cluster_pmu_counter_enable(idx);
+}
+
+static void l2_cache_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	cluster_pmu_counter_disable_interrupt(idx);
+	cluster_pmu_counter_disable(idx);
+
+	if (flags & PERF_EF_UPDATE)
+		l2_cache_event_update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int l2_cache_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
+	struct cluster_pmu *cluster;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	idx = l2_cache_get_event_idx(cluster, event);
+	if (idx < 0)
+		return idx;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	cluster->events[idx] = event;
+	local64_set(&hwc->prev_count, 0);
+
+	if (flags & PERF_EF_START)
+		l2_cache_event_start(event, flags);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return err;
+}
+
+static void l2_cache_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct cluster_pmu *cluster;
+	int idx = hwc->idx;
+
+	cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
+
+	l2_cache_event_stop(event, flags | PERF_EF_UPDATE);
+	cluster->events[idx] = NULL;
+	l2_cache_clear_event_idx(cluster, event);
+
+	perf_event_update_userpage(event);
+}
+
+static void l2_cache_event_read(struct perf_event *event)
+{
+	l2_cache_event_update(event);
+}
+
+static ssize_t l2_cache_pmu_cpumask_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask);
+}
+
+static struct device_attribute l2_cache_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, l2_cache_pmu_cpumask_show, NULL);
+
+static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
+	&l2_cache_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group l2_cache_pmu_cpumask_group = {
+	.attrs = l2_cache_pmu_cpumask_attrs,
+};
+
+/* CCG format for perf RAW codes. */
+PMU_FORMAT_ATTR(l2_code,   "config:4-11");
+PMU_FORMAT_ATTR(l2_group,  "config:0-3");
+static struct attribute *l2_cache_pmu_formats[] = {
+	&format_attr_l2_code.attr,
+	&format_attr_l2_group.attr,
+	NULL,
+};
+
+static struct attribute_group l2_cache_pmu_format_group = {
+	.name = "format",
+	.attrs = l2_cache_pmu_formats,
+};
+
+static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
+	&l2_cache_pmu_format_group,
+	&l2_cache_pmu_cpumask_group,
+	NULL,
+};
+
+/*
+ * Generic device handlers
+ */
+
+static const struct acpi_device_id l2_cache_pmu_acpi_match[] = {
+	{ "QCOM8130", },
+	{ }
+};
+
+static int get_num_counters(void)
+{
+	int val;
+
+	val = get_l2_indirect_reg(L2PMCR);
+
+	/*
+	 * Read number of counters from L2PMCR and add 1
+	 * for the cycle counter.
+	 */
+	return ((val >> L2PMCR_NUM_EV_SHIFT) & L2PMCR_NUM_EV_MASK) + 1;
+}
+
+static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
+	struct l2cache_pmu *l2cache_pmu, int cpu)
+{
+	u64 mpidr;
+	int cpu_cluster_id;
+	struct cluster_pmu *cluster = NULL;
+
+	/*
+	 * This assumes that the cluster_id is in MPIDR[aff1] for
+	 * single-threaded cores, and MPIDR[aff2] for multi-threaded
+	 * cores. This logic will have to be updated if this changes.
+	 */
+	mpidr = read_cpuid_mpidr();
+	if (mpidr & MPIDR_MT_BITMASK)
+		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+	else
+		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
+		if (cluster->cluster_id != cpu_cluster_id)
+			continue;
+
+		dev_info(&l2cache_pmu->pdev->dev,
+			 "CPU%d associated with cluster %d\n", cpu,
+			 cluster->cluster_id);
+		cpumask_set_cpu(cpu, &cluster->cluster_cpus);
+		*per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
+		break;
+	}
+
+	return cluster;
+}
+
+static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cluster_pmu *cluster;
+	struct l2cache_pmu *l2cache_pmu;
+
+	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
+	cluster = get_cluster_pmu(l2cache_pmu, cpu);
+	if (!cluster) {
+		/* First time this CPU has come online */
+		cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu);
+		if (!cluster) {
+			/* Only if broken firmware doesn't list every cluster */
+			WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu);
+			return 0;
+		}
+	}
+
+	/* If another CPU is managing this cluster, we're done */
+	if (cluster->on_cpu != -1)
+		return 0;
+
+	/*
+	 * All CPUs on this cluster were down, use this one.
+	 * Reset to put it into sane state.
+	 */
+	cluster->on_cpu = cpu;
+	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
+	cluster_pmu_reset();
+
+	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
+	enable_irq(cluster->irq);
+
+	return 0;
+}
+
+static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct cluster_pmu *cluster;
+	struct l2cache_pmu *l2cache_pmu;
+	cpumask_t cluster_online_cpus;
+	unsigned int target;
+
+	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
+	cluster = get_cluster_pmu(l2cache_pmu, cpu);
+	if (!cluster)
+		return 0;
+
+	/* If this CPU is not managing the cluster, we're done */
+	if (cluster->on_cpu != cpu)
+		return 0;
+
+	/* Give up ownership of cluster */
+	cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask);
+	cluster->on_cpu = -1;
+
+	/* Any other CPU for this cluster which is still online */
+	cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
+		    cpu_online_mask);
+	target = cpumask_any_but(&cluster_online_cpus, cpu);
+	if (target >= nr_cpu_ids) {
+		disable_irq(cluster->irq);
+		return 0;
+	}
+
+	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
+	cluster->on_cpu = target;
+	cpumask_set_cpu(target, &l2cache_pmu->cpumask);
+	WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
+
+	return 0;
+}
+
+static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev->parent);
+	struct platform_device *sdev = to_platform_device(dev);
+	struct l2cache_pmu *l2cache_pmu = data;
+	struct cluster_pmu *cluster;
+	struct acpi_device *device;
+	unsigned long fw_cluster_id;
+	int err;
+	int irq;
+
+	if (acpi_bus_get_device(ACPI_HANDLE(dev), &device))
+		return -ENODEV;
+
+	if (kstrtoul(device->pnp.unique_id, 10, &fw_cluster_id) < 0) {
+		dev_err(&pdev->dev, "unable to read ACPI uid\n");
+		return -ENODEV;
+	}
+
+	cluster = devm_kzalloc(&pdev->dev, sizeof(*cluster), GFP_KERNEL);
+	if (!cluster)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&cluster->next);
+	list_add(&cluster->next, &l2cache_pmu->clusters);
+	cluster->cluster_id = fw_cluster_id;
+
+	irq = platform_get_irq(sdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev,
+			"Failed to get valid irq for cluster %ld\n",
+			fw_cluster_id);
+		return irq;
+	}
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+	cluster->irq = irq;
+
+	cluster->l2cache_pmu = l2cache_pmu;
+	cluster->on_cpu = -1;
+
+	err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       "l2-cache-pmu", cluster);
+	if (err) {
+		dev_err(&pdev->dev,
+			"Unable to request IRQ%d for L2 PMU counters\n", irq);
+		return err;
+	}
+
+	dev_info(&pdev->dev,
+		"Registered L2 cache PMU cluster %ld\n", fw_cluster_id);
+
+	spin_lock_init(&cluster->pmu_lock);
+
+	l2cache_pmu->num_pmus++;
+
+	return 0;
+}
+
+static int l2_cache_pmu_probe(struct platform_device *pdev)
+{
+	int err;
+	struct l2cache_pmu *l2cache_pmu;
+
+	l2cache_pmu =
+		devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL);
+	if (!l2cache_pmu)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&l2cache_pmu->clusters);
+
+	platform_set_drvdata(pdev, l2cache_pmu);
+	l2cache_pmu->pmu = (struct pmu) {
+		/* suffix is instance id for future use with multiple sockets */
+		.name		= "l2cache_0",
+		.task_ctx_nr    = perf_invalid_context,
+		.pmu_enable	= l2_cache_pmu_enable,
+		.pmu_disable	= l2_cache_pmu_disable,
+		.event_init	= l2_cache_event_init,
+		.add		= l2_cache_event_add,
+		.del		= l2_cache_event_del,
+		.start		= l2_cache_event_start,
+		.stop		= l2_cache_event_stop,
+		.read		= l2_cache_event_read,
+		.attr_groups	= l2_cache_pmu_attr_grps,
+	};
+
+	l2cache_pmu->num_counters = get_num_counters();
+	l2cache_pmu->pdev = pdev;
+	l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev,
+						     struct cluster_pmu *);
+	if (!l2cache_pmu->pmu_cluster)
+		return -ENOMEM;
+
+	l2_cycle_ctr_idx = l2cache_pmu->num_counters - 1;
+	l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 2, 0) |
+		BIT(L2CYCLE_CTR_BIT);
+
+	cpumask_clear(&l2cache_pmu->cpumask);
+
+	/* Read cluster info and initialize each cluster */
+	err = device_for_each_child(&pdev->dev, l2cache_pmu,
+				    l2_cache_pmu_probe_cluster);
+	if (err)
+		return err;
+
+	if (l2cache_pmu->num_pmus == 0) {
+		dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n");
+		return -ENODEV;
+	}
+
+	err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				       &l2cache_pmu->node);
+	if (err) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", err);
+		return err;
+	}
+
+	err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1);
+	if (err) {
+		dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err);
+		goto out_unregister;
+	}
+
+	dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n",
+		 l2cache_pmu->num_pmus);
+
+	return err;
+
+out_unregister:
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				    &l2cache_pmu->node);
+	return err;
+}
+
+static int l2_cache_pmu_remove(struct platform_device *pdev)
+{
+	struct l2cache_pmu *l2cache_pmu =
+		to_l2cache_pmu(platform_get_drvdata(pdev));
+
+	perf_pmu_unregister(&l2cache_pmu->pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				    &l2cache_pmu->node);
+	return 0;
+}
+
+static struct platform_driver l2_cache_pmu_driver = {
+	.driver = {
+		.name = "qcom-l2cache-pmu",
+		.acpi_match_table = ACPI_PTR(l2_cache_pmu_acpi_match),
+	},
+	.probe = l2_cache_pmu_probe,
+	.remove = l2_cache_pmu_remove,
+};
+
+static int __init register_l2_cache_pmu_driver(void)
+{
+	int err;
+
+	err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
+				      "AP_PERF_ARM_QCOM_L2_ONLINE",
+				      l2cache_pmu_online_cpu,
+				      l2cache_pmu_offline_cpu);
+	if (err)
+		return err;
+
+	return platform_driver_register(&l2_cache_pmu_driver);
+}
+device_initcall(register_l2_cache_pmu_driver);
diff --git a/src/kernel/linux/v4.14/drivers/perf/qcom_l3_pmu.c b/src/kernel/linux/v4.14/drivers/perf/qcom_l3_pmu.c
new file mode 100644
index 0000000..7f6b62b
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/qcom_l3_pmu.c
@@ -0,0 +1,849 @@
+/*
+ * Driver for the L3 cache PMUs in Qualcomm Technologies chips.
+ *
+ * The driver supports a distributed cache architecture where the overall
+ * cache for a socket is comprised of multiple slices each with its own PMU.
+ * Access to each individual PMU is provided even though all CPUs share all
+ * the slices. User space needs to aggregate to individual counts to provide
+ * a global picture.
+ *
+ * See Documentation/perf/qcom_l3_pmu.txt for more details.
+ *
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+/*
+ * General constants
+ */
+
+/* Number of counters on each PMU */
+#define L3_NUM_COUNTERS  8
+/* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
+#define L3_EVTYPE_MASK   0xFF
+/*
+ * Bit position of the 'long counter' flag within perf_event_attr.config.
+ * Reserve some space between the event type and this flag to allow expansion
+ * in the event type field.
+ */
+#define L3_EVENT_LC_BIT  32
+
+/*
+ * Register offsets
+ */
+
+/* Perfmon registers */
+#define L3_HML3_PM_CR       0x000
+#define L3_HML3_PM_EVCNTR(__cntr) (0x420 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_CNTCTL(__cntr) (0x120 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_EVTYPE(__cntr) (0x220 + ((__cntr) & 0x7) * 8)
+#define L3_HML3_PM_FILTRA   0x300
+#define L3_HML3_PM_FILTRB   0x308
+#define L3_HML3_PM_FILTRC   0x310
+#define L3_HML3_PM_FILTRAM  0x304
+#define L3_HML3_PM_FILTRBM  0x30C
+#define L3_HML3_PM_FILTRCM  0x314
+
+/* Basic counter registers */
+#define L3_M_BC_CR         0x500
+#define L3_M_BC_SATROLL_CR 0x504
+#define L3_M_BC_CNTENSET   0x508
+#define L3_M_BC_CNTENCLR   0x50C
+#define L3_M_BC_INTENSET   0x510
+#define L3_M_BC_INTENCLR   0x514
+#define L3_M_BC_GANG       0x718
+#define L3_M_BC_OVSR       0x740
+#define L3_M_BC_IRQCTL     0x96C
+
+/*
+ * Bit field definitions
+ */
+
+/* L3_HML3_PM_CR */
+#define PM_CR_RESET           (0)
+
+/* L3_HML3_PM_XCNTCTL/L3_HML3_PM_CNTCTLx */
+#define PMCNT_RESET           (0)
+
+/* L3_HML3_PM_EVTYPEx */
+#define EVSEL(__val)          ((__val) & L3_EVTYPE_MASK)
+
+/* Reset value for all the filter registers */
+#define PM_FLTR_RESET         (0)
+
+/* L3_M_BC_CR */
+#define BC_RESET              (1UL << 1)
+#define BC_ENABLE             (1UL << 0)
+
+/* L3_M_BC_SATROLL_CR */
+#define BC_SATROLL_CR_RESET   (0)
+
+/* L3_M_BC_CNTENSET */
+#define PMCNTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
+
+/* L3_M_BC_CNTENCLR */
+#define PMCNTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
+#define BC_CNTENCLR_RESET     (0xFF)
+
+/* L3_M_BC_INTENSET */
+#define PMINTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
+
+/* L3_M_BC_INTENCLR */
+#define PMINTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
+#define BC_INTENCLR_RESET     (0xFF)
+
+/* L3_M_BC_GANG */
+#define GANG_EN(__cntr)       (1UL << ((__cntr) & 0x7))
+#define BC_GANG_RESET         (0)
+
+/* L3_M_BC_OVSR */
+#define PMOVSRCLR(__cntr)     (1UL << ((__cntr) & 0x7))
+#define PMOVSRCLR_RESET       (0xFF)
+
+/* L3_M_BC_IRQCTL */
+#define PMIRQONMSBEN(__cntr)  (1UL << ((__cntr) & 0x7))
+#define BC_IRQCTL_RESET       (0x0)
+
+/*
+ * Events
+ */
+
+#define L3_EVENT_CYCLES		0x01
+#define L3_EVENT_READ_HIT		0x20
+#define L3_EVENT_READ_MISS		0x21
+#define L3_EVENT_READ_HIT_D		0x22
+#define L3_EVENT_READ_MISS_D		0x23
+#define L3_EVENT_WRITE_HIT		0x24
+#define L3_EVENT_WRITE_MISS		0x25
+
+/*
+ * Decoding of settings from perf_event_attr
+ *
+ * The config format for perf events is:
+ * - config: bits 0-7: event type
+ *           bit  32:  HW counter size requested, 0: 32 bits, 1: 64 bits
+ */
+
+static inline u32 get_event_type(struct perf_event *event)
+{
+	return (event->attr.config) & L3_EVTYPE_MASK;
+}
+
+static inline bool event_uses_long_counter(struct perf_event *event)
+{
+	return !!(event->attr.config & BIT_ULL(L3_EVENT_LC_BIT));
+}
+
+static inline int event_num_counters(struct perf_event *event)
+{
+	return event_uses_long_counter(event) ? 2 : 1;
+}
+
+/*
+ * Main PMU, inherits from the core perf PMU type
+ */
+struct l3cache_pmu {
+	struct pmu		pmu;
+	struct hlist_node	node;
+	void __iomem		*regs;
+	struct perf_event	*events[L3_NUM_COUNTERS];
+	unsigned long		used_mask[BITS_TO_LONGS(L3_NUM_COUNTERS)];
+	cpumask_t		cpumask;
+};
+
+#define to_l3cache_pmu(p) (container_of(p, struct l3cache_pmu, pmu))
+
+/*
+ * Type used to group hardware counter operations
+ *
+ * Used to implement two types of hardware counters, standard (32bits) and
+ * long (64bits). The hardware supports counter chaining which we use to
+ * implement long counters. This support is exposed via the 'lc' flag field
+ * in perf_event_attr.config.
+ */
+struct l3cache_event_ops {
+	/* Called to start event monitoring */
+	void (*start)(struct perf_event *event);
+	/* Called to stop event monitoring */
+	void (*stop)(struct perf_event *event, int flags);
+	/* Called to update the perf_event */
+	void (*update)(struct perf_event *event);
+};
+
+/*
+ * Implementation of long counter operations
+ *
+ * 64bit counters are implemented by chaining two of the 32bit physical
+ * counters. The PMU only supports chaining of adjacent even/odd pairs
+ * and for simplicity the driver always configures the odd counter to
+ * count the overflows of the lower-numbered even counter. Note that since
+ * the resulting hardware counter is 64bits no IRQs are required to maintain
+ * the software counter which is also 64bits.
+ */
+
+static void qcom_l3_cache__64bit_counter_start(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 evsel = get_event_type(event);
+	u32 gang;
+
+	/* Set the odd counter to count the overflows of the even counter */
+	gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
+	gang |= GANG_EN(idx + 1);
+	writel_relaxed(gang, l3pmu->regs + L3_M_BC_GANG);
+
+	/* Initialize the hardware counters and reset prev_count*/
+	local64_set(&event->hw.prev_count, 0);
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+
+	/*
+	 * Set the event types, the upper half must use zero and the lower
+	 * half the actual event type
+	 */
+	writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(idx + 1));
+	writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
+
+	/* Finally, enable the counters */
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx + 1));
+	writel_relaxed(PMCNTENSET(idx + 1), l3pmu->regs + L3_M_BC_CNTENSET);
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
+	writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
+}
+
+static void qcom_l3_cache__64bit_counter_stop(struct perf_event *event,
+					      int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
+
+	/* Disable the counters */
+	writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
+	writel_relaxed(PMCNTENCLR(idx + 1), l3pmu->regs + L3_M_BC_CNTENCLR);
+
+	/* Disable chaining */
+	writel_relaxed(gang & ~GANG_EN(idx + 1), l3pmu->regs + L3_M_BC_GANG);
+}
+
+static void qcom_l3_cache__64bit_counter_update(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 hi, lo;
+	u64 prev, new;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		do {
+			hi = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
+			lo = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+		} while (hi != readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1)));
+		new = ((u64)hi << 32) | lo;
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	local64_add(new - prev, &event->count);
+}
+
+static const struct l3cache_event_ops event_ops_long = {
+	.start = qcom_l3_cache__64bit_counter_start,
+	.stop = qcom_l3_cache__64bit_counter_stop,
+	.update = qcom_l3_cache__64bit_counter_update,
+};
+
+/*
+ * Implementation of standard counter operations
+ *
+ * 32bit counters use a single physical counter and a hardware feature that
+ * asserts the overflow IRQ on the toggling of the most significant bit in
+ * the counter. This feature allows the counters to be left free-running
+ * without needing the usual reprogramming required to properly handle races
+ * during concurrent calls to update.
+ */
+
+static void qcom_l3_cache__32bit_counter_start(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 evsel = get_event_type(event);
+	u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Set the counter to assert the overflow IRQ on MSB toggling */
+	writel_relaxed(irqctl | PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Initialize the hardware counter and reset prev_count*/
+	local64_set(&event->hw.prev_count, 0);
+	writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+
+	/* Set the event type */
+	writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
+
+	/* Enable interrupt generation by this counter */
+	writel_relaxed(PMINTENSET(idx), l3pmu->regs + L3_M_BC_INTENSET);
+
+	/* Finally, enable the counter */
+	writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
+	writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
+}
+
+static void qcom_l3_cache__32bit_counter_stop(struct perf_event *event,
+					      int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
+
+	/* Disable the counter */
+	writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
+
+	/* Disable interrupt generation by this counter */
+	writel_relaxed(PMINTENCLR(idx), l3pmu->regs + L3_M_BC_INTENCLR);
+
+	/* Set the counter to not assert the overflow IRQ on MSB toggling */
+	writel_relaxed(irqctl & ~PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
+}
+
+static void qcom_l3_cache__32bit_counter_update(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	int idx = event->hw.idx;
+	u32 prev, new;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		new = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	local64_add(new - prev, &event->count);
+}
+
+static const struct l3cache_event_ops event_ops_std = {
+	.start = qcom_l3_cache__32bit_counter_start,
+	.stop = qcom_l3_cache__32bit_counter_stop,
+	.update = qcom_l3_cache__32bit_counter_update,
+};
+
+/* Retrieve the appropriate operations for the given event */
+static
+const struct l3cache_event_ops *l3cache_event_get_ops(struct perf_event *event)
+{
+	if (event_uses_long_counter(event))
+		return &event_ops_long;
+	else
+		return &event_ops_std;
+}
+
+/*
+ * Top level PMU functions.
+ */
+
+static inline void qcom_l3_cache__init(struct l3cache_pmu *l3pmu)
+{
+	int i;
+
+	writel_relaxed(BC_RESET, l3pmu->regs + L3_M_BC_CR);
+
+	/*
+	 * Use writel for the first programming command to ensure the basic
+	 * counter unit is stopped before proceeding
+	 */
+	writel(BC_SATROLL_CR_RESET, l3pmu->regs + L3_M_BC_SATROLL_CR);
+
+	writel_relaxed(BC_CNTENCLR_RESET, l3pmu->regs + L3_M_BC_CNTENCLR);
+	writel_relaxed(BC_INTENCLR_RESET, l3pmu->regs + L3_M_BC_INTENCLR);
+	writel_relaxed(PMOVSRCLR_RESET, l3pmu->regs + L3_M_BC_OVSR);
+	writel_relaxed(BC_GANG_RESET, l3pmu->regs + L3_M_BC_GANG);
+	writel_relaxed(BC_IRQCTL_RESET, l3pmu->regs + L3_M_BC_IRQCTL);
+	writel_relaxed(PM_CR_RESET, l3pmu->regs + L3_HML3_PM_CR);
+
+	for (i = 0; i < L3_NUM_COUNTERS; ++i) {
+		writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(i));
+		writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(i));
+	}
+
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRA);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRAM);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRB);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRBM);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRC);
+	writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRCM);
+
+	/*
+	 * Use writel here to ensure all programming commands are done
+	 *  before proceeding
+	 */
+	writel(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
+}
+
+static irqreturn_t qcom_l3_cache__handle_irq(int irq_num, void *data)
+{
+	struct l3cache_pmu *l3pmu = data;
+	/* Read the overflow status register */
+	long status = readl_relaxed(l3pmu->regs + L3_M_BC_OVSR);
+	int idx;
+
+	if (status == 0)
+		return IRQ_NONE;
+
+	/* Clear the bits we read on the overflow status register */
+	writel_relaxed(status, l3pmu->regs + L3_M_BC_OVSR);
+
+	for_each_set_bit(idx, &status, L3_NUM_COUNTERS) {
+		struct perf_event *event;
+		const struct l3cache_event_ops *ops;
+
+		event = l3pmu->events[idx];
+		if (!event)
+			continue;
+
+		/*
+		 * Since the IRQ is not enabled for events using long counters
+		 * we should never see one of those here, however, be consistent
+		 * and use the ops indirections like in the other operations.
+		 */
+
+		ops = l3cache_event_get_ops(event);
+		ops->update(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Implementation of abstract pmu functionality required by
+ * the core perf events code.
+ */
+
+static void qcom_l3_cache__pmu_enable(struct pmu *pmu)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
+
+	/* Ensure the other programming commands are observed before enabling */
+	wmb();
+
+	writel_relaxed(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
+}
+
+static void qcom_l3_cache__pmu_disable(struct pmu *pmu)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
+
+	writel_relaxed(0, l3pmu->regs + L3_M_BC_CR);
+
+	/* Ensure the basic counter unit is stopped before proceeding */
+	wmb();
+}
+
+/*
+ * We must NOT create groups containing events from multiple hardware PMUs,
+ * although mixing different software and hardware PMUs is allowed.
+ */
+static bool qcom_l3_cache__validate_event_group(struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader;
+	struct perf_event *sibling;
+	int counters = 0;
+
+	if (leader->pmu != event->pmu && !is_software_event(leader))
+		return false;
+
+	counters = event_num_counters(event);
+	counters += event_num_counters(leader);
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (is_software_event(sibling))
+			continue;
+		if (sibling->pmu != event->pmu)
+			return false;
+		counters += event_num_counters(sibling);
+	}
+
+	/*
+	 * If the group requires more counters than the HW has, it
+	 * cannot ever be scheduled.
+	 */
+	return counters <= L3_NUM_COUNTERS;
+}
+
+static int qcom_l3_cache__event_init(struct perf_event *event)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+
+	/*
+	 * Is the event for this PMU?
+	 */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * There are no per-counter mode filters in the PMU.
+	 */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+	    event->attr.exclude_hv || event->attr.exclude_idle)
+		return -EINVAL;
+
+	/*
+	 * Sampling not supported since these events are not core-attributable.
+	 */
+	if (hwc->sample_period)
+		return -EINVAL;
+
+	/*
+	 * Task mode not available, we run the counters as socket counters,
+	 * not attributable to any CPU and therefore cannot attribute per-task.
+	 */
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	/* Validate the group */
+	if (!qcom_l3_cache__validate_event_group(event))
+		return -EINVAL;
+
+	hwc->idx = -1;
+
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, like this one, where
+	 * each event could be theoretically assigned to a different CPU.
+	 * To mitigate this, we enforce CPU assignment to one designated
+	 * processor (the one described in the "cpumask" attribute exported
+	 * by the PMU device). perf user space tools honor this and avoid
+	 * opening more than one copy of the events.
+	 */
+	event->cpu = cpumask_first(&l3pmu->cpumask);
+
+	return 0;
+}
+
+static void qcom_l3_cache__event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	hwc->state = 0;
+	ops->start(event);
+}
+
+static void qcom_l3_cache__event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	if (hwc->state & PERF_HES_STOPPED)
+		return;
+
+	ops->stop(event, flags);
+	if (flags & PERF_EF_UPDATE)
+		ops->update(event);
+	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int qcom_l3_cache__event_add(struct perf_event *event, int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int order = event_uses_long_counter(event) ? 1 : 0;
+	int idx;
+
+	/*
+	 * Try to allocate a counter.
+	 */
+	idx = bitmap_find_free_region(l3pmu->used_mask, L3_NUM_COUNTERS, order);
+	if (idx < 0)
+		/* The counters are all in use. */
+		return -EAGAIN;
+
+	hwc->idx = idx;
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	l3pmu->events[idx] = event;
+
+	if (flags & PERF_EF_START)
+		qcom_l3_cache__event_start(event, 0);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void qcom_l3_cache__event_del(struct perf_event *event, int flags)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int order = event_uses_long_counter(event) ? 1 : 0;
+
+	/* Stop and clean up */
+	qcom_l3_cache__event_stop(event,  flags | PERF_EF_UPDATE);
+	l3pmu->events[hwc->idx] = NULL;
+	bitmap_release_region(l3pmu->used_mask, hwc->idx, order);
+
+	/* Propagate changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+}
+
+static void qcom_l3_cache__event_read(struct perf_event *event)
+{
+	const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
+
+	ops->update(event);
+}
+
+/*
+ * Add sysfs attributes
+ *
+ * We export:
+ * - formats, used by perf user space and other tools to configure events
+ * - events, used by perf user space and other tools to create events
+ *   symbolically, e.g.:
+ *     perf stat -a -e l3cache_0_0/event=read-miss/ ls
+ *     perf stat -a -e l3cache_0_0/event=0x21/ ls
+ * - cpumask, used by perf user space and other tools to know on which CPUs
+ *   to open the events
+ */
+
+/* formats */
+
+static ssize_t l3cache_pmu_format_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return sprintf(buf, "%s\n", (char *) eattr->var);
+}
+
+#define L3CACHE_PMU_FORMAT_ATTR(_name, _config)				      \
+	(&((struct dev_ext_attribute[]) {				      \
+		{ .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \
+		  .var = (void *) _config, }				      \
+	})[0].attr.attr)
+
+static struct attribute *qcom_l3_cache_pmu_formats[] = {
+	L3CACHE_PMU_FORMAT_ATTR(event, "config:0-7"),
+	L3CACHE_PMU_FORMAT_ATTR(lc, "config:" __stringify(L3_EVENT_LC_BIT)),
+	NULL,
+};
+
+static struct attribute_group qcom_l3_cache_pmu_format_group = {
+	.name = "format",
+	.attrs = qcom_l3_cache_pmu_formats,
+};
+
+/* events */
+
+static ssize_t l3cache_pmu_event_show(struct device *dev,
+				     struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define L3CACHE_EVENT_ATTR(_name, _id)					     \
+	(&((struct perf_pmu_events_attr[]) {				     \
+		{ .attr = __ATTR(_name, 0444, l3cache_pmu_event_show, NULL), \
+		  .id = _id, }						     \
+	})[0].attr.attr)
+
+static struct attribute *qcom_l3_cache_pmu_events[] = {
+	L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES),
+	L3CACHE_EVENT_ATTR(read-hit, L3_EVENT_READ_HIT),
+	L3CACHE_EVENT_ATTR(read-miss, L3_EVENT_READ_MISS),
+	L3CACHE_EVENT_ATTR(read-hit-d-side, L3_EVENT_READ_HIT_D),
+	L3CACHE_EVENT_ATTR(read-miss-d-side, L3_EVENT_READ_MISS_D),
+	L3CACHE_EVENT_ATTR(write-hit, L3_EVENT_WRITE_HIT),
+	L3CACHE_EVENT_ATTR(write-miss, L3_EVENT_WRITE_MISS),
+	NULL
+};
+
+static struct attribute_group qcom_l3_cache_pmu_events_group = {
+	.name = "events",
+	.attrs = qcom_l3_cache_pmu_events,
+};
+
+/* cpumask */
+
+static ssize_t qcom_l3_cache_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask);
+}
+
+static DEVICE_ATTR(cpumask, 0444, qcom_l3_cache_pmu_cpumask_show, NULL);
+
+static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
+	.attrs = qcom_l3_cache_pmu_cpumask_attrs,
+};
+
+/*
+ * Per PMU device attribute groups
+ */
+static const struct attribute_group *qcom_l3_cache_pmu_attr_grps[] = {
+	&qcom_l3_cache_pmu_format_group,
+	&qcom_l3_cache_pmu_events_group,
+	&qcom_l3_cache_pmu_cpumask_attr_group,
+	NULL,
+};
+
+/*
+ * Probing functions and data.
+ */
+
+static int qcom_l3_cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
+
+	/* If there is not a CPU/PMU association pick this CPU */
+	if (cpumask_empty(&l3pmu->cpumask))
+		cpumask_set_cpu(cpu, &l3pmu->cpumask);
+
+	return 0;
+}
+
+static int qcom_l3_cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &l3pmu->cpumask))
+		return 0;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+	perf_pmu_migrate_context(&l3pmu->pmu, cpu, target);
+	cpumask_set_cpu(target, &l3pmu->cpumask);
+	return 0;
+}
+
+static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
+{
+	struct l3cache_pmu *l3pmu;
+	struct acpi_device *acpi_dev;
+	struct resource *memrc;
+	int ret;
+	char *name;
+
+	/* Initialize the PMU data structures */
+
+	acpi_dev = ACPI_COMPANION(&pdev->dev);
+	if (!acpi_dev)
+		return -ENODEV;
+
+	l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL);
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s",
+		      acpi_dev->parent->pnp.unique_id, acpi_dev->pnp.unique_id);
+	if (!l3pmu || !name)
+		return -ENOMEM;
+
+	l3pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+
+		.pmu_enable	= qcom_l3_cache__pmu_enable,
+		.pmu_disable	= qcom_l3_cache__pmu_disable,
+		.event_init	= qcom_l3_cache__event_init,
+		.add		= qcom_l3_cache__event_add,
+		.del		= qcom_l3_cache__event_del,
+		.start		= qcom_l3_cache__event_start,
+		.stop		= qcom_l3_cache__event_stop,
+		.read		= qcom_l3_cache__event_read,
+
+		.attr_groups	= qcom_l3_cache_pmu_attr_grps,
+	};
+
+	memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc);
+	if (IS_ERR(l3pmu->regs)) {
+		dev_err(&pdev->dev, "Can't map PMU @%pa\n", &memrc->start);
+		return PTR_ERR(l3pmu->regs);
+	}
+
+	qcom_l3_cache__init(l3pmu);
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret <= 0)
+		return ret;
+
+	ret = devm_request_irq(&pdev->dev, ret, qcom_l3_cache__handle_irq, 0,
+			       name, l3pmu);
+	if (ret) {
+		dev_err(&pdev->dev, "Request for IRQ failed for slice @%pa\n",
+			&memrc->start);
+		return ret;
+	}
+
+	/* Add this instance to the list used by the offline callback */
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, &l3pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", ret);
+		return ret;
+	}
+
+	ret = perf_pmu_register(&l3pmu->pmu, name, -1);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register L3 cache PMU (%d)\n", ret);
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "Registered %s, type: %d\n", name, l3pmu->pmu.type);
+
+	return 0;
+}
+
+static const struct acpi_device_id qcom_l3_cache_pmu_acpi_match[] = {
+	{ "QCOM8081", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, qcom_l3_cache_pmu_acpi_match);
+
+static struct platform_driver qcom_l3_cache_pmu_driver = {
+	.driver = {
+		.name = "qcom-l3cache-pmu",
+		.acpi_match_table = ACPI_PTR(qcom_l3_cache_pmu_acpi_match),
+	},
+	.probe = qcom_l3_cache_pmu_probe,
+};
+
+static int __init register_qcom_l3_cache_pmu_driver(void)
+{
+	int ret;
+
+	/* Install a hook to update the reader CPU in case it goes offline */
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
+				      "perf/qcom/l3cache:online",
+				      qcom_l3_cache_pmu_online_cpu,
+				      qcom_l3_cache_pmu_offline_cpu);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&qcom_l3_cache_pmu_driver);
+}
+device_initcall(register_qcom_l3_cache_pmu_driver);
diff --git a/src/kernel/linux/v4.14/drivers/perf/xgene_pmu.c b/src/kernel/linux/v4.14/drivers/perf/xgene_pmu.c
new file mode 100644
index 0000000..8b79c2f
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/perf/xgene_pmu.c
@@ -0,0 +1,1941 @@
+/*
+ * APM X-Gene SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2016, Applied Micro Circuits Corporation
+ * Author: Hoan Tran <hotran@apm.com>
+ *         Tai Nguyen <ttnguyen@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define CSW_CSWCR                       0x0000
+#define  CSW_CSWCR_DUALMCB_MASK         BIT(0)
+#define  CSW_CSWCR_MCB0_ROUTING(x)	(((x) & 0x0C) >> 2)
+#define  CSW_CSWCR_MCB1_ROUTING(x)	(((x) & 0x30) >> 4)
+#define MCBADDRMR                       0x0000
+#define  MCBADDRMR_DUALMCU_MODE_MASK    BIT(2)
+
+#define PCPPMU_INTSTATUS_REG	0x000
+#define PCPPMU_INTMASK_REG	0x004
+#define  PCPPMU_INTMASK		0x0000000F
+#define  PCPPMU_INTENMASK	0xFFFFFFFF
+#define  PCPPMU_INTCLRMASK	0xFFFFFFF0
+#define  PCPPMU_INT_MCU		BIT(0)
+#define  PCPPMU_INT_MCB		BIT(1)
+#define  PCPPMU_INT_L3C		BIT(2)
+#define  PCPPMU_INT_IOB		BIT(3)
+
+#define  PCPPMU_V3_INTMASK	0x00FF33FF
+#define  PCPPMU_V3_INTENMASK	0xFFFFFFFF
+#define  PCPPMU_V3_INTCLRMASK	0xFF00CC00
+#define  PCPPMU_V3_INT_MCU	0x000000FF
+#define  PCPPMU_V3_INT_MCB	0x00000300
+#define  PCPPMU_V3_INT_L3C	0x00FF0000
+#define  PCPPMU_V3_INT_IOB	0x00003000
+
+#define PMU_MAX_COUNTERS	4
+#define PMU_CNT_MAX_PERIOD	0xFFFFFFFFULL
+#define PMU_V3_CNT_MAX_PERIOD	0xFFFFFFFFFFFFFFFFULL
+#define PMU_OVERFLOW_MASK	0xF
+#define PMU_PMCR_E		BIT(0)
+#define PMU_PMCR_P		BIT(1)
+
+#define PMU_PMEVCNTR0		0x000
+#define PMU_PMEVCNTR1		0x004
+#define PMU_PMEVCNTR2		0x008
+#define PMU_PMEVCNTR3		0x00C
+#define PMU_PMEVTYPER0		0x400
+#define PMU_PMEVTYPER1		0x404
+#define PMU_PMEVTYPER2		0x408
+#define PMU_PMEVTYPER3		0x40C
+#define PMU_PMAMR0		0xA00
+#define PMU_PMAMR1		0xA04
+#define PMU_PMCNTENSET		0xC00
+#define PMU_PMCNTENCLR		0xC20
+#define PMU_PMINTENSET		0xC40
+#define PMU_PMINTENCLR		0xC60
+#define PMU_PMOVSR		0xC80
+#define PMU_PMCR		0xE04
+
+/* PMU registers for V3 */
+#define PMU_PMOVSCLR		0xC80
+#define PMU_PMOVSSET		0xCC0
+
+#define to_pmu_dev(p)     container_of(p, struct xgene_pmu_dev, pmu)
+#define GET_CNTR(ev)      (ev->hw.idx)
+#define GET_EVENTID(ev)   (ev->hw.config & 0xFFULL)
+#define GET_AGENTID(ev)   (ev->hw.config_base & 0xFFFFFFFFUL)
+#define GET_AGENT1ID(ev)  ((ev->hw.config_base >> 32) & 0xFFFFFFFFUL)
+
+struct hw_pmu_info {
+	u32 type;
+	u32 enable_mask;
+	void __iomem *csr;
+};
+
+struct xgene_pmu_dev {
+	struct hw_pmu_info *inf;
+	struct xgene_pmu *parent;
+	struct pmu pmu;
+	u8 max_counters;
+	DECLARE_BITMAP(cntr_assign_mask, PMU_MAX_COUNTERS);
+	u64 max_period;
+	const struct attribute_group **attr_groups;
+	struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS];
+};
+
+struct xgene_pmu_ops {
+	void (*mask_int)(struct xgene_pmu *pmu);
+	void (*unmask_int)(struct xgene_pmu *pmu);
+	u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx);
+	void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val);
+	void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val);
+	void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+	void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+	void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+	void (*reset_counters)(struct xgene_pmu_dev *pmu_dev);
+	void (*start_counters)(struct xgene_pmu_dev *pmu_dev);
+	void (*stop_counters)(struct xgene_pmu_dev *pmu_dev);
+};
+
+struct xgene_pmu {
+	struct device *dev;
+	int version;
+	void __iomem *pcppmu_csr;
+	u32 mcb_active_mask;
+	u32 mc_active_mask;
+	u32 l3c_active_mask;
+	cpumask_t cpu;
+	raw_spinlock_t lock;
+	const struct xgene_pmu_ops *ops;
+	struct list_head l3cpmus;
+	struct list_head iobpmus;
+	struct list_head mcbpmus;
+	struct list_head mcpmus;
+};
+
+struct xgene_pmu_dev_ctx {
+	char *name;
+	struct list_head next;
+	struct xgene_pmu_dev *pmu_dev;
+	struct hw_pmu_info inf;
+};
+
+struct xgene_pmu_data {
+	int id;
+	u32 data;
+};
+
+enum xgene_pmu_version {
+	PCP_PMU_V1 = 1,
+	PCP_PMU_V2,
+	PCP_PMU_V3,
+};
+
+enum xgene_pmu_dev_type {
+	PMU_TYPE_L3C = 0,
+	PMU_TYPE_IOB,
+	PMU_TYPE_IOB_SLOW,
+	PMU_TYPE_MCB,
+	PMU_TYPE_MC,
+};
+
+/*
+ * sysfs format attributes
+ */
+static ssize_t xgene_pmu_format_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return sprintf(buf, "%s\n", (char *) eattr->var);
+}
+
+#define XGENE_PMU_FORMAT_ATTR(_name, _config)		\
+	(&((struct dev_ext_attribute[]) {		\
+		{ .attr = __ATTR(_name, S_IRUGO, xgene_pmu_format_show, NULL), \
+		  .var = (void *) _config, }		\
+	})[0].attr.attr)
+
+static struct attribute *l3c_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-7"),
+	XGENE_PMU_FORMAT_ATTR(l3c_agentid, "config1:0-9"),
+	NULL,
+};
+
+static struct attribute *iob_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-7"),
+	XGENE_PMU_FORMAT_ATTR(iob_agentid, "config1:0-63"),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-5"),
+	XGENE_PMU_FORMAT_ATTR(mcb_agentid, "config1:0-9"),
+	NULL,
+};
+
+static struct attribute *mc_pmu_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-28"),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = l3c_pmu_format_attrs,
+};
+
+static const struct attribute_group iob_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = iob_pmu_format_attrs,
+};
+
+static const struct attribute_group mcb_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = mcb_pmu_format_attrs,
+};
+
+static const struct attribute_group mc_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = mc_pmu_format_attrs,
+};
+
+static struct attribute *l3c_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"),
+	NULL,
+};
+
+static struct attribute *iob_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"),
+	NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"),
+	NULL,
+};
+
+static struct attribute *mc_pmu_v3_format_attrs[] = {
+	XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = l3c_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = iob_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = iob_slow_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = mcb_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_format_attr_group = {
+	.name = "format",
+	.attrs = mc_pmu_v3_format_attrs,
+};
+
+/*
+ * sysfs event attributes
+ */
+static ssize_t xgene_pmu_event_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return sprintf(buf, "config=0x%lx\n", (unsigned long) eattr->var);
+}
+
+#define XGENE_PMU_EVENT_ATTR(_name, _config)		\
+	(&((struct dev_ext_attribute[]) {		\
+		{ .attr = __ATTR(_name, S_IRUGO, xgene_pmu_event_show, NULL), \
+		  .var = (void *) _config, }		\
+	 })[0].attr.attr)
+
+static struct attribute *l3c_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(read-hit,				0x02),
+	XGENE_PMU_EVENT_ATTR(read-miss,				0x03),
+	XGENE_PMU_EVENT_ATTR(write-need-replacement,		0x06),
+	XGENE_PMU_EVENT_ATTR(write-not-need-replacement,	0x07),
+	XGENE_PMU_EVENT_ATTR(tq-full,				0x08),
+	XGENE_PMU_EVENT_ATTR(ackq-full,				0x09),
+	XGENE_PMU_EVENT_ATTR(wdb-full,				0x0a),
+	XGENE_PMU_EVENT_ATTR(bank-fifo-full,			0x0b),
+	XGENE_PMU_EVENT_ATTR(odb-full,				0x0c),
+	XGENE_PMU_EVENT_ATTR(wbq-full,				0x0d),
+	XGENE_PMU_EVENT_ATTR(bank-conflict-fifo-issue,		0x0e),
+	XGENE_PMU_EVENT_ATTR(bank-fifo-issue,			0x0f),
+	NULL,
+};
+
+static struct attribute *iob_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(axi0-read,				0x02),
+	XGENE_PMU_EVENT_ATTR(axi0-read-partial,			0x03),
+	XGENE_PMU_EVENT_ATTR(axi1-read,				0x04),
+	XGENE_PMU_EVENT_ATTR(axi1-read-partial,			0x05),
+	XGENE_PMU_EVENT_ATTR(csw-read-block,			0x06),
+	XGENE_PMU_EVENT_ATTR(csw-read-partial,			0x07),
+	XGENE_PMU_EVENT_ATTR(axi0-write,			0x10),
+	XGENE_PMU_EVENT_ATTR(axi0-write-partial,		0x11),
+	XGENE_PMU_EVENT_ATTR(axi1-write,			0x13),
+	XGENE_PMU_EVENT_ATTR(axi1-write-partial,		0x14),
+	XGENE_PMU_EVENT_ATTR(csw-inbound-dirty,			0x16),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(csw-read,				0x02),
+	XGENE_PMU_EVENT_ATTR(csw-write-request,			0x03),
+	XGENE_PMU_EVENT_ATTR(mcb-csw-stall,			0x04),
+	XGENE_PMU_EVENT_ATTR(cancel-read-gack,			0x05),
+	NULL,
+};
+
+static struct attribute *mc_pmu_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(cycle-count-div-64,		0x01),
+	XGENE_PMU_EVENT_ATTR(act-cmd-sent,			0x02),
+	XGENE_PMU_EVENT_ATTR(pre-cmd-sent,			0x03),
+	XGENE_PMU_EVENT_ATTR(rd-cmd-sent,			0x04),
+	XGENE_PMU_EVENT_ATTR(rda-cmd-sent,			0x05),
+	XGENE_PMU_EVENT_ATTR(wr-cmd-sent,			0x06),
+	XGENE_PMU_EVENT_ATTR(wra-cmd-sent,			0x07),
+	XGENE_PMU_EVENT_ATTR(pde-cmd-sent,			0x08),
+	XGENE_PMU_EVENT_ATTR(sre-cmd-sent,			0x09),
+	XGENE_PMU_EVENT_ATTR(prea-cmd-sent,			0x0a),
+	XGENE_PMU_EVENT_ATTR(ref-cmd-sent,			0x0b),
+	XGENE_PMU_EVENT_ATTR(rd-rda-cmd-sent,			0x0c),
+	XGENE_PMU_EVENT_ATTR(wr-wra-cmd-sent,			0x0d),
+	XGENE_PMU_EVENT_ATTR(in-rd-collision,			0x0e),
+	XGENE_PMU_EVENT_ATTR(in-wr-collision,			0x0f),
+	XGENE_PMU_EVENT_ATTR(collision-queue-not-empty,		0x10),
+	XGENE_PMU_EVENT_ATTR(collision-queue-full,		0x11),
+	XGENE_PMU_EVENT_ATTR(mcu-request,			0x12),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-request,			0x13),
+	XGENE_PMU_EVENT_ATTR(mcu-hp-rd-request,			0x14),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-request,			0x15),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-all,		0x16),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-cancel,		0x17),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-response,			0x18),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-speculative-all,	0x19),
+	XGENE_PMU_EVENT_ATTR(mcu-rd-proceed-speculative-cancel,	0x1a),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-proceed-all,		0x1b),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-proceed-cancel,		0x1c),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = l3c_pmu_events_attrs,
+};
+
+static const struct attribute_group iob_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = iob_pmu_events_attrs,
+};
+
+static const struct attribute_group mcb_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = mcb_pmu_events_attrs,
+};
+
+static const struct attribute_group mc_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = mc_pmu_events_attrs,
+};
+
+static struct attribute *l3c_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(read-hit,				0x01),
+	XGENE_PMU_EVENT_ATTR(read-miss,				0x02),
+	XGENE_PMU_EVENT_ATTR(index-flush-eviction,		0x03),
+	XGENE_PMU_EVENT_ATTR(write-caused-replacement,		0x04),
+	XGENE_PMU_EVENT_ATTR(write-not-caused-replacement,	0x05),
+	XGENE_PMU_EVENT_ATTR(clean-eviction,			0x06),
+	XGENE_PMU_EVENT_ATTR(dirty-eviction,			0x07),
+	XGENE_PMU_EVENT_ATTR(read,				0x08),
+	XGENE_PMU_EVENT_ATTR(write,				0x09),
+	XGENE_PMU_EVENT_ATTR(request,				0x0a),
+	XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall,	0x0b),
+	XGENE_PMU_EVENT_ATTR(tq-full,				0x0c),
+	XGENE_PMU_EVENT_ATTR(ackq-full,				0x0d),
+	XGENE_PMU_EVENT_ATTR(wdb-full,				0x0e),
+	XGENE_PMU_EVENT_ATTR(odb-full,				0x10),
+	XGENE_PMU_EVENT_ATTR(wbq-full,				0x11),
+	XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall,	0x12),
+	XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall,	0x13),
+	XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall,	0x14),
+	XGENE_PMU_EVENT_ATTR(total-insertion,			0x15),
+	XGENE_PMU_EVENT_ATTR(sip-insertions-r-set,		0x16),
+	XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear,		0x17),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-r-set,		0x18),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear,		0x19),
+	XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set,	0x1a),
+	XGENE_PMU_EVENT_ATTR(egression,				0x1b),
+	XGENE_PMU_EVENT_ATTR(replacement,			0x1c),
+	XGENE_PMU_EVENT_ATTR(old-replacement,			0x1d),
+	XGENE_PMU_EVENT_ATTR(young-replacement,			0x1e),
+	XGENE_PMU_EVENT_ATTR(r-set-replacement,			0x1f),
+	XGENE_PMU_EVENT_ATTR(r-clear-replacement,		0x20),
+	XGENE_PMU_EVENT_ATTR(old-r-replacement,			0x21),
+	XGENE_PMU_EVENT_ATTR(old-nr-replacement,		0x22),
+	XGENE_PMU_EVENT_ATTR(young-r-replacement,		0x23),
+	XGENE_PMU_EVENT_ATTR(young-nr-replacement,		0x24),
+	XGENE_PMU_EVENT_ATTR(bloomfilter-clearing,		0x25),
+	XGENE_PMU_EVENT_ATTR(generation-flip,			0x26),
+	XGENE_PMU_EVENT_ATTR(vcc-droop-detected,		0x27),
+	NULL,
+};
+
+static struct attribute *iob_fast_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-all,		0x01),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-rd,		0x02),
+	XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-wr,		0x03),
+	XGENE_PMU_EVENT_ATTR(pa-all-cp-req,			0x04),
+	XGENE_PMU_EVENT_ATTR(pa-cp-blk-req,			0x05),
+	XGENE_PMU_EVENT_ATTR(pa-cp-ptl-req,			0x06),
+	XGENE_PMU_EVENT_ATTR(pa-cp-rd-req,			0x07),
+	XGENE_PMU_EVENT_ATTR(pa-cp-wr-req,			0x08),
+	XGENE_PMU_EVENT_ATTR(ba-all-req,			0x09),
+	XGENE_PMU_EVENT_ATTR(ba-rd-req,				0x0a),
+	XGENE_PMU_EVENT_ATTR(ba-wr-req,				0x0b),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-req-issued,		0x10),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-req-issued,	0x11),
+	XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-stashable, 0x12),
+	XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-nonstashable, 0x13),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-stashable,	0x14),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-nonstashable, 0x15),
+	XGENE_PMU_EVENT_ATTR(pa-ptl-wr-req,			0x16),
+	XGENE_PMU_EVENT_ATTR(pa-ptl-rd-req,			0x17),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-clean-data,		0x18),
+	XGENE_PMU_EVENT_ATTR(pa-wr-back-cancelled-on-SS,	0x1b),
+	XGENE_PMU_EVENT_ATTR(pa-barrier-occurrence,		0x1c),
+	XGENE_PMU_EVENT_ATTR(pa-barrier-cycles,			0x1d),
+	XGENE_PMU_EVENT_ATTR(pa-total-cp-snoops,		0x20),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop,		0x21),
+	XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop-hit,		0x22),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop,		0x23),
+	XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop-hit,		0x24),
+	XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop,		0x25),
+	XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop-hit,	0x26),
+	XGENE_PMU_EVENT_ATTR(pa-req-buffer-full,		0x28),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-req-fifo-full,	0x29),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-snoop-fifo-backpressure, 0x2a),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-lack-fifo-full,	0x2b),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-gack-fifo-backpressure, 0x2c),
+	XGENE_PMU_EVENT_ATTR(cswlf-outbound-data-fifo-full,	0x2d),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-data-fifo-backpressure, 0x2e),
+	XGENE_PMU_EVENT_ATTR(cswlf-inbound-req-backpressure,	0x2f),
+	NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(pa-axi0-rd-req,			0x01),
+	XGENE_PMU_EVENT_ATTR(pa-axi0-wr-req,			0x02),
+	XGENE_PMU_EVENT_ATTR(pa-axi1-rd-req,			0x03),
+	XGENE_PMU_EVENT_ATTR(pa-axi1-wr-req,			0x04),
+	XGENE_PMU_EVENT_ATTR(ba-all-axi-req,			0x07),
+	XGENE_PMU_EVENT_ATTR(ba-axi-rd-req,			0x08),
+	XGENE_PMU_EVENT_ATTR(ba-axi-wr-req,			0x09),
+	XGENE_PMU_EVENT_ATTR(ba-free-list-empty,		0x10),
+	NULL,
+};
+
+static struct attribute *mcb_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(req-receive,			0x01),
+	XGENE_PMU_EVENT_ATTR(rd-req-recv,			0x02),
+	XGENE_PMU_EVENT_ATTR(rd-req-recv-2,			0x03),
+	XGENE_PMU_EVENT_ATTR(wr-req-recv,			0x04),
+	XGENE_PMU_EVENT_ATTR(wr-req-recv-2,			0x05),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu,		0x06),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2,		0x07),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu,		0x08),
+	XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2,		0x09),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req,	0x0d),
+	XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2,	0x0e),
+	XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu,		0x0f),
+	XGENE_PMU_EVENT_ATTR(gack-recv,				0x10),
+	XGENE_PMU_EVENT_ATTR(rd-gack-recv,			0x11),
+	XGENE_PMU_EVENT_ATTR(wr-gack-recv,			0x12),
+	XGENE_PMU_EVENT_ATTR(cancel-rd-gack,			0x13),
+	XGENE_PMU_EVENT_ATTR(cancel-wr-gack,			0x14),
+	XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall,			0x15),
+	XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked,		0x16),
+	XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall,		0x17),
+	XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked,		0x18),
+	XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked,	0x19),
+	XGENE_PMU_EVENT_ATTR(mcu-req-table-full,		0x1a),
+	XGENE_PMU_EVENT_ATTR(mcu-stat-table-full,		0x1b),
+	XGENE_PMU_EVENT_ATTR(mcu-wr-table-full,			0x1c),
+	XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp,		0x1d),
+	XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp,		0x1e),
+	XGENE_PMU_EVENT_ATTR(mcu-retryack-resp,			0x1f),
+	XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp,		0x20),
+	XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload,		0x21),
+	XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass,		0x22),
+	XGENE_PMU_EVENT_ATTR(volt-droop-detect,			0x23),
+	NULL,
+};
+
+static struct attribute *mc_pmu_v3_events_attrs[] = {
+	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
+	XGENE_PMU_EVENT_ATTR(act-sent,				0x01),
+	XGENE_PMU_EVENT_ATTR(pre-sent,				0x02),
+	XGENE_PMU_EVENT_ATTR(rd-sent,				0x03),
+	XGENE_PMU_EVENT_ATTR(rda-sent,				0x04),
+	XGENE_PMU_EVENT_ATTR(wr-sent,				0x05),
+	XGENE_PMU_EVENT_ATTR(wra-sent,				0x06),
+	XGENE_PMU_EVENT_ATTR(pd-entry-vld,			0x07),
+	XGENE_PMU_EVENT_ATTR(sref-entry-vld,			0x08),
+	XGENE_PMU_EVENT_ATTR(prea-sent,				0x09),
+	XGENE_PMU_EVENT_ATTR(ref-sent,				0x0a),
+	XGENE_PMU_EVENT_ATTR(rd-rda-sent,			0x0b),
+	XGENE_PMU_EVENT_ATTR(wr-wra-sent,			0x0c),
+	XGENE_PMU_EVENT_ATTR(raw-hazard,			0x0d),
+	XGENE_PMU_EVENT_ATTR(war-hazard,			0x0e),
+	XGENE_PMU_EVENT_ATTR(waw-hazard,			0x0f),
+	XGENE_PMU_EVENT_ATTR(rar-hazard,			0x10),
+	XGENE_PMU_EVENT_ATTR(raw-war-waw-hazard,		0x11),
+	XGENE_PMU_EVENT_ATTR(hprd-lprd-wr-req-vld,		0x12),
+	XGENE_PMU_EVENT_ATTR(lprd-req-vld,			0x13),
+	XGENE_PMU_EVENT_ATTR(hprd-req-vld,			0x14),
+	XGENE_PMU_EVENT_ATTR(hprd-lprd-req-vld,			0x15),
+	XGENE_PMU_EVENT_ATTR(wr-req-vld,			0x16),
+	XGENE_PMU_EVENT_ATTR(partial-wr-req-vld,		0x17),
+	XGENE_PMU_EVENT_ATTR(rd-retry,				0x18),
+	XGENE_PMU_EVENT_ATTR(wr-retry,				0x19),
+	XGENE_PMU_EVENT_ATTR(retry-gnt,				0x1a),
+	XGENE_PMU_EVENT_ATTR(rank-change,			0x1b),
+	XGENE_PMU_EVENT_ATTR(dir-change,			0x1c),
+	XGENE_PMU_EVENT_ATTR(rank-dir-change,			0x1d),
+	XGENE_PMU_EVENT_ATTR(rank-active,			0x1e),
+	XGENE_PMU_EVENT_ATTR(rank-idle,				0x1f),
+	XGENE_PMU_EVENT_ATTR(rank-pd,				0x20),
+	XGENE_PMU_EVENT_ATTR(rank-sref,				0x21),
+	XGENE_PMU_EVENT_ATTR(queue-fill-gt-thresh,		0x22),
+	XGENE_PMU_EVENT_ATTR(queue-rds-gt-thresh,		0x23),
+	XGENE_PMU_EVENT_ATTR(queue-wrs-gt-thresh,		0x24),
+	XGENE_PMU_EVENT_ATTR(phy-updt-complt,			0x25),
+	XGENE_PMU_EVENT_ATTR(tz-fail,				0x26),
+	XGENE_PMU_EVENT_ATTR(dram-errc,				0x27),
+	XGENE_PMU_EVENT_ATTR(dram-errd,				0x28),
+	XGENE_PMU_EVENT_ATTR(rd-enq,				0x29),
+	XGENE_PMU_EVENT_ATTR(wr-enq,				0x2a),
+	XGENE_PMU_EVENT_ATTR(tmac-limit-reached,		0x2b),
+	XGENE_PMU_EVENT_ATTR(tmaw-tracker-full,			0x2c),
+	NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = l3c_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_fast_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = iob_fast_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = iob_slow_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = mcb_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_events_attr_group = {
+	.name = "events",
+	.attrs = mc_pmu_v3_events_attrs,
+};
+
+/*
+ * sysfs cpumask attributes
+ */
+static ssize_t xgene_pmu_cpumask_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, &pmu_dev->parent->cpu);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, xgene_pmu_cpumask_show, NULL);
+
+static struct attribute *xgene_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group pmu_cpumask_attr_group = {
+	.attrs = xgene_pmu_cpumask_attrs,
+};
+
+/*
+ * Per PMU device attribute groups of PMU v1 and v2
+ */
+static const struct attribute_group *l3c_pmu_attr_groups[] = {
+	&l3c_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&l3c_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_pmu_attr_groups[] = {
+	&iob_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mcb_pmu_attr_groups[] = {
+	&mcb_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mcb_pmu_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mc_pmu_attr_groups[] = {
+	&mc_pmu_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mc_pmu_events_attr_group,
+	NULL
+};
+
+/*
+ * Per PMU device attribute groups of PMU v3
+ */
+static const struct attribute_group *l3c_pmu_v3_attr_groups[] = {
+	&l3c_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&l3c_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = {
+	&iob_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_fast_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = {
+	&iob_slow_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&iob_slow_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mcb_pmu_v3_attr_groups[] = {
+	&mcb_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mcb_pmu_v3_events_attr_group,
+	NULL
+};
+
+static const struct attribute_group *mc_pmu_v3_attr_groups[] = {
+	&mc_pmu_v3_format_attr_group,
+	&pmu_cpumask_attr_group,
+	&mc_pmu_v3_events_attr_group,
+	NULL
+};
+
+static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev)
+{
+	int cntr;
+
+	cntr = find_first_zero_bit(pmu_dev->cntr_assign_mask,
+				pmu_dev->max_counters);
+	if (cntr == pmu_dev->max_counters)
+		return -ENOSPC;
+	set_bit(cntr, pmu_dev->cntr_assign_mask);
+
+	return cntr;
+}
+
+static void clear_avail_cntr(struct xgene_pmu_dev *pmu_dev, int cntr)
+{
+	clear_bit(cntr, pmu_dev->cntr_assign_mask);
+}
+
+static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu)
+{
+	writel(PCPPMU_V3_INTCLRMASK,
+	       xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
+					   int idx)
+{
+	return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
+}
+
+static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev,
+					   int idx)
+{
+	u32 lo, hi;
+
+	/*
+	 * v3 has 64-bit counter registers composed by 2 32-bit registers
+	 * This can be a problem if the counter increases and carries
+	 * out of bit [31] between 2 reads. The extra reads would help
+	 * to prevent this issue.
+	 */
+	do {
+		hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1);
+		lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx);
+	} while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1));
+
+	return (((u64)hi << 32) | lo);
+}
+
+static inline void
+xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
+}
+
+static inline void
+xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
+{
+	u32 cnt_lo, cnt_hi;
+
+	cnt_hi = upper_32_bits(val);
+	cnt_lo = lower_32_bits(val);
+
+	/* v3 has 64-bit counter registers composed by 2 32-bit registers */
+	xgene_pmu_write_counter32(pmu_dev, 2 * idx, cnt_lo);
+	xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, cnt_hi);
+}
+
+static inline void
+xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMEVTYPER0 + (4 * idx));
+}
+
+static inline void
+xgene_pmu_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMAMR0);
+}
+
+static inline void
+xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
+xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val)
+{
+	writel(val, pmu_dev->inf->csr + PMU_PMAMR1);
+}
+
+static inline void
+xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
+xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCNTENSET);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMCNTENSET);
+}
+
+static inline void
+xgene_pmu_disable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCNTENCLR);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMCNTENCLR);
+}
+
+static inline void
+xgene_pmu_enable_counter_int(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMINTENSET);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMINTENSET);
+}
+
+static inline void
+xgene_pmu_disable_counter_int(struct xgene_pmu_dev *pmu_dev, int idx)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMINTENCLR);
+	val |= 1 << idx;
+	writel(val, pmu_dev->inf->csr + PMU_PMINTENCLR);
+}
+
+static inline void xgene_pmu_reset_counters(struct xgene_pmu_dev *pmu_dev)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCR);
+	val |= PMU_PMCR_P;
+	writel(val, pmu_dev->inf->csr + PMU_PMCR);
+}
+
+static inline void xgene_pmu_start_counters(struct xgene_pmu_dev *pmu_dev)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCR);
+	val |= PMU_PMCR_E;
+	writel(val, pmu_dev->inf->csr + PMU_PMCR);
+}
+
+static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev)
+{
+	u32 val;
+
+	val = readl(pmu_dev->inf->csr + PMU_PMCR);
+	val &= ~PMU_PMCR_E;
+	writel(val, pmu_dev->inf->csr + PMU_PMCR);
+}
+
+static void xgene_perf_pmu_enable(struct pmu *pmu)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
+			pmu_dev->max_counters);
+
+	if (!enabled)
+		return;
+
+	xgene_pmu->ops->start_counters(pmu_dev);
+}
+
+static void xgene_perf_pmu_disable(struct pmu *pmu)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+
+	xgene_pmu->ops->stop_counters(pmu_dev);
+}
+
+static int xgene_perf_event_init(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	struct perf_event *sibling;
+
+	/* Test the event attr type check for PMU enumeration */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/*
+	 * SOC PMU counters are shared across all cores.
+	 * Therefore, it does not support per-process mode.
+	 * Also, it does not support event sampling mode.
+	 */
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	/* SOC counters do not have usr/os/guest/host bits */
+	if (event->attr.exclude_user || event->attr.exclude_kernel ||
+	    event->attr.exclude_host || event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+	/*
+	 * Many perf core operations (eg. events rotation) operate on a
+	 * single CPU context. This is obvious for CPU PMUs, where one
+	 * expects the same sets of events being observed on all CPUs,
+	 * but can lead to issues for off-core PMUs, where each
+	 * event could be theoretically assigned to a different CPU. To
+	 * mitigate this, we enforce CPU assignment to one, selected
+	 * processor (the one described in the "cpumask" attribute).
+	 */
+	event->cpu = cpumask_first(&pmu_dev->parent->cpu);
+
+	hw->config = event->attr.config;
+	/*
+	 * Each bit of the config1 field represents an agent from which the
+	 * request of the event come. The event is counted only if it's caused
+	 * by a request of an agent has the bit cleared.
+	 * By default, the event is counted for all agents.
+	 */
+	hw->config_base = event->attr.config1;
+
+	/*
+	 * We must NOT create groups containing mixed PMUs, although software
+	 * events are acceptable
+	 */
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			group_entry)
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+
+	return 0;
+}
+
+static void xgene_perf_enable_event(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+
+	xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event),
+				      GET_EVENTID(event));
+	xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
+	if (pmu_dev->inf->type == PMU_TYPE_IOB)
+		xgene_pmu->ops->write_agent1msk(pmu_dev,
+						~((u32)GET_AGENT1ID(event)));
+
+	xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event));
+}
+
+static void xgene_perf_disable_event(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+
+	xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event));
+	xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event));
+}
+
+static void xgene_perf_event_set_period(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	struct hw_perf_event *hw = &event->hw;
+	/*
+	 * For 32 bit counter, it has a period of 2^32. To account for the
+	 * possibility of extreme interrupt latency we program for a period of
+	 * half that. Hopefully, we can handle the interrupt before another 2^31
+	 * events occur and the counter overtakes its previous value.
+	 * For 64 bit counter, we don't expect it overflow.
+	 */
+	u64 val = 1ULL << 31;
+
+	local64_set(&hw->prev_count, val);
+	xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val);
+}
+
+static void xgene_perf_event_update(struct perf_event *event)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	struct hw_perf_event *hw = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hw->prev_count);
+	new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event));
+
+	if (local64_cmpxchg(&hw->prev_count, prev_raw_count,
+			    new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count - prev_raw_count) & pmu_dev->max_period;
+
+	local64_add(delta, &event->count);
+}
+
+static void xgene_perf_read(struct perf_event *event)
+{
+	xgene_perf_event_update(event);
+}
+
+static void xgene_perf_start(struct perf_event *event, int flags)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
+		return;
+
+	WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
+	hw->state = 0;
+
+	xgene_perf_event_set_period(event);
+
+	if (flags & PERF_EF_RELOAD) {
+		u64 prev_raw_count =  local64_read(&hw->prev_count);
+
+		xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event),
+					      prev_raw_count);
+	}
+
+	xgene_perf_enable_event(event);
+	perf_event_update_userpage(event);
+}
+
+static void xgene_perf_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+	u64 config;
+
+	if (hw->state & PERF_HES_UPTODATE)
+		return;
+
+	xgene_perf_disable_event(event);
+	WARN_ON_ONCE(hw->state & PERF_HES_STOPPED);
+	hw->state |= PERF_HES_STOPPED;
+
+	if (hw->state & PERF_HES_UPTODATE)
+		return;
+
+	config = hw->config;
+	xgene_perf_read(event);
+	hw->state |= PERF_HES_UPTODATE;
+}
+
+static int xgene_perf_add(struct perf_event *event, int flags)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+
+	hw->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Allocate an event counter */
+	hw->idx = get_next_avail_cntr(pmu_dev);
+	if (hw->idx < 0)
+		return -EAGAIN;
+
+	/* Update counter event pointer for Interrupt handler */
+	pmu_dev->pmu_counter_event[hw->idx] = event;
+
+	if (flags & PERF_EF_START)
+		xgene_perf_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void xgene_perf_del(struct perf_event *event, int flags)
+{
+	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+
+	xgene_perf_stop(event, PERF_EF_UPDATE);
+
+	/* clear the assigned counter */
+	clear_avail_cntr(pmu_dev, GET_CNTR(event));
+
+	perf_event_update_userpage(event);
+	pmu_dev->pmu_counter_event[hw->idx] = NULL;
+}
+
+static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
+{
+	struct xgene_pmu *xgene_pmu;
+
+	if (pmu_dev->parent->version == PCP_PMU_V3)
+		pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD;
+	else
+		pmu_dev->max_period = PMU_CNT_MAX_PERIOD;
+	/* First version PMU supports only single event counter */
+	xgene_pmu = pmu_dev->parent;
+	if (xgene_pmu->version == PCP_PMU_V1)
+		pmu_dev->max_counters = 1;
+	else
+		pmu_dev->max_counters = PMU_MAX_COUNTERS;
+
+	/* Perf driver registration */
+	pmu_dev->pmu = (struct pmu) {
+		.attr_groups	= pmu_dev->attr_groups,
+		.task_ctx_nr	= perf_invalid_context,
+		.pmu_enable	= xgene_perf_pmu_enable,
+		.pmu_disable	= xgene_perf_pmu_disable,
+		.event_init	= xgene_perf_event_init,
+		.add		= xgene_perf_add,
+		.del		= xgene_perf_del,
+		.start		= xgene_perf_start,
+		.stop		= xgene_perf_stop,
+		.read		= xgene_perf_read,
+	};
+
+	/* Hardware counter init */
+	xgene_pmu->ops->stop_counters(pmu_dev);
+	xgene_pmu->ops->reset_counters(pmu_dev);
+
+	return perf_pmu_register(&pmu_dev->pmu, name, -1);
+}
+
+static int
+xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
+{
+	struct device *dev = xgene_pmu->dev;
+	struct xgene_pmu_dev *pmu;
+
+	pmu = devm_kzalloc(dev, sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+	pmu->parent = xgene_pmu;
+	pmu->inf = &ctx->inf;
+	ctx->pmu_dev = pmu;
+
+	switch (pmu->inf->type) {
+	case PMU_TYPE_L3C:
+		if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
+			return -ENODEV;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = l3c_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = l3c_pmu_attr_groups;
+		break;
+	case PMU_TYPE_IOB:
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = iob_fast_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = iob_pmu_attr_groups;
+		break;
+	case PMU_TYPE_IOB_SLOW:
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = iob_slow_pmu_v3_attr_groups;
+		break;
+	case PMU_TYPE_MCB:
+		if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
+			return -ENODEV;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = mcb_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = mcb_pmu_attr_groups;
+		break;
+	case PMU_TYPE_MC:
+		if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
+			return -ENODEV;
+		if (xgene_pmu->version == PCP_PMU_V3)
+			pmu->attr_groups = mc_pmu_v3_attr_groups;
+		else
+			pmu->attr_groups = mc_pmu_attr_groups;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (xgene_init_perf(pmu, ctx->name)) {
+		dev_err(dev, "%s PMU: Failed to init perf driver\n", ctx->name);
+		return -ENODEV;
+	}
+
+	dev_info(dev, "%s PMU registered\n", ctx->name);
+
+	return 0;
+}
+
+static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
+{
+	struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+	void __iomem *csr = pmu_dev->inf->csr;
+	u32 pmovsr;
+	int idx;
+
+	xgene_pmu->ops->stop_counters(pmu_dev);
+
+	if (xgene_pmu->version == PCP_PMU_V3)
+		pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK;
+	else
+		pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+
+	if (!pmovsr)
+		goto out;
+
+	/* Clear interrupt flag */
+	if (xgene_pmu->version == PCP_PMU_V1)
+		writel(0x0, csr + PMU_PMOVSR);
+	else if (xgene_pmu->version == PCP_PMU_V2)
+		writel(pmovsr, csr + PMU_PMOVSR);
+	else
+		writel(pmovsr, csr + PMU_PMOVSCLR);
+
+	for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) {
+		struct perf_event *event = pmu_dev->pmu_counter_event[idx];
+		int overflowed = pmovsr & BIT(idx);
+
+		/* Ignore if we don't have an event. */
+		if (!event || !overflowed)
+			continue;
+		xgene_perf_event_update(event);
+		xgene_perf_event_set_period(event);
+	}
+
+out:
+	xgene_pmu->ops->start_counters(pmu_dev);
+}
+
+static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
+{
+	u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
+	struct xgene_pmu_dev_ctx *ctx;
+	struct xgene_pmu *xgene_pmu = dev_id;
+	unsigned long flags;
+	u32 val;
+
+	raw_spin_lock_irqsave(&xgene_pmu->lock, flags);
+
+	/* Get Interrupt PMU source */
+	val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
+	if (xgene_pmu->version == PCP_PMU_V3) {
+		intr_mcu = PCPPMU_V3_INT_MCU;
+		intr_mcb = PCPPMU_V3_INT_MCB;
+		intr_l3c = PCPPMU_V3_INT_L3C;
+		intr_iob = PCPPMU_V3_INT_IOB;
+	} else {
+		intr_mcu = PCPPMU_INT_MCU;
+		intr_mcb = PCPPMU_INT_MCB;
+		intr_l3c = PCPPMU_INT_L3C;
+		intr_iob = PCPPMU_INT_IOB;
+	}
+	if (val & intr_mcu) {
+		list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+	if (val & intr_mcb) {
+		list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+	if (val & intr_l3c) {
+		list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+	if (val & intr_iob) {
+		list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
+			_xgene_pmu_isr(irq, ctx->pmu_dev);
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&xgene_pmu->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					     struct platform_device *pdev)
+{
+	void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
+	struct resource *res;
+	unsigned int reg;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	csw_csr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(csw_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
+		return PTR_ERR(csw_csr);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	mcba_csr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mcba_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
+		return PTR_ERR(mcba_csr);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+	mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mcbb_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
+		return PTR_ERR(mcbb_csr);
+	}
+
+	xgene_pmu->l3c_active_mask = 0x1;
+
+	reg = readl(csw_csr + CSW_CSWCR);
+	if (reg & CSW_CSWCR_DUALMCB_MASK) {
+		/* Dual MCB active */
+		xgene_pmu->mcb_active_mask = 0x3;
+		/* Probe all active MC(s) */
+		reg = readl(mcbb_csr + CSW_CSWCR);
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
+	} else {
+		/* Single MCB active */
+		xgene_pmu->mcb_active_mask = 0x1;
+		/* Probe all active MC(s) */
+		reg = readl(mcba_csr + CSW_CSWCR);
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
+	}
+
+	return 0;
+}
+
+static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+						struct platform_device *pdev)
+{
+	void __iomem *csw_csr;
+	struct resource *res;
+	unsigned int reg;
+	u32 mcb0routing;
+	u32 mcb1routing;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	csw_csr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(csw_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
+		return PTR_ERR(csw_csr);
+	}
+
+	reg = readl(csw_csr + CSW_CSWCR);
+	mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
+	mcb1routing = CSW_CSWCR_MCB1_ROUTING(reg);
+	if (reg & CSW_CSWCR_DUALMCB_MASK) {
+		/* Dual MCB active */
+		xgene_pmu->mcb_active_mask = 0x3;
+		/* Probe all active L3C(s), maximum is 8 */
+		xgene_pmu->l3c_active_mask = 0xFF;
+		/* Probe all active MC(s), maximum is 8 */
+		if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
+			xgene_pmu->mc_active_mask = 0xFF;
+		else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
+			xgene_pmu->mc_active_mask =  0x33;
+		else
+			xgene_pmu->mc_active_mask =  0x11;
+	} else {
+		/* Single MCB active */
+		xgene_pmu->mcb_active_mask = 0x1;
+		/* Probe all active L3C(s), maximum is 4 */
+		xgene_pmu->l3c_active_mask = 0x0F;
+		/* Probe all active MC(s), maximum is 4 */
+		if (mcb0routing == 0x2)
+			xgene_pmu->mc_active_mask = 0x0F;
+		else if (mcb0routing == 0x1)
+			xgene_pmu->mc_active_mask =  0x03;
+		else
+			xgene_pmu->mc_active_mask =  0x01;
+	}
+
+	return 0;
+}
+
+static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					    struct platform_device *pdev)
+{
+	struct regmap *csw_map, *mcba_map, *mcbb_map;
+	struct device_node *np = pdev->dev.of_node;
+	unsigned int reg;
+
+	csw_map = syscon_regmap_lookup_by_phandle(np, "regmap-csw");
+	if (IS_ERR(csw_map)) {
+		dev_err(&pdev->dev, "unable to get syscon regmap csw\n");
+		return PTR_ERR(csw_map);
+	}
+
+	mcba_map = syscon_regmap_lookup_by_phandle(np, "regmap-mcba");
+	if (IS_ERR(mcba_map)) {
+		dev_err(&pdev->dev, "unable to get syscon regmap mcba\n");
+		return PTR_ERR(mcba_map);
+	}
+
+	mcbb_map = syscon_regmap_lookup_by_phandle(np, "regmap-mcbb");
+	if (IS_ERR(mcbb_map)) {
+		dev_err(&pdev->dev, "unable to get syscon regmap mcbb\n");
+		return PTR_ERR(mcbb_map);
+	}
+
+	xgene_pmu->l3c_active_mask = 0x1;
+	if (regmap_read(csw_map, CSW_CSWCR, &reg))
+		return -EINVAL;
+
+	if (reg & CSW_CSWCR_DUALMCB_MASK) {
+		/* Dual MCB active */
+		xgene_pmu->mcb_active_mask = 0x3;
+		/* Probe all active MC(s) */
+		if (regmap_read(mcbb_map, MCBADDRMR, &reg))
+			return 0;
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
+	} else {
+		/* Single MCB active */
+		xgene_pmu->mcb_active_mask = 0x1;
+		/* Probe all active MC(s) */
+		if (regmap_read(mcba_map, MCBADDRMR, &reg))
+			return 0;
+		xgene_pmu->mc_active_mask =
+			(reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
+	}
+
+	return 0;
+}
+
+static int xgene_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+					      struct platform_device *pdev)
+{
+	if (has_acpi_companion(&pdev->dev)) {
+		if (xgene_pmu->version == PCP_PMU_V3)
+			return acpi_pmu_v3_probe_active_mcb_mcu_l3c(xgene_pmu,
+								    pdev);
+		else
+			return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu,
+								 pdev);
+	}
+	return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
+}
+
+static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
+{
+	switch (type) {
+	case PMU_TYPE_L3C:
+		return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
+	case PMU_TYPE_IOB:
+		return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
+	case PMU_TYPE_IOB_SLOW:
+		return devm_kasprintf(dev, GFP_KERNEL, "iob_slow%d", id);
+	case PMU_TYPE_MCB:
+		return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id);
+	case PMU_TYPE_MC:
+		return devm_kasprintf(dev, GFP_KERNEL, "mc%d", id);
+	default:
+		return devm_kasprintf(dev, GFP_KERNEL, "unknown");
+	}
+}
+
+#if defined(CONFIG_ACPI)
+static int acpi_pmu_dev_add_resource(struct acpi_resource *ares, void *data)
+{
+	struct resource *res = data;
+
+	if (ares->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32)
+		acpi_dev_resource_memory(ares, res);
+
+	/* Always tell the ACPI core to skip this resource */
+	return 1;
+}
+
+static struct
+xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
+				       struct acpi_device *adev, u32 type)
+{
+	struct device *dev = xgene_pmu->dev;
+	struct list_head resource_list;
+	struct xgene_pmu_dev_ctx *ctx;
+	const union acpi_object *obj;
+	struct hw_pmu_info *inf;
+	void __iomem *dev_csr;
+	struct resource res;
+	int enable_bit;
+	int rc;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	INIT_LIST_HEAD(&resource_list);
+	rc = acpi_dev_get_resources(adev, &resource_list,
+				    acpi_pmu_dev_add_resource, &res);
+	acpi_dev_free_resource_list(&resource_list);
+	if (rc < 0) {
+		dev_err(dev, "PMU type %d: No resource address found\n", type);
+		return NULL;
+	}
+
+	dev_csr = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(dev_csr)) {
+		dev_err(dev, "PMU type %d: Fail to map resource\n", type);
+		return NULL;
+	}
+
+	/* A PMU device node without enable-bit-index is always enabled */
+	rc = acpi_dev_get_property(adev, "enable-bit-index",
+				   ACPI_TYPE_INTEGER, &obj);
+	if (rc < 0)
+		enable_bit = 0;
+	else
+		enable_bit = (int) obj->integer.value;
+
+	ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
+	if (!ctx->name) {
+		dev_err(dev, "PMU type %d: Fail to get device name\n", type);
+		return NULL;
+	}
+	inf = &ctx->inf;
+	inf->type = type;
+	inf->csr = dev_csr;
+	inf->enable_mask = 1 << enable_bit;
+
+	return ctx;
+}
+
+static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
+	{"APMC0D5D", PMU_TYPE_L3C},
+	{"APMC0D5E", PMU_TYPE_IOB},
+	{"APMC0D5F", PMU_TYPE_MCB},
+	{"APMC0D60", PMU_TYPE_MC},
+	{"APMC0D84", PMU_TYPE_L3C},
+	{"APMC0D85", PMU_TYPE_IOB},
+	{"APMC0D86", PMU_TYPE_IOB_SLOW},
+	{"APMC0D87", PMU_TYPE_MCB},
+	{"APMC0D88", PMU_TYPE_MC},
+	{},
+};
+
+static const struct acpi_device_id *xgene_pmu_acpi_match_type(
+					const struct acpi_device_id *ids,
+					struct acpi_device *adev)
+{
+	const struct acpi_device_id *match_id = NULL;
+	const struct acpi_device_id *id;
+
+	for (id = ids; id->id[0] || id->cls; id++) {
+		if (!acpi_match_device_ids(adev, id))
+			match_id = id;
+		else if (match_id)
+			break;
+	}
+
+	return match_id;
+}
+
+static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
+				    void *data, void **return_value)
+{
+	const struct acpi_device_id *acpi_id;
+	struct xgene_pmu *xgene_pmu = data;
+	struct xgene_pmu_dev_ctx *ctx;
+	struct acpi_device *adev;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+	if (acpi_bus_get_status(adev) || !adev->status.present)
+		return AE_OK;
+
+	acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
+	if (!acpi_id)
+		return AE_OK;
+
+	ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
+	if (!ctx)
+		return AE_OK;
+
+	if (xgene_pmu_dev_add(xgene_pmu, ctx)) {
+		/* Can't add the PMU device, skip it */
+		devm_kfree(xgene_pmu->dev, ctx);
+		return AE_OK;
+	}
+
+	switch (ctx->inf.type) {
+	case PMU_TYPE_L3C:
+		list_add(&ctx->next, &xgene_pmu->l3cpmus);
+		break;
+	case PMU_TYPE_IOB:
+		list_add(&ctx->next, &xgene_pmu->iobpmus);
+		break;
+	case PMU_TYPE_IOB_SLOW:
+		list_add(&ctx->next, &xgene_pmu->iobpmus);
+		break;
+	case PMU_TYPE_MCB:
+		list_add(&ctx->next, &xgene_pmu->mcbpmus);
+		break;
+	case PMU_TYPE_MC:
+		list_add(&ctx->next, &xgene_pmu->mcpmus);
+		break;
+	}
+	return AE_OK;
+}
+
+static int acpi_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				  struct platform_device *pdev)
+{
+	struct device *dev = xgene_pmu->dev;
+	acpi_handle handle;
+	acpi_status status;
+
+	handle = ACPI_HANDLE(dev);
+	if (!handle)
+		return -EINVAL;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				     acpi_pmu_dev_add, NULL, xgene_pmu, NULL);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to probe PMU devices\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+#else
+static int acpi_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				  struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+static struct
+xgene_pmu_dev_ctx *fdt_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
+				      struct device_node *np, u32 type)
+{
+	struct device *dev = xgene_pmu->dev;
+	struct xgene_pmu_dev_ctx *ctx;
+	struct hw_pmu_info *inf;
+	void __iomem *dev_csr;
+	struct resource res;
+	int enable_bit;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	if (of_address_to_resource(np, 0, &res) < 0) {
+		dev_err(dev, "PMU type %d: No resource address found\n", type);
+		return NULL;
+	}
+
+	dev_csr = devm_ioremap_resource(dev, &res);
+	if (IS_ERR(dev_csr)) {
+		dev_err(dev, "PMU type %d: Fail to map resource\n", type);
+		return NULL;
+	}
+
+	/* A PMU device node without enable-bit-index is always enabled */
+	if (of_property_read_u32(np, "enable-bit-index", &enable_bit))
+		enable_bit = 0;
+
+	ctx->name = xgene_pmu_dev_name(dev, type, enable_bit);
+	if (!ctx->name) {
+		dev_err(dev, "PMU type %d: Fail to get device name\n", type);
+		return NULL;
+	}
+
+	inf = &ctx->inf;
+	inf->type = type;
+	inf->csr = dev_csr;
+	inf->enable_mask = 1 << enable_bit;
+
+	return ctx;
+}
+
+static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				 struct platform_device *pdev)
+{
+	struct xgene_pmu_dev_ctx *ctx;
+	struct device_node *np;
+
+	for_each_child_of_node(pdev->dev.of_node, np) {
+		if (!of_device_is_available(np))
+			continue;
+
+		if (of_device_is_compatible(np, "apm,xgene-pmu-l3c"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_L3C);
+		else if (of_device_is_compatible(np, "apm,xgene-pmu-iob"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_IOB);
+		else if (of_device_is_compatible(np, "apm,xgene-pmu-mcb"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_MCB);
+		else if (of_device_is_compatible(np, "apm,xgene-pmu-mc"))
+			ctx = fdt_get_pmu_hw_inf(xgene_pmu, np, PMU_TYPE_MC);
+		else
+			ctx = NULL;
+
+		if (!ctx)
+			continue;
+
+		if (xgene_pmu_dev_add(xgene_pmu, ctx)) {
+			/* Can't add the PMU device, skip it */
+			devm_kfree(xgene_pmu->dev, ctx);
+			continue;
+		}
+
+		switch (ctx->inf.type) {
+		case PMU_TYPE_L3C:
+			list_add(&ctx->next, &xgene_pmu->l3cpmus);
+			break;
+		case PMU_TYPE_IOB:
+			list_add(&ctx->next, &xgene_pmu->iobpmus);
+			break;
+		case PMU_TYPE_IOB_SLOW:
+			list_add(&ctx->next, &xgene_pmu->iobpmus);
+			break;
+		case PMU_TYPE_MCB:
+			list_add(&ctx->next, &xgene_pmu->mcbpmus);
+			break;
+		case PMU_TYPE_MC:
+			list_add(&ctx->next, &xgene_pmu->mcpmus);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int xgene_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
+				   struct platform_device *pdev)
+{
+	if (has_acpi_companion(&pdev->dev))
+		return acpi_pmu_probe_pmu_dev(xgene_pmu, pdev);
+	return fdt_pmu_probe_pmu_dev(xgene_pmu, pdev);
+}
+
+static const struct xgene_pmu_data xgene_pmu_data = {
+	.id   = PCP_PMU_V1,
+};
+
+static const struct xgene_pmu_data xgene_pmu_v2_data = {
+	.id   = PCP_PMU_V2,
+};
+
+static const struct xgene_pmu_ops xgene_pmu_ops = {
+	.mask_int = xgene_pmu_mask_int,
+	.unmask_int = xgene_pmu_unmask_int,
+	.read_counter = xgene_pmu_read_counter32,
+	.write_counter = xgene_pmu_write_counter32,
+	.write_evttype = xgene_pmu_write_evttype,
+	.write_agentmsk = xgene_pmu_write_agentmsk,
+	.write_agent1msk = xgene_pmu_write_agent1msk,
+	.enable_counter = xgene_pmu_enable_counter,
+	.disable_counter = xgene_pmu_disable_counter,
+	.enable_counter_int = xgene_pmu_enable_counter_int,
+	.disable_counter_int = xgene_pmu_disable_counter_int,
+	.reset_counters = xgene_pmu_reset_counters,
+	.start_counters = xgene_pmu_start_counters,
+	.stop_counters = xgene_pmu_stop_counters,
+};
+
+static const struct xgene_pmu_ops xgene_pmu_v3_ops = {
+	.mask_int = xgene_pmu_v3_mask_int,
+	.unmask_int = xgene_pmu_v3_unmask_int,
+	.read_counter = xgene_pmu_read_counter64,
+	.write_counter = xgene_pmu_write_counter64,
+	.write_evttype = xgene_pmu_write_evttype,
+	.write_agentmsk = xgene_pmu_v3_write_agentmsk,
+	.write_agent1msk = xgene_pmu_v3_write_agent1msk,
+	.enable_counter = xgene_pmu_enable_counter,
+	.disable_counter = xgene_pmu_disable_counter,
+	.enable_counter_int = xgene_pmu_enable_counter_int,
+	.disable_counter_int = xgene_pmu_disable_counter_int,
+	.reset_counters = xgene_pmu_reset_counters,
+	.start_counters = xgene_pmu_start_counters,
+	.stop_counters = xgene_pmu_stop_counters,
+};
+
+static const struct of_device_id xgene_pmu_of_match[] = {
+	{ .compatible	= "apm,xgene-pmu",	.data = &xgene_pmu_data },
+	{ .compatible	= "apm,xgene-pmu-v2",	.data = &xgene_pmu_v2_data },
+	{},
+};
+MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_pmu_acpi_match[] = {
+	{"APMC0D5B", PCP_PMU_V1},
+	{"APMC0D5C", PCP_PMU_V2},
+	{"APMC0D83", PCP_PMU_V3},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
+#endif
+
+static int xgene_pmu_probe(struct platform_device *pdev)
+{
+	const struct xgene_pmu_data *dev_data;
+	const struct of_device_id *of_id;
+	struct xgene_pmu *xgene_pmu;
+	struct resource *res;
+	int irq, rc;
+	int version;
+
+	xgene_pmu = devm_kzalloc(&pdev->dev, sizeof(*xgene_pmu), GFP_KERNEL);
+	if (!xgene_pmu)
+		return -ENOMEM;
+	xgene_pmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, xgene_pmu);
+
+	version = -EINVAL;
+	of_id = of_match_device(xgene_pmu_of_match, &pdev->dev);
+	if (of_id) {
+		dev_data = (const struct xgene_pmu_data *) of_id->data;
+		version = dev_data->id;
+	}
+
+#ifdef CONFIG_ACPI
+	if (ACPI_COMPANION(&pdev->dev)) {
+		const struct acpi_device_id *acpi_id;
+
+		acpi_id = acpi_match_device(xgene_pmu_acpi_match, &pdev->dev);
+		if (acpi_id)
+			version = (int) acpi_id->driver_data;
+	}
+#endif
+	if (version < 0)
+		return -ENODEV;
+
+	if (version == PCP_PMU_V3)
+		xgene_pmu->ops = &xgene_pmu_v3_ops;
+	else
+		xgene_pmu->ops = &xgene_pmu_ops;
+
+	INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
+	INIT_LIST_HEAD(&xgene_pmu->iobpmus);
+	INIT_LIST_HEAD(&xgene_pmu->mcbpmus);
+	INIT_LIST_HEAD(&xgene_pmu->mcpmus);
+
+	xgene_pmu->version = version;
+	dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xgene_pmu->pcppmu_csr)) {
+		dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
+		return PTR_ERR(xgene_pmu->pcppmu_csr);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "No IRQ resource\n");
+		return -EINVAL;
+	}
+	rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr,
+				IRQF_NOBALANCING | IRQF_NO_THREAD,
+				dev_name(&pdev->dev), xgene_pmu);
+	if (rc) {
+		dev_err(&pdev->dev, "Could not request IRQ %d\n", irq);
+		return rc;
+	}
+
+	raw_spin_lock_init(&xgene_pmu->lock);
+
+	/* Check for active MCBs and MCUs */
+	rc = xgene_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
+	if (rc) {
+		dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n");
+		xgene_pmu->mcb_active_mask = 0x1;
+		xgene_pmu->mc_active_mask = 0x1;
+	}
+
+	/* Pick one core to use for cpumask attributes */
+	cpumask_set_cpu(smp_processor_id(), &xgene_pmu->cpu);
+
+	/* Make sure that the overflow interrupt is handled by this CPU */
+	rc = irq_set_affinity(irq, &xgene_pmu->cpu);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to set interrupt affinity!\n");
+		return rc;
+	}
+
+	/* Walk through the tree for all PMU perf devices */
+	rc = xgene_pmu_probe_pmu_dev(xgene_pmu, pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "No PMU perf devices found!\n");
+		return rc;
+	}
+
+	/* Enable interrupt */
+	xgene_pmu->ops->unmask_int(xgene_pmu);
+
+	return 0;
+}
+
+static void
+xgene_pmu_dev_cleanup(struct xgene_pmu *xgene_pmu, struct list_head *pmus)
+{
+	struct xgene_pmu_dev_ctx *ctx;
+
+	list_for_each_entry(ctx, pmus, next) {
+		perf_pmu_unregister(&ctx->pmu_dev->pmu);
+	}
+}
+
+static int xgene_pmu_remove(struct platform_device *pdev)
+{
+	struct xgene_pmu *xgene_pmu = dev_get_drvdata(&pdev->dev);
+
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->l3cpmus);
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->iobpmus);
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcbpmus);
+	xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus);
+
+	return 0;
+}
+
+static struct platform_driver xgene_pmu_driver = {
+	.probe = xgene_pmu_probe,
+	.remove = xgene_pmu_remove,
+	.driver = {
+		.name		= "xgene-pmu",
+		.of_match_table = xgene_pmu_of_match,
+		.acpi_match_table = ACPI_PTR(xgene_pmu_acpi_match),
+	},
+};
+
+builtin_platform_driver(xgene_pmu_driver);