Merge android13-gs-pixel-5.10-24Q3 into android14-gs-pixel-5.15-24Q3

Bring along a few bug fixes :

Bug: 344695868
Bug: 343237846
Bug: 341513150

Test: Local build & test
Change-Id: Ib50a730c815aacdd4798f55ba4f2d63a3480fca5
Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
diff --git a/common/BUILD.bazel b/common/BUILD.bazel
index b7506a1..be83682 100644
--- a/common/BUILD.bazel
+++ b/common/BUILD.bazel
@@ -1,10 +1,4 @@
-# NOTE: THIS FILE IS EXPERIMENTAL FOR THE BAZEL MIGRATION AND NOT USED FOR
-# YOUR BUILDS CURRENTLY.
-#
-# It is not yet the source of truth for your build. If you're looking to modify
-# the build file, modify the Android.bp file instead. Do *not* modify this file
-# unless you have coordinated with the team managing the Soong to Bazel
-# migration.
+# SPDX-License-Identifier: GPL-2.0
 
 filegroup(
     name = "headers",
@@ -12,6 +6,7 @@
         "**/*.h",
     ]),
     visibility = [
+        "//private/devices/google:__subpackages__",
         "//private/google-modules/gpu:__subpackages__",
     ],
 )
diff --git a/mali_kbase/BUILD.bazel b/mali_kbase/BUILD.bazel
index 54dd437..d0714c5 100644
--- a/mali_kbase/BUILD.bazel
+++ b/mali_kbase/BUILD.bazel
@@ -12,7 +12,7 @@
 # along with this program; if not, you can access it online at
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
-#
+# SPDX-License-Identifier: GPL-2.0
 
 load(
     "//build/kernel/kleaf:kernel.bzl",
@@ -26,26 +26,25 @@
 ]
 
 kernel_module(
-    name = "mali_kbase.cloudripper",
+    name = "mali_kbase",
     srcs = glob([
         "**/*.c",
         "**/*.h",
         "**/*Kbuild",
         "**/*Makefile",
     ]) + [
-        "//common:kernel_headers",
-        "//common-modules/mali:headers",
-        "//common-modules/mali/drivers/gpu/arm/arbitration",
-        "//common-modules/mali/drivers/xen/arm:xen",
         "//private/google-modules/gpu/common:headers",
+        "//private/google-modules/soc/gs:gs_soc_headers",
     ],
     outs = _midgard_modules,
-    kernel_build = "//private/gs-google:cloudripper",
+    kernel_build = "//private/google-modules/soc/gs:gs_kernel_build",
     visibility = [
-        "//private/gs-google:__pkg__",
+        "//private/devices/google:__subpackages__",
+        "//private/google-modules/soc/gs:__pkg__",
     ],
     deps = [
         "//private/google-modules/gpu/mali_pixel",
+        "//private/google-modules/soc/gs:gs_soc_module",
     ],
 )
 
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 92b4976..0071336 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -98,6 +98,13 @@
     MALI_JIT_PRESSURE_LIMIT_BASE ?= 1
     MALI_USE_CSF ?= 0
 endif
+ifeq ($(CONFIG_SOC_ZUMA),y)
+    ccflags-y += -DCONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL
+    ccflags-y += -DCONFIG_MALI_PIXEL_GPU_SLEEP
+# Flag to enable Fmax cap.
+# Comment it to enable all possible OPPs in DVFS table
+    ccflags-y += -DCONFIG_MALI_PIXEL_GPU_HARD_FMAX
+endif
 
 
 ifneq ($(CONFIG_MALI_KUTF), n)
@@ -137,8 +144,6 @@
 endif
 
 ccflags-y += \
-    -I$(srctree)/include/linux \
-    -I$(srctree)/drivers/staging/android \
     -I$(src) \
     -I$(src)/platform/$(MALI_PLATFORM_DIR) \
     -I$(src)/../../../base \
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 1062c07..3a5239f 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -45,6 +45,7 @@
 CONFIG_MALI_PIXEL_GPU_BTS ?= y
 CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING ?= y
 CONFIG_MALI_PIXEL_GPU_THERMAL ?= y
+CONFIG_MALI_PIXEL_GPU_PM ?= y
 CONFIG_MALI_PIXEL_GPU_SLC ?= y
 
 #
@@ -211,6 +212,7 @@
         CONFIG_MALI_PIXEL_GPU_BTS \
         CONFIG_MALI_PIXEL_GPU_THERMAL \
         CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING \
+        CONFIG_MALI_PIXEL_GPU_PM \
         CONFIG_MALI_PIXEL_GPU_SLC
 
 
@@ -249,10 +251,12 @@
     EXTRA_CFLAGS += -DCONFIG_MALI_NO_MALI_DEFAULT_GPU='\"$(CONFIG_MALI_NO_MALI_DEFAULT_GPU)\"'
 endif
 
+include $(KDIR)/../private/google-modules/soc/gs/Makefile.include
+
 #
 # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
 #
-BASE_SYMBOLS = $(OUT_DIR)/../google-modules/gpu/mali_pixel/Module.symvers
+BASE_SYMBOLS = $(OUT_DIR)/../private/google-modules/gpu/mali_pixel/Module.symvers
 
 EXTRA_SYMBOLS += \
     $(BASE_SYMBOLS)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index 06efeaf..2337f6b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -46,6 +46,7 @@
 #include <mali_kbase_dummy_job_wa.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 
+#include <trace/hooks/systrace.h>
 
 static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
 static void kbase_pm_hwcnt_disable_worker(struct work_struct *data);
@@ -909,12 +910,16 @@
 
 void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
 {
+	ATRACE_BEGIN(__func__);
 	kbase_pm_update_active(kbdev);
+	ATRACE_END();
 }
 
 void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
 {
+	ATRACE_BEGIN(__func__);
 	kbase_pm_update_active(kbdev);
+	ATRACE_END();
 }
 
 int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index edec761..2157de9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -127,6 +127,8 @@
  *              time_period_start timestamp, measured in units of 256ns.
  *  @time_in_protm: The amount of time the GPU has spent in protected mode since
  *                  the time_period_start timestamp, measured in units of 256ns.
+ *  @busy_mcu: The amount of time MCU was busy measured in units of 256ns
+ *  @idle_mcu: The amount of time MCU was idle measured in units of 256ns
  *  @busy_cl: the amount of time the GPU was busy executing CL jobs. Note that
  *           if two CL jobs were active for 256ns, this value would be updated
  *           with 2 (2x256ns).
@@ -139,6 +141,8 @@
 	u32 time_idle;
 #if MALI_USE_CSF
 	u32 time_in_protm;
+	u32 busy_mcu;
+	u32 idle_mcu;
 #else
 	u32 busy_cl[2];
 	u32 busy_gl;
@@ -249,6 +253,8 @@
  *                                .state is populated.
  * @KBASE_PM_LOG_EVENT_CORES: a transition of core availability.
  *                            .cores is populated.
+ * @KBASE_PM_LOG_EVENT_DVFS_CHANGE: a transition of DVFS frequency
+ *                                  .dvfs is populated.
  *
  * Each event log event has a type which determines the data it carries.
  */
@@ -257,7 +263,8 @@
 	KBASE_PM_LOG_EVENT_SHADERS_STATE,
 	KBASE_PM_LOG_EVENT_L2_STATE,
 	KBASE_PM_LOG_EVENT_MCU_STATE,
-	KBASE_PM_LOG_EVENT_CORES
+	KBASE_PM_LOG_EVENT_CORES,
+	KBASE_PM_LOG_EVENT_DVFS_CHANGE,
 };
 
 /**
@@ -275,6 +282,11 @@
 			u8 prev;
 		} state;
 		struct {
+			u64 domain;
+			u64 next;
+			u64 prev;
+		} dvfs;
+		struct {
 			u64 l2;
 			u64 shader;
 			u64 tiler;
@@ -286,7 +298,7 @@
 #define EVENT_LOG_MAX (PAGE_SIZE / sizeof(struct kbase_pm_event_log_event))
 
 struct kbase_pm_event_log {
-	u32 last_event;
+	atomic_t last_event;
 	struct kbase_pm_event_log_event events[EVENT_LOG_MAX];
 };
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index bcae6ef..9928e69 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -58,6 +58,8 @@
 
 #include <linux/of.h>
 
+#include <trace/hooks/systrace.h>
+
 #ifdef CONFIG_MALI_CORESTACK
 bool corestack_driver_control = true;
 #else
@@ -1265,6 +1267,7 @@
 			dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n",
 				kbase_mcu_state_to_string(prev_state),
 				kbase_mcu_state_to_string(backend->mcu_state));
+			trace_mali_pm_mcu_state(prev_state, backend->mcu_state);
 			kbase_ktrace_log_mcu_state(kbdev, backend->mcu_state);
 		}
 
@@ -1519,8 +1522,10 @@
 		case KBASE_L2_OFF:
 			if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) {
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+#if IS_ENABLED(CONFIG_SOC_GS201)
 				// Workaround: give a short pause here before starting L2 transition.
 				udelay(200);
+#endif
 				/* Enable HW timer of IPA control before
 				 * L2 cache is powered-up.
 				 */
@@ -1819,6 +1824,13 @@
 			dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n",
 				kbase_l2_core_state_to_string(prev_state),
 				kbase_l2_core_state_to_string(backend->l2_state));
+			trace_mali_pm_l2_state(prev_state, backend->l2_state);
+#if IS_ENABLED(CONFIG_SOC_GS201)
+			if (!kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off &&
+				backend->l2_state == KBASE_L2_OFF) {
+				dev_warn(kbdev->dev, "transition to l2 off without waking waiter");
+			}
+#endif
 			kbase_ktrace_log_l2_core_state(kbdev, backend->l2_state);
 		}
 
@@ -2337,17 +2349,25 @@
 	enum kbase_mcu_state prev_mcu_state = kbdev->pm.backend.mcu_state;
 #endif
 
+	ATRACE_BEGIN(__func__);
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	if (!kbdev->pm.backend.gpu_ready)
+	if (!kbdev->pm.backend.gpu_ready) {
+		ATRACE_END();
 		return; /* Do nothing if the GPU is not ready */
+	}
 
-	if (kbase_pm_l2_update_state(kbdev))
+	if (kbase_pm_l2_update_state(kbdev)) {
+		ATRACE_END();
 		return;
+	}
 
 #if !MALI_USE_CSF
-	if (kbase_pm_shaders_update_state(kbdev))
+	if (kbase_pm_shaders_update_state(kbdev)) {
+		ATRACE_END();
 		return;
+	}
 
 	/* If the shaders just turned off, re-invoke the L2 state machine, in
 	 * case it was waiting for the shaders to turn off before powering down
@@ -2355,17 +2375,23 @@
 	 */
 	if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF &&
 	    kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF) {
-		if (kbase_pm_l2_update_state(kbdev))
+		if (kbase_pm_l2_update_state(kbdev)) {
+			ATRACE_END();
 			return;
+		}
 	}
 #else
-	if (kbase_pm_mcu_update_state(kbdev))
+	if (kbase_pm_mcu_update_state(kbdev)) {
+		ATRACE_END();
 		return;
+	}
 
 	if (!kbase_pm_is_mcu_inactive(kbdev, prev_mcu_state) &&
 	    kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state)) {
-		if (kbase_pm_l2_update_state(kbdev))
+		if (kbase_pm_l2_update_state(kbdev)) {
+			ATRACE_END();
 			return;
+		}
 	}
 #endif
 
@@ -2379,6 +2405,8 @@
 		KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0);
 		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
 	}
+
+	ATRACE_END();
 }
 
 static enum hrtimer_restart shader_tick_timer_callback(struct hrtimer *timer)
@@ -2522,7 +2550,7 @@
  * Shader and L2 state. If the time spent waiting has exceeded this threshold
  * then there is most likely a hardware issue.
  */
-#define PM_TIMEOUT_MS (5000) /* 5s */
+#define PM_TIMEOUT_MS (5000 * KBASE_TIMEOUT_MULTIPLIER) /* 5s */
 #endif
 
 void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg)
@@ -2839,6 +2867,7 @@
 {
 	unsigned long flags;
 
+	ATRACE_BEGIN(__func__);
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	/*
 	 * Clear all interrupts,
@@ -2859,7 +2888,7 @@
 #else
 	kbase_reg_write32(kbdev, MMU_CONTROL_ENUM(IRQ_MASK), 0xFFFFFFFF);
 #endif
-
+	ATRACE_END();
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
@@ -2908,6 +2937,7 @@
 {
 	struct kbase_context *kctx, *n;
 
+	ATRACE_BEGIN(__func__);
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	mutex_lock(&kbdev->csf.reg_lock);
@@ -2923,6 +2953,7 @@
 			kctx->id);
 	}
 	mutex_unlock(&kbdev->csf.reg_lock);
+	ATRACE_END();
 }
 #endif
 
@@ -2938,6 +2969,7 @@
 	bool reset_required = is_resume;
 	unsigned long flags;
 
+	ATRACE_BEGIN(__func__);
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 #if !MALI_USE_CSF
 	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
@@ -2947,6 +2979,7 @@
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) {
 		dev_err(kbdev->dev, "%s: Cannot power up while GPU lost", __func__);
+		ATRACE_END();
 		return;
 	}
 #endif
@@ -2963,6 +2996,7 @@
 			kbase_pm_enable_interrupts(kbdev);
 		kbdev->poweroff_pending = false;
 		KBASE_DEBUG_ASSERT(!is_resume);
+		ATRACE_END();
 		return;
 	}
 
@@ -2972,6 +3006,7 @@
 
 	if (is_resume && backend->callback_power_resume) {
 		backend->callback_power_resume(kbdev);
+		ATRACE_END();
 		return;
 	} else if (backend->callback_power_on) {
 		reset_required = backend->callback_power_on(kbdev);
@@ -3059,6 +3094,8 @@
 		backend->gpu_idled = false;
 	}
 #endif
+
+	ATRACE_END();
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
@@ -3591,6 +3628,7 @@
 	unsigned long irq_flags;
 	int err = 0;
 
+	ATRACE_BEGIN(__func__);
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	lockdep_assert_held(&kbdev->pm.lock);
 
@@ -3689,6 +3727,8 @@
 	}
 #endif
 
+	ATRACE_END();
+
 	return err;
 }
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_event_log.c b/mali_kbase/backend/gpu/mali_kbase_pm_event_log.c
index b752af8..0ed6115 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_event_log.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_event_log.c
@@ -24,11 +24,8 @@
 static inline u32 kbase_pm_next_log_event(
 	struct kbase_pm_event_log *log)
 {
-	u32 ret = log->last_event;
-	++ret;
-	ret %= EVENT_LOG_MAX;
-	log->last_event = ret;
-	return ret;
+	u32 ret = atomic_inc_return(&log->last_event);
+	return ret % EVENT_LOG_MAX;
 }
 
 struct kbase_pm_event_log_event *kbase_pm_add_log_event(
@@ -37,7 +34,6 @@
 	struct kbase_pm_event_log *log = &kbdev->pm.backend.event_log;
 	struct kbase_pm_event_log_event *ret = NULL;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
 	ret = &log->events[kbase_pm_next_log_event(log)];
 
 	memset(ret, 0, sizeof(*ret));
@@ -70,7 +66,7 @@
 {
 	struct kbase_pm_event_log_metadata *md =
 			&global_event_log_metadata;
-	kbdev->pm.backend.event_log.last_event = -1;
+	atomic_set(&kbdev->pm.backend.event_log.last_event, -1);
 	md->magic[0] = 'k';
 	md->magic[1] = 'p';
 	md->magic[2] = 'e';
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index c2d7bdb..a886b4d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -620,6 +620,8 @@
  * Return:         Returns 0 on failure and non zero on success.
  */
 int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
+int kbase_platform_dvfs_event_mcu(struct kbase_device *kbdev, u32 utilisation,
+				  u32 mcu_utilisation);
 #else
 /**
  * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index e89b188..55de531 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -47,8 +47,8 @@
 #endif
 
 #if MALI_USE_CSF
-/* To get the GPU_ACTIVE value in nano seconds unit */
-#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9)
+/* To get the GPU_ITER_ACTIVE value in nano seconds unit */
+#define GPU_ITER_ACTIVE_SCALING_FACTOR ((u64)1E9)
 #endif
 
 /*
@@ -102,28 +102,37 @@
 int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 {
 #if MALI_USE_CSF
-	struct kbase_ipa_control_perf_counter perf_counter;
+	struct kbase_ipa_control_perf_counter
+		perf_counters[IPA_NUM_PERF_COUNTERS];
 	int err;
 
-	/* One counter group */
-	const size_t NUM_PERF_COUNTERS = 1;
-
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	kbdev->pm.backend.metrics.kbdev = kbdev;
 	kbdev->pm.backend.metrics.time_period_start = ktime_get_raw();
 
-	perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR;
+	perf_counters[ITER_ACTIVE_IDX].scaling_factor =
+		GPU_ITER_ACTIVE_SCALING_FACTOR;
 
 	/* Normalize values by GPU frequency */
-	perf_counter.gpu_norm = true;
+	perf_counters[ITER_ACTIVE_IDX].gpu_norm = true;
 
-	/* We need the GPU_ACTIVE counter, which is in the CSHW group */
-	perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW;
+	/* We need the GPU_ITER_ACTIVE counter, which is in the CSHW group */
+	perf_counters[ITER_ACTIVE_IDX].type = KBASE_IPA_CORE_TYPE_CSHW;
 
-	/* We need the GPU_ACTIVE counter */
-	perf_counter.idx = GPU_ACTIVE_CNT_IDX;
+	/* We need the GPU_ITER_ACTIVE counter */
+	perf_counters[ITER_ACTIVE_IDX].idx = IPA_GPU_ITER_ACTIVE_CNT_IDX;
 
-	err = kbase_ipa_control_register(kbdev, &perf_counter, NUM_PERF_COUNTERS,
+	// do same for MCU_ACTIVE
+	perf_counters[MCU_ACTIVE_IDX].scaling_factor =
+		GPU_ITER_ACTIVE_SCALING_FACTOR;
+
+	perf_counters[MCU_ACTIVE_IDX].gpu_norm = true;
+
+	perf_counters[MCU_ACTIVE_IDX].type = KBASE_IPA_CORE_TYPE_CSHW;
+
+	perf_counters[MCU_ACTIVE_IDX].idx = IPA_MCU_ACTIVE_CNT_IDX;
+
+	err = kbase_ipa_control_register(kbdev, perf_counters, IPA_NUM_PERF_COUNTERS,
 					 &kbdev->pm.backend.metrics.ipa_control_client);
 	if (err) {
 		dev_err(kbdev->dev, "Failed to register IPA with kbase_ipa_control: err=%d", err);
@@ -184,7 +193,10 @@
 static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
 {
 	int err;
-	u64 gpu_active_counter;
+	struct pixel_context *pc = kbdev->platform_context;
+
+	u64 gpu_iter_active_counter, mcu_active_counter,
+		counters[IPA_NUM_PERF_COUNTERS];
 	u64 protected_time;
 	ktime_t now;
 
@@ -193,19 +205,23 @@
 	/* Query IPA_CONTROL for the latest GPU-active and protected-time
 	 * info.
 	 */
-	err = kbase_ipa_control_query(kbdev, kbdev->pm.backend.metrics.ipa_control_client,
-				      &gpu_active_counter, 1, &protected_time);
+	err = kbase_ipa_control_query(
+		kbdev, kbdev->pm.backend.metrics.ipa_control_client, counters,
+		IPA_NUM_PERF_COUNTERS, &protected_time);
+	gpu_iter_active_counter = counters[0];
+	mcu_active_counter = counters[1];
 
-	/* Read the timestamp after reading the GPU_ACTIVE counter value.
+	/* Read the timestamp after reading the GPU_ITER_ACTIVE counter value.
 	 * This ensures the time gap between the 2 reads is consistent for
-	 * a meaningful comparison between the increment of GPU_ACTIVE and
+	 * a meaningful comparison between the increment of GPU_ITER_ACTIVE and
 	 * elapsed time. The lock taken inside kbase_ipa_control_query()
 	 * function can cause lot of variation.
 	 */
 	now = ktime_get_raw();
 
 	if (err) {
-		dev_err(kbdev->dev, "Failed to query the increment of GPU_ACTIVE counter: err=%d",
+		dev_err(kbdev->dev,
+			"Failed to query the increment of GPU_ITER_ACTIVE counter: err=%d",
 			err);
 	} else {
 		u64 diff_ns;
@@ -227,20 +243,20 @@
 		 * (IPA_CONTROL_TIMER_DEFAULT_VALUE_MS). Ideally, we should check the counter
 		 * update timestamp in the GPU internal register to ensure accurate updates.
 		 */
-		if (gpu_active_counter == 0 &&
+		if (gpu_iter_active_counter == 0 && mcu_active_counter == 0 &&
 			diff_ns_signed < IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2)
 			return false;
 
 		diff_ns = (u64)diff_ns_signed;
 
 #if !IS_ENABLED(CONFIG_MALI_NO_MALI)
-		/* The GPU_ACTIVE counter shouldn't clock-up more time than has
+		/* The GPU_ITER_ACTIVE counter shouldn't clock-up more time than has
 		 * actually elapsed - but still some margin needs to be given
 		 * when doing the comparison. There could be some drift between
 		 * the CPU and GPU clock.
 		 *
 		 * Can do the check only in a real driver build, as an arbitrary
-		 * value for GPU_ACTIVE can be fed into dummy model in no_mali
+		 * value for GPU_ITER_ACTIVE can be fed into dummy model in no_mali
 		 * configuration which may not correspond to the real elapsed
 		 * time.
 		 */
@@ -252,11 +268,19 @@
 			u64 const MARGIN_NS =
 				IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2;
 
-			if (gpu_active_counter > (diff_ns + MARGIN_NS)) {
+			if (gpu_iter_active_counter > (diff_ns + MARGIN_NS)) {
 				dev_info(
 					kbdev->dev,
 					"GPU activity takes longer than time interval: %llu ns > %llu ns",
-					(unsigned long long)gpu_active_counter,
+					(unsigned long long)gpu_iter_active_counter,
+					(unsigned long long)diff_ns);
+			}
+			if (mcu_active_counter > (diff_ns + MARGIN_NS)) {
+				dev_info(
+					kbdev->dev,
+					"MCU activity longer than time interval: %llu ns > %llu ns",
+					(unsigned long long)
+						gpu_iter_active_counter,
 					(unsigned long long)diff_ns);
 			}
 		}
@@ -264,7 +288,7 @@
 		/* Calculate time difference in units of 256ns */
 		ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT);
 
-		/* Add protected_time to gpu_active_counter so that time in
+		/* Add protected_time to gpu_iter_active_counter so that time in
 		 * protected mode is included in the apparent GPU active time,
 		 * then convert it from units of 1ns to units of 256ns, to
 		 * match what JM GPUs use. The assumption is made here that the
@@ -276,22 +300,33 @@
 		 * the chances of overflows.
 		 */
 		protected_time >>= KBASE_PM_TIME_SHIFT;
-		gpu_active_counter >>= KBASE_PM_TIME_SHIFT;
-		gpu_active_counter += protected_time;
+		gpu_iter_active_counter >>= KBASE_PM_TIME_SHIFT;
+		gpu_iter_active_counter += protected_time;
+		mcu_active_counter >>= KBASE_PM_TIME_SHIFT;
+
+#if MALI_USE_CSF
+		mcu_active_counter += ((protected_time /
+					pc->dvfs.tunable.mcu_protm_scale_den) *
+				       pc->dvfs.tunable.mcu_protm_scale_num);
+#endif
 
 		/* Ensure the following equations don't go wrong if ns_time is
-		 * slightly larger than gpu_active_counter somehow
+		 * slightly larger than gpu_iter_active_counter somehow
 		 */
-		gpu_active_counter = MIN(gpu_active_counter, ns_time);
+		gpu_iter_active_counter = MIN(gpu_iter_active_counter, ns_time);
+		mcu_active_counter = MIN(mcu_active_counter, ns_time);
 
-		kbdev->pm.backend.metrics.values.time_busy += gpu_active_counter;
+		kbdev->pm.backend.metrics.values.time_busy += gpu_iter_active_counter;
 
-		kbdev->pm.backend.metrics.values.time_idle += ns_time - gpu_active_counter;
+		kbdev->pm.backend.metrics.values.time_idle += ns_time - gpu_iter_active_counter;
 
 		/* Also make time in protected mode available explicitly,
 		 * so users of this data have this info, too.
 		 */
 		kbdev->pm.backend.metrics.values.time_in_protm += protected_time;
+		kbdev->pm.backend.metrics.values.busy_mcu += mcu_active_counter;
+		kbdev->pm.backend.metrics.values.idle_mcu +=
+			(ns_time - mcu_active_counter);
 	}
 
 	kbdev->pm.backend.metrics.time_period_start = now;
@@ -356,6 +391,8 @@
 
 #if MALI_USE_CSF
 	diff->time_in_protm = cur->time_in_protm - last->time_in_protm;
+	diff->busy_mcu = cur->busy_mcu - last->busy_mcu;
+	diff->idle_mcu = cur->idle_mcu - last->idle_mcu;
 #else
 	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
 	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
@@ -372,7 +409,7 @@
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
 {
-	int utilisation;
+	int utilisation, mcu_utilisation;
 	struct kbasep_pm_metrics *diff;
 #if !MALI_USE_CSF
 	int busy;
@@ -388,6 +425,9 @@
 
 	utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u);
 
+	mcu_utilisation = (100 * diff->busy_mcu) /
+			  max(diff->busy_mcu + diff->idle_mcu, 1u);
+
 #if !MALI_USE_CSF
 	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
 
@@ -403,7 +443,7 @@
 	 * protected mode is already added to busy-time at this point, though,
 	 * so we should be good.
 	 */
-	kbase_platform_dvfs_event(kbdev, utilisation);
+	kbase_platform_dvfs_event_mcu(kbdev, utilisation, mcu_utilisation);
 #endif
 }
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index 070fd27..5f0f728 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -35,6 +35,8 @@
 
 #include <linux/of.h>
 
+#include <trace/hooks/systrace.h>
+
 static const struct kbase_pm_policy *const all_policy_list[] = {
 #if IS_ENABLED(CONFIG_MALI_NO_MALI)
 	&kbase_pm_always_on_policy_ops,
@@ -225,11 +227,13 @@
 {
 	unsigned long flags;
 
+	ATRACE_BEGIN(__func__);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	kbase_pm_update_cores_state_nolock(kbdev);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	ATRACE_END();
 }
 
 size_t kbase_pm_list_policies(struct kbase_device *kbdev,
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index bee5bad..40a8a16 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -244,6 +244,7 @@
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
 	err = kbase_insert_kctx_to_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+
 	if (err) {
 		dev_err(kctx->kbdev->dev, "(err:%d) failed to insert kctx to kbase_process", err);
 		if (likely(kctx->kfile)) {
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 59c82aa..f7692db 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -1870,8 +1870,6 @@
 	kbase_csf_kcpu_queue_context_term(kctx);
 	kbase_csf_scheduler_context_term(kctx);
 	kbase_csf_event_term(kctx);
-
-	rt_mutex_destroy(&kctx->csf.lock);
 }
 
 /**
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 74f1142..44b6a82 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -27,6 +27,7 @@
 #include "mali_kbase_csf_firmware.h"
 #include "mali_kbase_csf_protected_memory.h"
 #include "mali_kbase_hwaccess_time.h"
+#include <mali_kbase_defs.h>
 
 /* Indicate invalid CS h/w interface
  */
@@ -48,12 +49,6 @@
  */
 #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
 
-/* 60ms optimizes power while minimizing latency impact for UI test cases. */
-#define FIRMWARE_IDLE_HYSTERESIS_TIME_NS (60 * 1000 * 1000) /* Default 60 milliseconds */
-
-/* Idle hysteresis time can be scaled down when GPU sleep feature is used */
-#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
-
 /**
  * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space.
  *
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index 1878646..60cc85a 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -1240,8 +1240,17 @@
  */
 #define DISABLE_GLB_PWROFF_TIMER (0)
 
-/* Index of the GPU_ACTIVE counter within the CSHW counter block */
-#define GPU_ACTIVE_CNT_IDX (4)
+/* Total number of IPA_* counters registered with IPA, defined below */
+enum kbase_ipa_perf_counters {
+	ITER_ACTIVE_IDX = 0,
+	MCU_ACTIVE_IDX,
+	// append new perf counters above
+	IPA_NUM_PERF_COUNTERS
+};
+/* Index of the MCU_ACTIVE counter within the CSHW counter block */
+#define IPA_MCU_ACTIVE_CNT_IDX (5)
+/* Index of the GPU_ITER_ACTIVE counter within the CSHW counter block */
+#define IPA_GPU_ITER_ACTIVE_CNT_IDX (6)
 
 /*
  * Maximum number of sessions that can be managed by the IPA Control component.
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 481b3f4..98887ab 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -62,7 +62,7 @@
 MODULE_PARM_DESC(fw_name, "firmware image");
 
 /* The waiting time for firmware to boot */
-static unsigned int csf_firmware_boot_timeout_ms;
+static unsigned int csf_firmware_boot_timeout_ms = (500 * KBASE_TIMEOUT_MULTIPLIER);
 module_param(csf_firmware_boot_timeout_ms, uint, 0444);
 MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, "Maximum time to wait for firmware to boot.");
 
@@ -264,6 +264,8 @@
 
 	if (!remaining)
 		dev_err(kbdev->dev, "Timed out waiting for fw boot completion");
+	else
+		dev_info(kbdev->dev, "Firmware boot completed");
 
 	kbdev->csf.interrupt_received = false;
 }
@@ -1789,6 +1791,12 @@
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
+	/* The GPU idle timer is always enabled for simplicity. Checks will be
+	 * done before scheduling the GPU idle worker to see if it is
+	 * appropriate for the current power policy.
+	 */
+	enable_gpu_idle_timer(kbdev);
+
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface, GLB_ACK_IRQ_MASK, ack_irq_mask);
 
@@ -2075,9 +2083,6 @@
 	 * gets serialized.
 	 */
 	mutex_lock(&kbdev->csf.reg_lock);
-	/* The firmware only reads the new idle timer value when the timer is
-	 * disabled.
-	 */
 
 	/* The firmware only reads the new idle timer value when the timer is
 	 * disabled.
@@ -2322,12 +2327,6 @@
 {
 	u32 no_modifier = 0;
 
-	kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS;
-
-#ifdef KBASE_PM_RUNTIME
-	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
-#endif
 	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns);
 
 	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
@@ -2551,12 +2550,6 @@
 	if (ret != 0)
 		goto err_out;
 
-	ret = kbase_csf_firmware_log_init(kbdev);
-	if (ret != 0) {
-		dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
-		goto err_out;
-	}
-
 	ret = kbase_csf_firmware_cfg_init(kbdev);
 	if (ret != 0)
 		goto err_out;
@@ -2565,6 +2558,15 @@
 	if (ret != 0)
 		goto err_out;
 
+	ret = kbase_csf_firmware_log_init(kbdev);
+	if (ret != 0) {
+		dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
+		goto err_out;
+	}
+
+	/* Firmware loaded successfully */
+	dev_info(kbdev->dev, "Firmware load successful");
+
 	if (kbdev->csf.fw_core_dump.available)
 		kbase_csf_firmware_core_dump_init(kbdev);
 
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 28554cd..9efe116 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1136,11 +1136,6 @@
 {
 	u32 no_modifier = 0;
 
-	kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS;
-#ifdef KBASE_PM_RUNTIME
-	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
-#endif
 	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns);
 	kbdev->csf.gpu_idle_dur_count =
 		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &no_modifier);
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index f1ddae4..6d80735 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -653,7 +653,7 @@
 
 int kbase_reset_gpu_init(struct kbase_device *kbdev)
 {
-	kbdev->csf.reset.workq = alloc_workqueue("Mali reset workqueue", 0, 1, WQ_HIGHPRI);
+	kbdev->csf.reset.workq = alloc_workqueue("Mali reset workqueue", WQ_HIGHPRI, 1);
 	if (kbdev->csf.reset.workq == NULL)
 		return -ENOMEM;
 
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index f1ff8bf..44d447f 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -4296,8 +4296,6 @@
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
 	const u32 available_csg_slots = scheduler->num_csg_slots_for_tick;
-	u32 suspend_cnt = 0;
-	u32 remain_cnt = 0;
 	u32 resident_cnt = 0;
 	struct kbase_queue_group *group;
 	u32 i;
@@ -4312,9 +4310,7 @@
 			resident_cnt++;
 			if (group->prepared_seq_num >= available_csg_slots) {
 				suspend_queue_group(group);
-				suspend_cnt++;
-			} else
-				remain_cnt++;
+			}
 		}
 	}
 
@@ -6550,8 +6546,12 @@
 	/* If scheduler is in sleep or suspended state, re-activate it
 	 * to serve on-slot CSGs blocked on CQS which has been signaled.
 	 */
-	if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
+	if (!sync_updated && (scheduler->state == SCHED_SLEEPING)) {
+		/* Wait for sleep transition to complete to ensure the
+		 * CS_STATUS_WAIT registers are updated by the MCU.
+		 */
 		check_sync_update_in_sleep_mode(kbdev);
+	}
 
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
 
@@ -7053,7 +7053,7 @@
 			dev_warn(kbdev->dev, "failed to suspend active groups");
 			goto exit;
 		} else {
-			dev_dbg(kbdev->dev, "Scheduler PM suspend");
+			dev_info(kbdev->dev, "Scheduler PM suspend");
 			scheduler_suspend(kbdev);
 			cancel_tick_work(scheduler);
 		}
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 77f356e..0fae70a 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -340,6 +340,8 @@
 	{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
 	  "GPU property population failed" },
 	{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
+	{ kbase_pm_apc_init, kbase_pm_apc_term,
+	  "Asynchronous power control initialization failed" },
 #if IS_ENABLED(CONFIG_MALI_CORESIGHT)
 	{ kbase_debug_coresight_csf_init, kbase_debug_coresight_csf_term,
 	  "Coresight initialization failed" },
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 9d36394..1b9d579 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -281,6 +281,8 @@
 	  "GPU property population failed" },
 	{ NULL, kbase_dummy_job_wa_cleanup, NULL },
 	{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
+	{ kbase_pm_apc_init, kbase_pm_apc_term,
+	  "Asynchronous power control initialization failed" },
 };
 
 static void kbase_device_term_partial(struct kbase_device *kbdev, unsigned int i)
@@ -293,7 +295,6 @@
 
 void kbase_device_term(struct kbase_device *kbdev)
 {
-	kbase_pm_apc_term(kbdev);
 	kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
 	kbasep_js_devdata_halt(kbdev);
 	kbase_mem_halt(kbdev);
@@ -329,10 +330,6 @@
 	if (err)
 		return err;
 
-	err = kbase_pm_apc_init(kbdev);
-	if (err)
-		return err;
-
 	kthread_init_worker(&kbdev->event_worker);
 	kbdev->event_worker.task =
 		kthread_run(kthread_worker_fn, &kbdev->event_worker, "mali_event_thread");
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 802011a..5f9df52 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -535,6 +535,14 @@
 	if (err)
 		goto gpuprops_term;
 
+	if (!of_property_read_u32(kbdev->dev->of_node, "kbase-rt-uclamp-min", &kbdev->uclamp_rt.min)) {
+		kbdev->uclamp_rt.min = 0;
+	}
+
+	if (!of_property_read_u32(kbdev->dev->of_node, "kbase-rt-uclamp-max", &kbdev->uclamp_rt.max)) {
+		kbdev->uclamp_rt.max = 1024;
+	}
+
 	/* We're done accessing the GPU registers for now. */
 	kbase_pm_register_access_disable(kbdev);
 
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
index 323dd3b..1c1a1e8 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
@@ -24,7 +24,7 @@
 
 #define DEFAULT_SCALING_FACTOR 5
 
-/* If the value of GPU_ACTIVE is below this, use the simple model
+/* If the value of GPU_ITER_ACTIVE is below this, use the simple model
  * instead, to avoid extrapolating small amounts of counter data across
  * large sample periods.
  */
@@ -89,11 +89,17 @@
 	int err;
 	size_t i;
 
-	/* Value for GPU_ACTIVE counter also needs to be queried. It is required
-	 * for the normalization of top-level and shader core counters.
+	/* Value for GPU_ITER_ACTIVE counter also needs to be
+         * queried. It is required for the normalization of top-level and shader
+         * core counters.
+         *
+         * The MCU_ACTIVE counter is used alongside GPU_ITER_ACTIVE in this
+         * function and all modifications pertaining to it are applicable for
+         * MCU_ACTIVE as well.
 	 */
-	model_data->num_counters =
-		1 + model_data->num_top_level_cntrs + model_data->num_shader_cores_cntrs;
+	model_data->num_counters = IPA_NUM_PERF_COUNTERS +
+				   model_data->num_top_level_cntrs +
+				   model_data->num_shader_cores_cntrs;
 
 	perf_counters = kcalloc(model_data->num_counters, sizeof(*perf_counters), GFP_KERNEL);
 
@@ -102,12 +108,18 @@
 		return -ENOMEM;
 	}
 
-	/* Fill in the description for GPU_ACTIVE counter which is always
+	/* Fill in the description for GPU_ITER_ACTIVE counter which is always
 	 * needed, as mentioned above, regardless of the energy model used
 	 * by the CSF GPUs.
 	 */
 	perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW;
-	perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX;
+	perf_counters[cnt_idx].idx = IPA_GPU_ITER_ACTIVE_CNT_IDX;
+	perf_counters[cnt_idx].gpu_norm = false;
+	perf_counters[cnt_idx].scaling_factor = 1;
+	cnt_idx++;
+
+	perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW;
+	perf_counters[cnt_idx].idx = IPA_MCU_ACTIVE_CNT_IDX;
 	perf_counters[cnt_idx].gpu_norm = false;
 	perf_counters[cnt_idx].scaling_factor = 1;
 	cnt_idx++;
@@ -289,7 +301,7 @@
 	 * 0 <= active_cycles < 2^31
 	 */
 	if (*cnt_values_p > U32_MAX) {
-		dev_warn(kbdev->dev, "Increment in GPU_ACTIVE counter more than expected");
+		dev_warn(kbdev->dev, "Increment in GPU_ITER_ACTIVE counter more than expected");
 		return -ERANGE;
 	}
 
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 6a22826..6393c58 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -188,7 +188,7 @@
  * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
  * on scaling from a 50MHz GPU system.
  */
-#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000ull)
+#define CSF_FIRMWARE_TIMEOUT_CYCLES (((u64)7500000000) * KBASE_TIMEOUT_MULTIPLIER)
 
 /* Timeout in clock cycles for GPU Power Management to reach the desired
  * Shader, L2 and MCU state.
@@ -274,7 +274,7 @@
 /* Default number of milliseconds given for other jobs on the GPU to be
  * soft-stopped when the GPU needs to be reset.
  */
-#define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+#define JM_DEFAULT_RESET_TIMEOUT_MS (3000 * KBASE_TIMEOUT_MULTIPLIER) /* 3s */
 
 /* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT
  * is updated on HW side so a Job Slot is considered free.
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index d757e8a..3f8d8ca 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -124,6 +124,8 @@
 
 #include <mali_kbase_caps.h>
 
+#include <linux/sched/types.h>
+
 #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
 
 /**
@@ -208,23 +210,20 @@
 
 static void kbase_set_sched_rt(struct kbase_device *kbdev, struct task_struct *task, char *thread_name)
 {
-	unsigned int i;
-	static const struct sched_param param = {
+	static struct sched_attr attr = {
+		.sched_policy = SCHED_FIFO,
 		.sched_priority = KBASE_RT_THREAD_PRIO,
 	};
 
-	cpumask_t mask = { CPU_BITS_NONE };
-	for (i = KBASE_RT_THREAD_CPUMASK_MIN; i <= KBASE_RT_THREAD_CPUMASK_MAX ; i++)
-		cpumask_set_cpu(i, &mask);
-	kthread_bind_mask(task, &mask);
+	attr.sched_util_min = kbdev->uclamp_rt.min;
+	attr.sched_util_max = kbdev->uclamp_rt.max;
 
 	wake_up_process(task);
-
-	if (sched_setscheduler_nocheck(task, SCHED_FIFO, &param))
-		dev_warn(kbdev->dev, "%s not set to RT prio", thread_name);
-	else
-		dev_dbg(kbdev->dev, "%s set to RT prio: %i",
-			thread_name, param.sched_priority);
+	if(sched_setattr_nocheck(task, &attr))
+		dev_warn(kbdev->dev, "%s attributes weren't set", thread_name);
+	else {
+		dev_dbg(kbdev->dev, "%s set to RT prio: %i", thread_name, attr.sched_priority);
+	}
 }
 
 struct task_struct *kbase_kthread_run_rt(struct kbase_device *kbdev,
@@ -6199,6 +6198,11 @@
 	struct kbase_device *kbdev;
 	int err = 0;
 
+#if IS_ENABLED(CONFIG_GOOGLE_BCL)
+	if (!google_retrieve_bcl_handle())
+		return -EPROBE_DEFER;
+#endif
+
 	mali_kbase_print_cs_experimental();
 
 	kbdev = kbase_device_alloc();
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index 8c3d52e..5a55deb 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -188,6 +188,7 @@
 	if (new_ref_count == 0) {
 		kbasep_platform_context_idle(kctx);
 		if (likely((kctx->as_nr >= 0) && (kctx->as_nr < BASE_MAX_NR_AS))) {
+			kbasep_platform_context_idle(kctx);
 			kbdev->as_free |= (1u << kctx->as_nr);
 			if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
 				KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id);
diff --git a/mali_kbase/mali_kbase_debug.h b/mali_kbase/mali_kbase_debug.h
index 1bda7b7..f507086 100644
--- a/mali_kbase/mali_kbase_debug.h
+++ b/mali_kbase/mali_kbase_debug.h
@@ -90,7 +90,7 @@
 #ifdef CONFIG_MALI_DEBUG
 #define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
 #else
-#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#define KBASE_CALL_ASSERT_HOOK()
 #endif
 
 /**
@@ -104,7 +104,7 @@
 #define KBASE_DEBUG_ASSERT(expr) KBASE_DEBUG_ASSERT_MSG(expr, #expr)
 
 #if KBASE_DEBUG_DISABLE_ASSERTS
-#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_UNUSED(expr); CSTD_NOP(__VA_ARGS__)
 #else
 /**
  * KBASE_DEBUG_ASSERT_MSG() - Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the
@@ -133,7 +133,7 @@
 #ifdef CONFIG_MALI_DEBUG
 #define KBASE_DEBUG_CODE(X) X
 #else
-#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#define KBASE_DEBUG_CODE(X) CSTD_NOP(X)
 #endif /* CONFIG_MALI_DEBUG */
 
 /** @} */
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 81e67bf..11441d3 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -76,8 +76,17 @@
 #include <linux/regulator/consumer.h>
 #include <linux/rtmutex.h>
 
+/**
+ * Multiplier on kernel timeout constants, useful for emulation
+ */
+#ifdef CONFIG_BOARD_EMULATOR
+#define KBASE_TIMEOUT_MULTIPLIER (1000)
+#else
+#define KBASE_TIMEOUT_MULTIPLIER (1)
+#endif
+
 /** Number of milliseconds before we time out on a GPU soft/hard reset */
-#define RESET_TIMEOUT 500
+#define RESET_TIMEOUT (500 * KBASE_TIMEOUT_MULTIPLIER)
 
 /**
  * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware.
@@ -118,17 +127,6 @@
  */
 #define KBASE_RT_THREAD_PRIO (2)
 
-/* TODO(b/181145264) get the following two numbers from device tree */
-/**
- * First CPU in the contiguous CPU mask used for realtime worker threads.
- */
-#define KBASE_RT_THREAD_CPUMASK_MIN (0)
-
-/**
- * Last CPU in the contiguous CPU mask used for realtime worker threads.
- */
-#define KBASE_RT_THREAD_CPUMASK_MAX (3)
-
 /**
  * Minimum allowed wake duration in usec for apc request.
  */
@@ -788,6 +786,7 @@
  * @reg_size:              Size of the region containing GPU registers
  * @reg:                   Kernel virtual address of the region containing GPU
  *                         registers, using which Driver will access the registers.
+ * @uclamp_rt:             UClamp min and max values for RT threads spawned
  * @regmap:                Top level structure for hw_access regmaps, containing
  *                         the size of the regmap, pointers to Look-Up Tables (LUT).
  * @regmap.regs:           Pointer to regmap LUT of precomputed iomem pointers from
@@ -1127,6 +1126,11 @@
 	size_t reg_size;
 	void __iomem *reg;
 	struct {
+		int min;
+		int max;
+	} uclamp_rt;
+
+	struct {
 		void __iomem **regs;
 		u32 *flags;
 		size_t size;
@@ -1383,12 +1387,18 @@
 
 	struct {
 		struct kthread_worker worker;
+#if !MALI_USE_CSF
+		// APC ioctl for core domain
 		struct kthread_work power_on_work;
 		struct kthread_work power_off_work;
 		ktime_t end_ts;
 		struct hrtimer timer;
 		bool pending;
 		struct mutex lock;
+#else
+		// sysfs power hint for CSF scheduler
+		struct kthread_work wakeup_csf_scheduler_work;
+#endif
 	} apc;
 
 	struct rb_root process_root;
@@ -1417,6 +1427,9 @@
 
 	struct kobject *proc_sysfs_node;
 
+#ifdef CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL
+	struct device *s2mpu_dev;
+#endif /* CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL */
 	struct kbase_mem_migrate mem_migrate;
 
 #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index e6c31d4..2b59054 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -203,10 +203,14 @@
 	struct kbasep_gpuprops_regdump *regdump;
 
 	int i, err;
+	u64 gpu_features_mask = 0;
 
 	if (WARN_ON(kbdev == NULL) || WARN_ON(kbdev->gpu_props.priv_data == NULL))
 		return -EINVAL;
 
+	if (WARN_ON(kbdev->dev == NULL) || WARN_ON(kbdev->dev->of_node == NULL))
+		return -EINVAL;
+
 	gpu_props = &kbdev->gpu_props;
 	regdump = &PRIV_DATA_REGDUMP(kbdev);
 
@@ -242,8 +246,13 @@
 	gpu_props->impl_tech = KBASE_UBFX32(regdump->thread_features, 30U, 2);
 #endif /* MALI_USE_CSF */
 
+	err = of_property_read_u64(kbdev->dev->of_node, "gpu_features_mask", &gpu_features_mask);
+	// In case of error, just accept all the features reported by the HW.
+	if (err)
+		gpu_features_mask = 0;
+
 	/* Features */
-	kbase_gpuprops_parse_gpu_features(&gpu_props->gpu_features, regdump->gpu_features);
+	kbase_gpuprops_parse_gpu_features(&gpu_props->gpu_features, regdump->gpu_features & ~gpu_features_mask);
 
 	gpu_props->coherency_info.coherent_core_group = KBASE_UBFX64(regdump->mem_features, 0U, 1);
 	gpu_props->coherency_info.coherent_super_group = KBASE_UBFX64(regdump->mem_features, 1U, 1);
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index 36e5431..ab72cb0 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -28,6 +28,10 @@
 #include <mali_kbase_kinstr_prfcnt.h>
 #include <hwcnt/mali_kbase_hwcnt_context.h>
 
+#if MALI_USE_CSF
+#include <csf/mali_kbase_csf_scheduler.h>
+#endif
+
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
@@ -37,6 +41,8 @@
 
 #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
 
+#include <trace/hooks/systrace.h>
+
 int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
 {
 	return kbase_hwaccess_pm_powerup(kbdev, flags);
@@ -57,6 +63,7 @@
 {
 	int c;
 
+	ATRACE_BEGIN(__func__);
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, suspend_handler,
 		current->pid);
@@ -65,6 +72,7 @@
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) {
 		kbase_pm_unlock(kbdev);
+		ATRACE_END();
 		return 1;
 	}
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
@@ -77,6 +85,7 @@
 			fallthrough;
 		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
 			kbase_pm_unlock(kbdev);
+			ATRACE_END();
 			return 1;
 
 		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
@@ -102,6 +111,7 @@
 
 	kbase_pm_unlock(kbdev);
 	dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count);
+	ATRACE_END();
 
 	return 0;
 }
@@ -112,6 +122,7 @@
 {
 	int c;
 
+	ATRACE_BEGIN(__func__);
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
 	kbase_pm_lock(kbdev);
@@ -135,6 +146,7 @@
 
 	kbase_pm_unlock(kbdev);
 	dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, kbdev->pm.active_count, current->pid);
+	ATRACE_END();
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
@@ -369,6 +381,7 @@
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
 }
 
+#if !MALI_USE_CSF
 /**
  * kbase_pm_apc_power_off_worker - Power off worker running on mali_apc_thread
  * @data: A &struct kthread_work
@@ -516,6 +529,14 @@
 
 	return HRTIMER_NORESTART;
 }
+#else
+static void kbase_pm_apc_wakeup_csf_scheduler_worker(struct kthread_work *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device, apc.wakeup_csf_scheduler_work);
+
+	kbase_csf_scheduler_force_wakeup(kbdev);
+}
+#endif
 
 int kbase_pm_apc_init(struct kbase_device *kbdev)
 {
@@ -525,6 +546,7 @@
 	if (ret)
 		return ret;
 
+#if !MALI_USE_CSF
 	/*
 	 * We initialize power off and power on work on init as they will each
 	 * only operate on one worker.
@@ -536,12 +558,18 @@
 	kbdev->apc.timer.function = kbase_pm_apc_timer_callback;
 
 	mutex_init(&kbdev->apc.lock);
+#else
+	kthread_init_work(&kbdev->apc.wakeup_csf_scheduler_work, kbase_pm_apc_wakeup_csf_scheduler_worker);
+#endif
 
 	return 0;
 }
 
 void kbase_pm_apc_term(struct kbase_device *kbdev)
 {
+#if !MALI_USE_CSF
 	hrtimer_cancel(&kbdev->apc.timer);
+#endif
+
 	kbase_destroy_kworker_stack(&kbdev->apc.worker);
 }
diff --git a/mali_kbase/mali_kbase_pm.h b/mali_kbase/mali_kbase_pm.h
index 9bbd69b..fa03f5d 100644
--- a/mali_kbase/mali_kbase_pm.h
+++ b/mali_kbase/mali_kbase_pm.h
@@ -276,6 +276,7 @@
  */
 void kbase_pm_apc_term(struct kbase_device *kbdev);
 
+#if !MALI_USE_CSF
 /**
  * kbase_pm_apc_request - Handle APC power on request
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -293,6 +294,7 @@
  * and will result in no APC work being queued.
  */
 void kbase_pm_apc_request(struct kbase_device *kbdev, u32 dur_usec);
+#endif
 
 /**
  * Print debug message indicating power state of GPU
diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h
index 1a9aa29..43e8e72 100644
--- a/mali_kbase/mali_linux_trace.h
+++ b/mali_kbase/mali_linux_trace.h
@@ -46,6 +46,46 @@
 		      __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id));
 
 /**
+ * mali_pm_mcu_state - Reports changes to mcu state machine
+ * @from: initial state
+ * @to: final state
+ */
+TRACE_EVENT(mali_pm_mcu_state,
+	TP_PROTO(u32 from, u32 to),
+	TP_ARGS(from, to),
+	TP_STRUCT__entry(
+		__field(u32, from)
+		__field(u32, to)
+	),
+	TP_fast_assign(
+		__entry->from = from;
+		__entry->to = to;
+	),
+	TP_printk("from=%u to=%u",
+		__entry->from, __entry->to)
+);
+
+/**
+ * mali_pm_l2_state - Reports changes to l2 state machine
+ * @from: initial state
+ * @to: final state
+ */
+TRACE_EVENT(mali_pm_l2_state,
+	TP_PROTO(u32 from, u32 to),
+	TP_ARGS(from, to),
+	TP_STRUCT__entry(
+		__field(u32, from)
+		__field(u32, to)
+	),
+	TP_fast_assign(
+		__entry->from = from;
+		__entry->to = to;
+	),
+	TP_printk("from=%u to=%u",
+		__entry->from, __entry->to)
+);
+
+/**
  * mali_pm_status - Reports change of power management status.
  * @gpu_id:   Kbase device id
  * @event_id: Core type (shader, tiler, L2 cache)
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index e5b3706..b674c4d 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1933,6 +1933,8 @@
  * @level_high: The higher bound for the levels for which the PGD allocs are required
  * @new_pgds:   Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
  *              newly allocated PGD addresses to.
+ * @pool_grown: True if new PGDs required the memory pool to grow to allocate more pages,
+ *              or false otherwise
  *
  * Numerically, level_low < level_high, not to be confused with top level and
  * bottom level concepts for MMU PGDs. They are only used as low and high bounds
@@ -1943,14 +1945,18 @@
  * * -ENOMEM - allocation failed for a PGD.
  */
 static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				 phys_addr_t *new_pgds, int level_low, int level_high)
+				 phys_addr_t *new_pgds, int level_low, int level_high,
+				 bool *pool_grown)
 {
 	int err = 0;
 	int i;
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
+	*pool_grown = false;
 	for (i = level_low; i <= level_high; i++) {
+		if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
+			continue;
 		do {
 			new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
 			if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
@@ -1963,17 +1969,9 @@
 			if (err) {
 				dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
 					__func__, err);
-
-				/* Free all PGDs allocated in previous successful iterations
-				 * from (i-1) to level_low
-				 */
-				for (i = (i - 1); i >= level_low; i--) {
-					if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
-						kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
-				}
-
 				return err;
 			}
+			*pool_grown = true;
 		} while (1);
 	}
 
@@ -2003,6 +2001,8 @@
 	if (WARN_ON(kctx == NULL))
 		return -EINVAL;
 
+	lockdep_assert_held(&kctx->reg_lock);
+
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
 
@@ -2038,6 +2038,7 @@
 		struct page *p;
 		register unsigned int num_of_valid_entries;
 		bool newly_created_pgd = false;
+		bool pool_grown;
 
 		if (count > remain)
 			count = remain;
@@ -2045,6 +2046,10 @@
 		cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 		insert_level = cur_level;
 
+		for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
+			new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
+
+repeat_page_table_walk:
 		/*
 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 		 * suboptimal. We don't have to re-parse the whole tree
@@ -2059,7 +2064,7 @@
 		if (err) {
 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
 				__func__, err);
-			goto fail_unlock;
+			goto fail_unlock_free_pgds;
 		}
 
 		/* No valid pgd at cur_level */
@@ -2068,9 +2073,12 @@
 			 * down to the lowest valid pgd at insert_level
 			 */
 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
-						    cur_level);
+						    cur_level, &pool_grown);
 			if (err)
-				goto fail_unlock;
+				goto fail_unlock_free_pgds;
+
+			if (pool_grown)
+				goto repeat_page_table_walk;
 
 			newly_created_pgd = true;
 
@@ -2156,9 +2164,9 @@
 fail_unlock_free_pgds:
 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
 	for (l = cur_level; l > insert_level; l--)
-		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+		if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
+			kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
 
-fail_unlock:
 	if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
 		/* Invalidate the pages we have partially completed */
 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,
@@ -2304,6 +2312,9 @@
 	int l, cur_level, insert_level;
 	struct tagged_addr *start_phys = phys;
 
+	if (mmut->kctx)
+		lockdep_assert_held(&mmut->kctx->reg_lock);
+
 	/* Note that 0 is a valid start_vpfn */
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
@@ -2326,6 +2337,7 @@
 		register unsigned int num_of_valid_entries;
 		bool newly_created_pgd = false;
 		enum kbase_mmu_op_type flush_op;
+		bool pool_grown;
 
 		if (count > remain)
 			count = remain;
@@ -2337,6 +2349,10 @@
 
 		insert_level = cur_level;
 
+		for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
+			new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
+
+repeat_page_table_walk:
 		/*
 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 		 * suboptimal. We don't have to re-parse the whole tree
@@ -2351,7 +2367,7 @@
 		if (err) {
 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
 				__func__, err);
-			goto fail_unlock;
+			goto fail_unlock_free_pgds;
 		}
 
 		/* No valid pgd at cur_level */
@@ -2360,9 +2376,12 @@
 			 * down to the lowest valid pgd at insert_level
 			 */
 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
-						    cur_level);
+						    cur_level, &pool_grown);
 			if (err)
-				goto fail_unlock;
+				goto fail_unlock_free_pgds;
+
+			if (pool_grown)
+				goto repeat_page_table_walk;
 
 			newly_created_pgd = true;
 
@@ -2476,9 +2495,9 @@
 fail_unlock_free_pgds:
 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
 	for (l = cur_level; l > insert_level; l--)
-		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+		if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
+			kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
 
-fail_unlock:
 	if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
 		/* Invalidate the pages we have partially completed */
 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,
@@ -3884,8 +3903,6 @@
 		if (mmut->kctx)
 			KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
 	}
-
-	rt_mutex_destroy(&mmut->mmu_lock);
 }
 
 void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
diff --git a/mali_kbase/platform/pixel/Kbuild b/mali_kbase/platform/pixel/Kbuild
index 17692af..c35c0be 100644
--- a/mali_kbase/platform/pixel/Kbuild
+++ b/mali_kbase/platform/pixel/Kbuild
@@ -31,6 +31,9 @@
 	platform/$(MALI_PLATFORM_DIR)/pixel_gpu_dvfs_metrics.o \
 	platform/$(MALI_PLATFORM_DIR)/pixel_gpu_sysfs.o
 
+mali_kbase-$(CONFIG_MALI_PIXEL_GPU_PM) += \
+	platform/$(MALI_PLATFORM_DIR)/pixel_gpu_power.o
+
 mali_kbase-$(CONFIG_MALI_PIXEL_GPU_QOS) += \
 	platform/$(MALI_PLATFORM_DIR)/pixel_gpu_dvfs_qos.o
 
diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
index f4ae8dc..4ad3318 100644
--- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
@@ -37,7 +37,12 @@
  * Attached value: pointer to @ref kbase_pm_callback_conf
  * Default value: See @ref kbase_pm_callback_conf
  */
+#ifdef CONFIG_MALI_PIXEL_GPU_PM
 #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+extern struct kbase_pm_callback_conf pm_callbacks;
+#else
+#define POWER_MANAGEMENT_CALLBACKS (NULL)
+#endif
 
 /**
  * Clock Rate Trace configuration functions
@@ -58,7 +63,6 @@
  */
 #define PLATFORM_FUNCS (&platform_funcs)
 
-extern struct kbase_pm_callback_conf pm_callbacks;
 extern struct kbase_platform_funcs_conf platform_funcs;
 
 #ifdef CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING
@@ -108,7 +112,7 @@
 #include "pixel_gpu_uevent.h"
 
 /* All port specific fields go here */
-#define OF_DATA_NUM_MAX 140
+#define OF_DATA_NUM_MAX 200
 #define CPU_FREQ_MAX INT_MAX
 
 enum gpu_power_state {
@@ -175,6 +179,10 @@
  *                operating point.
  * @util_max:     The maximum threshold of utlization before the governor should consider moving to
  *                a higher operating point.
+ * @mcu_util_max: The maximum threshold of MCU utilization above which the
+ *                governor should consider moving to a higher OPP
+ * @mcu_util_min: The minimum value after which if the gpu utilisation is also
+ *                low, the DVFS should consider a lower OPP
  * @hysteresis:   A measure of how long the governor should keep the GPU at this operating point
  *                before moving to a lower one. For example, in the basic governor, this translates
  *                directly into &hr_timer ticks for the Mali DVFS utilization thread, but other
@@ -200,6 +208,8 @@
 	int util_min;
 	int util_max;
 	int hysteresis;
+	int mcu_util_max;
+	int mcu_util_min;
 
 	/* Metrics */
 	struct gpu_dvfs_opp_metrics metrics;
@@ -241,6 +251,13 @@
  * @pm.autosuspend_delay:       Delay (in ms) before PM runtime should trigger auto suspend on TOP
  *                              domain if use_autosuspend is true.
  * @pm.bcl_dev:                 Pointer to the Battery Current Limiter device.
+ * @pm.firmware_idle_hysteresis_time_ms The duration of the GPU idle hysteresis in milliseconds. Set via DT.
+ * @pm.firmware_idle_hysteresis_gpu_sleep_scaler Factor for calculating GPU idle hysteresis
+                                in case GPU sleep is enabled. &csf.gpu_idle_hysteresis_ms is eventually
+                                (firmware_idle_hysteresis_time_ms / firmware_idle_hysteresis_gpu_sleep_scaler).
+                                Set via DT.
+ * @pm.cores_suspend_hysteresis_time_ms Hysteresis timeout for suspending CORES domain. Set via DT.
+ * @pm.top_suspend_hysteresis_time_ms   Hysteresis timeout for suspending TOP domain. Set via DT.
  *
  * @tz_protection_enabled:      Storing the secure rendering state of the GPU. Access to this is
  *                              controlled by the HW access lock for the GPU associated with @kbdev.
@@ -252,6 +269,7 @@
  *                              incoming utilization data from the Mali driver into DVFS changes on
  *                              the GPU.
  * @dvfs.util:                  Stores incoming utilization metrics from the Mali driver.
+ * @dvfs.mcu_util:              Stores incoming MCU utilisation metrics.
  * @dvfs.util_gl:               Percentage of utilization from a non-OpenCL work
  * @dvfs.util_cl:               Percentage of utilization from a OpenCL work.
  * @dvfs.clockdown_wq:          Delayed workqueue for clocking down the GPU after it has been idle
@@ -329,8 +347,15 @@
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
 #if IS_ENABLED(CONFIG_GOOGLE_BCL)
 		struct bcl_device *bcl_dev;
+		struct notifier_block qos_nb;
 #endif
 		struct pixel_rail_state_log *rail_state_log;
+		unsigned int firmware_idle_hysteresis_time_ms;
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+		unsigned int firmware_idle_hysteresis_gpu_sleep_scaler;
+		unsigned int cores_suspend_hysteresis_time_ms;
+		unsigned int top_suspend_hysteresis_time_ms;
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
 	} pm;
 
 #ifdef CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING
@@ -344,6 +369,7 @@
 		struct workqueue_struct *control_wq;
 		struct work_struct control_work;
 		atomic_t util;
+		atomic_t mcu_util;
 #if !MALI_USE_CSF
 		atomic_t util_gl;
 		atomic_t util_cl;
@@ -360,6 +386,9 @@
 		int table_size;
 		int step_up_val;
 		int level;
+#if MALI_USE_CSF
+		int level_before_headroom;
+#endif
 		int level_target;
 		int level_max;
 		int level_min;
@@ -367,6 +396,12 @@
 		int level_scaling_min;
 		int level_scaling_compute_min;
 		struct gpu_dvfs_level_lock level_locks[GPU_DVFS_LEVEL_LOCK_COUNT];
+#if MALI_USE_CSF
+		u32 capacity_headroom;
+		u32 capacity_history[8];
+		u8 capacity_history_depth;
+		u8 capacity_history_index;
+#endif
 
 		struct {
 			enum gpu_dvfs_governor_type curr;
@@ -386,6 +421,14 @@
 #endif /* !MALI_USE_CSF */
 			struct list_head uid_stats_list;
 		} metrics;
+#if MALI_USE_CSF
+		struct {
+			int mcu_protm_scale_num;
+			int mcu_protm_scale_den;
+			int mcu_down_util_scale_num;
+			int mcu_down_util_scale_den;
+		} tunable;
+#endif
 
 #ifdef CONFIG_MALI_PIXEL_GPU_QOS
 		struct {
diff --git a/mali_kbase/platform/pixel/pixel_gpu.c b/mali_kbase/platform/pixel/pixel_gpu.c
index 6e30053..1ae6db6 100644
--- a/mali_kbase/platform/pixel/pixel_gpu.c
+++ b/mali_kbase/platform/pixel/pixel_gpu.c
@@ -13,7 +13,7 @@
 
 /* SOC includes */
 #ifdef CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING
-#include <soc/samsung/exynos-smc.h>
+#include <linux/soc/samsung/exynos-smc.h>
 #endif /* CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING */
 
 /* Mali core includes */
@@ -193,7 +193,51 @@
 	kctx->platform_data = NULL;
 }
 
+#ifdef CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL
+/**
+ * gpu_s2mpu_init - Initialize S2MPU for G3D
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ *
+ * Return: On success, returns 0. On failure an error code is returned.
+ */
+static int gpu_s2mpu_init(struct kbase_device *kbdev)
+{
+	int ret = 0;
+	struct device_node *np;
+	struct platform_device *pdev;
+
+	/*
+	 * We expect "s2mpus" entry in device tree to point to gpu s2mpu device
+	 */
+	np = of_parse_phandle(kbdev->dev->of_node, "s2mpus", 0);
+	if (!np) {
+		dev_err(kbdev->dev, "No 's2mpus' entry found in the device tree\n");
+		ret = -ENODEV;
+		goto done;
+	}
+
+	pdev = of_find_device_by_node(np);
+	of_node_put(np);
+	if (!pdev) {
+		dev_err(kbdev->dev, "No device specified in 's2mpus' device node\n");
+		ret = -ENODEV;
+		goto done;
+	}
+
+	kbdev->s2mpu_dev = &pdev->dev;
+	dev_info(kbdev->dev, "s2mpu device %s successfully configured\n",
+				dev_name(kbdev->s2mpu_dev));
+
+done:
+	return ret;
+}
+#endif /* CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL */
+
 static const struct kbase_device_init dev_init[] = {
+#ifdef CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL
+	{ gpu_s2mpu_init, NULL, "S2MPU init failed" },
+#endif /* CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL */
 	{ gpu_pm_init, gpu_pm_term, "PM init failed" },
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 	{ gpu_dvfs_init, gpu_dvfs_term, "DVFS init failed" },
@@ -276,10 +320,12 @@
 struct kbase_platform_funcs_conf platform_funcs = {
 	.platform_init_func = &gpu_pixel_init,
 	.platform_term_func = &gpu_pixel_term,
+#ifdef CONFIG_MALI_MIDGARD_DVFS
 	.platform_handler_context_init_func = &gpu_pixel_kctx_init,
 	.platform_handler_context_term_func = &gpu_pixel_kctx_term,
 	.platform_handler_work_begin_func = &gpu_dvfs_metrics_work_begin,
 	.platform_handler_work_end_func = &gpu_dvfs_metrics_work_end,
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
 	.platform_handler_context_active = &gpu_slc_kctx_active,
 	.platform_handler_context_idle = &gpu_slc_kctx_idle,
 	.platform_handler_tick_tock = &gpu_slc_tick_tock,
diff --git a/mali_kbase/platform/pixel/pixel_gpu_control.h b/mali_kbase/platform/pixel/pixel_gpu_control.h
index 51b3063..d3420f9 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_control.h
+++ b/mali_kbase/platform/pixel/pixel_gpu_control.h
@@ -9,17 +9,26 @@
 #define _PIXEL_GPU_CONTROL_H_
 
 /* Power management */
+#ifdef CONFIG_MALI_PIXEL_GPU_PM
+bool gpu_pm_get_power_state_nolock(struct kbase_device *kbdev);
 bool gpu_pm_get_power_state(struct kbase_device *kbdev);
 int gpu_pm_init(struct kbase_device *kbdev);
 void gpu_pm_term(struct kbase_device *kbdev);
 void* gpu_pm_get_rail_state_log(struct kbase_device *kbdev);
 unsigned int gpu_pm_get_rail_state_log_size(struct kbase_device *kbdev);
+#else
+static bool __maybe_unused gpu_pm_get_power_state(struct kbase_device *kbdev) { return true; }
+static int __maybe_unused gpu_pm_init(struct kbase_device *kbdev) { return 0; }
+static void __maybe_unused gpu_pm_term(struct kbase_device *kbdev) {}
+static void* __maybe_unused gpu_pm_get_rail_state_log(struct kbase_device *kbdev) { return NULL; }
+static unsigned int __maybe_unused gpu_pm_get_rail_state_log_size(struct kbase_device *kbdev) {return 0; }
+#endif
 
 /* DVFS */
+#ifdef CONFIG_MALI_MIDGARD_DVFS
 void gpu_dvfs_event_power_on(struct kbase_device *kbdev);
 void gpu_dvfs_event_power_off(struct kbase_device *kbdev);
 
-#ifdef CONFIG_MALI_MIDGARD_DVFS
 int gpu_dvfs_init(struct kbase_device *kbdev);
 void gpu_dvfs_term(struct kbase_device *kbdev);
 void gpu_dvfs_disable_updates(struct kbase_device *kbdev);
diff --git a/mali_kbase/platform/pixel/pixel_gpu_dvfs.c b/mali_kbase/platform/pixel/pixel_gpu_dvfs.c
index f758867..c72ee67 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_dvfs.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_dvfs.c
@@ -15,9 +15,11 @@
 #if IS_ENABLED(CONFIG_CAL_IF)
 #include <soc/google/cal-if.h>
 #endif
+#include <soc/google/gs_tmu_v3.h>
 
 /* Mali core includes */
 #include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_pm_event_log.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
 /* Pixel integration includes */
@@ -26,6 +28,8 @@
 #include "pixel_gpu_dvfs.h"
 #include "pixel_gpu_trace.h"
 
+#include <trace/hooks/systrace.h>
+
 #define DVFS_TABLE_ROW_MAX (14)
 #define DVFS_TABLES_MAX (2)
 static struct gpu_dvfs_opp gpu_dvfs_table[DVFS_TABLE_ROW_MAX];
@@ -45,11 +49,23 @@
  */
 static int gpu_dvfs_set_freq(struct kbase_device *kbdev, enum gpu_dvfs_clk_index domain, int level)
 {
+#if IS_ENABLED(CONFIG_CAL_IF)
 	struct pixel_context *pc = kbdev->platform_context;
+	struct kbase_pm_event_log_event *event = NULL;
 
 	lockdep_assert_held(&pc->pm.domain->access_lock);
 
+	event = kbase_pm_add_log_event(kbdev);
+	event->type = KBASE_PM_LOG_EVENT_DVFS_CHANGE;
+	event->dvfs.domain = domain;
+	event->dvfs.prev = pc->dvfs.table[pc->dvfs.level].clk[domain];
+	event->dvfs.next = pc->dvfs.table[level].clk[domain];
+
 	return cal_dfs_set_rate(pc->dvfs.clks[domain].cal_id, pc->dvfs.table[level].clk[domain]);
+#else
+	(void)kbdev, (void)domain, (void)level;
+	return -1;
+#endif /* CONFIG_CAL_IF */
 }
 
 /**
@@ -73,7 +89,16 @@
 		gpu_dvfs_qos_set(kbdev, next_level);
 #endif /* CONFIG_MALI_PIXEL_GPU_QOS */
 
+#if IS_ENABLED(CONFIG_EXYNOS_PD)
 	mutex_lock(&pc->pm.domain->access_lock);
+#endif /* CONFIG_EXYNOS_PD */
+
+	if (!gpu_pm_get_power_state_nolock(kbdev)) {
+#if IS_ENABLED(CONFIG_EXYNOS_PD)
+		mutex_unlock(&pc->pm.domain->access_lock);
+#endif /* CONFIG_EXYNOS_PD */
+		return 0;
+	}
 
 	/* We must enforce the CLK_G3DL2 >= CLK_G3D constraint.
 	 * When clocking down we must set G3D CLK first to avoid violating the constraint.
@@ -86,8 +111,9 @@
 		gpu_dvfs_set_freq(kbdev, GPU_DVFS_CLK_SHADERS, next_level);
 	}
 
-
+#if IS_ENABLED(CONFIG_EXYNOS_PD)
 	mutex_unlock(&pc->pm.domain->access_lock);
+#endif /* CONFIG_EXYNOS_PD */
 
 	gpu_dvfs_metrics_update(kbdev, pc->dvfs.level, next_level, true);
 
@@ -223,6 +249,7 @@
 {
 	struct pixel_context *pc = kbdev->platform_context;
 
+	ATRACE_BEGIN(__func__);
 	mutex_lock(&pc->dvfs.lock);
 	if (pc->dvfs.level_target != pc->dvfs.level)
 		gpu_dvfs_select_level(kbdev);
@@ -233,6 +260,8 @@
 	mutex_unlock(&pc->dvfs.lock);
 
 	cancel_delayed_work(&pc->dvfs.clockdown_work);
+
+	ATRACE_END();
 }
 
 /**
@@ -334,7 +363,9 @@
 
 	if (pc->dvfs.updates_enabled && gpu_pm_get_power_state(kbdev)) {
 		util_stats.util = atomic_read(&pc->dvfs.util);
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+		util_stats.mcu_util = atomic_read(&pc->dvfs.mcu_util);
+#else
 		util_stats.util_gl = atomic_read(&pc->dvfs.util_gl);
 		util_stats.util_cl = atomic_read(&pc->dvfs.util_cl);
 #endif
@@ -372,7 +403,12 @@
 	struct pixel_context *pc = kbdev->platform_context;
 
 	mutex_lock(&pc->dvfs.lock);
-	pc->dvfs.updates_enabled = false;
+	/* TODO (289541794): guard all calls to gpu_dvfs_[en,dis]able_updates with PM state machine */
+	if (pc->dvfs.updates_enabled) {
+		pc->dvfs.updates_enabled = false;
+		if (set_acpm_tj_power_status(TZ_GPU, false))
+			dev_err(kbdev->dev, "Failed to set Tj power off status\n");
+	}
 	mutex_unlock(&pc->dvfs.lock);
 
 	flush_workqueue(pc->dvfs.control_wq);
@@ -390,7 +426,13 @@
 	struct pixel_context *pc = kbdev->platform_context;
 
 	mutex_lock(&pc->dvfs.lock);
-	pc->dvfs.updates_enabled = true;
+	if (!pc->dvfs.updates_enabled) {
+		pc->dvfs.updates_enabled = true;
+		ATRACE_BEGIN("set_acpm_tj_power_status");
+		if (set_acpm_tj_power_status(TZ_GPU, true))
+			dev_err(kbdev->dev, "Failed to set Tj power on status\n");
+		ATRACE_END();
+	}
 	mutex_unlock(&pc->dvfs.lock);
 }
 #endif
@@ -430,7 +472,6 @@
 	struct pixel_context *pc = kbdev->platform_context;
 	int proc = raw_smp_processor_id();
 
-	/* TODO (b/187175695): Report this data via a custom ftrace event instead */
 	trace_clock_set_rate("gpu_util", utilisation, proc);
 
 	atomic_set(&pc->dvfs.util, utilisation);
@@ -438,6 +479,38 @@
 
 	return 1;
 }
+
+/**
+ * kbase_platform_dvfs_event_mcu() - Callback from Mali driver to report updated mcu and the total GPU utilization metrics.
+ *
+ * @kbdev:         The &struct kbase_device for the GPU.
+ * @utilisation:   The calculated GPU utilization as measured by the core Mali driver's metrics system.
+ * @mcu_utilisation: The MCU utilisation
+ *
+ * This is the function that bridges the core Mali driver and the Pixel integration code. As this is
+ * made in interrupt context, it is swiftly handed off to a work_queue for further processing.
+ *
+ * Context: Interrupt context.
+ *
+ * Return: Returns 1 to signal success as specified in mali_kbase_pm_internal.h.
+ */
+
+int kbase_platform_dvfs_event_mcu(struct kbase_device *kbdev, u32 utilisation,
+				  u32 mcu_utilisation)
+{
+	struct pixel_context *pc = kbdev->platform_context;
+	int proc = raw_smp_processor_id();
+
+	trace_clock_set_rate("gpu_util", utilisation, proc);
+	trace_clock_set_rate("mcu_util", mcu_utilisation, proc);
+
+	atomic_set(&pc->dvfs.util, utilisation);
+	atomic_set(&pc->dvfs.mcu_util, mcu_utilisation);
+	queue_work(pc->dvfs.control_wq, &pc->dvfs.control_work);
+
+	return 1;
+}
+
 #else /* MALI_USE_CSF */
 /**
  * kbase_platform_dvfs_event() - Callback from Mali driver to report updated utilization metrics.
@@ -476,6 +549,7 @@
 
 /* Initialization code */
 
+#if IS_ENABLED(CONFIG_CAL_IF)
 /**
  * find_voltage_for_freq() - Retrieves voltage for a frequency from ECT.
  *
@@ -502,6 +576,7 @@
 
 	return -ENOENT;
 }
+#endif /* CONFIG_CAL_IF */
 
 /**
  * validate_and_parse_dvfs_table() - Validate and populate the GPU's DVFS table from DT.
@@ -524,20 +599,31 @@
 	char table_name[64];
 	char table_size_name[64];
 
-	int i, idx, c;
+	int i, idx;
 	int of_data_int_array[OF_DATA_NUM_MAX];
 	int dvfs_table_row_num = 0, dvfs_table_col_num = 0;
 	int dvfs_table_size = 0;
-	int scaling_level_max = -1, scaling_level_min = -1;
+
+#if IS_ENABLED(CONFIG_CAL_IF)
+	int c;
+	int level_count[GPU_DVFS_CLK_COUNT];
+	struct dvfs_rate_volt vf_map[GPU_DVFS_CLK_COUNT][16];
+	int scaling_level_max_ect = -1, scaling_level_min_ect = -1;
+	int scaling_freq_max_ect = INT_MAX;
+	int scaling_freq_min_ect = 0;
+#endif /* CONFIG_CAL_IF */
+
+	int scaling_level_max_devicetree = -1, scaling_level_min_devicetree = -1;
 	int scaling_freq_max_devicetree = INT_MAX;
 	int scaling_freq_min_devicetree = 0;
 	int scaling_freq_min_compute = 0;
-	int level_count[GPU_DVFS_CLK_COUNT];
-	struct dvfs_rate_volt vf_map[GPU_DVFS_CLK_COUNT][16];
+
+	int scaling_freq_hard_max = INT_MAX;
 
 	struct device_node *np = kbdev->dev->of_node;
 	struct pixel_context *pc = kbdev->platform_context;
 
+#if IS_ENABLED(CONFIG_CAL_IF)
 	/* Get frequency -> voltage mapping */
 	for (c = 0; c < GPU_DVFS_CLK_COUNT; c++) {
 		level_count[c] = cal_dfs_get_lv_num(pc->dvfs.clks[c].cal_id);
@@ -546,6 +632,7 @@
 			goto err;
 		}
 	}
+#endif /* CONFIG_CAL_IF */
 
 	sprintf(table_size_name, "gpu_dvfs_table_size_v%d", dvfs_table_num);
 	if (of_property_read_u32_array(np, table_size_name, of_data_int_array, 2))
@@ -572,12 +659,24 @@
 
 	of_property_read_u32(np, "gpu_dvfs_max_freq", &scaling_freq_max_devicetree);
 	of_property_read_u32(np, "gpu_dvfs_min_freq", &scaling_freq_min_devicetree);
-	of_property_read_u32(np, "gpu_dvfs_min_freq_compute", &scaling_freq_min_compute);
+	of_property_read_u32(np, "gpu_dvfs_min_freq_compute",
+			     &scaling_freq_min_compute);
+
+	scaling_freq_hard_max = scaling_freq_max_devicetree;
+
+#if IS_ENABLED(CONFIG_CAL_IF)
+	scaling_freq_max_ect = cal_dfs_get_max_freq(pc->dvfs.clks[GPU_DVFS_CLK_SHADERS].cal_id);
+	scaling_freq_min_ect = cal_dfs_get_min_freq(pc->dvfs.clks[GPU_DVFS_CLK_SHADERS].cal_id);
+
+	if(scaling_freq_hard_max == INT_MAX)
+		scaling_freq_hard_max = scaling_freq_max_ect;
+#endif /* CONFIG_CAL_IF */
 
 	/* Check if there is a voltage mapping for each frequency in the ECT table */
 	for (i = 0; i < dvfs_table_row_num; i++) {
 		idx = i * dvfs_table_col_num;
 
+#if IS_ENABLED(CONFIG_CAL_IF)
 		/* Get and validate voltages from cal-if */
 		for (c = 0; c < GPU_DVFS_CLK_COUNT; c++) {
 			if (find_voltage_for_freq(kbdev, of_data_int_array[idx + c],
@@ -588,20 +687,32 @@
 				goto err;
 			}
 		}
+#endif /* CONFIG_CAL_IF */
 	}
 
 	/* Process DVFS table data from device tree and store it in OPP table */
-	for (i = 0; i < dvfs_table_row_num; i++) {
-		idx = i * dvfs_table_col_num;
-
+	for (i = 0, idx = 0; i < dvfs_table_row_num; i++) {
+#ifdef CONFIG_MALI_PIXEL_GPU_HARD_FMAX
+		/** Skip storing the OPP above scaling_freq_hard_max value
+		* Decrease the number of rows and row index from the dvfs table
+		*/
+		if(of_data_int_array[idx + 1] > scaling_freq_hard_max) {
+			idx += dvfs_table_col_num;
+			i--;
+			dvfs_table_row_num--;
+			continue;
+		}
+#endif /* CONFIG_MALI_PIXEL_GPU_HARD_FMAX */
 		/* Read raw data from device tree table */
 		gpu_dvfs_table[i].clk[GPU_DVFS_CLK_TOP_LEVEL] = of_data_int_array[idx + 0];
 		gpu_dvfs_table[i].clk[GPU_DVFS_CLK_SHADERS]   = of_data_int_array[idx + 1];
 
+#if IS_ENABLED(CONFIG_CAL_IF)
 		for (c = 0; c < GPU_DVFS_CLK_COUNT; c++) {
 			find_voltage_for_freq(kbdev, gpu_dvfs_table[i].clk[c],
 				&(gpu_dvfs_table[i].vol[c]), vf_map[c], level_count[c]);
 		}
+#endif /* CONFIG_CAL_IF */
 
 		gpu_dvfs_table[i].util_min     = of_data_int_array[idx + 2];
 		gpu_dvfs_table[i].util_max     = of_data_int_array[idx + 3];
@@ -612,6 +723,10 @@
 		gpu_dvfs_table[i].qos.cpu0_min = of_data_int_array[idx + 7];
 		gpu_dvfs_table[i].qos.cpu1_min = of_data_int_array[idx + 8];
 		gpu_dvfs_table[i].qos.cpu2_max = of_data_int_array[idx + 9];
+#if MALI_USE_CSF
+		gpu_dvfs_table[i].mcu_util_min = of_data_int_array[idx + 10];
+		gpu_dvfs_table[i].mcu_util_max = of_data_int_array[idx + 11];
+#endif
 
 		/* Handle case where CPU cluster 2 has no limit set */
 		if (!gpu_dvfs_table[i].qos.cpu2_max)
@@ -619,20 +734,36 @@
 
 		/* Update level locks */
 		if (gpu_dvfs_table[i].clk[GPU_DVFS_CLK_SHADERS] <= scaling_freq_max_devicetree)
-			if (scaling_level_max == -1)
-				scaling_level_max = i;
+			if (scaling_level_max_devicetree == -1)
+				scaling_level_max_devicetree = i;
 
 		if (gpu_dvfs_table[i].clk[GPU_DVFS_CLK_SHADERS] >= scaling_freq_min_devicetree)
-			scaling_level_min = i;
+			scaling_level_min_devicetree = i;
 
 		if (gpu_dvfs_table[i].clk[GPU_DVFS_CLK_SHADERS] >= scaling_freq_min_compute)
 			pc->dvfs.level_scaling_compute_min = i;
+
+#if IS_ENABLED(CONFIG_CAL_IF)
+		if (gpu_dvfs_table[i].clk[GPU_DVFS_CLK_SHADERS] <= scaling_freq_max_ect)
+			if (scaling_level_max_ect == -1)
+				scaling_level_max_ect = i;
+
+		if (gpu_dvfs_table[i].clk[GPU_DVFS_CLK_SHADERS] >= scaling_freq_min_ect)
+			scaling_level_min_ect = i;
+#endif /* CONFIG_CAL_IF */
+
+		idx += dvfs_table_col_num;
 	}
 
 	pc->dvfs.level_max = 0;
 	pc->dvfs.level_min = dvfs_table_row_num - 1;
 	gpu_dvfs_update_level_lock(kbdev, GPU_DVFS_LEVEL_LOCK_DEVICETREE,
-		scaling_level_min, scaling_level_max);
+		scaling_level_min_devicetree, scaling_level_max_devicetree);
+
+#if IS_ENABLED(CONFIG_CAL_IF)
+	gpu_dvfs_update_level_lock(kbdev, GPU_DVFS_LEVEL_LOCK_ECT,
+		scaling_level_min_ect, scaling_level_max_ect);
+#endif /* CONFIG_CAL_IF */
 
 	return dvfs_table_row_num;
 
@@ -673,6 +804,36 @@
 	return dvfs_table_row_num;
 }
 
+#if MALI_USE_CSF
+static void gpu_dvfs_initialize_capacity_headroom(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	struct pixel_context *pc = kbdev->platform_context;
+
+	pc->dvfs.capacity_headroom = 0;
+	of_property_read_u32(np, "gpu_dvfs_capacity_headroom", &pc->dvfs.capacity_headroom);
+
+#define CAPACITY_HISTORY_DEFAULT_DEPTH 4
+	pc->dvfs.capacity_history_depth = CAPACITY_HISTORY_DEFAULT_DEPTH;
+	of_property_read_u8(np, "gpu_dvfs_capacity_history_depth", &pc->dvfs.capacity_history_depth);
+
+	if (pc->dvfs.capacity_history_depth == 0) {
+		dev_err(kbdev->dev,
+			"DVFS capacity history depth cannot be zero");
+		pc->dvfs.capacity_history_depth = CAPACITY_HISTORY_DEFAULT_DEPTH;
+	}
+#undef CAPACITY_HISTORY_DEFAULT_DEPTH
+
+	if (pc->dvfs.capacity_history_depth > ARRAY_SIZE(pc->dvfs.capacity_history)) {
+		dev_err(kbdev->dev,
+			"DVFS capacity history depth %u exceeds maximum depth %u",
+			(unsigned int)pc->dvfs.capacity_history_depth,
+			(unsigned int)ARRAY_SIZE(pc->dvfs.capacity_history));
+		pc->dvfs.capacity_history_depth = ARRAY_SIZE(pc->dvfs.capacity_history);
+	}
+}
+#endif
+
 /**
  * gpu_dvfs_set_initial_level() - Set the initial GPU clocks
  *
@@ -685,7 +846,11 @@
  */
 static int gpu_dvfs_set_initial_level(struct kbase_device *kbdev)
 {
-	int level, ret, c;
+	int level, ret = -1;
+#if IS_ENABLED(CONFIG_CAL_IF)
+	int c;
+#endif /* CONFIG_CAL_IF */
+
 	struct pixel_context *pc = kbdev->platform_context;
 
 	level = pc->dvfs.level_min;
@@ -695,8 +860,11 @@
 		pc->dvfs.table[level].clk[GPU_DVFS_CLK_TOP_LEVEL],
 		pc->dvfs.table[level].clk[GPU_DVFS_CLK_SHADERS]);
 
+#if IS_ENABLED(CONFIG_EXYNOS_PD)
 	mutex_lock(&pc->pm.domain->access_lock);
+#endif /* CONFIG_EXYNOS_PD */
 
+#if IS_ENABLED(CONFIG_CAL_IF)
 	for (c = 0; c < GPU_DVFS_CLK_COUNT; c++) {
 		ret = gpu_dvfs_set_freq(kbdev, c, level);
 		if (ret) {
@@ -706,12 +874,40 @@
 			break;
 		}
 	}
+#endif /* CONFIG_CAL_IF */
 
+#if IS_ENABLED(CONFIG_EXYNOS_PD)
 	mutex_unlock(&pc->pm.domain->access_lock);
+#endif /* CONFIG_EXYNOS_PD */
 
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_GOOGLE_BCL)
+static int google_bcl_callback(struct notifier_block *nb, unsigned long max_clk, void *data) {
+	struct pixel_context *pc = container_of(nb, struct pixel_context, pm.qos_nb);
+	struct kbase_device *kbdev = pc->kbdev;
+	int max_level = -1;
+	int level;
+
+	CSTD_UNUSED(data);
+
+	// Find the throttling level that satisfies the requested maximum clock frequency.
+	for (level = 0; level < pc->dvfs.table_size; level++) {
+		max_level = level;
+		if (pc->dvfs.table[level].clk[GPU_DVFS_CLK_SHADERS] <= max_clk)
+			break;
+	}
+
+	mutex_lock(&pc->dvfs.lock);
+	gpu_dvfs_update_level_lock(kbdev, GPU_DVFS_LEVEL_LOCK_BCL, -1, max_level);
+	gpu_dvfs_select_level(kbdev);
+	mutex_unlock(&pc->dvfs.lock);
+
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_GOOGLE_BCL */
+
 /**
  * gpu_dvfs_init() - Initializes the Pixel GPU DVFS system.
  *
@@ -725,6 +921,9 @@
 int gpu_dvfs_init(struct kbase_device *kbdev)
 {
 	int i, ret = 0;
+#if MALI_USE_CSF
+	u32 of_data_int_array[2];
+#endif /* MALI_USE_CSF*/
 	struct pixel_context *pc = kbdev->platform_context;
 	struct device_node *np = kbdev->dev->of_node;
 
@@ -736,9 +935,9 @@
 		pc->dvfs.level_locks[i].level_min = -1;
 		pc->dvfs.level_locks[i].level_max = -1;
 	}
-
 	pc->dvfs.updates_enabled = true;
 
+#if IS_ENABLED(CONFIG_CAL_IF)
 	/* Get data from DT */
 	if (of_property_read_u32(np, "gpu0_cmu_cal_id",
 		&pc->dvfs.clks[GPU_DVFS_CLK_TOP_LEVEL].cal_id) ||
@@ -747,6 +946,7 @@
 		ret = -EINVAL;
 		goto done;
 	}
+#endif /* IS_ENABLED(CONFIG_CAL_IF) */
 
 	/* Get the ASV table */
 	mutex_lock(&pc->dvfs.lock);
@@ -766,6 +966,24 @@
 		ret = -EINVAL;
 		goto done;
 	}
+#if MALI_USE_CSF
+	/* Set up DVFS perf tuning variables */
+	if (of_property_read_u32_array(np, "mcu_protm_scale", of_data_int_array,
+				       2)) {
+		ret = -EINVAL;
+		goto done;
+	}
+	pc->dvfs.tunable.mcu_protm_scale_num = of_data_int_array[0];
+	pc->dvfs.tunable.mcu_protm_scale_den = of_data_int_array[1];
+
+	if (of_property_read_u32_array(np, "mcu_down_util_scale",
+				       of_data_int_array, 2)) {
+		ret = -EINVAL;
+		goto done;
+	}
+	pc->dvfs.tunable.mcu_down_util_scale_num = of_data_int_array[0];
+	pc->dvfs.tunable.mcu_down_util_scale_den = of_data_int_array[1];
+#endif /* MALI_USE_CSF*/
 
 	/* Setup dvfs step up value */
 	if (of_property_read_u32(np, "gpu_dvfs_step_up_val", &pc->dvfs.step_up_val)) {
@@ -781,6 +999,11 @@
 		goto done;
 	}
 	atomic_set(&pc->dvfs.util, 0);
+	atomic_set(&pc->dvfs.mcu_util, 0);
+
+#if MALI_USE_CSF
+	gpu_dvfs_initialize_capacity_headroom(kbdev);
+#endif /* MALI_USE_CSF */
 
 	/* Initialize DVFS governors */
 	ret = gpu_dvfs_governor_init(kbdev);
@@ -821,6 +1044,13 @@
 	pc->dvfs.clockdown_wq = create_singlethread_workqueue("gpu-dvfs-clockdown");
 	INIT_DELAYED_WORK(&pc->dvfs.clockdown_work, gpu_dvfs_clockdown_worker);
 
+#if IS_ENABLED(CONFIG_GOOGLE_BCL)
+	pc->pm.bcl_dev = google_retrieve_bcl_handle();
+	if (pc->pm.bcl_dev) {
+		pc->pm.qos_nb.notifier_call = google_bcl_callback;
+		exynos_pm_qos_add_notifier(PM_QOS_GPU_FREQ_MAX, &pc->pm.qos_nb);
+	}
+#endif
 	/* Initialization was successful */
 	goto done;
 
@@ -857,7 +1087,11 @@
 
 	cancel_work_sync(&pc->dvfs.control_work);
 	destroy_workqueue(pc->dvfs.control_wq);
-
+#if IS_ENABLED(CONFIG_GOOGLE_BCL)
+	if (pc->pm.bcl_dev) {
+		exynos_pm_qos_remove_notifier(PM_QOS_GPU_FREQ_MAX, &pc->pm.qos_nb);
+	}
+#endif
 #ifdef CONFIG_MALI_PIXEL_GPU_THERMAL
 	gpu_tmu_term(kbdev);
 #endif /* CONFIG_MALI_PIXEL_GPU_THERMAL */
diff --git a/mali_kbase/platform/pixel/pixel_gpu_dvfs.h b/mali_kbase/platform/pixel/pixel_gpu_dvfs.h
index c1f1587..df9530a 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_dvfs.h
+++ b/mali_kbase/platform/pixel/pixel_gpu_dvfs.h
@@ -45,7 +45,9 @@
  */
 struct gpu_dvfs_clk {
 	enum gpu_dvfs_clk_index index;
+#if IS_ENABLED(CONFIG_CAL_IF)
 	int cal_id;
+#endif /* IS_ENABLED(CONFIG_CAL_IF) */
 	struct blocking_notifier_head notifier;
 };
 
@@ -54,12 +56,16 @@
 /**
  * struct gpu_dvfs_utlization - Stores utilization statistics
  *
- * @util:    Overall utilization of the GPU
- * @util_gl: The share of utilization due to non-OpenCL work
- * @util_cl: The share of utilization due ot OpenCL work
+ * @util:     Overall utilization of the GPU
+ * @mcu_util: Utilization of the MCU
+ * @util_gl:  The share of utilization due to non-OpenCL work
+ * @util_cl:  The share of utilization due ot OpenCL work
  */
 struct gpu_dvfs_utlization {
 	int util;
+#if MALI_USE_CSF
+	int mcu_util;
+#endif
 	int util_gl;
 	int util_cl;
 };
@@ -106,6 +112,10 @@
 	 */
 	GPU_DVFS_GOVERNOR_BASIC = 0,
 	GPU_DVFS_GOVERNOR_QUICKSTEP,
+#if MALI_USE_CSF
+	GPU_DVFS_GOVERNOR_QUICKSTEP_USE_MCU,
+	GPU_DVFS_GOVERNOR_CAPACITY_USE_MCU,
+#endif
 	/* Insert new governors here */
 	GPU_DVFS_GOVERNOR_COUNT,
 	GPU_DVFS_GOVERNOR_INVALID,
@@ -298,12 +308,20 @@
  * restrictive than an earlier one, the value from the later lock is selected.
  */
 enum gpu_dvfs_level_lock_type {
+#if IS_ENABLED(CONFIG_CAL_IF)
+	/**
+	 * &GPU_DVFS_LEVEL_LOCK_ECT: ECT lock
+	 *
+	 * This lock is based on Fmax and Fmin obtained from ECT table of the chip.
+	 */
+	GPU_DVFS_LEVEL_LOCK_ECT,
+#endif /* CONFIG_CAL_IF */
 	/**
 	 * &GPU_DVFS_LEVEL_LOCK_DEVICETREE: Devicetree lock
 	 *
 	 * This lock is used to enforce scaling limits set as part of the GPU device tree entry.
 	 */
-	GPU_DVFS_LEVEL_LOCK_DEVICETREE = 0,
+	GPU_DVFS_LEVEL_LOCK_DEVICETREE,
 	/**
 	 * &GPU_DVFS_LEVEL_LOCK_COMPUTE: Compute lock
 	 *
@@ -334,6 +352,16 @@
 	 */
 	GPU_DVFS_LEVEL_LOCK_THERMAL,
 #endif /* CONFIG_MALI_PIXEL_GPU_THERMAL */
+#if IS_ENABLED(CONFIG_GOOGLE_BCL)
+	/**
+	 * &GPU_DVFS_LEVEL_LOCK_BCL: Battery current limitation mitigation lock
+	 *
+	 * This lock is set when the system is in a current limited situation where the GPU frequency
+         * needs to be controlled to stay in control of the maximum amount of current the battery
+         * can deliver.
+	 */
+	GPU_DVFS_LEVEL_LOCK_BCL,
+#endif /* CONFIG_GOOGLE_BCL */
 	/* Insert new level locks here */
 	GPU_DVFS_LEVEL_LOCK_COUNT,
 };
diff --git a/mali_kbase/platform/pixel/pixel_gpu_dvfs_governor.c b/mali_kbase/platform/pixel/pixel_gpu_dvfs_governor.c
index 28d4073..9086042 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_dvfs_governor.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_dvfs_governor.c
@@ -65,6 +65,219 @@
 }
 
 /**
+ * level_for_capacity() - Find the lowest level satisfying a given needed capacity.
+ *
+ * @capacity:  The capacity desired, in whatever units the clocks for levels are in
+ * @tbl:       The DVFS operating points to choose from
+ * @dev:       The device node, for debug logs.
+ * @level_min: The index of the lowest allowable operating point
+ * @level_max: The index of the highest allowable operating point
+ *
+ * Return: The index of the operating point found, or level_max if no operating
+ *         point has enough capacity.
+ */
+static int
+level_for_capacity(u32 capacity, struct gpu_dvfs_opp *tbl, struct device *dev,
+		   int level_min, int level_max)
+{
+	int l;
+
+	for (l = level_min; l >= level_max; --l) {
+		if ((u32)tbl[l].clk[1] >= capacity) {
+			dev_dbg(dev,
+				"DVFS needs capacity %u. "
+				"Setting max freq %u",
+				capacity,
+				tbl[l].clk[1]);
+			return l;
+		}
+	}
+
+	dev_dbg(dev,
+		"DVFS measured use exceeded maximum capacity."
+		"Setting max freq %u",
+		tbl[level_max].clk[1]);
+
+	return level_max;
+}
+
+/**
+ * gpu_dvfs_governor_quickstep_use_mcu_util() - The evaluation function for &GPU_DVFS_GOVERNOR_QUICKSTEP_USE_MCU.
+ *
+ * @kbdev:      The &struct kbase_device for the GPU.
+ * @util_stats: The current GPU utilization statistics.
+ *
+ * Algorithm:
+ *   * If we are within the utilization bounds of the current level then
+ *     no change is made.
+ *
+ *   * If &util or &mcu_util is above the maximum for the current level we calculate how much
+ *     above the maximum we are. &util is higher closer to 100% than it is to
+ *     the maximum utilization for the current level then we move up &step_up levels.
+ *     We also move up &step_up levels if the &mcu_util is more than 25% over
+ *     &mcu_up_util of that particular level.
+ *     Otherwise we move up just a single level. If we skip a level, we also
+ *     halve the hysteresis for the new level, so that we can swiftly correct
+ *     overshoots.
+ *
+ *   * If &util or &mcu_util is lower than the minimm utilization for the current level, then
+ *     we decrement the hysteresis value. If this decrement results in
+ *     hysteresis being zero, then we drop a level.
+ *
+ *   * Adjust the target frequency for capacity_headroom.
+ *
+ * Return: The level that the GPU should run at next.
+ *
+ * Context: Process context. Expects the caller to hold the DVFS lock.
+ */
+static int
+gpu_dvfs_governor_quickstep_use_mcu_util(struct kbase_device *kbdev,
+					 struct gpu_dvfs_utlization *util_stats)
+{
+	struct pixel_context *pc = kbdev->platform_context;
+	struct gpu_dvfs_opp *tbl = pc->dvfs.table;
+	int level = pc->dvfs.level_before_headroom;
+	int level_max = pc->dvfs.level_max;
+	int level_min = pc->dvfs.level_min;
+	int util = util_stats->util;
+	int mcu_util = util_stats->mcu_util;
+	int step_up = pc->dvfs.step_up_val;
+	int mcu_scale_num = pc->dvfs.tunable.mcu_down_util_scale_num;
+	int mcu_scale_den = pc->dvfs.tunable.mcu_down_util_scale_den;
+
+	lockdep_assert_held(&pc->dvfs.lock);
+
+	if ((level > level_max) && (util > tbl[level].util_max ||
+				    mcu_util > tbl[level].mcu_util_max)) {
+		/* We need to clock up. */
+		if (level >= step_up &&
+		    ((util > (100 + tbl[level].util_max) / 2) ||
+		     mcu_util > (mcu_scale_num *
+				 (tbl[level].mcu_util_max / mcu_scale_den)))) {
+			dev_dbg(kbdev->dev,
+				"DVFS +%d: %d -> %d (util: %d / %d | mcu: %d / %d)\n",
+				step_up, level, level - step_up, util,
+				tbl[level].util_max, mcu_util,
+				tbl[level].mcu_util_max);
+			level -= step_up;
+			pc->dvfs.governor.delay = tbl[level].hysteresis / 2;
+		} else {
+			dev_dbg(kbdev->dev,
+				"DVFS +1: %d -> %d (util: %d / %d mcu: %d / %d) \n",
+				level, level - 1, util, tbl[level].util_max,
+				mcu_util, tbl[level].mcu_util_max);
+			level -= 1;
+			pc->dvfs.governor.delay = tbl[level].hysteresis;
+		}
+
+	} else if ((level < level_min) && (util < tbl[level].util_min) &&
+		   (mcu_util < tbl[level].mcu_util_min)) {
+		/* We are clocked too high */
+		pc->dvfs.governor.delay--;
+
+		/* Check if we've resisted downclocking long enough */
+		if (pc->dvfs.governor.delay <= 0) {
+			dev_dbg(kbdev->dev,
+				"DVFS -1: %d -> %d (util: %d / %d mcu: %d / %d)\n",
+				level, level + 1, util, tbl[level].util_min,
+				mcu_util, tbl[level].mcu_util_min);
+
+			/* Time to clock down */
+			level++;
+
+			/* Reset hysteresis */
+			pc->dvfs.governor.delay = tbl[level].hysteresis;
+		}
+	} else {
+		/* We are at the correct level, reset hysteresis */
+		pc->dvfs.governor.delay = tbl[level].hysteresis;
+	}
+
+	pc->dvfs.level_before_headroom = level;
+
+	if (pc->dvfs.capacity_headroom != 0)
+	{
+		u32 capacity = tbl[level].clk[1];
+		capacity += pc->dvfs.capacity_headroom;
+		return level_for_capacity(capacity, tbl, kbdev->dev, level_min, level_max);
+	}
+	else
+	{
+		/**
+		 * It's conceivable that the governor might choose an operating
+		 * point with the same core clock rate but higher QoS votes, so
+		 * respect the exact level chosen rather than doing a lookup in
+		 * the table solely based on capacity.
+		 **/
+		return level;
+	}
+}
+
+/**
+ * gpu_dvfs_governor_capacity_use_mcu_util() - The evaluation function for &GPU_DVFS_GOVERNOR_CAPACITY_USE_MCU.
+ *
+ * @kbdev:      The &struct kbase_device for the GPU.
+ * @util_stats: The current GPU utilization statistics.
+ *
+ * Algorithm:
+ *   * If we are above 95% capacity at the current level, move to the highest
+ *     operating point.
+ *   * Otherwise, find the maximum capacity used in the last
+ *     capacity_history_depth DVFS intervals.
+ *   * Add capacity_headroom.
+ *   * Choose the lowest operating point that has that capacity.
+ *
+ * Return: The level that the GPU should run at next.
+ *
+ * Context: Process context. Expects the caller to hold the DVFS lock.
+ */
+static int
+gpu_dvfs_governor_capacity_use_mcu_util(struct kbase_device *kbdev,
+					 struct gpu_dvfs_utlization *util_stats)
+{
+	struct pixel_context *pc = kbdev->platform_context;
+	struct gpu_dvfs_opp *tbl = pc->dvfs.table;
+	int level = pc->dvfs.level;
+	int level_max = pc->dvfs.level_max;
+	int level_min = pc->dvfs.level_min;
+	u32 capacity_target = 0;
+	u64 util = util_stats->util < 0 ? 0 : util_stats->util;
+	u64 mcu_util = util_stats->mcu_util < 0 ? 0 : util_stats->mcu_util;
+	u64 total_util = util + mcu_util;
+
+	{
+		u64 capacity_used = (u64)tbl[level].clk[1] * total_util / 100ul;
+		pc->dvfs.capacity_history[pc->dvfs.capacity_history_index] = (u32)capacity_used;
+		pc->dvfs.capacity_history_index++;
+		pc->dvfs.capacity_history_index %= ARRAY_SIZE(pc->dvfs.capacity_history);
+	}
+
+	lockdep_assert_held(&pc->dvfs.lock);
+
+	if (total_util > 95) {
+		dev_dbg(kbdev->dev,
+			"DVFS load exceeds measurable levels. "
+			"Setting max freq %u",
+			tbl[level_max].clk[1]);
+		return level_max;
+	}
+
+	{
+		int h;
+
+		for (h = 0; h < ARRAY_SIZE(pc->dvfs.capacity_history); ++h) {
+			if (capacity_target < pc->dvfs.capacity_history[h]) {
+				capacity_target = pc->dvfs.capacity_history[h];
+			}
+		}
+
+		capacity_target += pc->dvfs.capacity_headroom;
+	}
+
+	return level_for_capacity(capacity_target, tbl, kbdev->dev, level_min, level_max);
+}
+
+/**
  * gpu_dvfs_governor_quickstep() - The evaluation function for &GPU_DVFS_GOVERNOR_QUICKSTEP.
  *
  * @kbdev:      The &struct kbase_device for the GPU.
@@ -90,7 +303,7 @@
  * Context: Process context. Expects the caller to hold the DVFS lock.
  */
 static int gpu_dvfs_governor_quickstep(struct kbase_device *kbdev,
-	struct gpu_dvfs_utlization *util_stats)
+				       struct gpu_dvfs_utlization *util_stats)
 {
 	struct pixel_context *pc = kbdev->platform_context;
 	struct gpu_dvfs_opp *tbl = pc->dvfs.table;
@@ -147,7 +360,17 @@
 	{
 		"quickstep",
 		gpu_dvfs_governor_quickstep,
-	}
+	},
+#if MALI_USE_CSF
+	{
+		"quickstep_use_mcu",
+		gpu_dvfs_governor_quickstep_use_mcu_util,
+	},
+	{
+		"capacity_use_mcu",
+		gpu_dvfs_governor_capacity_use_mcu_util,
+	},
+#endif
 };
 
 /**
diff --git a/mali_kbase/platform/pixel/pixel_gpu_dvfs_metrics.c b/mali_kbase/platform/pixel/pixel_gpu_dvfs_metrics.c
index c7c2b81..029afc1 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_dvfs_metrics.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_dvfs_metrics.c
@@ -102,9 +102,6 @@
 		}
 
 	}
-
-	trace_gpu_frequency(clks[GPU_DVFS_CLK_TOP_LEVEL], 0);
-	trace_gpu_frequency(clks[GPU_DVFS_CLK_SHADERS], 1);
 }
 
 /**
@@ -381,14 +378,18 @@
 	struct pixel_platform_data *pd = kctx->platform_data;
 
 	struct task_struct *task;
+	struct pid *pid;
 	kuid_t uid;
 
 	struct gpu_dvfs_metrics_uid_stats *entry, *stats;
 	int ret = 0;
 
 	/* Get UID from task_struct */
-	task = get_pid_task(find_get_pid(kctx->kprcs->tgid), PIDTYPE_TGID);
+	pid = find_get_pid(kctx->kprcs->tgid);
+	task = get_pid_task(pid, PIDTYPE_TGID);
 	uid = task->cred->uid;
+	put_task_struct(task);
+	put_pid(pid);
 
 	mutex_lock(&kbdev->kctx_list_lock);
 
diff --git a/mali_kbase/platform/pixel/pixel_gpu_dvfs_qos.c b/mali_kbase/platform/pixel/pixel_gpu_dvfs_qos.c
index 7c7f84f..65b7da7 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_dvfs_qos.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_dvfs_qos.c
@@ -31,7 +31,7 @@
  */
 static inline void qos_reset(struct gpu_dvfs_qos_vote *vote) {
 	if (unlikely(vote->enabled)) {
-		exynos_pm_qos_update_request(&vote->req, EXYNOS_PM_QOS_DEFAULT_VALUE);
+		exynos_pm_qos_update_request_async(&vote->req, EXYNOS_PM_QOS_DEFAULT_VALUE);
 		vote->enabled = false;
 	}
 }
@@ -44,7 +44,7 @@
  */
 static inline void qos_set(struct gpu_dvfs_qos_vote *vote, int value) {
 	if (unlikely(value)) {
-		exynos_pm_qos_update_request(&vote->req, value);
+		exynos_pm_qos_update_request_async(&vote->req, value);
 		vote->enabled = true;
 	}
 	else {
@@ -182,7 +182,9 @@
 	dev_dbg(kbdev->dev, "GPU QOS initialized\n");
 	ret = 0;
 
+#ifdef CONFIG_MALI_PIXEL_GPU_BTS
 done:
+#endif /* CONFIG_MALI_PIXEL_GPU_BTS */
 	return ret;
 }
 
@@ -193,7 +195,9 @@
  */
 void gpu_dvfs_qos_term(struct kbase_device *kbdev)
 {
+#if IS_ENABLED(CONFIG_EXYNOS_PMU_IF) || defined(CONFIG_MALI_PIXEL_GPU_BTS)
 	struct pixel_context *pc = kbdev->platform_context;
+#endif
 
 	exynos_pm_qos_remove_request(&pc->dvfs.qos.int_min.req);
 	exynos_pm_qos_remove_request(&pc->dvfs.qos.mif_min.req);
diff --git a/mali_kbase/platform/pixel/pixel_gpu_power.c b/mali_kbase/platform/pixel/pixel_gpu_power.c
index bc9ce92..c4b5892 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_power.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_power.c
@@ -20,7 +20,8 @@
 #if IS_ENABLED(CONFIG_CAL_IF)
 #include <soc/google/cal-if.h>
 #endif
-#include <soc/samsung/exynos-smc.h>
+#include <linux/soc/samsung/exynos-smc.h>
+#include <linux/pm_runtime.h>
 
 /* Mali core includes */
 #include <mali_kbase.h>
@@ -30,6 +31,7 @@
 #include "pixel_gpu_control.h"
 #include "pixel_gpu_trace.h"
 #include <trace/events/power.h>
+#include <trace/hooks/systrace.h>
 
 /*
  * GPU_PM_DOMAIN_NAMES - names for GPU power domains.
@@ -252,8 +254,18 @@
 	int ret;
 	struct pixel_context *pc = kbdev->platform_context;
 
+	ATRACE_BEGIN(__func__);
+	ATRACE_BEGIN("pm_runtime_get_sync: top");
 	pm_runtime_get_sync(pc->pm.domain_devs[GPU_PM_DOMAIN_TOP]);
+	ATRACE_END();
+	ATRACE_BEGIN("pm_runtime_get_sync: cores");
 	pm_runtime_get_sync(pc->pm.domain_devs[GPU_PM_DOMAIN_CORES]);
+	ATRACE_END();
+#ifdef CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL
+	ATRACE_BEGIN("pm_runtime_get_sync: s2mpu");
+	pm_runtime_get_sync(kbdev->s2mpu_dev);
+	ATRACE_END();
+#endif /* CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL */
 	/*
 	 * We determine whether GPU state was lost by detecting whether the GPU state reached
 	 * GPU_POWER_LEVEL_OFF before we entered this function. The GPU state is set to be
@@ -268,6 +280,7 @@
 	ret = (pc->pm.state == GPU_POWER_LEVEL_OFF);
 
 	gpu_dvfs_enable_updates(kbdev);
+
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 	kbase_pm_metrics_start(kbdev);
 	gpu_dvfs_event_power_on(kbdev);
@@ -279,14 +292,21 @@
 		google_init_gpu_ratio(pc->pm.bcl_dev);
 #endif
 
-#if !IS_ENABLED(CONFIG_SOC_GS101)
+#if !IS_ENABLED(CONFIG_SOC_GS101) && defined(CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING)
 	if (exynos_smc(SMC_PROTECTION_SET, 0, PROT_G3D, SMC_PROTECTION_ENABLE) != 0) {
 		dev_err(kbdev->dev, "Couldn't enable protected mode after GPU power-on");
 	}
 #endif
 
+#if IS_ENABLED(CONFIG_SOC_ZUMA) && defined(CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING)
+	if (exynos_smc(SMC_DRM_G3D_PPCFW_RESTORE, 0, 0, 0) != 0) {
+		dev_err(kbdev->dev, "Couldn't restore G3D PPCFW");
+	}
+#endif
 	pc->pm.state = GPU_POWER_LEVEL_STACKS;
 
+	ATRACE_END();
+
 	return ret;
 }
 
@@ -323,21 +343,33 @@
 {
 	struct pixel_context *pc = kbdev->platform_context;
 
+#if IS_ENABLED(CONFIG_SOC_ZUMA) && defined(CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING)
+	if (exynos_smc(SMC_DRM_G3D_PPCFW_OFF, 0, 0, 0) != 0) {
+		dev_err(kbdev->dev, "Couldn't disable G3D PPCFW");
+	}
+#endif
+
 	if (pc->pm.state == GPU_POWER_LEVEL_STACKS) {
+		gpu_dvfs_disable_updates(kbdev);
+#ifdef CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL
+		pm_runtime_put_sync(kbdev->s2mpu_dev);
+#endif /* CONFIG_MALI_PM_RUNTIME_S2MPU_CONTROL */
 		pm_runtime_put_sync(pc->pm.domain_devs[GPU_PM_DOMAIN_CORES]);
 		pc->pm.state = GPU_POWER_LEVEL_GLOBAL;
 	}
 
 	if (pc->pm.state == GPU_POWER_LEVEL_GLOBAL) {
-#if !IS_ENABLED(CONFIG_SOC_GS101)
+#if !IS_ENABLED(CONFIG_SOC_GS101) && defined(CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING)
 		if (exynos_smc(SMC_PROTECTION_SET, 0, PROT_G3D, SMC_PROTECTION_DISABLE) != 0) {
 			dev_err(kbdev->dev, "Couldn't disable protected mode before GPU power-off");
 		}
 #endif
 
-		gpu_dvfs_disable_updates(kbdev);
-
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+		if (pc->pm.top_suspend_hysteresis_time_ms != 0) {
+#else
 		if (pc->pm.use_autosuspend) {
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
 			pm_runtime_mark_last_busy(pc->pm.domain_devs[GPU_PM_DOMAIN_TOP]);
 			pm_runtime_put_autosuspend(pc->pm.domain_devs[GPU_PM_DOMAIN_TOP]);
 		} else {
@@ -349,7 +381,6 @@
 		gpu_dvfs_event_power_off(kbdev);
 		kbase_pm_metrics_stop(kbdev);
 #endif
-
 	}
 }
 
@@ -465,6 +496,13 @@
  * We enable autosuspend for the TOP domain so that after the autosuspend delay, the core Mali
  * driver knows to disable the collection of GPU utilization data used for DVFS purposes.
  *
+ * For GPU Sleep mode, setup autosuspend delay for mali device. The timer is triggered from
+ * power_runtime_gpu_idle_callback. As the timer expires power_off_callback is triggered.
+ * This autosuspend delay is set to pm.cores_suspend_hysteresis_time_ms, as only CORES domain is
+ * powered-off as soon as power_off_callback is called.
+ * TOP domain will be powered-off after additionally pm.top_suspend_hysteresis_time_ms timer expires,
+ * which is triggered from the power_off_callback.
+ *
  * Return: Returns 0 on success, or an error code on failure.
  */
 static int gpu_pm_callback_power_runtime_init(struct kbase_device *kbdev)
@@ -473,8 +511,25 @@
 
 	dev_dbg(kbdev->dev, "%s\n", __func__);
 
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+	if (pc->pm.cores_suspend_hysteresis_time_ms != 0) {
+		pm_runtime_set_autosuspend_delay(kbdev->dev, pc->pm.cores_suspend_hysteresis_time_ms);
+		pm_runtime_use_autosuspend(kbdev->dev);
+	}
+	pm_runtime_set_active(kbdev->dev);
+	pm_runtime_enable(kbdev->dev);
+	if (pc->pm.top_suspend_hysteresis_time_ms != 0) {
+		pm_runtime_set_autosuspend_delay(pc->pm.domain_devs[GPU_PM_DOMAIN_TOP],
+				pc->pm.top_suspend_hysteresis_time_ms);
+		pm_runtime_use_autosuspend(pc->pm.domain_devs[GPU_PM_DOMAIN_TOP]);
+	}
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
 	if (!pm_runtime_enabled(pc->pm.domain_devs[GPU_PM_DOMAIN_TOP]) ||
-		!pm_runtime_enabled(pc->pm.domain_devs[GPU_PM_DOMAIN_CORES])) {
+		!pm_runtime_enabled(pc->pm.domain_devs[GPU_PM_DOMAIN_CORES])
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+		|| !pm_runtime_enabled(kbdev->dev)
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
+			) {
 		dev_warn(kbdev->dev, "pm_runtime not enabled\n");
 		return -ENOSYS;
 	}
@@ -489,7 +544,7 @@
 }
 
 /**
- * kbase_device_runtime_term() - Initialize runtime power management.
+ * gpu_pm_callback_power_runtime_term() - Terminate runtime power management.
  *
  * @kbdev: The &struct kbase_device for the GPU.
  *
@@ -505,10 +560,60 @@
 
 	pm_runtime_disable(pc->pm.domain_devs[GPU_PM_DOMAIN_CORES]);
 	pm_runtime_disable(pc->pm.domain_devs[GPU_PM_DOMAIN_TOP]);
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+	pm_runtime_disable(kbdev->dev);
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
 }
 
-#endif /* IS_ENABLED(KBASE_PM_RUNTIME) */
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+/**
+ * gpu_pm_callback_power_runtime_idle() - Callback when Runtime PM is idle.
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ *
+ * This callback is made via the core Mali driver at the point where runtime power management is
+ * idle.
+ */
+static void gpu_pm_callback_power_runtime_idle(struct kbase_device *kbdev)
+{
+	struct pixel_context *pc = kbdev->platform_context;
 
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	ATRACE_BEGIN(__func__);
+	if (pc->pm.cores_suspend_hysteresis_time_ms != 0) {
+		pm_runtime_mark_last_busy(kbdev->dev);
+		pm_runtime_put_autosuspend(kbdev->dev);
+	} else {
+		pm_runtime_put_sync_suspend(kbdev->dev);
+	}
+	kbdev->pm.runtime_active = false;
+	ATRACE_END();
+}
+
+/**
+ * gpu_pm_callback_power_runtime_active() - Callback when Runtime PM is active.
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ *
+ * This callback is made via the core Mali driver at the point where runtime power management is
+ * active.
+ */
+static void gpu_pm_callback_power_runtime_active(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	ATRACE_BEGIN(__func__);
+	if (pm_runtime_status_suspended(kbdev->dev))
+		pm_runtime_get_sync(kbdev->dev);
+	else
+		pm_runtime_get(kbdev->dev);
+
+	kbdev->pm.runtime_active = true;
+	ATRACE_END();
+}
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
+#endif /* IS_ENABLED(KBASE_PM_RUNTIME) */
 
 static void gpu_pm_hw_reset(struct kbase_device *kbdev)
 {
@@ -575,9 +680,34 @@
 #endif /* KBASE_PM_RUNTIME */
 	.soft_reset_callback = NULL,
 	.hardware_reset_callback = gpu_pm_hw_reset,
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+	.power_runtime_gpu_idle_callback = gpu_pm_callback_power_runtime_idle,
+	.power_runtime_gpu_active_callback = gpu_pm_callback_power_runtime_active,
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
 };
 
 /**
+ * gpu_pm_get_power_state_nolock() - See gpu_pm_get_power_state
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ */
+bool gpu_pm_get_power_state_nolock(struct kbase_device *kbdev)
+{
+	bool ret = true;
+#if IS_ENABLED(CONFIG_EXYNOS_PMU_IF)
+	unsigned int val = 0;
+	struct pixel_context *pc = kbdev->platform_context;
+
+	lockdep_assert_held(&pc->pm.domain->access_lock);
+
+	exynos_pmu_read(pc->pm.status_reg_offset, &val);
+	ret = ((val & pc->pm.status_local_power_mask) == pc->pm.status_local_power_mask);
+#endif /* CONFIG_EXYNOS_PMU_IF */
+
+	return ret;
+}
+
+/**
  * gpu_pm_get_power_state() - Returns the current power state of the GPU.
  *
  * @kbdev: The &struct kbase_device for the GPU.
@@ -588,19 +718,18 @@
  */
 bool gpu_pm_get_power_state(struct kbase_device *kbdev)
 {
-	bool ret;
-	unsigned int val = 0;
+	bool ret = true;
+#if IS_ENABLED(CONFIG_EXYNOS_PMU_IF)
 	struct pixel_context *pc = kbdev->platform_context;
 
 	mutex_lock(&pc->pm.domain->access_lock);
-	exynos_pmu_read(pc->pm.status_reg_offset, &val);
-	ret = ((val & pc->pm.status_local_power_mask) == pc->pm.status_local_power_mask);
+	ret = gpu_pm_get_power_state_nolock(kbdev);
 	mutex_unlock(&pc->pm.domain->access_lock);
+#endif /* CONFIG_EXYNOS_PMU_IF */
 
 	return ret;
 }
 
-
 /**
  * gpu_pm_init() - Initializes power management control for a GPU.
  *
@@ -646,7 +775,7 @@
 		dev_set_drvdata(pc->pm.domain_devs[i], kbdev);
 
 		pc->pm.domain_links[i] = device_link_add(kbdev->dev,
-			pc->pm.domain_devs[i], DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
+			pc->pm.domain_devs[i], DL_FLAG_STATELESS);
 
 		if (!pc->pm.domain_links[i]) {
 			dev_err(kbdev->dev, "failed to link pm domain device");
@@ -683,17 +812,53 @@
 		goto error;
 	}
 
+#if MALI_USE_CSF
+	if (of_property_read_u32(np, "firmware_idle_hysteresis_time_ms",
+				&pc->pm.firmware_idle_hysteresis_time_ms)) {
+		dev_err(kbdev->dev, "firmware_idle_hysteresis_time_ms not set in DT\n");
+		ret = -EINVAL;
+		goto error;
+	}
+
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+	if (of_property_read_u32(np, "firmware_idle_hysteresis_gpu_sleep_scaler",
+				&pc->pm.firmware_idle_hysteresis_gpu_sleep_scaler)) {
+		dev_err(kbdev->dev, "firmware_idle_hysteresis_gpu_sleep_scaler not set in DT\n");
+		ret = -EINVAL;
+		goto error;
+	}
+
+	if (of_property_read_u32(np, "cores_suspend_hysteresis_time_ms",
+				&pc->pm.cores_suspend_hysteresis_time_ms)) {
+		dev_err(kbdev->dev, "cores_suspend_hysteresis_time_ms not set in DT\n");
+		ret = -EINVAL;
+		goto error;
+	}
+
+	if (of_property_read_u32(np, "top_suspend_hysteresis_time_ms",
+				&pc->pm.top_suspend_hysteresis_time_ms)) {
+		dev_err(kbdev->dev, "top_suspend_hysteresis_time_ms not set in DT\n");
+		ret = -EINVAL;
+		goto error;
+	}
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
+
+#define NSECS_PER_MILLISEC (1000u * 1000u)
+	kbdev->csf.gpu_idle_hysteresis_ns = pc->pm.firmware_idle_hysteresis_time_ms * NSECS_PER_MILLISEC;
+#ifdef CONFIG_MALI_PIXEL_GPU_SLEEP
+	kbdev->csf.gpu_idle_hysteresis_ns /= pc->pm.firmware_idle_hysteresis_gpu_sleep_scaler;
+#endif /* CONFIG_MALI_PIXEL_GPU_SLEEP */
+#endif /* MALI_USE_CSF */
+
+#if IS_ENABLED(CONFIG_EXYNOS_PMU_IF)
 	pc->pm.domain = exynos_pd_lookup_name(g3d_power_domain_name);
+#endif /* CONFIG_EXYNOS_PMU_IF */
 	if (pc->pm.domain == NULL) {
 		dev_err(kbdev->dev, "Failed to find GPU power domain '%s'\n",
 			g3d_power_domain_name);
 		return -ENODEV;
 	}
 
-#if IS_ENABLED(CONFIG_GOOGLE_BCL)
-	pc->pm.bcl_dev = google_retrieve_bcl_handle();
-#endif
-
 	pc->pm.rail_state_log = gpu_pm_rail_state_log_init(kbdev);
 
 	return 0;
@@ -718,6 +883,7 @@
 
 	gpu_pm_rail_state_log_term(pc->pm.rail_state_log);
 
+
 	for (i = 0; i < GPU_PM_DOMAIN_COUNT; i++) {
 		if (pc->pm.domain_devs[i]) {
 			if (pc->pm.domain_links[i])
diff --git a/mali_kbase/platform/pixel/pixel_gpu_sysfs.c b/mali_kbase/platform/pixel/pixel_gpu_sysfs.c
index a1c0e94..b7edf7e 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_sysfs.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_sysfs.c
@@ -16,6 +16,9 @@
 #include "pixel_gpu_sscd.h"
 
 static const char *gpu_dvfs_level_lock_names[GPU_DVFS_LEVEL_LOCK_COUNT] = {
+#if IS_ENABLED(CONFIG_CAL_IF)
+	"ect",
+#endif /* CONFIG_CAL_IF */
 	"devicetree",
 	"compute",
 	"hint",
@@ -23,6 +26,9 @@
 #ifdef CONFIG_MALI_PIXEL_GPU_THERMAL
 	"thermal",
 #endif /* CONFIG_MALI_PIXEL_GPU_THERMAL */
+#if IS_ENABLED(CONFIG_GOOGLE_BCL)
+        "bcl",
+#endif
 };
 
 /* Helper functions */
@@ -161,9 +167,9 @@
 		return -ENODEV;
 
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
-		" gpu_0   gpu_0   gpu_1   gpu_1  util util hyste- int_clk  mif_clk cpu0_clk cpu1_clk cpu2_clk\n"
-		"  clk     vol     clk     vol   min  max  resis    min      min     min      min      limit\n"
-		"------- ------- ------- ------- ---- ---- ------ ------- -------- -------- -------- --------\n");
+		" gpu_0   gpu_0   gpu_1   gpu_1  util util hyste- int_clk  mif_clk cpu0_clk cpu1_clk cpu2_clk    mcu      mcu\n"
+		"  clk     vol     clk     vol   min  max  resis    min      min     min      min      limit  down_util up_util\n"
+		"------- ------- ------- ------- ---- ---- ------ ------- -------- -------- -------- -------- --------- -------\n");
 
 	for (i = pc->dvfs.level_max; i <= pc->dvfs.level_min; i++) {
 		ret += scnprintf(buf + ret, PAGE_SIZE - ret,
@@ -181,10 +187,14 @@
 			pc->dvfs.table[i].qos.cpu1_min);
 
 		if (pc->dvfs.table[i].qos.cpu2_max == CPU_FREQ_MAX)
-			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%8s\n", "none");
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%8s", "none");
 		else
-			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%8d\n",
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%8d",
 				pc->dvfs.table[i].qos.cpu2_max);
+
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%9d %7d\n",
+			pc->dvfs.table[i].mcu_util_min,
+			pc->dvfs.table[i].mcu_util_max);
 	}
 
 	return ret;
@@ -695,6 +705,98 @@
 	return ret;
 }
 
+#if MALI_USE_CSF
+static ssize_t hint_power_on_store(struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int ret;
+	bool enabled;
+	struct kbase_device *kbdev = dev->driver_data;
+	struct pixel_context *pc = kbdev->platform_context;
+	if (!pc)
+		return -ENODEV;
+
+	ret = strtobool(buf, &enabled);
+	if (ret)
+		return -EINVAL;
+
+	if (enabled)
+		kthread_queue_work(&kbdev->apc.worker, &kbdev->apc.wakeup_csf_scheduler_work);
+
+	return count;
+}
+#endif
+
+#if MALI_USE_CSF
+static ssize_t capacity_headroom_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev->driver_data;
+	struct pixel_context *pc = kbdev->platform_context;
+
+	if (!pc)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+		pc->dvfs.capacity_headroom);
+}
+
+static ssize_t capacity_headroom_store(struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	struct kbase_device *kbdev = dev->driver_data;
+	struct pixel_context *pc = kbdev->platform_context;
+	int capacity_headroom = 0;
+
+	if (!pc)
+		return -ENODEV;
+
+	if (kstrtoint(buf, 0, &capacity_headroom))
+		return -EINVAL;
+
+	mutex_lock(&pc->dvfs.lock);
+	pc->dvfs.capacity_headroom = capacity_headroom;
+	mutex_unlock(&pc->dvfs.lock);
+	trace_clock_set_rate("cap_headroom", capacity_headroom, raw_smp_processor_id());
+
+	return count;
+}
+
+static ssize_t capacity_history_depth_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct kbase_device *kbdev = dev->driver_data;
+	struct pixel_context *pc = kbdev->platform_context;
+
+	if (!pc)
+		return -ENODEV;
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n",
+		(unsigned int)pc->dvfs.capacity_history_depth);
+}
+
+static ssize_t capacity_history_depth_store(struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	struct kbase_device *kbdev = dev->driver_data;
+	struct pixel_context *pc = kbdev->platform_context;
+	unsigned int capacity_history_depth = 0;
+
+	if (!pc)
+		return -ENODEV;
+
+	if (kstrtouint(buf, 0, &capacity_history_depth))
+		return -EINVAL;
+
+	if (capacity_history_depth == 0 || capacity_history_depth > ARRAY_SIZE(pc->dvfs.capacity_history))
+		return -EINVAL;
+
+	mutex_lock(&pc->dvfs.lock);
+	pc->dvfs.capacity_history_depth = (u8)capacity_history_depth;
+	mutex_unlock(&pc->dvfs.lock);
+
+	return count;
+}
+#endif
+
 /* Define devfreq-like attributes */
 DEVICE_ATTR_RO(available_frequencies);
 DEVICE_ATTR_RO(cur_freq);
@@ -709,6 +811,13 @@
 DEVICE_ATTR_RO(trans_stat);
 DEVICE_ATTR_RO(available_governors);
 DEVICE_ATTR_RW(governor);
+#if MALI_USE_CSF
+DEVICE_ATTR_WO(hint_power_on);
+#endif
+#if MALI_USE_CSF
+DEVICE_ATTR_RW(capacity_headroom);
+DEVICE_ATTR_RW(capacity_history_depth);
+#endif
 
 /* Initialization code */
 
@@ -742,6 +851,11 @@
 	{ "available_governors", &dev_attr_available_governors },
 	{ "governor", &dev_attr_governor },
 	{ "trigger_core_dump", &dev_attr_trigger_core_dump },
+#if MALI_USE_CSF
+	{ "capacity_headroom", &dev_attr_capacity_headroom },
+	{ "capacity_history_depth", &dev_attr_capacity_history_depth },
+	{ "hint_power_on", &dev_attr_hint_power_on },
+#endif
 };
 
 /**
diff --git a/mali_pixel/BUILD.bazel b/mali_pixel/BUILD.bazel
index 4b5357b..cd5eced 100644
--- a/mali_pixel/BUILD.bazel
+++ b/mali_pixel/BUILD.bazel
@@ -1,28 +1,27 @@
-# NOTE: THIS FILE IS EXPERIMENTAL FOR THE BAZEL MIGRATION AND NOT USED FOR
-# YOUR BUILDS CURRENTLY.
-#
-# It is not yet the source of truth for your build. If you're looking to modify
-# the build file, modify the Android.bp file instead. Do *not* modify this file
-# unless you have coordinated with the team managing the Soong to Bazel
-# migration.
+# SPDX-License-Identifier: GPL-2.0
 
-load("//build/kleaf:kernel.bzl", "kernel_module")
+load("//build/kernel/kleaf:kernel.bzl", "kernel_module")
 
 kernel_module(
-    name = "mali_pixel.cloudripper",
+    name = "mali_pixel",
     srcs = glob([
         "**/*.c",
         "**/*.h",
         "Kbuild",
     ]) + [
         "//private/google-modules/gpu/common:headers",
+        "//private/google-modules/soc/gs:gs_soc_headers",
     ],
     outs = [
         "mali_pixel.ko",
     ],
-    kernel_build = "//private/gs-google:cloudripper",
+    kernel_build = "//private/google-modules/soc/gs:gs_kernel_build",
     visibility = [
+        "//private/devices/google:__subpackages__",
         "//private/google-modules/gpu/mali_kbase:__pkg__",
-        "//private/gs-google:__pkg__",
+        "//private/google-modules/soc/gs:__pkg__",
+    ],
+    deps = [
+        "//private/google-modules/soc/gs:gs_soc_module",
     ],
 )
diff --git a/mali_pixel/Kbuild b/mali_pixel/Kbuild
index f19e708..4f65a95 100644
--- a/mali_pixel/Kbuild
+++ b/mali_pixel/Kbuild
@@ -29,9 +29,11 @@
 
 mali_pixel-objs :=
 
-ifeq ($(CONFIG_MALI_PIXEL_STATS),m)
-	DEFINES += -DCONFIG_MALI_PIXEL_STATS
-	mali_pixel-objs += mali_pixel_stats.o
+ifeq ($(CONFIG_PIXEL_STAT),m)
+	ifeq ($(CONFIG_MALI_PIXEL_STATS),m)
+		DEFINES += -DCONFIG_MALI_PIXEL_STATS
+		mali_pixel-objs += mali_pixel_stats.o
+	endif
 endif
 
 ifeq ($(CONFIG_MALI_MEMORY_GROUP_MANAGER),m)
diff --git a/mali_pixel/Makefile b/mali_pixel/Makefile
index 24a1890..7b09188 100644
--- a/mali_pixel/Makefile
+++ b/mali_pixel/Makefile
@@ -14,11 +14,10 @@
 
 KBUILD_OPTIONS += $(KBUILD_EXTRA) # Extra config if any
 
-modules:
-	$(MAKE) -C $(KERNEL_SRC) M=$(M) W=1 EXTRA_CFLAGS="-I$(M) -I$(M)/../common/include" $(KBUILD_OPTIONS) $(@)
+EXTRA_CFLAGS += -I$(M)
+EXTRA_CFLAGS += -I$(M)/../common/include
 
-modules_install:
-	$(MAKE) -C $(KERNEL_SRC) M=$(M) modules_install
+include $(KERNEL_SRC)/../private/google-modules/soc/gs/Makefile.include
 
-clean:
-	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
+modules modules_install clean:
+	$(MAKE) -C $(KERNEL_SRC) M=$(M) W=1 $(KBUILD_OPTIONS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" $(@)
diff --git a/mali_pixel/protected_memory_allocator.c b/mali_pixel/protected_memory_allocator.c
index 25b5bde..89276de 100644
--- a/mali_pixel/protected_memory_allocator.c
+++ b/mali_pixel/protected_memory_allocator.c
@@ -14,6 +14,8 @@
 #include <linux/protected_memory_allocator.h>
 #include <linux/slab.h>
 
+MODULE_IMPORT_NS(DMA_BUF);
+
 #define MALI_PMA_DMA_HEAP_NAME "vframe-secure"
 #define MALI_PMA_SLAB_SIZE (1 << 16)
 #define MALI_PMA_SLAB_BLOCK_SIZE (PAGE_SIZE)