arm64: Use FEAT_HAFDBS to track dirty pages when available

Some recent arm64 cores have a facility that allows the page
table walker to track the dirty state of a page. This makes it
really efficient to perform CMOs by VA as we only need to look
at dirty pages.

Bug: 217161634
Signed-off-by: Marc Zyngier <maz@kernel.org>
Change-Id: Iba2b67ba8c3d153c53c09d86eb42e3b35e18483a
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index 1291b7a..98489d1 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -73,6 +73,8 @@
 
 	if (el == 1) {
 		tcr = TCR_EL1_RSVD | (ips << 32) | TCR_EPD1_DISABLE;
+		if (gd->arch.has_hafdbs)
+			tcr |= TCR_HA | TCR_HD;
 	} else if (el == 2) {
 		tcr = TCR_EL2_RSVD | (ips << 16);
 	} else {
@@ -180,6 +182,9 @@
 		    attrs != PTE_BLOCK_MEMTYPE(MT_NORMAL_NC))
 			continue;
 
+		if (gd->arch.has_hafdbs && (pte & (PTE_RDONLY | PTE_DBM)) != PTE_DBM)
+			continue;
+
 		end = va + BIT(level2shift(level)) - 1;
 
 		debug("Flush PTE %llx at level %d: %llx-%llx\n",
@@ -314,6 +319,8 @@
 	if (va_bits < 39)
 		level = 1;
 
+	if (gd->arch.has_hafdbs)
+		attrs |= PTE_DBM | PTE_RDONLY;
 
 	map_range(map->virt, map->phys, map->size, level,
 		  (u64*)gd->arch.tlb_addr, attrs);
@@ -366,7 +373,14 @@
 __weak u64 get_page_table_size(void)
 {
 	u64 one_pt = MAX_PTE_ENTRIES * sizeof(u64);
-	u64 size;
+	u64 size, mmfr1;
+
+	asm volatile("mrs %0, id_aa64mmfr1_el1" : "=r" (mmfr1));
+	if ((mmfr1 & 0xf) == 2) {
+		gd->arch.has_hafdbs = true;
+	} else {
+		gd->arch.has_hafdbs = false;
+	}
 
 	/* Account for all page tables we would need to cover our memory map */
 	size = one_pt * count_ranges();
diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h
index fc97c55..d3632d8 100644
--- a/arch/arm/include/asm/armv8/mmu.h
+++ b/arch/arm/include/asm/armv8/mmu.h
@@ -49,6 +49,9 @@
 #define PTE_TYPE_BLOCK		(1 << 0)
 #define PTE_TYPE_VALID		(1 << 0)
 
+#define PTE_RDONLY		(1 << 7)
+#define PTE_DBM			(1UL << 51)
+
 #define PTE_TABLE_PXN		(1UL << 59)
 #define PTE_TABLE_XN		(1UL << 60)
 #define PTE_TABLE_AP		(1UL << 61)
@@ -99,6 +102,9 @@
 #define TCR_TG0_16K		(2 << 14)
 #define TCR_EPD1_DISABLE	(1 << 23)
 
+#define TCR_HA			(1UL << 39)
+#define TCR_HD			(1UL << 40)
+
 #define TCR_EL1_RSVD		(1 << 31)
 #define TCR_EL2_RSVD		(1 << 31 | 1 << 23)
 #define TCR_EL3_RSVD		(1 << 31 | 1 << 23)
diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h
index 085e12b..a5ccdb6 100644
--- a/arch/arm/include/asm/global_data.h
+++ b/arch/arm/include/asm/global_data.h
@@ -52,6 +52,7 @@
 #if defined(CONFIG_ARM64)
 	unsigned long tlb_fillptr;
 	unsigned long tlb_emerg;
+	bool has_hafdbs;
 #endif
 #endif
 #ifdef CONFIG_SYS_MEM_RESERVE_SECURE