gh-109329: Count tier2 opcode misses (#110561)

This keeps a separate 'miss' counter for each micro-opcode, incremented whenever a guard uop takes a deoptimization side exit.
diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
index 4988caa..294bf15 100644
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@@ -98,6 +98,7 @@ typedef struct _gc_stats {
 
 typedef struct _uop_stats {
     uint64_t execution_count;
+    uint64_t miss;
 } UOpStats;
 
 #define _Py_UOP_HIST_SIZE 32
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index d31d836..4f126ab 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -283,7 +283,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
     do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
 #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
 #define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
-#define UOP_EXE_INC(opname) do { if (_Py_stats) _Py_stats->optimization_stats.opcode[opname].execution_count++; } while (0)
+#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0)
 #define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
 #define OPT_HIST(length, name) \
     do { \
@@ -308,7 +308,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
 #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
 #define GC_STAT_ADD(gen, name, n) ((void)0)
 #define OPT_STAT_INC(name) ((void)0)
-#define UOP_EXE_INC(opname) ((void)0)
+#define UOP_STAT_INC(opname, name) ((void)0)
 #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0)
 #define OPT_HIST(length, name) ((void)0)
 #endif  // !Py_STATS
diff --git a/Python/executor.c b/Python/executor.c
index bfa7f7e..2884565 100644
--- a/Python/executor.c
+++ b/Python/executor.c
@@ -25,6 +25,7 @@
 #undef DEOPT_IF
 #define DEOPT_IF(COND, INSTNAME) \
     if ((COND)) {                \
+        UOP_STAT_INC(INSTNAME, miss); \
         goto deoptimize;         \
     }
 
@@ -93,7 +94,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject
                 (int)(stack_pointer - _PyFrame_Stackbase(frame)));
         pc++;
         OPT_STAT_INC(uops_executed);
-        UOP_EXE_INC(opcode);
+        UOP_STAT_INC(opcode, execution_count);
 #ifdef Py_STATS
         trace_uop_execution_counter++;
 #endif
diff --git a/Python/specialize.c b/Python/specialize.c
index d74c4c5..41e74c6 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -248,6 +248,9 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
         if (stats->opcode[i].execution_count) {
             fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count);
         }
+        if (stats->opcode[i].miss) {
+            fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", names[i], stats->opcode[i].miss);
+        }
     }
 
     for (int i = 0; i < 256; i++) {
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index 165b9b4..0527d4be 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -1028,7 +1028,7 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None)
                 ],
             )
         yield Section(
-            "Uop stats",
+            "Uop execution stats",
             "",
             [
                 Table(