Implement GuestCodeRegion

GuestCodeRegion represents basic block topology of the guest code

Bug: http://b/363582841
Test: run berberis_host_unit_tests
Change-Id: Ib16896ff9c3d52fe265730d8561fff0bb533acb4
diff --git a/runtime_primitives/Android.bp b/runtime_primitives/Android.bp
index b9c8519..86820c1 100644
--- a/runtime_primitives/Android.bp
+++ b/runtime_primitives/Android.bp
@@ -128,6 +128,7 @@
     srcs: [
         "code_pool_test.cc",
         "exec_region_anonymous_test.cc",
+        "guest_code_region_test.cc",
         "signal_queue_test.cc",
         "table_of_tables_test.cc",
     ],
diff --git a/runtime_primitives/guest_code_region_test.cc b/runtime_primitives/guest_code_region_test.cc
new file mode 100644
index 0000000..367dc03
--- /dev/null
+++ b/runtime_primitives/guest_code_region_test.cc
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "berberis/runtime_primitives/guest_code_region.h"
+
+namespace berberis {
+
+namespace {
+
+using testing::ElementsAre;
+
+TEST(GuestCodeRegion, Smoke) {
+  Arena arena;
+  GuestCodeRegion region(&arena);
+
+  EXPECT_TRUE(region.branch_targets().empty());
+
+  {
+    // 42 - 50 ->{8, 100}
+    auto* bb = region.NewBasicBlock(42, 8, ArenaVector<GuestAddr>({8, 100}, &arena));
+    EXPECT_EQ(bb->start_addr(), 42u);
+    EXPECT_EQ(bb->size(), 8u);
+    EXPECT_EQ(bb->end_addr(), 50u);
+    EXPECT_THAT(bb->out_edges(), ElementsAre(8, 100));
+    EXPECT_TRUE(bb->in_edges().empty());
+  }
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(8, 100));
+
+  {
+    // 56 - 60 -> {42, 120}
+    auto* bb = region.NewBasicBlock(56, 4, ArenaVector<GuestAddr>({42, 50}, &arena));
+    EXPECT_EQ(bb->start_addr(), 56u);
+    EXPECT_EQ(bb->size(), 4u);
+    EXPECT_EQ(bb->end_addr(), 60u);
+    EXPECT_THAT(bb->out_edges(), ElementsAre(42, 50));
+    EXPECT_TRUE(bb->in_edges().empty());
+  }
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(8, 42, 50, 100));
+
+  region.ResolveEdges();
+
+  auto& basic_blocks = region.basic_blocks();
+
+  EXPECT_EQ(basic_blocks.size(), 2u);
+
+  ASSERT_TRUE(basic_blocks.contains(42));
+  ASSERT_TRUE(basic_blocks.contains(56));
+
+  {
+    auto& bb = basic_blocks.at(42);
+    EXPECT_THAT(bb.in_edges(), ElementsAre(56));
+  }
+
+  {
+    auto& bb = basic_blocks.at(56);
+    EXPECT_TRUE(bb.in_edges().empty());
+  }
+}
+
+TEST(GuestCodeRegion, ResolveEdges) {
+  Arena arena;
+  GuestCodeRegion region(&arena);
+
+  // 42 - 54
+  region.NewBasicBlock(42, 12, ArenaVector<GuestAddr>({100, 150, 200}, &arena));
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(100, 150, 200));
+
+  // 100 - 120
+  region.NewBasicBlock(100, 20, ArenaVector<GuestAddr>({8, 200, 1000}, &arena));
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(8, 100, 150, 200, 1000));
+
+  // 200 - 240
+  region.NewBasicBlock(200, 40, ArenaVector<GuestAddr>({80, 120}, &arena));
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(8, 80, 100, 120, 150, 200, 1000));
+
+  region.ResolveEdges();
+
+  auto& basic_blocks = region.basic_blocks();
+  ASSERT_EQ(basic_blocks.size(), 3u);
+  ASSERT_TRUE(basic_blocks.contains(42));
+  ASSERT_TRUE(basic_blocks.contains(100));
+  ASSERT_TRUE(basic_blocks.contains(200));
+
+  {
+    auto bb = basic_blocks.at(42);
+    EXPECT_TRUE(bb.in_edges().empty());
+  }
+
+  {
+    auto bb = basic_blocks.at(100);
+    EXPECT_THAT(bb.in_edges(), ElementsAre(42));
+  }
+
+  {
+    auto bb = basic_blocks.at(200);
+    EXPECT_THAT(bb.in_edges(), ElementsAre(42, 100));
+  }
+}
+
+TEST(GuestCodeRegion, SplitBasicBlock) {
+  Arena arena;
+  GuestCodeRegion region(&arena);
+
+  // 42 - 54
+  region.NewBasicBlock(42, 12, ArenaVector<GuestAddr>({110, 150, 220}, &arena));
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(110, 150, 220));
+
+  // 100 - 120
+  region.NewBasicBlock(100, 20, ArenaVector<GuestAddr>({8, 50, 1000}, &arena));
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(8, 50, 110, 150, 220, 1000));
+
+  // 200 - 240
+  region.NewBasicBlock(200, 40, ArenaVector<GuestAddr>({80, 120, 240}, &arena));
+
+  EXPECT_THAT(region.branch_targets(), ElementsAre(8, 50, 80, 110, 120, 150, 220, 240, 1000));
+
+  // 240 - 250
+  region.NewBasicBlock(240, 50, ArenaVector<GuestAddr>({10, 210, 230}, &arena));
+
+  EXPECT_THAT(region.branch_targets(),
+              ElementsAre(8, 10, 50, 80, 110, 120, 150, 210, 220, 230, 240, 1000));
+
+  region.ResolveEdges();
+
+  auto& basic_blocks = region.basic_blocks();
+  ASSERT_EQ(basic_blocks.size(), 9u);
+  ASSERT_TRUE(basic_blocks.contains(42));
+  ASSERT_TRUE(basic_blocks.contains(50));
+  ASSERT_TRUE(basic_blocks.contains(100));
+  ASSERT_TRUE(basic_blocks.contains(110));
+  ASSERT_TRUE(basic_blocks.contains(200));
+  ASSERT_TRUE(basic_blocks.contains(210));
+  ASSERT_TRUE(basic_blocks.contains(220));
+  ASSERT_TRUE(basic_blocks.contains(230));
+  ASSERT_TRUE(basic_blocks.contains(240));
+
+  {
+    auto bb = basic_blocks.at(42);
+    EXPECT_EQ(bb.start_addr(), 42u);
+    EXPECT_EQ(bb.size(), 8u);
+    EXPECT_EQ(bb.end_addr(), 50u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(50));
+    EXPECT_TRUE(bb.in_edges().empty());
+  }
+
+  {
+    auto bb = basic_blocks.at(50);
+    EXPECT_EQ(bb.start_addr(), 50u);
+    EXPECT_EQ(bb.size(), 4u);
+    EXPECT_EQ(bb.end_addr(), 54u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(110, 150, 220));
+    EXPECT_THAT(bb.in_edges(), ElementsAre(42, 110));
+  }
+
+  {
+    auto bb = basic_blocks.at(100);
+    EXPECT_EQ(bb.start_addr(), 100u);
+    EXPECT_EQ(bb.size(), 10u);
+    EXPECT_EQ(bb.end_addr(), 110u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(110));
+    EXPECT_TRUE(bb.in_edges().empty());
+  }
+
+  {
+    auto bb = basic_blocks.at(110);
+    EXPECT_EQ(bb.start_addr(), 110u);
+    EXPECT_EQ(bb.size(), 10u);
+    EXPECT_EQ(bb.end_addr(), 120u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(8, 50, 1000));
+    EXPECT_THAT(bb.in_edges(), ElementsAre(50, 100));
+  }
+
+  {
+    auto bb = basic_blocks.at(200);
+    EXPECT_EQ(bb.start_addr(), 200u);
+    EXPECT_EQ(bb.size(), 10u);
+    EXPECT_EQ(bb.end_addr(), 210u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(210));
+    EXPECT_TRUE(bb.in_edges().empty());
+  }
+
+  {
+    auto bb = basic_blocks.at(210);
+    EXPECT_EQ(bb.start_addr(), 210u);
+    EXPECT_EQ(bb.size(), 10u);
+    EXPECT_EQ(bb.end_addr(), 220u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(220));
+    EXPECT_THAT(bb.in_edges(), ElementsAre(200, 240));
+  }
+
+  {
+    auto bb = basic_blocks.at(220);
+    EXPECT_EQ(bb.start_addr(), 220u);
+    EXPECT_EQ(bb.size(), 10u);
+    EXPECT_EQ(bb.end_addr(), 230u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(230));
+    EXPECT_THAT(bb.in_edges(), ElementsAre(50, 210));
+  }
+
+  {
+    auto bb = basic_blocks.at(230);
+    EXPECT_EQ(bb.start_addr(), 230u);
+    EXPECT_EQ(bb.size(), 10u);
+    EXPECT_EQ(bb.end_addr(), 240u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(80, 120, 240));
+    EXPECT_THAT(bb.in_edges(), ElementsAre(220, 240));
+  }
+
+  {
+    auto bb = basic_blocks.at(240);
+    EXPECT_EQ(bb.start_addr(), 240u);
+    EXPECT_EQ(bb.size(), 50u);
+    EXPECT_EQ(bb.end_addr(), 290u);
+    EXPECT_THAT(bb.out_edges(), ElementsAre(10, 210, 230));
+    EXPECT_THAT(bb.in_edges(), ElementsAre(230));
+  }
+}
+
+TEST(GuestCodeRegion, InvalidRegion) {
+  Arena arena;
+  GuestCodeRegion region(&arena);
+
+  // Overlapping code blocks are not allowed
+  region.NewBasicBlock(100, 60, ArenaVector<GuestAddr>({}, &arena));
+  region.NewBasicBlock(150, 50, ArenaVector<GuestAddr>({}, &arena));
+
+  EXPECT_DEATH(region.ResolveEdges(), "");
+}
+
+TEST(GuestCodeRegion, NoResolveEdgesTwice) {
+  Arena arena;
+  GuestCodeRegion region(&arena);
+
+  region.NewBasicBlock(100, 60, ArenaVector<GuestAddr>({}, &arena));
+
+  region.ResolveEdges();
+
+  EXPECT_DEATH(region.ResolveEdges(), "");
+}
+
+TEST(GuestCodeRegion, ResolveEdgesExpectsNoInEdges) {
+  Arena arena;
+  GuestCodeRegion region(&arena);
+
+  auto* bb = region.NewBasicBlock(100, 60, ArenaVector<GuestAddr>({}, &arena));
+  bb->AddInEdge(5);
+
+  EXPECT_DEATH(region.ResolveEdges(), "");
+}
+
+TEST(GuestCodeRegion, NoNewBasicBlockAfterResolveRegion) {
+  Arena arena;
+  GuestCodeRegion region(&arena);
+
+  region.NewBasicBlock(100, 60, ArenaVector<GuestAddr>({}, &arena));
+
+  region.ResolveEdges();
+
+  EXPECT_DEATH(region.NewBasicBlock(200, 20, ArenaVector<GuestAddr>({}, &arena)), "");
+}
+
+}  // namespace
+
+}  // namespace berberis
diff --git a/runtime_primitives/include/berberis/runtime_primitives/guest_code_region.h b/runtime_primitives/include/berberis/runtime_primitives/guest_code_region.h
new file mode 100644
index 0000000..c7d3d30
--- /dev/null
+++ b/runtime_primitives/include/berberis/runtime_primitives/guest_code_region.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_RUNTIME_PRIMITIVES_GUEST_CODE_REGION_H_
+#define BERBERIS_RUNTIME_PRIMITIVES_GUEST_CODE_REGION_H_
+
+#include "berberis/base/arena_alloc.h"
+#include "berberis/base/arena_map.h"
+#include "berberis/base/arena_set.h"
+#include "berberis/base/arena_vector.h"
+#include "berberis/guest_state/guest_addr.h"
+
+namespace berberis {
+
+class GuestCodeBasicBlock {
+ public:
+  explicit GuestCodeBasicBlock(Arena* arena,
+                               GuestAddr start_addr,
+                               size_t size,
+                               ArenaVector<GuestAddr> out_edges)
+      : start_addr_{start_addr}, size_{size}, in_edges_{arena}, out_edges_{std::move(out_edges)} {}
+
+  void SetOutEdges(ArenaVector<GuestAddr> out_edges) { out_edges_ = std::move(out_edges); }
+
+  void AddInEdge(GuestAddr source_addr) { in_edges_.push_back(source_addr); }
+
+  void SetSize(size_t size) { size_ = size; }
+
+  [[nodiscard]] GuestAddr start_addr() const { return start_addr_; }
+  [[nodiscard]] GuestAddr end_addr() const { return start_addr_ + size_; }
+  [[nodiscard]] size_t size() const { return size_; }
+  [[nodiscard]] const ArenaVector<GuestAddr>& out_edges() const { return out_edges_; }
+  [[nodiscard]] const ArenaVector<GuestAddr>& in_edges() const { return in_edges_; }
+
+ private:
+  const GuestAddr start_addr_;
+  size_t size_;
+  ArenaVector<GuestAddr> in_edges_;
+  ArenaVector<GuestAddr> out_edges_;
+};
+
+class GuestCodeRegion {
+ public:
+  explicit GuestCodeRegion(Arena* arena)
+      : arena_{arena}, basic_blocks_{arena}, branch_targets_{arena} {}
+
+  /* may_discard */ GuestCodeBasicBlock* NewBasicBlock(GuestAddr guest_addr,
+                                                       size_t size,
+                                                       const ArenaVector<GuestAddr>& out_edges) {
+    CHECK(!code_region_finalized_);
+    auto [it, inserted] =
+        basic_blocks_.try_emplace(guest_addr, arena_, guest_addr, size, out_edges);
+    CHECK(inserted);
+    branch_targets_.insert(out_edges.begin(), out_edges.end());
+    return &it->second;
+  }
+
+  // This method must be called only once.
+  void ResolveEdges() {
+    CHECK(!code_region_finalized_);
+    ValidateRegionBeforeFinalize();
+    SplitBasicBlocks();
+    ResolveInEdges();
+    code_region_finalized_ = true;
+  }
+
+  [[nodiscard]] const ArenaMap<GuestAddr, GuestCodeBasicBlock>& basic_blocks() const {
+    return basic_blocks_;
+  }
+
+  [[nodiscard]] const ArenaSet<GuestAddr>& branch_targets() const { return branch_targets_; }
+
+ private:
+  void SplitBasicBlocks() {
+    for (auto branch_target : branch_targets_) {
+      auto it = basic_blocks_.upper_bound(branch_target);
+      if (it == basic_blocks_.begin()) {
+        continue;
+      }
+
+      --it;
+      auto& [guest_addr, code_block] = *it;
+      if (branch_target <= guest_addr || branch_target >= code_block.end_addr()) {
+        // Nothing to split.
+        continue;
+      }
+
+      size_t updated_size = branch_target - code_block.start_addr();
+      size_t new_code_block_size = code_block.size() - updated_size;
+
+      NewBasicBlock(branch_target, new_code_block_size, code_block.out_edges());
+
+      code_block.SetSize(updated_size);
+      code_block.SetOutEdges(ArenaVector<GuestAddr>({branch_target}, arena_));
+    }
+  }
+
+  void ResolveInEdges() {
+    for (auto& [source_addr, basic_block] : basic_blocks_) {
+      for (auto target_addr : basic_block.out_edges()) {
+        auto it = basic_blocks_.find(target_addr);
+        if (it != basic_blocks_.end()) {
+          it->second.AddInEdge(source_addr);
+        }
+      }
+    }
+  }
+
+  void ValidateRegionBeforeFinalize() const {
+    GuestAddr last_seen_end_addr = kNullGuestAddr;
+    for (const auto& [start_addr, basic_block] : basic_blocks_) {
+      CHECK_GE(start_addr, last_seen_end_addr);
+      last_seen_end_addr = basic_block.end_addr();
+      CHECK(basic_block.in_edges().empty());
+    }
+  }
+
+  Arena* arena_;
+  ArenaMap<GuestAddr, GuestCodeBasicBlock> basic_blocks_;
+  ArenaSet<GuestAddr> branch_targets_;
+  bool code_region_finalized_{false};
+};
+
+}  // namespace berberis
+
+#endif  // BERBERIS_RUNTIME_PRIMITIVES_GUEST_CODE_REGION_H_