blob: 32111807f1a8de6fd3c02a5a8bfb8ce80ad5d570 [file] [log] [blame]
#!/usr/bin/python3 -B
# Copyright 2021 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Read the EXPECTED_UPSTREAM and update the files from the upstream."""
import logging
# pylint: disable=g-importing-member
from pathlib import Path
import sys
from typing import List
# pylint: disable=g-multiple-import
from common_util import (
ExpectedUpstreamEntry,
ExpectedUpstreamFile,
has_file_in_tree,
LIBCORE_DIR,
)
from git import (
Blob,
IndexFile,
Repo,
)
# Enable INFO logging for error emitted by GitPython
logging.basicConfig(level=logging.INFO)
# Pick an arbitrary existing commit with an empty tree
EMPTY_COMMIT_SHA = "d85bc16ba1cdcc20bec6fcbfe46dc90f9fcd2f78"
def validate_and_remove_updated_entries(
entries: List[ExpectedUpstreamEntry],
repo: Repo) -> List[ExpectedUpstreamEntry]:
"""Returns a list of entries of which the file content needs to be updated."""
head_tree = repo.head.commit.tree
result: List[ExpectedUpstreamEntry] = []
for e in entries:
try:
# The following step validate each entry by querying the git database
commit = repo.commit(e.git_ref)
source_blob = commit.tree.join(e.src_path)
if not has_file_in_tree(e.dst_path, head_tree):
# Add the entry if the file is missing in the HEAD
result.append(e)
continue
dst_blob = head_tree.join(e.dst_path)
# Add the entry if the content is different.
# data_stream will be close during GC.
if source_blob.data_stream.read() != dst_blob.data_stream.read():
result.append(e)
except:
print(f"ERROR: reading entry: {e}", file=sys.stderr)
raise
return result
def partition_entries_by_ref(
entries: List[ExpectedUpstreamEntry]) -> List[List[ExpectedUpstreamEntry]]:
result_map = {}
for e in entries:
if result_map.get(e.git_ref) is None:
result_map[e.git_ref] = []
result_map[e.git_ref].append(e)
return list(result_map.values())
THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR)
MSG_FIRST_COMMIT = ("Import {summary} from {ref}\n"
"\n"
"List of files:\n"
" {files}\n"
"\n"
f"Generated by {THIS_TOOL_PATH}"
"\n"
"Test: N/A")
MSG_SECOND_COMMIT = ("Merge {summary} from {ref} into the "
" expected_upstream branch\n"
"\n"
"List of files:\n"
" {files}\n"
"\n"
f"Generated by {THIS_TOOL_PATH}"
"\n"
"Test: N/A")
def merge_files_and_create_commit(entry_set: List[ExpectedUpstreamEntry],
repo: Repo) -> None:
r"""Create the commits importing the given files into the current branch.
`--------<ref>--------------- aosp/upstream_openjdkXXX
\
<first_commit>
\
-------<second_commit>------ expected_upstream
This function creates the 2 commits, i.e. first_commit and second_commit, in
the diagram. The goal is to checkout a subset files specified in the
entry_set, and merged into the pected_upstream branch in order to keep the
git-blame history of the individual files. first_commit is needed in order
to move the files specified in the entry_set.
In the implementation, first_commit isn't really modified from the ref, but
created from an empty tree, and all files in entry_set will be added into
the first_commit, second_commit is a merged commit and modified from
the parent in the expected_upstream branch, and any file contents in the
first commit will override the file content in the second commit.
You may reference the following git commands for understanding which should
create the same commits, but the python implementation is cleaner, because
it doesn't change the working tree or create a new branch.
first_commit:
git checkout -b temp_branch <entry.git_ref>
rm -r * .jcheck/ .hgignore .hgtags # Remove hidden files
git checkout <entry.git_ref> <entry.src_path>
mkdir -p <entry.dst_path>.directory && git mv <entry.src_path>
<entry.dst_path>
git commit -a
second_commit:
git merge temp_branch
git checkout HEAD -- ojluni/ # Force checkout to resolve merge conflict
git checkout temp_branch -- <entry.dst_path>
git commit
Args:
entry_set: a list of entries
repo: the repository object
"""
ref = entry_set[0].git_ref
upstream_commit = repo.commit(ref)
# We need an index empty initially, i.e. no staged files.
# Note that the empty commit is not the parent. The parents can be set later.
first_index = IndexFile.from_tree(repo, repo.commit(EMPTY_COMMIT_SHA))
for entry in entry_set:
src_blob = upstream_commit.tree[entry.src_path]
# Write into the file system directly because GitPython provides no API
# writing into the index in memory. IndexFile.move doesn't help here,
# because the API requires the file on the working tree too.
# However, it's fine, because we later reset the HEAD to the second commit.
# The user expects the file showing in the file system, and the file is
# not staged/untracked because the file is in the second commit too.
Path(entry.dst_path).parent.mkdir(parents=True, exist_ok=True)
with open(entry.dst_path, "wb") as file:
file.write(src_blob.data_stream.read())
first_index.add(entry.dst_path)
dst_paths = [e.dst_path for e in entry_set]
str_dst_paths = "\n ".join(dst_paths)
summary_msg = "files"
if len(entry_set) == 1:
summary_msg = Path(entry_set[0].dst_path).stem
msg = MSG_FIRST_COMMIT.format(
summary=summary_msg, ref=ref, files=str_dst_paths)
first_commit = first_index.commit(
message=msg, parent_commits=[upstream_commit], head=False)
# The second commit is a merge commit. It doesn't use the current index,
# i.e. repo.index, to avoid affecting the current staged files.
prev_head = repo.active_branch.commit
second_index = IndexFile.from_tree(repo, prev_head)
blob_filter = lambda obj, i: isinstance(obj, Blob)
blobs = first_commit.tree.traverse(blob_filter)
second_index.add(blobs)
msg = MSG_SECOND_COMMIT.format(
summary=summary_msg, ref=ref, files=str_dst_paths)
second_commit = second_index.commit(
message=msg, parent_commits=[prev_head, first_commit], head=True)
# We updated the HEAD to the second commit. Thus, git-reset updates the
# current index. Otherwise, the current index, aka, repo.index, shows that
# the files are deleted.
repo.index.reset(paths=dst_paths)
print(f"New merge commit {second_commit} contains:")
print(f" {str_dst_paths}")
def create_commits(repo: Repo) -> None:
"""Create the commits importing files according to the EXPECTED_UPSTREAM."""
current_tracking_branch = repo.active_branch.tracking_branch()
if current_tracking_branch.name != "aosp/expected_upstream":
print("This script should only run on aosp/expected_upstream branch. "
f"Currently, this is on branch {repo.active_branch} "
f"tracking {current_tracking_branch}")
print("Reading EXPECTED_UPSTREAM file...")
expected_upstream_entries = ExpectedUpstreamFile().read_all_entries()
outdated_entries = validate_and_remove_updated_entries(
expected_upstream_entries, repo)
if not outdated_entries:
print("No need to update. All files are updated.")
return
print("The following entries will be updated from upstream")
for e in outdated_entries:
print(f" {e.dst_path}")
entry_sets_to_be_merged = partition_entries_by_ref(outdated_entries)
for entry_set in entry_sets_to_be_merged:
merge_files_and_create_commit(entry_set, repo)
def main():
repo = Repo(LIBCORE_DIR.as_posix())
try:
create_commits(repo)
finally:
repo.close()
if __name__ == "__main__":
main()