feat: support pip_parse in Gazelle (#633)

* feat: support pip_parse in Gazelle

Signed-off-by: Thulio Ferraz Assis <3149049+f0rmiga@users.noreply.github.com>

* Apply suggestions from code review
diff --git a/.gitignore b/.gitignore
index 9df0fb8..a68c6f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,3 +47,6 @@
 # These otherwise match patterns above
 !go.mod
 !BUILD.out
+
+# Python cache
+**/__pycache__/
diff --git a/examples/build_file_generation/BUILD b/examples/build_file_generation/BUILD
index 3e6d44a..ef9e967 100644
--- a/examples/build_file_generation/BUILD
+++ b/examples/build_file_generation/BUILD
@@ -3,7 +3,7 @@
 load("@rules_python//gazelle:def.bzl", "GAZELLE_PYTHON_RUNTIME_DEPS")
 load("@rules_python//gazelle/manifest:defs.bzl", "gazelle_python_manifest")
 load("@rules_python//gazelle/modules_mapping:def.bzl", "modules_mapping")
-load("@rules_python//python:defs.bzl", "py_library")
+load("@rules_python//python:defs.bzl", "py_binary", "py_library")
 
 # This rule fetches the metadata for python packages we depend on. That data is
 # required for the gazelle_python_manifest rule to update our manifest file.
@@ -22,7 +22,8 @@
 gazelle_python_manifest(
     name = "gazelle_python_manifest",
     modules_mapping = ":modules_map",
-    pip_deps_repository_name = "pip",
+    pip_repository_incremental = True,
+    pip_repository_name = "pip",
     requirements = "//:requirements_lock.txt",
 )
 
@@ -43,4 +44,13 @@
     name = "build_file_generation",
     srcs = ["__init__.py"],
     visibility = ["//:__subpackages__"],
+    deps = ["@pip_requests//:pkg"],
+)
+
+py_binary(
+    name = "build_file_generation_bin",
+    srcs = ["__main__.py"],
+    main = "__main__.py",
+    visibility = ["//:__subpackages__"],
+    deps = [":build_file_generation"],
 )
diff --git a/examples/build_file_generation/WORKSPACE b/examples/build_file_generation/WORKSPACE
index c58b50f..51c923f 100644
--- a/examples/build_file_generation/WORKSPACE
+++ b/examples/build_file_generation/WORKSPACE
@@ -23,11 +23,11 @@
 # to include https://github.com/bazelbuild/bazel-gazelle/commit/2834ea4
 http_archive(
     name = "bazel_gazelle",
-    sha256 = "0bb8056ab9ed4cbcab5b74348d8530c0e0b939987b0cfe36c1ab53d35a99e4de",
-    strip_prefix = "bazel-gazelle-2834ea44b3ec6371c924baaf28704730ec9d4559",
+    sha256 = "fd8d852ebcb770b41c1c933fc3085b4a23e1426a1af4e791d39b67bb8d894eb7",
+    strip_prefix = "bazel-gazelle-41b542f9b0fefe916a95ca5460458abf916f5fe5",
     urls = [
         # No release since March, and we need subsequent fixes
-        "https://github.com/bazelbuild/bazel-gazelle/archive/2834ea44b3ec6371c924baaf28704730ec9d4559.zip",
+        "https://github.com/bazelbuild/bazel-gazelle/archive/41b542f9b0fefe916a95ca5460458abf916f5fe5.zip",
     ],
 )
 
@@ -48,13 +48,17 @@
     path = "../..",
 )
 
-load("@rules_python//python:pip.bzl", "pip_install")
+load("@rules_python//python:pip.bzl", "pip_parse")
 
-pip_install(
-    # Uses the default repository name "pip"
-    requirements = "//:requirements_lock.txt",
+pip_parse(
+    name = "pip",
+    requirements_lock = "//:requirements_lock.txt",
 )
 
+load("@pip//:requirements.bzl", "install_deps")
+
+install_deps()
+
 # The rules_python gazelle extension has some third-party go dependencies
 # which we need to fetch in order to compile it.
 load("@rules_python//gazelle:deps.bzl", _py_gazelle_deps = "gazelle_deps")
diff --git a/examples/build_file_generation/__init__.py b/examples/build_file_generation/__init__.py
index 11b15b1..851fefb 100644
--- a/examples/build_file_generation/__init__.py
+++ b/examples/build_file_generation/__init__.py
@@ -1 +1,5 @@
-print("hello")
+import requests
+
+def main(url):
+    r = requests.get(url)
+    print(r.text)
diff --git a/examples/build_file_generation/__main__.py b/examples/build_file_generation/__main__.py
new file mode 100644
index 0000000..8b6189c
--- /dev/null
+++ b/examples/build_file_generation/__main__.py
@@ -0,0 +1,5 @@
+from __init__ import main
+
+
+if __name__ == "__main__":
+    main("https://example.com")
diff --git a/examples/build_file_generation/gazelle_python.yaml b/examples/build_file_generation/gazelle_python.yaml
index 39eaccc..a005b43 100644
--- a/examples/build_file_generation/gazelle_python.yaml
+++ b/examples/build_file_generation/gazelle_python.yaml
@@ -126,5 +126,7 @@
     urllib3.util.timeout: urllib3
     urllib3.util.url: urllib3
     urllib3.util.wait: urllib3
-  pip_deps_repository_name: pip
-integrity: 575d259c512b4b80f9923d1623d2aae3038654b731a4e088bf268e01138b6411
+  pip_repository:
+    name: pip
+    incremental: true
+integrity: c47bf2ca0a185cf6b8815d4a61e26e7457564e931de76c70653277e4eccfadc8
diff --git a/gazelle/README.md b/gazelle/README.md
index 2058458..7d138e3 100644
--- a/gazelle/README.md
+++ b/gazelle/README.md
@@ -58,7 +58,9 @@
     modules_mapping = ":modules_map",
     # This is what we called our `pip_install` rule, where third-party
     # python libraries are loaded in BUILD files.
-    pip_deps_repository_name = "pip",
+    pip_repository_name = "pip",
+    # When using pip_parse instead of pip_install, set the following.
+    # pip_repository_incremental = True,
     # This should point to wherever we declare our python dependencies
     # (the same as what we passed to the modules_mapping rule in WORKSPACE)
     requirements = "//:requirements_lock.txt",
diff --git a/gazelle/manifest/defs.bzl b/gazelle/manifest/defs.bzl
index fd555db..a675e25 100644
--- a/gazelle/manifest/defs.bzl
+++ b/gazelle/manifest/defs.bzl
@@ -7,21 +7,52 @@
 def gazelle_python_manifest(
         name,
         requirements,
-        pip_deps_repository_name,
         modules_mapping,
+        pip_repository_name = "",
+        pip_repository_incremental = False,
+        pip_deps_repository_name = "",
         manifest = ":gazelle_python.yaml"):
     """A macro for defining the updating and testing targets for the Gazelle manifest file.
 
     Args:
         name: the name used as a base for the targets.
         requirements: the target for the requirements.txt file.
-        pip_deps_repository_name: the name of the pip_install repository target.
+        pip_repository_name: the name of the pip_install or pip_repository target.
+        pip_repository_incremental: the incremental property of pip_repository.
+        pip_deps_repository_name: deprecated - the old pip_install target name.
         modules_mapping: the target for the generated modules_mapping.json file.
         manifest: the target for the Gazelle manifest file.
     """
+    if pip_deps_repository_name != "":
+        # buildifier: disable=print
+        print("DEPRECATED pip_deps_repository_name in //{}:{}. Please use pip_repository_name instead.".format(
+            native.package_name(),
+            name,
+        ))
+        pip_repository_name = pip_deps_repository_name
+
+    if pip_repository_name == "":
+        # This is a temporary check while pip_deps_repository_name exists as deprecated.
+        fail("pip_repository_name must be set in //{}:{}".format(native.package_name(), name))
+
     update_target = "{}.update".format(name)
     update_target_label = "//{}:{}".format(native.package_name(), update_target)
 
+    update_args = [
+        "--requirements",
+        "$(rootpath {})".format(requirements),
+        "--pip-repository-name",
+        pip_repository_name,
+        "--modules-mapping",
+        "$(rootpath {})".format(modules_mapping),
+        "--output",
+        "$(rootpath {})".format(manifest),
+        "--update-target",
+        update_target_label,
+    ]
+    if pip_repository_incremental:
+        update_args.append("--pip-repository-incremental")
+
     go_binary(
         name = update_target,
         embed = ["@rules_python//gazelle/manifest/generate:generate_lib"],
@@ -30,18 +61,7 @@
             modules_mapping,
             requirements,
         ],
-        args = [
-            "--requirements",
-            "$(rootpath {})".format(requirements),
-            "--pip-deps-repository-name",
-            pip_deps_repository_name,
-            "--modules-mapping",
-            "$(rootpath {})".format(modules_mapping),
-            "--output",
-            "$(rootpath {})".format(manifest),
-            "--update-target",
-            update_target_label,
-        ],
+        args = update_args,
         visibility = ["//visibility:private"],
         tags = ["manual"],
     )
diff --git a/gazelle/manifest/generate/generate.go b/gazelle/manifest/generate/generate.go
index 1ed91bf..04d7441 100644
--- a/gazelle/manifest/generate/generate.go
+++ b/gazelle/manifest/generate/generate.go
@@ -25,7 +25,8 @@
 
 func main() {
 	var requirementsPath string
-	var pipDepsRepositoryName string
+	var pipRepositoryName string
+	var pipRepositoryIncremental bool
 	var modulesMappingPath string
 	var outputPath string
 	var updateTarget string
@@ -35,10 +36,15 @@
 		"",
 		"The requirements.txt file.")
 	flag.StringVar(
-		&pipDepsRepositoryName,
-		"pip-deps-repository-name",
+		&pipRepositoryName,
+		"pip-repository-name",
 		"",
-		"The name of the pip_install repository target.")
+		"The name of the pip_install or pip_repository target.")
+	flag.BoolVar(
+		&pipRepositoryIncremental,
+		"pip-repository-incremental",
+		false,
+		"The value for the incremental option in pip_repository.")
 	flag.StringVar(
 		&modulesMappingPath,
 		"modules-mapping",
@@ -80,8 +86,11 @@
 	header := generateHeader(updateTarget)
 
 	manifestFile := manifest.NewFile(&manifest.Manifest{
-		ModulesMapping:        modulesMapping,
-		PipDepsRepositoryName: pipDepsRepositoryName,
+		ModulesMapping: modulesMapping,
+		PipRepository: &manifest.PipRepository{
+			Name:        pipRepositoryName,
+			Incremental: pipRepositoryIncremental,
+		},
 	})
 	if err := writeOutput(outputPath, header, manifestFile, requirementsPath); err != nil {
 		log.Fatalf("ERROR: %v\n", err)
@@ -142,4 +151,4 @@
 	}
 
 	return nil
-}
\ No newline at end of file
+}
diff --git a/gazelle/manifest/manifest.go b/gazelle/manifest/manifest.go
index 4d432da..b92706a 100644
--- a/gazelle/manifest/manifest.go
+++ b/gazelle/manifest/manifest.go
@@ -100,7 +100,18 @@
 	// wheel name provides these modules.
 	ModulesMapping map[string]string `yaml:"modules_mapping"`
 	// PipDepsRepositoryName is the name of the pip_install repository target.
-	PipDepsRepositoryName string `yaml:"pip_deps_repository_name"`
+	// DEPRECATED
+	PipDepsRepositoryName string `yaml:"pip_deps_repository_name,omitempty"`
+	// PipRepository contains the information for pip_install or pip_repository
+	// target.
+	PipRepository *PipRepository `yaml:"pip_repository,omitempty"`
+}
+
+type PipRepository struct {
+	// The name of the pip_install or pip_repository target.
+	Name string
+	// The incremental property of pip_repository.
+	Incremental bool
 }
 
 // sha256File calculates the checksum of a given file path.
diff --git a/gazelle/pythonconfig/BUILD.bazel b/gazelle/pythonconfig/BUILD.bazel
index 4fab8c9..cff75d9 100644
--- a/gazelle/pythonconfig/BUILD.bazel
+++ b/gazelle/pythonconfig/BUILD.bazel
@@ -10,6 +10,7 @@
     visibility = ["//visibility:public"],
     deps = [
         "//gazelle/manifest",
+        "@bazel_gazelle//label:go_default_library",
         "@com_github_emirpasic_gods//lists/singlylinkedlist",
     ],
 )
diff --git a/gazelle/pythonconfig/pythonconfig.go b/gazelle/pythonconfig/pythonconfig.go
index 550e66b..7e65fd9 100644
--- a/gazelle/pythonconfig/pythonconfig.go
+++ b/gazelle/pythonconfig/pythonconfig.go
@@ -7,6 +7,7 @@
 
 	"github.com/emirpasic/gods/lists/singlylinkedlist"
 
+	"github.com/bazelbuild/bazel-gazelle/label"
 	"github.com/bazelbuild/rules_python/gazelle/manifest"
 )
 
@@ -141,7 +142,6 @@
 		extensionEnabled:         c.extensionEnabled,
 		repoRoot:                 c.repoRoot,
 		pythonProjectRoot:        c.pythonProjectRoot,
-		gazelleManifest:          c.gazelleManifest,
 		excludedPatterns:         c.excludedPatterns,
 		ignoreFiles:              make(map[string]struct{}),
 		ignoreDependencies:       make(map[string]struct{}),
@@ -190,20 +190,37 @@
 	c.gazelleManifest = gazelleManifest
 }
 
-// PipRepository returns the pip repository name from the manifest.
-func (c *Config) PipRepository() string {
-	if c.gazelleManifest != nil {
-		return c.gazelleManifest.PipDepsRepositoryName
+// FindThirdPartyDependency scans the gazelle manifests for the current config
+// and the parent configs up to the root finding if it can resolve the module
+// name.
+func (c *Config) FindThirdPartyDependency(modName string) (string, bool) {
+	for currentCfg := c; currentCfg != nil; currentCfg = currentCfg.parent {
+		if currentCfg.gazelleManifest != nil {
+			gazelleManifest := currentCfg.gazelleManifest
+			if distributionName, ok := gazelleManifest.ModulesMapping[modName]; ok {
+				var distributionRepositoryName string
+				if gazelleManifest.PipDepsRepositoryName != "" {
+					distributionRepositoryName = gazelleManifest.PipDepsRepositoryName
+				} else if gazelleManifest.PipRepository != nil {
+					distributionRepositoryName = gazelleManifest.PipRepository.Name
+				}
+				sanitizedDistribution := strings.ToLower(distributionName)
+				sanitizedDistribution = strings.ReplaceAll(sanitizedDistribution, "-", "_")
+				var lbl label.Label
+				if gazelleManifest.PipRepository != nil && gazelleManifest.PipRepository.Incremental {
+					// @<repository_name>_<distribution_name>//:pkg
+					distributionRepositoryName = distributionRepositoryName + "_" + sanitizedDistribution
+					lbl = label.New(distributionRepositoryName, "", "pkg")
+				} else {
+					// @<repository_name>//pypi__<distribution_name>
+					distributionPackage := "pypi__" + sanitizedDistribution
+					lbl = label.New(distributionRepositoryName, distributionPackage, distributionPackage)
+				}
+				return lbl.String(), true
+			}
+		}
 	}
-	return ""
-}
-
-// ModulesMapping returns the modules mapping from the manifest.
-func (c *Config) ModulesMapping() map[string]string {
-	if c.gazelleManifest != nil {
-		return c.gazelleManifest.ModulesMapping
-	}
-	return map[string]string{}
+	return "", false
 }
 
 // AddIgnoreFile adds a file to the list of ignored files for a given package.
diff --git a/gazelle/resolve.go b/gazelle/resolve.go
index b3bdda1..8f68dfd 100644
--- a/gazelle/resolve.go
+++ b/gazelle/resolve.go
@@ -137,8 +137,6 @@
 		cfg := cfgs[from.Pkg]
 		pythonProjectRoot := cfg.PythonProjectRoot()
 		modules := modulesRaw.(*treeset.Set)
-		pipRepository := cfg.PipRepository()
-		modulesMapping := cfg.ModulesMapping()
 		it := modules.Iterator()
 		explainDependency := os.Getenv("EXPLAIN_DEPENDENCY")
 		hasFatalError := false
@@ -164,15 +162,13 @@
 					}
 				}
 			} else {
-				if distribution, ok := modulesMapping[mod.Name]; ok {
-					distributionPackage := rulesPythonDistributionPackage(distribution)
-					dep := label.New(pipRepository, distributionPackage, distributionPackage).String()
+				if dep, ok := cfg.FindThirdPartyDependency(mod.Name); ok {
 					deps.Add(dep)
 					if explainDependency == dep {
 						log.Printf("Explaining dependency (%s): "+
 							"in the target %q, the file %q imports %q at line %d, "+
 							"which resolves from the third-party module %q from the wheel %q.\n",
-							explainDependency, from.String(), mod.Filepath, mod.Name, mod.LineNumber, mod.Name, distribution)
+							explainDependency, from.String(), mod.Filepath, mod.Name, mod.LineNumber, mod.Name, dep)
 					}
 				} else {
 					matches := ix.FindRulesByImportWithConfig(c, imp, languageName)
@@ -255,17 +251,6 @@
 	}
 }
 
-// rulesPythonDistributionPackage builds a token that mimics how the
-// rules_python does it for the generated requirement function. By doing this,
-// we avoid having to generate the load statement for this function and the
-// third-party dependency becomes an explicit Bazel target.
-// https://github.com/bazelbuild/rules_python/blob/c639955c/packaging/piptool.py#L238-L245
-func rulesPythonDistributionPackage(distribution string) string {
-	sanitizedDistribution := strings.ToLower(distribution)
-	sanitizedDistribution = strings.ReplaceAll(sanitizedDistribution, "-", "_")
-	return "pypi__" + sanitizedDistribution
-}
-
 // targetListFromResults returns a string with the human-readable list of
 // targets contained in the given results.
 func targetListFromResults(results []resolve.FindResult) string {
diff --git a/gazelle/testdata/monorepo/coarse_grained/packages_mapping.json b/gazelle/testdata/monorepo/coarse_grained/packages_mapping.json
deleted file mode 100644
index fe89518..0000000
--- a/gazelle/testdata/monorepo/coarse_grained/packages_mapping.json
+++ /dev/null
@@ -1 +0,0 @@
-{ "boto3": "threeboto3" }
diff --git a/gazelle/testdata/monorepo/gazelle_python.yaml b/gazelle/testdata/monorepo/gazelle_python.yaml
index 527b6ea..5f7922f 100644
--- a/gazelle/testdata/monorepo/gazelle_python.yaml
+++ b/gazelle/testdata/monorepo/gazelle_python.yaml
@@ -1,4 +1,5 @@
 manifest:
   modules_mapping:
     boto3: rootboto3
+    boto4: rootboto4
   pip_deps_repository_name: root_pip_deps
diff --git a/gazelle/testdata/monorepo/three/BUILD.out b/gazelle/testdata/monorepo/three/BUILD.out
index bbb03b1..0da269d 100644
--- a/gazelle/testdata/monorepo/three/BUILD.out
+++ b/gazelle/testdata/monorepo/three/BUILD.out
@@ -15,6 +15,7 @@
         "//one/bar",
         "//one/bar/baz:modified_name_baz",
         "//one/foo",
-        "@three_pip_deps//pypi__threeboto3",
+        "@root_pip_deps//pypi__rootboto4",
+        "@three_pip_deps_threeboto3//:pkg",
     ],
 )
diff --git a/gazelle/testdata/monorepo/three/__init__.py b/gazelle/testdata/monorepo/three/__init__.py
index fe955f6..6f12bd8 100644
--- a/gazelle/testdata/monorepo/three/__init__.py
+++ b/gazelle/testdata/monorepo/three/__init__.py
@@ -2,12 +2,14 @@
 
 import bar.baz.hue as hue
 import boto3
+import boto4
 from bar import bar
 from bar.baz import baz
 from foo import foo
 
 _ = os
 _ = boto3
+_ = boto4
 _ = bar
 _ = baz
 _ = foo
diff --git a/gazelle/testdata/monorepo/three/gazelle_python.yaml b/gazelle/testdata/monorepo/three/gazelle_python.yaml
index 572216c..d46a88f 100644
--- a/gazelle/testdata/monorepo/three/gazelle_python.yaml
+++ b/gazelle/testdata/monorepo/three/gazelle_python.yaml
@@ -1,4 +1,6 @@
 manifest:
   modules_mapping:
     boto3: threeboto3
-  pip_deps_repository_name: three_pip_deps
+  pip_repository:
+    name: three_pip_deps
+    incremental: true