pulumi/sdk/go/common/util/gitutil/git.go

631 lines
20 KiB
Go

// Copyright 2016-2018, Pulumi Corporation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gitutil
import (
"errors"
"fmt"
"net/url"
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strings"
"github.com/go-git/go-git/v5/plumbing/transport/http"
git "github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/config"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/storage/memory"
"github.com/pulumi/pulumi/sdk/v3/go/common/util/fsutil"
"github.com/pulumi/pulumi/sdk/v3/go/common/util/logging"
)
// VCSKind represents the hostname of a specific type of VCS.
// For eg., github.com, gitlab.com etc.
type VCSKind = string
// Constants related to detecting the right type of source control provider for git.
const (
defaultGitCloudRepositorySuffix = ".git"
// GitLabHostName The host name for GitLab.
GitLabHostName VCSKind = "gitlab.com"
// GitHubHostName The host name for GitHub.
GitHubHostName VCSKind = "github.com"
// AzureDevOpsHostName The host name for Azure DevOps
AzureDevOpsHostName VCSKind = "dev.azure.com"
// BitbucketHostName The host name for Bitbucket
BitbucketHostName VCSKind = "bitbucket.org"
)
// The pre-compiled regex used to extract owner and repo name from an SSH git remote URL.
// Note: If you are renaming any of the group names in the regex (the ?P<group_name> part) to something else,
// be sure to update its usage elsewhere in the code as well.
// The nolint instruction prevents gometalinter from complaining about the length of the line.
var (
cloudSourceControlSSHRegex = regexp.MustCompile(`git@(?P<host_name>[a-zA-Z.-]*\.[a-zA-Z]+):(?P<owner_and_repo>[^/]+/[^/]+\.git).?$`) //nolint
azureSourceControlSSHRegex = regexp.MustCompile(`git@([a-zA-Z]+\.)?(?P<host_name>([a-zA-Z]+\.)*[a-zA-Z]*\.[a-zA-Z]+):(v[0-9]{1}/)?(?P<owner_and_repo>.*)`) //nolint
legacyAzureSourceControlRegex = regexp.MustCompile("(?P<owner>[a-zA-Z0-9-]*).visualstudio.com$")
)
// VCSInfo describes a cloud-hosted version control system.
// Cloud hosted VCS' typically have an owner (could be an organization),
// to whom the repo belongs.
type VCSInfo struct {
Owner string
Repo string
Kind VCSKind
}
// GetGitRepository returns the git repository by walking up from the provided directory.
// If no repository is found, will return (nil, nil).
func GetGitRepository(dir string) (*git.Repository, error) {
gitRoot, err := fsutil.WalkUp(dir, func(s string) bool { return filepath.Base(s) == ".git" }, nil)
if err != nil {
return nil, fmt.Errorf("searching for git repository from %v: %w", dir, err)
}
if gitRoot == "" {
return nil, nil
}
// Open the git repo in the .git folder's parent, not the .git folder itself.
repo, err := git.PlainOpenWithOptions(filepath.Dir(gitRoot), &git.PlainOpenOptions{
EnableDotGitCommonDir: true,
})
if err == git.ErrRepositoryNotExists {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("reading git repository: %w", err)
}
return repo, nil
}
// GetGitHubProjectForOrigin returns the GitHub login, and GitHub repo name if the "origin" remote is
// a GitHub URL.
func GetGitHubProjectForOrigin(dir string) (*VCSInfo, error) {
repo, err := GetGitRepository(dir)
if repo == nil {
return nil, fmt.Errorf("no git repository found from %v", dir)
}
if err != nil {
return nil, err
}
remoteURL, err := GetGitRemoteURL(repo, "origin")
if err != nil {
return nil, err
}
return TryGetVCSInfo(remoteURL)
}
// GetGitRemoteURL returns the remote URL for the given remoteName in the repo.
func GetGitRemoteURL(repo *git.Repository, remoteName string) (string, error) {
remote, err := repo.Remote(remoteName)
if err != nil {
return "", fmt.Errorf("could not read origin information: %w", err)
}
remoteURL := ""
if len(remote.Config().URLs) > 0 {
remoteURL = remote.Config().URLs[0]
}
return remoteURL, nil
}
// IsGitOriginURLGitHub returns true if the provided remoteURL is detected as GitHub.
func IsGitOriginURLGitHub(remoteURL string) bool {
return strings.Contains(remoteURL, GitHubHostName)
}
// TryGetVCSInfo attempts to detect whether the provided remoteURL
// is an SSH or an HTTPS remote URL. It then extracts the repo, owner name,
// and the type (kind) of VCS from it.
func TryGetVCSInfo(remoteURL string) (*VCSInfo, error) {
project := ""
vcsKind := ""
// If the remote is using git SSH, then we extract the named groups by matching
// with the pre-compiled regex pattern.
if strings.HasPrefix(remoteURL, "git@") {
// Most cloud-hosted VCS have the ssh URL of the format git@somehostname.com:owner/repo
if cloudSourceControlSSHRegex.MatchString(remoteURL) {
groups := getMatchedGroupsFromRegex(cloudSourceControlSSHRegex, remoteURL)
vcsKind = groups["host_name"]
project = groups["owner_and_repo"]
project = strings.TrimSuffix(project, defaultGitCloudRepositorySuffix)
} else if azureSourceControlSSHRegex.MatchString(remoteURL) {
// Azure's DevOps service uses a git SSH url, that is completely different
// from the rest of the services.
groups := getMatchedGroupsFromRegex(azureSourceControlSSHRegex, remoteURL)
vcsKind = groups["host_name"]
project = groups["owner_and_repo"]
project = strings.TrimSuffix(project, defaultGitCloudRepositorySuffix)
}
} else if strings.HasPrefix(remoteURL, "http") {
// This could be an HTTP(S)-based remote.
if parsedURL, err := url.Parse(remoteURL); err == nil {
vcsKind = parsedURL.Host
project = parsedURL.Path
// Replace the .git extension from the path.
project = strings.TrimSuffix(project, defaultGitCloudRepositorySuffix)
// Remove the prefix "/". TrimPrefix returns the same value if there is no prefix.
// So it is safe to use it instead of doing any sort of substring matches.
project = strings.TrimPrefix(project, "/")
}
}
if project == "" {
return nil, fmt.Errorf("detecting the VCS info from the remote URL %v", remoteURL)
}
// For Azure, we will have more than 2 parts in the array.
// Ex: owner/project/repo.git
if vcsKind == AzureDevOpsHostName {
azureSplit := strings.SplitN(project, "/", 2)
// Azure DevOps repo links are in the format `owner/project/_git/repo`. Some remote URLs do
// not include the `_git` piece, which results in the reconstructed URL linking to the
// project dashboard. To remedy this, we will add the _git portion to the URL if its
// missing.
project = azureSplit[1]
if !strings.Contains(project, "_git") {
projectSplit := strings.SplitN(project, "/", 2)
project = projectSplit[0] + "/_git/" + projectSplit[1]
}
return &VCSInfo{
Owner: azureSplit[0],
Repo: project,
Kind: vcsKind,
}, nil
}
// Legacy Azure URLs have the owner as part of the host name. We will convert the Git info to
// reflect the newer Azure DevOps URLs. This allows the UI to properly construct the repo URL
// and group it with other projects/stacks that have been pulled with a newer version of the Git
// URL.
if legacyAzureSourceControlRegex.MatchString(vcsKind) {
groups := getMatchedGroupsFromRegex(legacyAzureSourceControlRegex, vcsKind)
return &VCSInfo{
Owner: groups["owner"],
Repo: project,
Kind: AzureDevOpsHostName,
}, nil
}
// Since the vcsKind is not Azure, we can try to detect the other kinds of VCS.
// We are splitting in two because some VCS providers (e.g. GitLab) allow for
// subgroups.
split := strings.SplitN(project, "/", 2)
if len(split) != 2 {
return nil, fmt.Errorf("could not detect VCS project from url: %v", remoteURL)
}
return &VCSInfo{
Owner: split[0],
Repo: split[1],
Kind: vcsKind,
}, nil
}
func getMatchedGroupsFromRegex(regex *regexp.Regexp, remoteURL string) map[string]string {
// Get all matching groups.
matches := regex.FindAllStringSubmatch(remoteURL, -1)[0]
// Get the named groups in our regex.
groupNames := regex.SubexpNames()
groups := map[string]string{}
for i, value := range matches {
groups[groupNames[i]] = value
}
return groups
}
func parseAuthURL(remoteURL string) (string, *http.BasicAuth, error) {
u, err := url.Parse(remoteURL)
if err != nil {
return "", nil, err
}
var auth *http.BasicAuth
if u.User != nil {
auth = &http.BasicAuth{
Username: u.User.Username(),
}
if password, ok := u.User.Password(); ok {
auth.Password = password
}
u.User = nil
}
return u.String(), auth, nil
}
// GitCloneAndCheckoutCommit clones the Git repository and checkouts the specified commit.
func GitCloneAndCheckoutCommit(url string, commit plumbing.Hash, path string) error {
u, auth, err := parseAuthURL(url)
if err != nil {
return err
}
repo, err := git.PlainClone(path, false, &git.CloneOptions{
URL: u,
Auth: auth,
})
if err != nil {
return err
}
w, err := repo.Worktree()
if err != nil {
return err
}
return w.Checkout(&git.CheckoutOptions{
Hash: commit,
Force: true,
})
}
func GitCloneOrPull(rawurl string, referenceName plumbing.ReferenceName, path string, shallow bool) error {
logging.V(10).Infof("Attempting to clone from %s at ref %s", rawurl, referenceName)
if u, err := url.Parse(rawurl); err == nil && u.Hostname() == AzureDevOpsHostName {
// system-installed git is used to clone Azure DevOps repositories
// due to https://github.com/go-git/go-git/issues/64
return gitCloneOrPullSystemGit(rawurl, referenceName, path, shallow)
}
return gitCloneOrPull(rawurl, referenceName, path, shallow)
}
// GitCloneOrPull clones or updates the specified referenceName (branch or tag) of a Git repository.
func gitCloneOrPull(url string, referenceName plumbing.ReferenceName, path string, shallow bool) error {
// For shallow clones, use a depth of 1.
depth := 0
if shallow {
depth = 1
}
u, auth, err := parseAuthURL(url)
if err != nil {
return err
}
// Attempt to clone the repo.
_, cloneErr := git.PlainClone(path, false, &git.CloneOptions{
URL: u,
Auth: auth,
ReferenceName: referenceName,
SingleBranch: true,
Depth: depth,
Tags: git.NoTags,
})
if cloneErr != nil {
// If the repo already exists, open it and pull.
if cloneErr == git.ErrRepositoryAlreadyExists {
repo, err := git.PlainOpen(path)
if err != nil {
return err
}
w, err := repo.Worktree()
if err != nil {
return err
}
// There are cases where go-git gets confused about files that were included in .gitignore
// and then later removed from .gitignore and added to the repository, leaving unstaged
// changes in the working directory after a pull. To address this, we'll first do a hard
// reset of the worktree before pulling to ensure it's in a good state.
if err := w.Reset(&git.ResetOptions{
Mode: git.HardReset,
}); err != nil {
return err
}
if cloneErr = w.Pull(&git.PullOptions{
ReferenceName: referenceName,
SingleBranch: true,
Force: true,
}); cloneErr == git.NoErrAlreadyUpToDate {
return nil
}
}
}
if cloneErr == git.ErrUnstagedChanges {
// See https://github.com/pulumi/pulumi/issues/11121. We seem to be getting intermittent unstaged
// changes errors, which is very hard to reproduce. This block of code catches this error and tries to
// do a diff to see what the unstaged change is and tells the user to report this error to the above
// ticket.
repo, err := git.PlainOpen(path)
if err != nil {
return fmt.Errorf(
"GitCloneOrPull reported unstaged changes, but the repo couldn't be opened to check: %w\n"+
"Please report this to https://github.com/pulumi/pulumi/issues/11121.", err)
}
worktree, err := repo.Worktree()
if err != nil {
return fmt.Errorf(
"GitCloneOrPull reported unstaged changes, but the worktree couldn't be opened to check: %w\n"+
"Please report this to https://github.com/pulumi/pulumi/issues/11121.", err)
}
status, err := worktree.Status()
if err != nil {
return fmt.Errorf(
"GitCloneOrPull reported unstaged changes, but the worktree status couldn't be fetched to check: %w\n"+
"Please report this to https://github.com/pulumi/pulumi/issues/11121.", err)
}
messages := make([]string, 0)
for path, stat := range status {
if stat.Worktree != git.Unmodified {
messages = append(messages, fmt.Sprintf("%s was %c", path, rune(stat.Worktree)))
}
}
return fmt.Errorf("GitCloneOrPull reported unstaged changes: %s\n"+
"Please report this to https://github.com/pulumi/pulumi/issues/11121.",
strings.Join(messages, "\n"))
}
return cloneErr
}
// gitCloneOrPullSystemGit uses the `git` command to pull or clone repositories.
func gitCloneOrPullSystemGit(url string, referenceName plumbing.ReferenceName, path string, shallow bool) error {
// Assume repo already exists, pull changes.
gitArgs := []string{
"pull",
}
if _, err := os.Stat(filepath.Join(path, ".git")); os.IsNotExist(err) {
// Repo does not exist, clone it.
gitArgs = []string{
"clone", url, ".",
}
// For shallow clones, use a depth of 1.
if shallow {
gitArgs = append(gitArgs, "--depth")
gitArgs = append(gitArgs, "1")
}
}
cmd := exec.Command("git", gitArgs...)
cmd.Dir = path
if err := cmd.Run(); err != nil {
return fmt.Errorf("failed to run `git %v`", strings.Join(gitArgs, " "))
}
return nil
}
// We currently accept Gist URLs in the form: https://gist.github.com/owner/id.
// We may want to consider supporting https://gist.github.com/id at some point,
// as well as arbitrary revisions, e.g. https://gist.github.com/owner/id/commit.
func parseGistURL(u *url.URL) (string, error) {
path := strings.Trim(u.Path, "/")
paths := strings.Split(path, "/")
if len(paths) != 2 {
return "", errors.New("invalid Gist URL")
}
owner := paths[0]
if owner == "" {
return "", errors.New("invalid Gist URL; no owner")
}
id := paths[1]
if id == "" {
return "", errors.New("invalid Gist URL; no id")
}
if !strings.HasSuffix(id, ".git") {
id = id + ".git"
}
resultURL := u.Scheme + "://" + u.Host + "/" + id
return resultURL, nil
}
func parseHostAuth(u *url.URL) string {
if u.User == nil {
return u.Host
}
user := u.User.Username()
p, ok := u.User.Password()
if !ok {
return user + "@" + u.Host
}
return user + ":" + p + "@" + u.Host
}
// ParseGitRepoURL returns the URL to the Git repository and path from a raw URL.
// For example, an input of "https://github.com/pulumi/templates/templates/javascript" returns
// "https://github.com/pulumi/templates.git" and "templates/javascript".
// Additionally, it supports nested git projects, as used by GitLab.
// For example, "https://github.com/pulumi/platform-team/templates.git/templates/javascript"
// returns "https://github.com/pulumi/platform-team/templates.git" and "templates/javascript"
//
// Note: URL with a hostname of `dev.azure.com`, are currently treated as a raw git clone url
// and currently do not support subpaths.
func ParseGitRepoURL(rawurl string) (string, string, error) {
u, err := url.Parse(rawurl)
if err != nil {
return "", "", err
}
if u.Scheme != "https" {
return "", "", errors.New("invalid URL scheme")
}
// Special case Gists.
if u.Hostname() == "gist.github.com" {
repo, err := parseGistURL(u)
if err != nil {
return "", "", err
}
return repo, "", nil
}
// Special case Azure DevOps.
if u.Hostname() == AzureDevOpsHostName {
// Specifying branch/ref and subpath is currently unsupported.
return rawurl, "", nil
}
path := strings.TrimPrefix(u.Path, "/")
paths := strings.Split(path, "/")
if len(paths) < 2 {
return "", "", errors.New("invalid Git URL")
}
// Shortcut for general case: URI Path contains '.git'
// Cleave URI into what comes before and what comes after.
if loc := strings.LastIndex(path, defaultGitCloudRepositorySuffix); loc != -1 {
extensionOffset := loc + len(defaultGitCloudRepositorySuffix)
resultURL := u.Scheme + "://" + parseHostAuth(u) + "/" + path[:extensionOffset]
gitRepoPath := path[extensionOffset:]
resultPath := strings.Trim(gitRepoPath, "/")
return resultURL, resultPath, nil
}
owner := paths[0]
if owner == "" {
return "", "", errors.New("invalid Git URL; no owner")
}
repo := paths[1]
if repo == "" {
return "", "", errors.New("invalid Git URL; no repository")
}
if !strings.HasSuffix(repo, ".git") {
repo = repo + ".git"
}
resultURL := u.Scheme + "://" + parseHostAuth(u) + "/" + owner + "/" + repo
resultPath := strings.TrimSuffix(strings.Join(paths[2:], "/"), "/")
return resultURL, resultPath, nil
}
var gitSHARegex = regexp.MustCompile(`^[0-9a-fA-F]{40}$`)
// GetGitReferenceNameOrHashAndSubDirectory returns the reference name or hash, and sub directory path.
// The sub directory path always uses "/" as the separator.
func GetGitReferenceNameOrHashAndSubDirectory(url string, urlPath string) (
plumbing.ReferenceName, plumbing.Hash, string, error,
) {
// If path is empty, use HEAD.
if urlPath == "" {
return plumbing.HEAD, plumbing.ZeroHash, "", nil
}
// Trim leading/trailing separator(s).
urlPath = strings.TrimPrefix(urlPath, "/")
urlPath = strings.TrimSuffix(urlPath, "/")
paths := strings.Split(urlPath, "/")
// Ensure the path components are not "." or "..".
for _, path := range paths {
if path == "." || path == ".." {
return "", plumbing.ZeroHash, "", errors.New("invalid Git URL")
}
}
if paths[0] == "tree" {
if len(paths) >= 2 {
// If it looks like a SHA, use that.
if gitSHARegex.MatchString(paths[1]) {
return "", plumbing.NewHash(paths[1]), strings.Join(paths[2:], "/"), nil
}
// Otherwise, try matching based on the repo's refs.
// Get the list of refs sorted by length.
refs, err := GitListBranchesAndTags(url)
if err != nil {
return "", plumbing.ZeroHash, "", err
}
// Try to find the matching ref, checking the longest names first, so
// if there are multiple refs that would match, we pick the longest.
path := strings.Join(paths[1:], "/") + "/"
for _, ref := range refs {
shortName := ref.Short()
prefix := shortName + "/"
if strings.HasPrefix(path, prefix) {
subDir := strings.TrimPrefix(path, prefix)
return ref, plumbing.ZeroHash, strings.TrimSuffix(subDir, "/"), nil
}
}
}
// If there aren't any path components after "tree", it's an error.
return "", plumbing.ZeroHash, "", errors.New("invalid Git URL")
}
// If there wasn't "tree" in the path, just use HEAD.
return plumbing.HEAD, plumbing.ZeroHash, strings.Join(paths, "/"), nil
}
// GitListBranchesAndTags fetches a remote Git repository's branch and tag references
// (including HEAD), sorted by the length of the short name descending.
func GitListBranchesAndTags(url string) ([]plumbing.ReferenceName, error) {
// We're only listing the references, so just use in-memory storage.
repo, err := git.Init(memory.NewStorage(), nil)
if err != nil {
return nil, err
}
remote, err := repo.CreateRemote(&config.RemoteConfig{
Name: "origin",
URLs: []string{url},
})
if err != nil {
return nil, err
}
refs, err := remote.List(&git.ListOptions{})
if err != nil {
return nil, err
}
var results []plumbing.ReferenceName
for _, ref := range refs {
name := ref.Name()
if name == plumbing.HEAD || name.IsBranch() || name.IsTag() {
results = append(results, name)
}
}
sort.Sort(byShortNameLengthDesc(results))
return results, nil
}
type byShortNameLengthDesc []plumbing.ReferenceName
func (r byShortNameLengthDesc) Len() int { return len(r) }
func (r byShortNameLengthDesc) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r byShortNameLengthDesc) Less(i, j int) bool {
return len(r[j].Short()) < len(r[i].Short())
}