// Copyright 2016-2018, Pulumi Corporation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package resource

import (
	"archive/tar"
	"archive/zip"
	"bytes"
	"compress/gzip"
	"crypto/sha256"
	"encoding/hex"
	"fmt"
	"io"
	"io/ioutil"
	"net/http"
	"net/url"
	"os"
	"path/filepath"
	"reflect"
	"regexp"
	"sort"
	"strings"
	"time"

	"github.com/pkg/errors"

	"github.com/pulumi/pulumi/pkg/util/contract"
	"github.com/pulumi/pulumi/pkg/util/httputil"
	"github.com/pulumi/pulumi/pkg/workspace"
)

// Asset is a serialized asset reference.  It is a union: thus, only one of its fields will be non-nil.  Several helper
// routines exist as members in order to easily interact with the assets referenced by an instance of this type.
// nolint: lll
type Asset struct {
	Sig  string `json:"4dabf18193072939515e22adb298388d" yaml:"4dabf18193072939515e22adb298388d"` // the unique asset signature (see properties.go).
	Hash string `json:"hash,omitempty" yaml:"hash,omitempty"`                                     // the SHA256 hash of the asset contents.
	Text string `json:"text,omitempty" yaml:"text,omitempty"`                                     // a textual asset.
	Path string `json:"path,omitempty" yaml:"path,omitempty"`                                     // a file on the current filesystem.
	URI  string `json:"uri,omitempty" yaml:"uri,omitempty"`                                       // a URI (file://, http://, https://, or custom).
}

const (
	AssetSig          = "c44067f5952c0a294b673a41bacd8c17" // a randomly assigned type hash for assets.
	AssetHashProperty = "hash"                             // the dynamic property for an asset's hash.
	AssetTextProperty = "text"                             // the dynamic property for an asset's text.
	AssetPathProperty = "path"                             // the dynamic property for an asset's path.
	AssetURIProperty  = "uri"                              // the dynamic property for an asset's URI.
)

// NewTextAsset produces a new asset and its corresponding SHA256 hash from the given text.
func NewTextAsset(text string) (*Asset, error) {
	a := &Asset{Sig: AssetSig, Text: text}
	err := a.EnsureHash()
	return a, err
}

// NewPathAsset produces a new asset and its corresponding SHA256 hash from the given filesystem path.
func NewPathAsset(path string) (*Asset, error) {
	a := &Asset{Sig: AssetSig, Path: path}
	err := a.EnsureHash()
	return a, err
}

// NewURIAsset produces a new asset and its corresponding SHA256 hash from the given network URI.
func NewURIAsset(uri string) (*Asset, error) {
	a := &Asset{Sig: AssetSig, URI: uri}
	err := a.EnsureHash()
	return a, err
}

func (a *Asset) IsText() bool { return !a.IsPath() && !a.IsURI() }
func (a *Asset) IsPath() bool { return a.Path != "" }
func (a *Asset) IsURI() bool  { return a.URI != "" }

func (a *Asset) GetText() (string, bool) {
	if a.IsText() {
		return a.Text, true
	}
	return "", false
}

func (a *Asset) GetPath() (string, bool) {
	if a.IsPath() {
		return a.Path, true
	}
	return "", false
}

func (a *Asset) GetURI() (string, bool) {
	if a.IsURI() {
		return a.URI, true
	}
	return "", false
}

var (
	functionRegexp    = regexp.MustCompile(`function __.*`)
	withRegexp        = regexp.MustCompile(`    with\({ .* }\) {`)
	environmentRegexp = regexp.MustCompile(`  }\).apply\(.*\).apply\(this, arguments\);`)
	preambleRegexp    = regexp.MustCompile(
		`function __.*\(\) {\n  return \(function\(\) {\n    with \(__closure\) {\n\nreturn `)
	postambleRegexp = regexp.MustCompile(
		`;\n\n    }\n  }\).apply\(__environment\).apply\(this, arguments\);\n}`)
)

// IsUserProgramCode checks to see if this is the special asset containing the users's code
func (a *Asset) IsUserProgramCode() bool {
	if !a.IsText() {
		return false
	}

	text := a.Text

	return functionRegexp.MatchString(text) &&
		withRegexp.MatchString(text) &&
		environmentRegexp.MatchString(text)
}

// MassageIfUserProgramCodeAsset takes the text for a function and cleans it up a bit to make the
// user visible diffs less noisy.  Specifically:
//   1. it tries to condense things by changling multiple blank lines into a single blank line.
//   2. it normalizs the sha hashes we emit so that changes to them don't appear in the diff.
//   3. it elides the with-capture headers, as changes there are not generally meaningful.
//
// TODO(https://github.com/pulumi/pulumi/issues/592) this is baking in a lot of knowledge about
// pulumi serialized functions.  We should try to move to an alternative mode that isn't so brittle.
// Options include:
//   1. Have a documented delimeter format that plan.go will look for.  Have the function serializer
//      emit those delimeters around code that should be ignored.
//   2. Have our resource generation code supply not just the resource, but the "user presentable"
//      resource that cuts out a lot of cruft.  We could then just diff that content here.
func MassageIfUserProgramCodeAsset(asset *Asset, debug bool) *Asset {
	if debug {
		return asset
	}

	// Only do this for strings that match our serialized function pattern.
	if !asset.IsUserProgramCode() {
		return asset
	}

	text := asset.Text
	replaceNewlines := func() {
		for {
			newText := strings.Replace(text, "\n\n\n", "\n\n", -1)
			if len(newText) == len(text) {
				break
			}

			text = newText
		}
	}

	replaceNewlines()

	firstFunc := functionRegexp.FindStringIndex(text)
	text = text[firstFunc[0]:]

	text = withRegexp.ReplaceAllString(text, "    with (__closure) {")
	text = environmentRegexp.ReplaceAllString(text, "  }).apply(__environment).apply(this, arguments);")

	text = preambleRegexp.ReplaceAllString(text, "")
	text = postambleRegexp.ReplaceAllString(text, "")

	replaceNewlines()

	return &Asset{Text: text}
}

// GetURIURL returns the underlying URI as a parsed URL, provided it is one.  If there was an error parsing the URI, it
// will be returned as a non-nil error object.
func (a *Asset) GetURIURL() (*url.URL, bool, error) {
	if uri, isuri := a.GetURI(); isuri {
		url, err := url.Parse(uri)
		if err != nil {
			return nil, true, err
		}
		return url, true, nil
	}
	return nil, false, nil
}

// Equals returns true if a is value-equal to other. In this case, value equality is determined only by the hash: even
// if the contents of two assets come from different sources, they are treated as equal if their hashes match.
// Similarly, if the contents of two assets come from the same source but the assets have different hashes, the assets
// are not equal.
func (a *Asset) Equals(other *Asset) bool {
	if a == nil {
		return other == nil
	} else if other == nil {
		return false
	}

	// If we can't get a hash for both assets, treat them as differing.
	if err := a.EnsureHash(); err != nil {
		return false
	}
	if err := other.EnsureHash(); err != nil {
		return false
	}
	return a.Hash == other.Hash
}

// Serialize returns a weakly typed map that contains the right signature for serialization purposes.
func (a *Asset) Serialize() map[string]interface{} {
	result := map[string]interface{}{
		string(SigKey): AssetSig,
	}
	if a.Hash != "" {
		result[AssetHashProperty] = a.Hash
	}
	if a.Text != "" {
		result[AssetTextProperty] = a.Text
	}
	if a.Path != "" {
		result[AssetPathProperty] = a.Path
	}
	if a.URI != "" {
		result[AssetURIProperty] = a.URI
	}
	return result
}

// DeserializeAsset checks to see if the map contains an asset, using its signature, and if so deserializes it.
func DeserializeAsset(obj map[string]interface{}) (*Asset, bool, error) {
	// If not an asset, return false immediately.
	if obj[string(SigKey)] != AssetSig {
		return &Asset{}, false, nil
	}

	// Else, deserialize the possible fields.
	var hash string
	if v, has := obj[AssetHashProperty]; has {
		hash = v.(string)
	}
	var text string
	if v, has := obj[AssetTextProperty]; has {
		text = v.(string)
	}
	var path string
	if v, has := obj[AssetPathProperty]; has {
		path = v.(string)
	}
	var uri string
	if v, has := obj[AssetURIProperty]; has {
		uri = v.(string)
	}

	return &Asset{Hash: hash, Text: text, Path: path, URI: uri}, true, nil
}

// HasContents indicates whether or not an asset's contents can be read.
func (a *Asset) HasContents() bool {
	return a.IsText() || a.IsPath() || a.IsURI()
}

// Bytes returns the contents of the asset as a byte slice.
func (a *Asset) Bytes() ([]byte, error) {
	// If this is a text asset, just return its bytes directly.
	if text, istext := a.GetText(); istext {
		return []byte(text), nil
	}

	blob, err := a.Read()
	if err != nil {
		return nil, err
	}
	return ioutil.ReadAll(blob)
}

// Read begins reading an asset.
func (a *Asset) Read() (*Blob, error) {
	if a.IsText() {
		return a.readText()
	} else if a.IsPath() {
		return a.readPath()
	} else if a.IsURI() {
		return a.readURI()
	}
	return nil, errors.New("unrecognized asset type")
}

func (a *Asset) readText() (*Blob, error) {
	text, istext := a.GetText()
	contract.Assertf(istext, "Expected a text-based asset")
	return NewByteBlob([]byte(text)), nil
}

func (a *Asset) readPath() (*Blob, error) {
	path, ispath := a.GetPath()
	contract.Assertf(ispath, "Expected a path-based asset")

	file, err := os.Open(path)
	if err != nil {
		return nil, errors.Wrapf(err, "failed to open asset file '%v'", path)
	}

	// Do a quick check to make sure it's a file, so we can fail gracefully if someone passes a directory.
	info, err := file.Stat()
	if err != nil {
		contract.IgnoreClose(file)
		return nil, errors.Wrapf(err, "failed to stat asset file '%v'", path)
	}
	if info.IsDir() {
		contract.IgnoreClose(file)
		return nil, errors.Errorf("asset path '%v' is a directory; try using an archive", path)
	}

	blob := &Blob{
		rd: file,
		sz: info.Size(),
	}
	return blob, nil
}

func (a *Asset) readURI() (*Blob, error) {
	url, isurl, err := a.GetURIURL()
	if err != nil {
		return nil, err
	}
	contract.Assertf(isurl, "Expected a URI-based asset")
	switch s := url.Scheme; s {
	case "http", "https":
		resp, err := httputil.GetWithRetry(url.String(), http.DefaultClient)
		if err != nil {
			return nil, err
		}
		return NewReadCloserBlob(resp.Body)
	case "file":
		contract.Assert(url.User == nil)
		contract.Assert(url.RawQuery == "")
		contract.Assert(url.Fragment == "")
		if url.Host != "" && url.Host != "localhost" {
			return nil, errors.Errorf("file:// host '%v' not supported (only localhost)", url.Host)
		}
		f, err := os.Open(url.Path)
		if err != nil {
			return nil, err
		}
		return NewFileBlob(f)
	default:
		return nil, errors.Errorf("Unrecognized or unsupported URI scheme: %v", s)
	}
}

// EnsureHash computes the SHA256 hash of the asset's contents and stores it on the object.
func (a *Asset) EnsureHash() error {
	if a.Hash == "" {
		blob, err := a.Read()
		if err != nil {
			return err
		}
		defer contract.IgnoreClose(blob)

		hash := sha256.New()
		_, err = io.Copy(hash, blob)
		if err != nil {
			return err
		}
		a.Hash = hex.EncodeToString(hash.Sum(nil))
	}
	return nil
}

// Blob is a blob that implements ReadCloser and offers Len functionality.
type Blob struct {
	rd io.ReadCloser // an underlying reader.
	sz int64         // the size of the blob.
}

func (blob *Blob) Close() error               { return blob.rd.Close() }
func (blob *Blob) Read(p []byte) (int, error) { return blob.rd.Read(p) }
func (blob *Blob) Size() int64                { return blob.sz }

// NewByteBlob creates a new byte blob.
func NewByteBlob(data []byte) *Blob {
	return &Blob{
		rd: ioutil.NopCloser(bytes.NewReader(data)),
		sz: int64(len(data)),
	}
}

// NewFileBlob creates a new asset blob whose size is known thanks to stat.
func NewFileBlob(f *os.File) (*Blob, error) {
	stat, err := f.Stat()
	if err != nil {
		return nil, err
	}
	return &Blob{
		rd: f,
		sz: stat.Size(),
	}, nil
}

// NewReadCloserBlob turn any old ReadCloser into an Blob, usually by making a copy.
func NewReadCloserBlob(r io.ReadCloser) (*Blob, error) {
	if f, isf := r.(*os.File); isf {
		// If it's a file, we can "fast path" the asset creation without making a copy.
		return NewFileBlob(f)
	}
	// Otherwise, read it all in, and create a blob out of that.
	defer contract.IgnoreClose(r)
	data, err := ioutil.ReadAll(r)
	if err != nil {
		return nil, err
	}
	return NewByteBlob(data), nil
}

// Archive is a serialized archive reference.  It is a union: thus, only one of its fields will be non-nil.  Several
// helper routines exist as members in order to easily interact with archives of different kinds.
//nolint: lll
type Archive struct {
	Sig    string                 `json:"4dabf18193072939515e22adb298388d" yaml:"4dabf18193072939515e22adb298388d"` // the unique asset signature (see properties.go).
	Hash   string                 `json:"hash,omitempty" yaml:"hash,omitempty"`                                     // the SHA256 hash of the archive contents.
	Assets map[string]interface{} `json:"assets,omitempty" yaml:"assets,omitempty"`                                 // a collection of other assets/archives.
	Path   string                 `json:"path,omitempty" yaml:"path,omitempty"`                                     // a file on the current filesystem.
	URI    string                 `json:"uri,omitempty" yaml:"uri,omitempty"`                                       // a remote URI (file://, http://, https://, etc).
}

const (
	ArchiveSig            = "0def7320c3a5731c473e5ecbe6d01bc7" // a randomly assigned archive type signature.
	ArchiveHashProperty   = "hash"                             // the dynamic property for an archive's hash.
	ArchiveAssetsProperty = "assets"                           // the dynamic property for an archive's assets.
	ArchivePathProperty   = "path"                             // the dynamic property for an archive's path.
	ArchiveURIProperty    = "uri"                              // the dynamic property for an archive's URI.
)

func NewAssetArchive(assets map[string]interface{}) (*Archive, error) {
	// Ensure all elements are either assets or archives.
	for _, asset := range assets {
		switch t := asset.(type) {
		case *Asset, *Archive:
			// ok
		default:
			return &Archive{}, errors.Errorf("type %v is not a valid archive element", t)
		}
	}
	a := &Archive{Sig: ArchiveSig, Assets: assets}
	err := a.EnsureHash()
	return a, err
}

func NewPathArchive(path string) (*Archive, error) {
	a := &Archive{Sig: ArchiveSig, Path: path}
	err := a.EnsureHash()
	return a, err
}

func NewURIArchive(uri string) (*Archive, error) {
	a := &Archive{Sig: ArchiveSig, URI: uri}
	err := a.EnsureHash()
	return a, err
}

func (a *Archive) IsAssets() bool { return a.Assets != nil }
func (a *Archive) IsPath() bool   { return a.Path != "" }
func (a *Archive) IsURI() bool    { return a.URI != "" }

func (a *Archive) GetAssets() (map[string]interface{}, bool) {
	if a.IsAssets() {
		return a.Assets, true
	}
	return nil, false
}

func (a *Archive) GetPath() (string, bool) {
	if a.IsPath() {
		return a.Path, true
	}
	return "", false
}

func (a *Archive) GetURI() (string, bool) {
	if a.IsURI() {
		return a.URI, true
	}
	return "", false
}

// GetURIURL returns the underlying URI as a parsed URL, provided it is one.  If there was an error parsing the URI, it
// will be returned as a non-nil error object.
func (a *Archive) GetURIURL() (*url.URL, bool, error) {
	if uri, isuri := a.GetURI(); isuri {
		url, err := url.Parse(uri)
		if err != nil {
			return nil, true, err
		}
		return url, true, nil
	}
	return nil, false, nil
}

// Equals returns true if a is value-equal to other. In this case, value equality is determined only by the hash: even
// if the contents of two archives come from different sources, they are treated as equal if their hashes match.
// Similarly, if the contents of two archives come from the same source but the archives have different hashes, the
// archives are not equal.
func (a *Archive) Equals(other *Archive) bool {
	if a == nil {
		return other == nil
	} else if other == nil {
		return false
	}

	// If we can't get a hash for both archives, treat them as differing.
	if err := a.EnsureHash(); err != nil {
		return false
	}
	if err := other.EnsureHash(); err != nil {
		return false
	}
	return a.Hash == other.Hash
}

// Serialize returns a weakly typed map that contains the right signature for serialization purposes.
func (a *Archive) Serialize() map[string]interface{} {
	result := map[string]interface{}{
		string(SigKey): ArchiveSig,
	}
	if a.Hash != "" {
		result[ArchiveHashProperty] = a.Hash
	}
	if a.Assets != nil {
		assets := make(map[string]interface{})
		for k, v := range a.Assets {
			switch t := v.(type) {
			case *Asset:
				assets[k] = t.Serialize()
			case *Archive:
				assets[k] = t.Serialize()
			default:
				contract.Failf("Unrecognized asset map type %v", reflect.TypeOf(t))
			}
		}
		result[ArchiveAssetsProperty] = assets
	}
	if a.Path != "" {
		result[ArchivePathProperty] = a.Path
	}
	if a.URI != "" {
		result[ArchiveURIProperty] = a.URI
	}
	return result
}

// DeserializeArchive checks to see if the map contains an archive, using its signature, and if so deserializes it.
func DeserializeArchive(obj map[string]interface{}) (*Archive, bool, error) {
	// If not an archive, return false immediately.
	if obj[string(SigKey)] != ArchiveSig {
		return &Archive{}, false, nil
	}

	var hash string
	if v, has := obj[ArchiveHashProperty]; has {
		hash = v.(string)
	}

	var assets map[string]interface{}
	if v, has := obj[ArchiveAssetsProperty]; has {
		assets = make(map[string]interface{})
		if v != nil {
			for k, elem := range v.(map[string]interface{}) {
				switch t := elem.(type) {
				case *Asset:
					assets[k] = t
				case *Archive:
					assets[k] = t
				case map[string]interface{}:
					a, isa, err := DeserializeAsset(t)
					if err != nil {
						return &Archive{}, false, err
					} else if isa {
						assets[k] = a
					} else {
						arch, isarch, err := DeserializeArchive(t)
						if err != nil {
							return &Archive{}, false, err
						} else if !isarch {
							return &Archive{}, false, errors.Errorf("archive member '%v' is not an asset or archive", k)
						}
						assets[k] = arch
					}
				default:
					return &Archive{}, false, nil
				}
			}
		}
	}
	var path string
	if v, has := obj[ArchivePathProperty]; has {
		path = v.(string)
	}
	var uri string
	if v, has := obj[ArchiveURIProperty]; has {
		uri = v.(string)
	}

	return &Archive{Hash: hash, Assets: assets, Path: path, URI: uri}, true, nil
}

// HasContents indicates whether or not an archive's contents can be read.
func (a *Archive) HasContents() bool {
	return a.IsAssets() || a.IsPath() || a.IsURI()
}

// ArchiveReader presents the contents of an archive as a stream of named blobs.
type ArchiveReader interface {
	// Next returns the name and contents of the next member of the archive. If there are no more members in the
	// archive, this function returns ("", nil, io.EOF). The blob returned by a call to Next() must be read in full
	// before the next call to Next().
	Next() (string, *Blob, error)

	// Close terminates the stream.
	Close() error
}

// Open returns an ArchiveReader that can be used to iterate over the named blobs that comprise the archive.
func (a *Archive) Open() (ArchiveReader, error) {
	contract.Assertf(a.HasContents(), "cannot read an archive that has no contents")
	if a.IsAssets() {
		return a.readAssets()
	} else if a.IsPath() {
		return a.readPath()
	} else if a.IsURI() {
		return a.readURI()
	}
	return nil, nil
}

// assetsArchiveReader is used to read an Assets archive.
type assetsArchiveReader struct {
	assets      map[string]interface{}
	keys        []string
	archive     ArchiveReader
	archiveRoot string
}

func (r *assetsArchiveReader) Next() (string, *Blob, error) {
	for {
		// If we're currently flattening out a subarchive, first check to see if it has any more members. If it does,
		// return the next member.
		if r.archive != nil {
			name, blob, err := r.archive.Next()
			switch {
			case err == io.EOF:
				// The subarchive is complete. Nil it out and continue on.
				r.archive = nil
			case err != nil:
				// The subarchive produced a legitimate error; return it.
				return "", nil, err
			default:
				// The subarchive produced a valid blob. Return it.
				return filepath.Join(r.archiveRoot, name), blob, nil
			}
		}

		// If there are no more members in this archive, return io.EOF.
		if len(r.keys) == 0 {
			return "", nil, io.EOF
		}

		// Fetch the next key in the archive and slice it off of the list.
		name := r.keys[0]
		r.keys = r.keys[1:]

		asset := r.assets[name]
		switch t := asset.(type) {
		case *Asset:
			// An asset can be produced directly.
			blob, err := t.Read()
			if err != nil {
				return "", nil, errors.Wrapf(err, "failed to expand archive asset '%v'", name)
			}
			return name, blob, nil
		case *Archive:
			// An archive must be flattened into its constituent blobs. Open the archive for reading and loop.
			archive, err := t.Open()
			if err != nil {
				return "", nil, errors.Wrapf(err, "failed to expand sub-archive '%v'", name)
			}
			r.archive = archive
			r.archiveRoot = name
		}
	}
}

func (r *assetsArchiveReader) Close() error {
	if r.archive != nil {
		return r.archive.Close()
	}
	return nil
}

func (a *Archive) readAssets() (ArchiveReader, error) {
	// To read a map-based archive, just produce a map from each asset to its associated reader.
	m, isassets := a.GetAssets()
	contract.Assertf(isassets, "Expected an asset map-based archive")

	// Calculate and sort the list of member names s.t. it is deterministically orderered.
	keys := make([]string, 0, len(m))
	for k := range m {
		keys = append(keys, k)
	}
	sort.Strings(keys)

	r := &assetsArchiveReader{
		assets: m,
		keys:   keys,
	}
	return r, nil
}

// directoryArchiveReader is used to read an archive that is represented by a directory in the host filesystem.
type directoryArchiveReader struct {
	directoryPath string
	assetPaths    []string
}

func (r *directoryArchiveReader) Next() (string, *Blob, error) {
	// If there are no more members in this archive, return io.EOF.
	if len(r.assetPaths) == 0 {
		return "", nil, io.EOF
	}

	// Fetch the next path in the archive and slice it off of the list.
	assetPath := r.assetPaths[0]
	r.assetPaths = r.assetPaths[1:]

	// Crop the asset's path s.t. it is relative to the directory path.
	name, err := filepath.Rel(r.directoryPath, assetPath)
	if err != nil {
		return "", nil, err
	}
	name = filepath.Clean(name)

	// Open and return the blob.
	blob, err := (&Asset{Path: assetPath}).Read()
	if err != nil {
		return "", nil, err
	}
	return name, blob, nil
}

func (r *directoryArchiveReader) Close() error {
	return nil
}

func (a *Archive) readPath() (ArchiveReader, error) {
	// To read a path-based archive, read that file and use its extension to ascertain what format to use.
	path, ispath := a.GetPath()
	contract.Assertf(ispath, "Expected a path-based asset")

	format := detectArchiveFormat(path)

	if format == NotArchive {
		// If not an archive, it could be a directory; if so, simply expand it out uncompressed as an archive.
		info, err := os.Stat(path)
		if err != nil {
			return nil, errors.Wrapf(err, "couldn't read archive path '%v'", path)
		} else if !info.IsDir() {
			return nil, errors.Wrapf(err, "'%v' is neither a recognized archive type nor a directory", path)
		}

		// Accumulate the list of asset paths. This list is ordered deterministically by filepath.Walk.
		assetPaths := []string{}
		if walkerr := filepath.Walk(path, func(filePath string, f os.FileInfo, fileerr error) error {
			// If there was an error, exit.
			if fileerr != nil {
				return fileerr
			}

			// If this is a .pulumi directory, we will skip this by default.
			// TODO[pulumi/pulumi#122]: when we support .pulumiignore, this will be customizable.
			if f.Name() == workspace.BookkeepingDir {
				if f.IsDir() {
					return filepath.SkipDir
				}
				return nil
			}

			// If this was a directory or a symlink, skip it.
			if f.IsDir() || f.Mode()&os.ModeSymlink != 0 {
				return nil
			}

			// Otherwise, add this asset to the list of paths and keep going.
			assetPaths = append(assetPaths, filePath)
			return nil
		}); walkerr != nil {
			return nil, walkerr
		}

		r := &directoryArchiveReader{
			directoryPath: path,
			assetPaths:    assetPaths,
		}
		return r, nil
	}

	// Otherwise, it's an archive file, and we will go ahead and open it up and read it.
	file, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	return readArchive(file, format)
}

func (a *Archive) readURI() (ArchiveReader, error) {
	// To read a URI-based archive, fetch the contents remotely and use the extension to pick the format to use.
	url, isurl, err := a.GetURIURL()
	if err != nil {
		return nil, err
	}
	contract.Assertf(isurl, "Expected a URI-based asset")

	format := detectArchiveFormat(url.Path)
	if format == NotArchive {
		// IDEA: support (a) hints and (b) custom providers that default to certain formats.
		return nil, errors.Errorf("file at URL '%v' is not a recognized archive format", url)
	}

	ar, err := a.openURLStream(url)
	if err != nil {
		return nil, err
	}
	return readArchive(ar, format)
}

func (a *Archive) openURLStream(url *url.URL) (io.ReadCloser, error) {
	switch s := url.Scheme; s {
	case "http", "https":
		resp, err := httputil.GetWithRetry(url.String(), http.DefaultClient)
		if err != nil {
			return nil, err
		}
		return resp.Body, nil
	case "file":
		contract.Assert(url.Host == "")
		contract.Assert(url.User == nil)
		contract.Assert(url.RawQuery == "")
		contract.Assert(url.Fragment == "")
		return os.Open(url.Path)
	default:
		return nil, errors.Errorf("Unrecognized or unsupported URI scheme: %v", s)
	}
}

// Bytes fetches the archive contents as a byte slices.  This is almost certainly the least efficient way to deal with
// the underlying streaming capabilities offered by assets and archives, but can be used in a pinch to interact with
// APIs that demand []bytes.
func (a *Archive) Bytes(format ArchiveFormat) ([]byte, error) {
	var data bytes.Buffer
	if err := a.Archive(format, &data); err != nil {
		return nil, err
	}
	return data.Bytes(), nil
}

// Archive produces a single archive stream in the desired format.  It prefers to return the archive with as little
// copying as is feasible, however if the desired format is different from the source, it will need to translate.
func (a *Archive) Archive(format ArchiveFormat, w io.Writer) error {
	// If the source format is the same, just return that.
	if sf, ss, err := a.ReadSourceArchive(); sf != NotArchive && sf == format {
		if err != nil {
			return err
		}
		_, err := io.Copy(w, ss)
		return err
	}

	switch format {
	case TarArchive:
		return a.archiveTar(w)
	case TarGZIPArchive:
		return a.archiveTarGZIP(w)
	case ZIPArchive:
		return a.archiveZIP(w)
	default:
		contract.Failf("Illegal archive type: %v", format)
		return nil
	}
}

// addNextFileToTar adds the next file in the given archive to the given tar file. Returns io.EOF if the archive
// contains no more files.
func addNextFileToTar(r ArchiveReader, tw *tar.Writer, seenFiles map[string]bool) error {
	file, data, err := r.Next()
	if err != nil {
		return err
	}
	defer contract.IgnoreClose(data)

	// It's possible to run into the same file multiple times in the list of archives we're passed.
	// For example, if there is an archive pointing to foo/bar and an archive pointing to
	// foo/bar/baz/quux.  Because of this only include the file the first time we see it.
	if _, has := seenFiles[file]; has {
		return nil
	}
	seenFiles[file] = true

	sz := data.Size()
	if err = tw.WriteHeader(&tar.Header{
		Name: file,
		Mode: 0600,
		Size: sz,
	}); err != nil {
		return err
	}
	n, err := io.Copy(tw, data)
	if err == tar.ErrWriteTooLong {
		return errors.Wrap(err, fmt.Sprintf("incorrect blob size for %v: expected %v, got %v", file, sz, n))
	}
	return err
}

func (a *Archive) archiveTar(w io.Writer) error {
	// Open the archive.
	reader, err := a.Open()
	if err != nil {
		return err
	}
	defer contract.IgnoreClose(reader)

	// Now actually emit the contents, file by file.
	tw := tar.NewWriter(w)
	seenFiles := make(map[string]bool)
	for err == nil {
		err = addNextFileToTar(reader, tw, seenFiles)
	}
	if err != io.EOF {
		return err
	}

	return tw.Close()
}

func (a *Archive) archiveTarGZIP(w io.Writer) error {
	z := gzip.NewWriter(w)
	return a.archiveTar(z)
}

// addNextFileToZIP adds the next file in the given archive to the given ZIP file. Returns io.EOF if the archive
// contains no more files.
func addNextFileToZIP(r ArchiveReader, zw *zip.Writer, seenFiles map[string]bool) error {
	file, data, err := r.Next()
	if err != nil {
		return err
	}
	defer contract.IgnoreClose(data)

	// It's possible to run into the same file multiple times in the list of archives we're passed.
	// For example, if there is an archive pointing to foo/bar and an archive pointing to
	// foo/bar/baz/quux.  Because of this only include the file the first time we see it.
	if _, has := seenFiles[file]; has {
		return nil
	}
	seenFiles[file] = true

	fh := &zip.FileHeader{
		// These are the two fields set by zw.Create()
		Name:   file,
		Method: zip.Deflate,
	}

	// Set a nonzero -- but constant -- modification time. Otherwise, some agents (e.g. Azure
	// websites) can't extract the resulting archive. The date is comfortably after 1980 because
	// the ZIP format includes a date representation that starts at 1980. Use `SetModTime` to
	// remain compatible with Go 1.9.
	// nolint: megacheck
	fh.SetModTime(time.Date(1990, time.January, 1, 0, 0, 0, 0, time.UTC))

	fw, err := zw.CreateHeader(fh)
	if err != nil {
		return err
	}
	_, err = io.Copy(fw, data)
	return err
}

func (a *Archive) archiveZIP(w io.Writer) error {
	// Open the archive.
	reader, err := a.Open()
	if err != nil {
		return err
	}
	defer contract.IgnoreClose(reader)

	// Now actually emit the contents, file by file.
	zw := zip.NewWriter(w)
	seenFiles := make(map[string]bool)
	for err == nil {
		err = addNextFileToZIP(reader, zw, seenFiles)
	}
	if err != io.EOF {
		return err
	}

	return zw.Close()
}

// ReadSourceArchive returns a stream to the underlying archive, if there is one.
func (a *Archive) ReadSourceArchive() (ArchiveFormat, io.ReadCloser, error) {
	if path, ispath := a.GetPath(); ispath {
		if format := detectArchiveFormat(path); format != NotArchive {
			f, err := os.Open(path)
			return format, f, err
		}
	} else if url, isurl, urlerr := a.GetURIURL(); urlerr == nil && isurl {
		if format := detectArchiveFormat(url.Path); format != NotArchive {
			s, err := a.openURLStream(url)
			return format, s, err
		}
	}
	return NotArchive, nil, nil
}

// EnsureHash computes the SHA256 hash of the archive's contents and stores it on the object.
func (a *Archive) EnsureHash() error {
	if a.Hash == "" {
		hash := sha256.New()

		// Attempt to compute the hash in the most efficient way.  First try to open the archive directly and copy it
		// to the hash.  This avoids traversing any of the contents and just treats it as a byte stream.
		f, r, err := a.ReadSourceArchive()
		if err != nil {
			return err
		}
		if f != NotArchive && r != nil {
			defer contract.IgnoreClose(r)
			_, err = io.Copy(hash, r)
			if err != nil {
				return err
			}
		} else {
			// Otherwise, it's not an archive; we'll need to transform it into one.  Pick tar since it avoids
			// any superfluous compression which doesn't actually help us in this situation.
			err := a.Archive(TarArchive, hash)
			if err != nil {
				return err
			}
		}

		// Finally, encode the resulting hash as a string and we're done.
		a.Hash = hex.EncodeToString(hash.Sum(nil))
	}
	return nil
}

// ArchiveFormat indicates what archive and/or compression format an archive uses.
type ArchiveFormat int

const (
	NotArchive     = iota // not an archive.
	TarArchive            // a POSIX tar archive.
	TarGZIPArchive        // a POSIX tar archive that has been subsequently compressed using GZip.
	ZIPArchive            // a multi-file ZIP archive.
)

// ArchiveExts maps from a file extension and its associated archive and/or compression format.
var ArchiveExts = map[string]ArchiveFormat{
	".tar":    TarArchive,
	".tgz":    TarGZIPArchive,
	".tar.gz": TarGZIPArchive,
	".zip":    ZIPArchive,
}

// detectArchiveFormat takes a path and infers its archive format based on the file extension.
func detectArchiveFormat(path string) ArchiveFormat {
	ext := filepath.Ext(path)
	if moreext := filepath.Ext(strings.TrimRight(path, ext)); moreext != "" {
		ext = moreext + ext // this ensures we detect ".tar.gz" correctly.
	}
	format, has := ArchiveExts[ext]
	if !has {
		return NotArchive
	}
	return format
}

// readArchive takes a stream to an existing archive and returns a map of names to readers for the inner assets.
// The routine returns an error if something goes wrong and, no matter what, closes the stream before returning.
func readArchive(ar io.ReadCloser, format ArchiveFormat) (ArchiveReader, error) {
	switch format {
	case TarArchive:
		return readTarArchive(ar)
	case TarGZIPArchive:
		return readTarGZIPArchive(ar)
	case ZIPArchive:
		// Unfortunately, the ZIP archive reader requires ReaderAt functionality.  If it's a file, we can recover this
		// with a simple stat.  Otherwise, we will need to go ahead and make a copy in memory.
		var ra io.ReaderAt
		var sz int64
		if f, isf := ar.(*os.File); isf {
			stat, err := f.Stat()
			if err != nil {
				return nil, err
			}
			ra = f
			sz = stat.Size()
		} else if data, err := ioutil.ReadAll(ar); err != nil {
			return nil, err
		} else {
			ra = bytes.NewReader(data)
			sz = int64(len(data))
		}
		return readZIPArchive(ra, sz)
	default:
		contract.Failf("Illegal archive type: %v", format)
		return nil, nil
	}
}

// tarArchiveReader is used to read an archive that is stored in tar format.
type tarArchiveReader struct {
	ar io.ReadCloser
	tr *tar.Reader
}

func (r *tarArchiveReader) Next() (string, *Blob, error) {
	for {
		file, err := r.tr.Next()
		if err != nil {
			return "", nil, err
		}

		switch file.Typeflag {
		case tar.TypeDir:
			continue // skip directories
		case tar.TypeReg:
			// Return the tar reader for this file's contents.
			data := &Blob{
				rd: ioutil.NopCloser(r.tr),
				sz: file.Size,
			}
			name := filepath.Clean(file.Name)
			return name, data, nil
		default:
			contract.Failf("Unrecognized tar header typeflag: %v", file.Typeflag)
		}
	}
}

func (r *tarArchiveReader) Close() error {
	return r.ar.Close()
}

func readTarArchive(ar io.ReadCloser) (ArchiveReader, error) {
	r := &tarArchiveReader{
		ar: ar,
		tr: tar.NewReader(ar),
	}
	return r, nil
}

func readTarGZIPArchive(ar io.ReadCloser) (ArchiveReader, error) {
	// First decompress the GZIP stream.
	gz, err := gzip.NewReader(ar)
	if err != nil {
		return nil, err
	}

	// Now read the tarfile.
	return readTarArchive(gz)
}

// zipArchiveReader is used to read an archive that is stored in ZIP format.
type zipArchiveReader struct {
	ar    io.ReaderAt
	zr    *zip.Reader
	index int
}

func (r *zipArchiveReader) Next() (string, *Blob, error) {
	for r.index < len(r.zr.File) {
		file := r.zr.File[r.index]
		r.index++

		// Skip directories, since they aren't included in TAR and other archives above.
		if file.FileInfo().IsDir() {
			continue
		}

		// Open the next file and return its blob.
		body, err := file.Open()
		if err != nil {
			return "", nil, errors.Wrapf(err, "failed to read ZIP inner file %v", file.Name)
		}
		blob := &Blob{
			rd: body,
			sz: int64(file.UncompressedSize64),
		}
		name := filepath.Clean(file.Name)
		return name, blob, nil
	}
	return "", nil, io.EOF
}

func (r *zipArchiveReader) Close() error {
	if c, ok := r.ar.(io.Closer); ok {
		return c.Close()
	}
	return nil
}

func readZIPArchive(ar io.ReaderAt, size int64) (ArchiveReader, error) {
	zr, err := zip.NewReader(ar, size)
	if err != nil {
		return nil, errors.Wrap(err, "failed to read ZIP")
	}

	r := &zipArchiveReader{
		ar: ar,
		zr: zr,
	}
	return r, nil
}