pulumi/sdk/go/common/tokens/tokens.go

297 lines
9.6 KiB
Go
Raw Normal View History

2018-05-22 19:43:36 +00:00
// Copyright 2016-2018, Pulumi Corporation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
Begin overhauling semantic phases This change further merges the new AST and MuPack/MuIL formats and abstractions into the core of the compiler. A good amount of the old code is gone now; I decided against ripping it all out in one fell swoop so that I can methodically check that we are preserving all relevant decisions and/or functionality we had in the old model. The changes are too numerous to outline in this commit message, however, here are the noteworthy ones: * Split up the notion of symbols and tokens, resulting in: - pkg/symbols for true compiler symbols (bound nodes) - pkg/tokens for name-based tokens, identifiers, constants * Several packages move underneath pkg/compiler: - pkg/ast becomes pkg/compiler/ast - pkg/errors becomes pkg/compiler/errors - pkg/symbols becomes pkg/compiler/symbols * pkg/ast/... becomes pkg/compiler/legacy/ast/... * pkg/pack/ast becomes pkg/compiler/ast. * pkg/options goes away, merged back into pkg/compiler. * All binding functionality moves underneath a dedicated package, pkg/compiler/binder. The legacy.go file contains cruft that will eventually go away, while the other files represent a halfway point between new and old, but are expected to stay roughly in the current shape. * All parsing functionality is moved underneath a new pkg/compiler/metadata namespace, and we adopt new terminology "metadata reading" since real parsing happens in the MetaMu compilers. Hence, Parser has become metadata.Reader. * In general phases of the compiler no longer share access to the actual compiler.Compiler object. Instead, shared state is moved to the core.Context object underneath pkg/compiler/core. * Dependency resolution during binding has been rewritten to the new model, including stashing bound package symbols in the context object, and detecting import cycles. * Compiler construction does not take a workspace object. Instead, creation of a workspace is entirely hidden inside of the compiler's constructor logic. * There are three Compile* functions on the Compiler interface, to support different styles of invoking compilation: Compile() auto- detects a Mu package, based on the workspace; CompilePath(string) loads the target as a Mu package and compiles it, regardless of the workspace settings; and, CompilePackage(*pack.Package) will compile a pre-loaded package AST, again regardless of workspace. * Delete the _fe, _sema, and parsetree phases. They are no longer relevant and the functionality is largely subsumed by the above. ...and so very much more. I'm surprised I ever got this to compile again!
2017-01-18 20:18:37 +00:00
// Package tokens contains the core symbol and token types for referencing resources and related entities.
Begin overhauling semantic phases This change further merges the new AST and MuPack/MuIL formats and abstractions into the core of the compiler. A good amount of the old code is gone now; I decided against ripping it all out in one fell swoop so that I can methodically check that we are preserving all relevant decisions and/or functionality we had in the old model. The changes are too numerous to outline in this commit message, however, here are the noteworthy ones: * Split up the notion of symbols and tokens, resulting in: - pkg/symbols for true compiler symbols (bound nodes) - pkg/tokens for name-based tokens, identifiers, constants * Several packages move underneath pkg/compiler: - pkg/ast becomes pkg/compiler/ast - pkg/errors becomes pkg/compiler/errors - pkg/symbols becomes pkg/compiler/symbols * pkg/ast/... becomes pkg/compiler/legacy/ast/... * pkg/pack/ast becomes pkg/compiler/ast. * pkg/options goes away, merged back into pkg/compiler. * All binding functionality moves underneath a dedicated package, pkg/compiler/binder. The legacy.go file contains cruft that will eventually go away, while the other files represent a halfway point between new and old, but are expected to stay roughly in the current shape. * All parsing functionality is moved underneath a new pkg/compiler/metadata namespace, and we adopt new terminology "metadata reading" since real parsing happens in the MetaMu compilers. Hence, Parser has become metadata.Reader. * In general phases of the compiler no longer share access to the actual compiler.Compiler object. Instead, shared state is moved to the core.Context object underneath pkg/compiler/core. * Dependency resolution during binding has been rewritten to the new model, including stashing bound package symbols in the context object, and detecting import cycles. * Compiler construction does not take a workspace object. Instead, creation of a workspace is entirely hidden inside of the compiler's constructor logic. * There are three Compile* functions on the Compiler interface, to support different styles of invoking compilation: Compile() auto- detects a Mu package, based on the workspace; CompilePath(string) loads the target as a Mu package and compiles it, regardless of the workspace settings; and, CompilePackage(*pack.Package) will compile a pre-loaded package AST, again regardless of workspace. * Delete the _fe, _sema, and parsetree phases. They are no longer relevant and the functionality is largely subsumed by the above. ...and so very much more. I'm surprised I ever got this to compile again!
2017-01-18 20:18:37 +00:00
package tokens
import (
"fmt"
"strings"
"unicode"
"github.com/pulumi/pulumi/sdk/v3/go/common/util/contract"
)
// Token is a qualified name that is capable of resolving to a symbol entirely on its own. Most uses of tokens are
// typed based on the context, so that a subset of the token syntax is permissible (see the various typedefs below).
// However, in its full generality, a token can have a package part, a module part, a module-member part, and a
// class-member part. Obviously tokens that are meant to address just a module won't have the module-member part, and
// tokens addressing module members won't have the class-member part, etc.
//
// Token's grammar is as follows:
//
2022-09-14 02:12:02 +00:00
// Token = <Identifier> |
// <QualifiedToken> |
// <DecoratedType>
// Identifier = <Name>
// QualifiedToken = <PackageName> [ ":" <ModuleName> [ ":" <ModuleMemberName> [ ":" <ClassMemberName> ] ] ]
// PackageName = ... similar to <QName>, except dashes permitted ...
// ModuleName = <QName>
// ModuleMemberName = <Name>
// ClassMemberName = <Name>
//
// A token may be a simple identifier in the case that it refers to a built-in symbol, like a primitive type, or a
// variable in scope, rather than a qualified token that is to be bound to a symbol through package/module resolution.
//
// Notice that both package and module names may be qualified names (meaning they can have "/"s in them; see QName's
// comments), and that module and class members must use unqualified, simple names (meaning they have no delimiters).
// The specialized token kinds differ only in what elements they require as part of the token string.
//
// Finally, a token may also be a decorated type. This is for built-in array, map, pointer, and function types:
//
2022-09-14 02:12:02 +00:00
// DecoratedType = "*" <Token> |
// "[]" <Token> |
// "map[" <Token> "]" <Token> |
// "(" [ <Token> [ "," <Token> ]* ] ")" <Token>?
//
// Notice that a recursive parsing process is required to extract elements from a <DecoratedType> token.
type Token string
const TokenDelimiter string = ":" // the character delimiting portions of a qualified token.
func (tok Token) Delimiters() int { return strings.Count(string(tok), TokenDelimiter) }
func (tok Token) HasModule() bool { return tok.Delimiters() > 0 }
func (tok Token) HasModuleMember() bool { return tok.Delimiters() > 1 }
func (tok Token) Simple() bool { return tok.Delimiters() == 0 }
func (tok Token) String() string { return string(tok) }
// delimiter returns the Nth index of a delimiter, as specified by the argument.
func (tok Token) delimiter(n int) int {
ix := -1
for n > 0 {
// Make sure we still have space.
if ix+1 >= len(tok) {
ix = -1
break
}
// If we do, keep looking for the next delimiter.
nix := strings.Index(string(tok[ix+1:]), TokenDelimiter)
if nix == -1 {
break
}
ix += 1 + nix
n--
}
return ix
2017-01-22 17:45:58 +00:00
}
// Name returns the Token as a Name (and assumes it is a legal one).
func (tok Token) Name() Name {
contract.Requiref(tok.Simple(), "tok", "Simple")
contract.Requiref(IsName(tok.String()), "tok", "IsName(%v)", tok)
return Name(tok.String())
}
// Package extracts the package from the token, assuming one exists.
func (tok Token) Package() Package {
if t := Type(tok); t.Primitive() {
return "" // decorated and primitive types are built-in (and hence have no package).
}
if tok.HasModule() {
return Package(tok[:tok.delimiter(1)])
}
return Package(tok)
}
// Module extracts the module portion from the token, assuming one exists.
func (tok Token) Module() Module {
if tok.HasModule() {
if tok.HasModuleMember() {
return Module(tok[:tok.delimiter(2)])
}
return Module(tok)
}
return Module("")
}
// ModuleMember extracts the module member portion from the token, assuming one exists.
func (tok Token) ModuleMember() ModuleMember {
if tok.HasModuleMember() {
return ModuleMember(tok)
}
return ModuleMember("")
}
// Package is a token representing just a package. It uses a much simpler grammar:
2022-09-14 02:12:02 +00:00
//
// Package = <PackageName>
//
// Note that a package name of "." means "current package", to simplify emission and lookups.
type Package Token
2017-01-21 20:25:59 +00:00
func NewPackageToken(nm PackageName) Package {
contract.Assertf(IsQName(string(nm)), "Package name '%v' is not a legal qualified name", nm)
return Package(nm)
}
func (tok Package) Name() PackageName {
return PackageName(tok)
}
2017-01-21 20:25:59 +00:00
func (tok Package) String() string { return string(tok) }
// Module is a token representing a module. It uses the following subset of the token grammar:
2022-09-14 02:12:02 +00:00
//
// Module = <Package> ":" <ModuleName>
//
// Note that a module name of "." means "current module", to simplify emission and lookups.
type Module Token
2017-01-21 20:25:59 +00:00
func NewModuleToken(pkg Package, nm ModuleName) Module {
contract.Assertf(IsQName(string(nm)), "Package '%v' module name '%v' is not a legal qualified name", pkg, nm)
return Module(string(pkg) + TokenDelimiter + string(nm))
}
func (tok Module) Package() Package {
t := Token(tok)
contract.Assertf(t.HasModule(), "Module token '%v' missing module delimiter", tok)
return Package(tok[:t.delimiter(1)])
}
func (tok Module) Name() ModuleName {
t := Token(tok)
contract.Assertf(t.HasModule(), "Module token '%v' missing module delimiter", tok)
return ModuleName(tok[t.delimiter(1)+1:])
}
2017-01-21 20:25:59 +00:00
func (tok Module) String() string { return string(tok) }
// ModuleMember is a token representing a module's member. It uses the following grammar. Note that this is not
// ambiguous because member names cannot contain slashes, and so the "last" slash in a name delimits the member:
2022-09-14 02:12:02 +00:00
//
// ModuleMember = <Module> "/" <ModuleMemberName>
type ModuleMember Token
2017-01-21 20:25:59 +00:00
func NewModuleMemberToken(mod Module, nm ModuleMemberName) ModuleMember {
contract.Assertf(IsName(string(nm)), "Module '%v' member name '%v' is not a legal name", mod, nm)
return ModuleMember(string(mod) + TokenDelimiter + string(nm))
}
// ParseModuleMember attempts to turn the string s into a module member, returning an error if it isn't a valid one.
func ParseModuleMember(s string) (ModuleMember, error) {
if !Token(s).HasModuleMember() {
return "", fmt.Errorf("String '%v' is not a valid module member", s)
}
return ModuleMember(s), nil
}
func (tok ModuleMember) Package() Package {
return tok.Module().Package()
}
func (tok ModuleMember) Module() Module {
t := Token(tok)
contract.Assertf(t.HasModuleMember(), "Module member token '%v' missing module member delimiter", tok)
return Module(tok[:t.delimiter(2)])
}
func (tok ModuleMember) Name() ModuleMemberName {
t := Token(tok)
contract.Assertf(t.HasModuleMember(), "Module member token '%v' missing module member delimiter", tok)
return ModuleMemberName(tok[t.delimiter(2)+1:])
}
2017-01-21 20:25:59 +00:00
func (tok ModuleMember) String() string { return string(tok) }
// Type is a token representing a type. It is either a primitive type name, reference to a module class, or decorated:
2022-09-14 02:12:02 +00:00
//
// Type = <Name> | <ModuleMember> | <DecoratedType>
type Type Token
2017-01-21 20:25:59 +00:00
func NewTypeToken(mod Module, nm TypeName) Type {
contract.Assertf(IsName(string(nm)), "Module '%v' type name '%v' is not a legal name", mod, nm)
return Type(string(mod) + TokenDelimiter + string(nm))
}
// ParseTypeToken interprets an arbitrary string as a Type, returning an error if the string is not a valid Type.
func ParseTypeToken(s string) (Type, error) {
tok := Token(s)
if !tok.HasModuleMember() {
return "", fmt.Errorf("Type '%s' is not a valid type token (must have format '*:*:*')", tok)
}
return Type(tok), nil
}
func (tok Type) Package() Package {
if tok.Primitive() {
return Package("")
}
2017-01-27 23:42:39 +00:00
return ModuleMember(tok).Package()
}
func (tok Type) Module() Module {
if tok.Primitive() {
return Module("")
}
2017-01-27 23:42:39 +00:00
return ModuleMember(tok).Module()
}
func (tok Type) Name() TypeName {
if tok.Primitive() {
return TypeName(tok)
}
2017-01-27 23:42:39 +00:00
return TypeName(ModuleMember(tok).Name())
}
// Primitive indicates whether this type is a primitive type name (i.e., not qualified with a module, etc).
func (tok Type) Primitive() bool {
return !Token(tok).HasModule()
}
2017-01-21 20:25:59 +00:00
func (tok Type) String() string { return string(tok) }
func camelCase(s string) string {
if len(s) == 0 {
return s
}
runes := []rune(s)
runes[0] = unicode.ToLower(runes[0])
return string(runes)
}
// DisplayName returns a simpler, user-readable version of this type name.
//
// {package}:{module path truncated to the last slash}:{type name}
//
// If not possible, it will return the string representation of the type.
func (tok Type) DisplayName() string {
typeString := string(tok)
components := strings.Split(typeString, ":")
if len(components) != 3 {
return typeString
}
pkg, module, name := components[0], components[1], components[2]
if len(name) == 0 {
return typeString
}
lastSlashInModule := strings.LastIndexByte(module, '/')
if lastSlashInModule == -1 {
return typeString
}
file := module[lastSlashInModule+1:]
if file != camelCase(name) {
return typeString
}
return fmt.Sprintf("%v:%v:%v", pkg, module[:lastSlashInModule], name)
}