2020-04-20 23:36:05 +00:00
|
|
|
// Copyright 2016-2020, Pulumi Corporation.
|
2019-07-23 00:09:35 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package python
|
|
|
|
|
|
|
|
import (
|
|
|
|
"strings"
|
|
|
|
"unicode"
|
2020-04-30 20:22:13 +00:00
|
|
|
"unicode/utf8"
|
|
|
|
|
2021-03-17 13:20:05 +00:00
|
|
|
"github.com/pulumi/pulumi/pkg/v3/codegen"
|
2019-07-23 00:09:35 +00:00
|
|
|
)
|
|
|
|
|
2022-06-15 12:45:55 +00:00
|
|
|
// useLegacyName are names that should return a legacy result from PyName, for compatibility.
|
2022-05-02 18:16:21 +00:00
|
|
|
var useLegacyName = codegen.NewStringSet(
|
2020-08-21 03:51:32 +00:00
|
|
|
// The following property name of a nested type is a case where the newer algorithm produces an incorrect name
|
|
|
|
// (`open_xjson_ser_de`). It should be the legacy name of `open_x_json_ser_de`.
|
|
|
|
// TODO[pulumi/pulumi#5199]: We should see if we can fix this in the algorithm of PyName so it doesn't need to
|
|
|
|
// be special-cased in this set.
|
2022-05-02 18:16:21 +00:00
|
|
|
"openXJsonSerDe", // AWS
|
2020-08-21 03:51:32 +00:00
|
|
|
|
|
|
|
// The following function name has already shipped with the legacy name (`get_public_i_ps`).
|
|
|
|
// TODO[pulumi/pulumi#5200]: Consider emitting two functions: one with the correct name (`get_public_ips`)
|
|
|
|
// and another function with the legacy name (`get_public_i_ps`) marked as deprecated.
|
2022-05-02 18:16:21 +00:00
|
|
|
"GetPublicIPs", // Azure
|
2020-08-21 03:51:32 +00:00
|
|
|
|
|
|
|
// The following function name has already shipped with the legacy name (`get_uptime_check_i_ps`).
|
|
|
|
// TODO[pulumi/pulumi#5200]: Consider emitting two functions: one with the correct name (`get_uptime_check_ips`)
|
|
|
|
// and another function with the legacy name (`get_uptime_check_i_ps`) marked as deprecated.
|
2022-05-02 18:16:21 +00:00
|
|
|
"GetUptimeCheckIPs", // GCP
|
|
|
|
)
|
2020-08-21 03:51:32 +00:00
|
|
|
|
2019-07-23 00:09:35 +00:00
|
|
|
// PyName turns a variable or function name, normally using camelCase, to an underscore_case name.
|
|
|
|
func PyName(name string) string {
|
2020-09-14 15:42:01 +00:00
|
|
|
return pyName(name, useLegacyName.Has(name))
|
2020-08-05 17:08:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func pyName(name string, legacy bool) string {
|
2019-07-23 00:09:35 +00:00
|
|
|
// This method is a state machine with four states:
|
|
|
|
// stateFirst - the initial state.
|
|
|
|
// stateUpper - The last character we saw was an uppercase letter and the character before it
|
|
|
|
// was either a number or a lowercase letter.
|
|
|
|
// stateAcronym - The last character we saw was an uppercase letter and the character before it
|
|
|
|
// was an uppercase letter.
|
|
|
|
// stateLowerOrNumber - The last character we saw was a lowercase letter or a number.
|
|
|
|
//
|
|
|
|
// The following are the state transitions of this state machine:
|
|
|
|
// stateFirst -> (uppercase letter) -> stateUpper
|
|
|
|
// stateFirst -> (lowercase letter or number) -> stateLowerOrNumber
|
|
|
|
// Append the lower-case form of the character to currentComponent.
|
|
|
|
//
|
|
|
|
// stateUpper -> (uppercase letter) -> stateAcronym
|
|
|
|
// stateUpper -> (lowercase letter or number) -> stateLowerOrNumber
|
|
|
|
// Append the lower-case form of the character to currentComponent.
|
|
|
|
//
|
|
|
|
// stateAcronym -> (uppercase letter) -> stateAcronym
|
|
|
|
// Append the lower-case form of the character to currentComponent.
|
|
|
|
// stateAcronym -> (number) -> stateLowerOrNumber
|
|
|
|
// Append the character to currentComponent.
|
|
|
|
// stateAcronym -> (lowercase letter) -> stateLowerOrNumber
|
|
|
|
// Take all but the last character in currentComponent, turn that into
|
|
|
|
// a string, and append that to components. Set currentComponent to the
|
|
|
|
// last two characters seen.
|
|
|
|
//
|
|
|
|
// stateLowerOrNumber -> (uppercase letter) -> stateUpper
|
|
|
|
// Take all characters in currentComponent, turn that into a string,
|
|
|
|
// and append that to components. Set currentComponent to the last
|
|
|
|
// character seen.
|
|
|
|
// stateLowerOrNumber -> (lowercase letter) -> stateLowerOrNumber
|
|
|
|
// Append the character to currentComponent.
|
|
|
|
//
|
|
|
|
// The Go libraries that convert camelCase to snake_case deviate subtly from
|
|
|
|
// the semantics we're going for in this method, namely that they separate
|
|
|
|
// numbers and lowercase letters. We don't want this in all cases (we want e.g. Sha256Hash to
|
|
|
|
// be converted as sha256_hash). We also want SHA256Hash to be converted as sha256_hash, so
|
|
|
|
// we must at least be aware of digits when in the stateAcronym state.
|
|
|
|
//
|
|
|
|
// As for why this is a state machine, the libraries that do this all pretty much use
|
|
|
|
// either regular expressions or state machines, which I suppose are ultimately the same thing.
|
|
|
|
const (
|
|
|
|
stateFirst = iota
|
|
|
|
stateUpper
|
|
|
|
stateAcronym
|
|
|
|
stateLowerOrNumber
|
|
|
|
)
|
|
|
|
|
2020-04-30 20:22:13 +00:00
|
|
|
var result strings.Builder // The components of the name, joined together with underscores.
|
|
|
|
var currentComponent strings.Builder // The characters composing the current component being built
|
2021-07-16 22:45:00 +00:00
|
|
|
|
|
|
|
// Preallocate enough space for the name + 5 underscores. '5' is based on a wild guess that most names will consist
|
|
|
|
// of 5 or fewer words.
|
|
|
|
result.Grow(len(name) + 5)
|
|
|
|
currentComponent.Grow(len(name) + 5)
|
|
|
|
|
2019-07-23 00:09:35 +00:00
|
|
|
state := stateFirst
|
2020-04-30 20:22:13 +00:00
|
|
|
for _, char := range name {
|
|
|
|
// If this is an illegal character for a Python identifier, replace it.
|
|
|
|
if !isLegalIdentifierPart(char) {
|
|
|
|
char = '_'
|
|
|
|
}
|
|
|
|
|
2019-07-23 00:09:35 +00:00
|
|
|
switch state {
|
|
|
|
case stateFirst:
|
2020-04-30 20:22:13 +00:00
|
|
|
if !isLegalIdentifierStart(char) {
|
|
|
|
currentComponent.WriteRune('_')
|
|
|
|
}
|
|
|
|
|
2019-07-23 00:09:35 +00:00
|
|
|
if unicode.IsUpper(char) {
|
|
|
|
// stateFirst -> stateUpper
|
|
|
|
state = stateUpper
|
2020-04-30 20:22:13 +00:00
|
|
|
currentComponent.WriteRune(unicode.ToLower(char))
|
2019-07-23 00:09:35 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// stateFirst -> stateLowerOrNumber
|
|
|
|
state = stateLowerOrNumber
|
2020-04-30 20:22:13 +00:00
|
|
|
currentComponent.WriteRune(char)
|
2019-07-23 00:09:35 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
case stateUpper:
|
|
|
|
if unicode.IsUpper(char) {
|
|
|
|
// stateUpper -> stateAcronym
|
|
|
|
state = stateAcronym
|
2020-04-30 20:22:13 +00:00
|
|
|
currentComponent.WriteRune(unicode.ToLower(char))
|
2019-07-23 00:09:35 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// stateUpper -> stateLowerOrNumber
|
|
|
|
state = stateLowerOrNumber
|
2020-04-30 20:22:13 +00:00
|
|
|
currentComponent.WriteRune(char)
|
2019-07-23 00:09:35 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
case stateAcronym:
|
|
|
|
if unicode.IsUpper(char) {
|
|
|
|
// stateAcronym -> stateAcronym
|
2020-04-30 20:22:13 +00:00
|
|
|
currentComponent.WriteRune(unicode.ToLower(char))
|
2019-07-23 00:09:35 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-08-05 17:08:52 +00:00
|
|
|
// We want to fold digits (or the lowercase letter 's' if not the legacy algo) immediately following
|
|
|
|
// an acronym into the same component as the acronym.
|
|
|
|
if unicode.IsDigit(char) || (char == 's' && !legacy) {
|
2019-07-23 00:09:35 +00:00
|
|
|
// stateAcronym -> stateLowerOrNumber
|
|
|
|
state = stateLowerOrNumber
|
2020-04-30 20:22:13 +00:00
|
|
|
currentComponent.WriteRune(char)
|
2019-07-23 00:09:35 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// stateAcronym -> stateLowerOrNumber
|
2020-04-30 20:22:13 +00:00
|
|
|
component := currentComponent.String()
|
|
|
|
last, size := utf8.DecodeLastRuneInString(component)
|
|
|
|
if result.Len() != 0 {
|
|
|
|
result.WriteRune('_')
|
|
|
|
}
|
|
|
|
result.WriteString(component[:len(component)-size])
|
|
|
|
|
|
|
|
currentComponent.Reset()
|
|
|
|
currentComponent.WriteRune(last)
|
|
|
|
currentComponent.WriteRune(char)
|
2019-07-23 00:09:35 +00:00
|
|
|
state = stateLowerOrNumber
|
|
|
|
continue
|
|
|
|
|
|
|
|
case stateLowerOrNumber:
|
|
|
|
if unicode.IsUpper(char) {
|
|
|
|
// stateLowerOrNumber -> stateUpper
|
2020-04-30 20:22:13 +00:00
|
|
|
if result.Len() != 0 {
|
|
|
|
result.WriteRune('_')
|
|
|
|
}
|
|
|
|
result.WriteString(currentComponent.String())
|
|
|
|
|
|
|
|
currentComponent.Reset()
|
|
|
|
currentComponent.WriteRune(unicode.ToLower(char))
|
2019-07-23 00:09:35 +00:00
|
|
|
state = stateUpper
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// stateLowerOrNumber -> stateLowerOrNumber
|
2020-04-30 20:22:13 +00:00
|
|
|
currentComponent.WriteRune(char)
|
2019-07-23 00:09:35 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-30 20:22:13 +00:00
|
|
|
if currentComponent.Len() != 0 {
|
|
|
|
if result.Len() != 0 {
|
|
|
|
result.WriteRune('_')
|
|
|
|
}
|
|
|
|
result.WriteString(currentComponent.String())
|
|
|
|
}
|
|
|
|
return EnsureKeywordSafe(result.String())
|
2019-07-23 00:09:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Keywords is a map of reserved keywords used by Python 2 and 3. We use this to avoid generating unspeakable
|
|
|
|
// names in the resulting code. This map was sourced by merging the following reference material:
|
|
|
|
//
|
2022-09-14 02:12:02 +00:00
|
|
|
// - Python 2: https://docs.python.org/2.5/ref/keywords.html
|
|
|
|
// - Python 3: https://docs.python.org/3/reference/lexical_analysis.html#keywords
|
2020-04-30 20:22:13 +00:00
|
|
|
var Keywords = codegen.NewStringSet(
|
|
|
|
"False",
|
|
|
|
"None",
|
|
|
|
"True",
|
|
|
|
"and",
|
|
|
|
"as",
|
|
|
|
"assert",
|
|
|
|
"async",
|
|
|
|
"await",
|
|
|
|
"break",
|
|
|
|
"class",
|
|
|
|
"continue",
|
|
|
|
"def",
|
|
|
|
"del",
|
|
|
|
"elif",
|
|
|
|
"else",
|
|
|
|
"except",
|
|
|
|
"exec",
|
|
|
|
"finally",
|
|
|
|
"for",
|
|
|
|
"from",
|
|
|
|
"global",
|
|
|
|
"if",
|
|
|
|
"import",
|
|
|
|
"in",
|
|
|
|
"is",
|
|
|
|
"lambda",
|
|
|
|
"nonlocal",
|
|
|
|
"not",
|
|
|
|
"or",
|
|
|
|
"pass",
|
|
|
|
"print",
|
|
|
|
"raise",
|
|
|
|
"return",
|
|
|
|
"try",
|
|
|
|
"while",
|
|
|
|
"with",
|
|
|
|
"yield")
|
2019-07-23 00:09:35 +00:00
|
|
|
|
|
|
|
// EnsureKeywordSafe adds a trailing underscore if the generated name clashes with a Python 2 or 3 keyword, per
|
|
|
|
// PEP 8: https://www.python.org/dev/peps/pep-0008/?#function-and-method-arguments
|
|
|
|
func EnsureKeywordSafe(name string) string {
|
2020-04-30 20:22:13 +00:00
|
|
|
if Keywords.Has(name) {
|
2019-07-23 00:09:35 +00:00
|
|
|
return name + "_"
|
|
|
|
}
|
|
|
|
return name
|
|
|
|
}
|