From dbc13deb21f5d017e7ae9d8e3abe1a97514cd2e9 Mon Sep 17 00:00:00 2001 From: bryanl <bryanliles@gmail.com> Date: Tue, 13 Mar 2018 14:56:26 -0400 Subject: [PATCH] create util package for jsonnet Signed-off-by: bryanl <bryanliles@gmail.com> --- Gopkg.lock | 2 +- pkg/docparser/context.go | 380 +++++++ pkg/docparser/lexer.go | 823 ++++++++++++++ pkg/docparser/lexer_test.go | 295 +++++ pkg/docparser/literalfield_set.go | 172 +++ pkg/docparser/parser.go | 1223 +++++++++++++++++++++ pkg/docparser/parser_test.go | 271 +++++ pkg/docparser/static_error.go | 49 + pkg/util/jsonnet/import.go | 53 + pkg/util/jsonnet/import_test.go | 75 ++ pkg/util/jsonnet/object.go | 135 +++ pkg/util/jsonnet/object_test.go | 206 ++++ pkg/util/jsonnet/testdata/set-map.jsonnet | 14 + 13 files changed, 3697 insertions(+), 1 deletion(-) create mode 100644 pkg/docparser/context.go create mode 100644 pkg/docparser/lexer.go create mode 100644 pkg/docparser/lexer_test.go create mode 100644 pkg/docparser/literalfield_set.go create mode 100644 pkg/docparser/parser.go create mode 100644 pkg/docparser/parser_test.go create mode 100644 pkg/docparser/static_error.go create mode 100644 pkg/util/jsonnet/import.go create mode 100644 pkg/util/jsonnet/import_test.go create mode 100644 pkg/util/jsonnet/object.go create mode 100644 pkg/util/jsonnet/object_test.go create mode 100644 pkg/util/jsonnet/testdata/set-map.jsonnet diff --git a/Gopkg.lock b/Gopkg.lock index 95273dd4..29b829c9 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -610,6 +610,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "87834a6bde3f8fbe065d32e469dc94301fd94d942c4e2ad414bb3d756e71778d" + inputs-digest = "c2823dabf259fbe1a025ad57ce12af4869ab264ca610219a88c1b29142c4875a" solver-name = "gps-cdcl" solver-version = 1 diff --git a/pkg/docparser/context.go b/pkg/docparser/context.go new file mode 100644 index 00000000..3b709ae8 --- /dev/null +++ b/pkg/docparser/context.go @@ -0,0 +1,380 @@ +/* +Copyright 2017 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package docparser + +import ( + "fmt" + + "github.com/google/go-jsonnet/ast" + "github.com/ksonnet/ksonnet-lib/ksonnet-gen/astext" +) + +var topLevelContext = "$" + +const anonymous = "anonymous" + +// TODO(sbarzowski) polish children functions and consider moving to AST +// and exporting + +// directChildren are children of AST node that are executed in the same context +// and environment as their parent +// +// They must satisfy the following rules: +// * (no-delayed-evaluation) They are evaluated when their parent is evaluated or never. +// * (no-indirect-evaluation) They cannot be evaluated during evaluation of any non-direct children +// * (same-environment) They must be evaluated in the same environment as their parent +func directChildren(node ast.Node) []ast.Node { + switch node := node.(type) { + case *ast.Apply: + return []ast.Node{node.Target} + // TODO(sbarzowski) tailstrict call arguments (once we have tailstrict) + case *ast.ApplyBrace: + return []ast.Node{node.Left, node.Right} + case *ast.Array: + return nil + case *ast.Assert: + return []ast.Node{node.Cond, node.Message, node.Rest} + case *ast.Binary: + return []ast.Node{node.Left, node.Right} + case *ast.Conditional: + return []ast.Node{node.Cond, node.BranchTrue, node.BranchFalse} + case *ast.Dollar: + return nil + case *ast.Error: + return []ast.Node{node.Expr} + case *ast.Function: + return nil + case *ast.Import: + return nil + case *ast.ImportStr: + return nil + case *ast.Index: + return []ast.Node{node.Target, node.Index} + case *ast.Slice: + return []ast.Node{node.Target, node.BeginIndex, node.EndIndex, node.Step} + case *ast.Local: + return []ast.Node{node.Body} + case *ast.LiteralBoolean: + return nil + case *ast.LiteralNull: + return nil + case *ast.LiteralNumber: + return nil + case *ast.LiteralString: + return nil + case *astext.Object: + return objectFieldsDirectChildren(node.Fields) + case *ast.ArrayComp: + result := []ast.Node{} + spec := &node.Spec + for spec != nil { + result = append(result, spec.Expr) + for _, ifspec := range spec.Conditions { + result = append(result, ifspec.Expr) + } + spec = spec.Outer + } + return result + case *ast.ObjectComp: + var fields astext.ObjectFields + for _, field := range node.Fields { + f := astext.ObjectField{ + ObjectField: field, + } + + fields = append(fields, f) + } + result := objectFieldsDirectChildren(fields) + spec := &node.Spec + for spec != nil { + result = append(result, spec.Expr) + for _, ifspec := range spec.Conditions { + result = append(result, ifspec.Expr) + } + spec = spec.Outer + } + return result + case *ast.Self: + return nil + case *ast.SuperIndex: + return []ast.Node{node.Index} + case *ast.InSuper: + return []ast.Node{node.Index} + case *ast.Unary: + return []ast.Node{node.Expr} + case *ast.Var: + return nil + } + panic(fmt.Sprintf("directChildren: Unknown node %#v", node)) +} + +// thunkChildren are children of AST node that are executed in a new context +// and capture environment from parent (thunked) +// TODO(sbarzowski) Make sure it works well with boundary cases like tailstrict arguments, +// make it more precise. +// Rules: +// * (same-environment) They must be evaluated in the same environment as their parent +// * (not-direct) If they can be direct children, they should (and cannot be thunked). +func thunkChildren(node ast.Node) []ast.Node { + switch node := node.(type) { + case *ast.Apply: + var nodes []ast.Node + for _, arg := range node.Arguments.Positional { + nodes = append(nodes, arg) + } + for _, arg := range node.Arguments.Named { + nodes = append(nodes, arg.Arg) + } + return nodes + case *ast.ApplyBrace: + return nil + case *ast.Array: + return node.Elements + case *ast.Assert: + return nil + case *ast.Binary: + return nil + case *ast.Conditional: + return nil + case *ast.Dollar: + return nil + case *ast.Error: + return nil + case *ast.Function: + return nil + case *ast.Import: + return nil + case *ast.ImportStr: + return nil + case *ast.Index: + return nil + case *ast.Slice: + return nil + case *ast.Local: + // TODO(sbarzowski) complicated + return nil + case *ast.LiteralBoolean: + return nil + case *ast.LiteralNull: + return nil + case *ast.LiteralNumber: + return nil + case *ast.LiteralString: + return nil + case *astext.Object: + return nil + case *ast.ArrayComp: + return []ast.Node{node.Body} + case *ast.ObjectComp: + return nil + case *ast.Self: + return nil + case *ast.SuperIndex: + return nil + case *ast.InSuper: + return nil + case *ast.Unary: + return nil + case *ast.Var: + return nil + } + panic(fmt.Sprintf("thunkChildren: Unknown node %#v", node)) +} + +func objectFieldsDirectChildren(fields astext.ObjectFields) ast.Nodes { + result := ast.Nodes{} + for _, field := range fields { + if field.Expr1 != nil { + result = append(result, field.Expr1) + } + } + return result +} + +func inObjectFieldsChildren(fields ast.ObjectFields) ast.Nodes { + result := ast.Nodes{} + for _, field := range fields { + if field.MethodSugar { + result = append(result, field.Method) + } else { + if field.Expr2 != nil { + result = append(result, field.Expr2) + } + if field.Expr3 != nil { + result = append(result, field.Expr3) + } + } + } + return result +} + +// children that are neither direct nor thunked, e.g. object field body +// They are evaluated in a different environment from their parent. +func specialChildren(node ast.Node) []ast.Node { + switch node := node.(type) { + case *ast.Apply: + return nil + case *ast.ApplyBrace: + return nil + case *ast.Array: + return nil + case *ast.Assert: + return nil + case *ast.Binary: + return nil + case *ast.Conditional: + return nil + case *ast.Dollar: + return nil + case *ast.Error: + return nil + case *ast.Function: + // TODO(sbarzowski) this + return nil + case *ast.Import: + return nil + case *ast.ImportStr: + return nil + case *ast.Index: + return nil + case *ast.Slice: + return nil + case *ast.Local: + return nil + case *ast.LiteralBoolean: + return nil + case *ast.LiteralNull: + return nil + case *ast.LiteralNumber: + return nil + case *ast.LiteralString: + return nil + case *ast.Object: + return inObjectFieldsChildren(node.Fields) + case *ast.ArrayComp: + return []ast.Node{node.Body} + case *ast.ObjectComp: + + case *ast.Self: + return nil + case *ast.SuperIndex: + return nil + case *ast.InSuper: + return nil + case *ast.Unary: + return nil + case *ast.Var: + return nil + } + panic(fmt.Sprintf("specialChildren: Unknown node %#v", node)) +} + +func Children(node ast.Node) []ast.Node { + var result []ast.Node + result = append(result, directChildren(node)...) + result = append(result, thunkChildren(node)...) + result = append(result, specialChildren(node)...) + return result +} + +func functionContext(funcName string) *string { + r := "function <" + funcName + ">" + return &r +} + +func objectContext(objName string) *string { + r := "object <" + objName + ">" + return &r +} + +// addContext adds context to a node and its whole subtree. +// +// context is the surrounding context of a node (e.g. a function it's in) +// +// bind is a name that the node is bound to, i.e. if node is a local bind body +// then bind is its name. For nodes that are not bound to variables `anonymous` +// should be passed. For example: +// local x = 2 + 2; x +// In such case bind for binary node 2 + 2 is "x" and for every other node, +// including its children, its anonymous. +func addContext(node ast.Node, context *string, bind string) { + if node == nil { + return + } + + node.SetContext(context) + + switch node := node.(type) { + case *ast.Function: + funContext := functionContext(bind) + addContext(node.Body, funContext, anonymous) + for i := range node.Parameters.Optional { + // Default arguments have the same context as the function body. + addContext(node.Parameters.Optional[i].DefaultArg, funContext, anonymous) + } + case *ast.Object: + // TODO(sbarzowski) include fieldname, maybe even chains + + outOfObject := directChildren(node) + for _, f := range outOfObject { + // This actually is evaluated outside of object + addContext(f, context, anonymous) + } + + objContext := objectContext(bind) + inObject := inObjectFieldsChildren(node.Fields) + for _, f := range inObject { + // This actually is evaluated outside of object + addContext(f, objContext, anonymous) + } + + case *ast.ObjectComp: + outOfObject := directChildren(node) + for _, f := range outOfObject { + // This actually is evaluated outside of object + addContext(f, context, anonymous) + } + + objContext := objectContext(bind) + inObject := inObjectFieldsChildren(node.Fields) + for _, f := range inObject { + // This actually is evaluated outside of object + addContext(f, objContext, anonymous) + } + + case *ast.Local: + for _, bind := range node.Binds { + namedThunkContext := "thunk <" + string(bind.Variable) + "> from <" + *context + ">" + if bind.Fun != nil { + addContext(bind.Fun, &namedThunkContext, string(bind.Variable)) + } else { + addContext(bind.Body, &namedThunkContext, string(bind.Variable)) + } + } + addContext(node.Body, context, bind) + default: + for _, child := range directChildren(node) { + addContext(child, context, anonymous) + } + + // TODO(sbarzowski) avoid "thunk from <thunk from..." + thunkContext := "thunk from <" + *context + ">" + for _, child := range thunkChildren(node) { + addContext(child, &thunkContext, anonymous) + } + } +} diff --git a/pkg/docparser/lexer.go b/pkg/docparser/lexer.go new file mode 100644 index 00000000..2e82f607 --- /dev/null +++ b/pkg/docparser/lexer.go @@ -0,0 +1,823 @@ +/* +Copyright 2016 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package docparser + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "unicode/utf8" + + "github.com/google/go-jsonnet/ast" +) + +// --------------------------------------------------------------------------- +// Fodder +// +// Fodder is stuff that is usually thrown away by lexers/preprocessors but is +// kept so that the source can be round tripped with full fidelity. +type FodderKind int + +const ( + FodderWhitespace FodderKind = iota + FodderCommentC + FodderCommentCpp + FodderCommentHash +) + +type FodderElement struct { + Kind FodderKind + Data string +} + +type fodder []FodderElement + +// --------------------------------------------------------------------------- +// Token + +type tokenKind int + +const ( + // Symbols + tokenBraceL tokenKind = iota + tokenBraceR + tokenBracketL + tokenBracketR + tokenComma + tokenDollar + tokenDot + tokenParenL + tokenParenR + tokenSemicolon + + // Arbitrary length lexemes + tokenIdentifier + tokenNumber + tokenOperator + tokenStringBlock + tokenStringDouble + tokenStringSingle + tokenVerbatimStringDouble + tokenVerbatimStringSingle + + // Keywords + tokenAssert + tokenElse + tokenError + tokenFalse + tokenFor + tokenFunction + tokenIf + tokenImport + tokenImportStr + tokenIn + tokenLocal + tokenNullLit + tokenSelf + tokenSuper + tokenTailStrict + tokenThen + tokenTrue + + // A special token that holds line/column information about the end of the + // file. + tokenEndOfFile +) + +var tokenKindStrings = []string{ + // Symbols + tokenBraceL: "\"{\"", + tokenBraceR: "\"}\"", + tokenBracketL: "\"[\"", + tokenBracketR: "\"]\"", + tokenComma: "\",\"", + tokenDollar: "\"$\"", + tokenDot: "\".\"", + tokenParenL: "\"(\"", + tokenParenR: "\")\"", + tokenSemicolon: "\";\"", + + // Arbitrary length lexemes + tokenIdentifier: "IDENTIFIER", + tokenNumber: "NUMBER", + tokenOperator: "OPERATOR", + tokenStringBlock: "STRING_BLOCK", + tokenStringDouble: "STRING_DOUBLE", + tokenStringSingle: "STRING_SINGLE", + tokenVerbatimStringDouble: "VERBATIM_STRING_DOUBLE", + tokenVerbatimStringSingle: "VERBATIM_STRING_SINGLE", + + // Keywords + tokenAssert: "assert", + tokenElse: "else", + tokenError: "error", + tokenFalse: "false", + tokenFor: "for", + tokenFunction: "function", + tokenIf: "if", + tokenImport: "import", + tokenImportStr: "importstr", + tokenIn: "in", + tokenLocal: "local", + tokenNullLit: "null", + tokenSelf: "self", + tokenSuper: "super", + tokenTailStrict: "tailstrict", + tokenThen: "then", + tokenTrue: "true", + + // A special token that holds line/column information about the end of the + // file. + tokenEndOfFile: "end of file", +} + +func (tk tokenKind) String() string { + if tk < 0 || int(tk) >= len(tokenKindStrings) { + panic(fmt.Sprintf("INTERNAL ERROR: Unknown token kind:: %d", tk)) + } + return tokenKindStrings[tk] +} + +type token struct { + kind tokenKind // The type of the token + fodder fodder // Any fodder the occurs before this token + data string // Content of the token if it is not a keyword + + // Extra info for when kind == tokenStringBlock + stringBlockIndent string // The sequence of whitespace that indented the block. + stringBlockTermIndent string // This is always fewer whitespace characters than in stringBlockIndent. + + loc ast.LocationRange +} + +type tokens []token + +func (t *token) String() string { + if t.data == "" { + return t.kind.String() + } else if t.kind == tokenOperator { + return fmt.Sprintf("\"%v\"", t.data) + } else { + return fmt.Sprintf("(%v, \"%v\")", t.kind, t.data) + } +} + +// --------------------------------------------------------------------------- +// Helpers + +func isUpper(r rune) bool { + return r >= 'A' && r <= 'Z' +} + +func isLower(r rune) bool { + return r >= 'a' && r <= 'z' +} + +func isNumber(r rune) bool { + return r >= '0' && r <= '9' +} + +func isIdentifierFirst(r rune) bool { + return isUpper(r) || isLower(r) || r == '_' +} + +func isIdentifier(r rune) bool { + return isIdentifierFirst(r) || isNumber(r) +} + +func isSymbol(r rune) bool { + switch r { + case '!', '$', ':', '~', '+', '-', '&', '|', '^', '=', '<', '>', '*', '/', '%': + return true + } + return false +} + +// Check that b has at least the same whitespace prefix as a and returns the +// amount of this whitespace, otherwise returns 0. If a has no whitespace +// prefix than return 0. +func checkWhitespace(a, b string) int { + i := 0 + for ; i < len(a); i++ { + if a[i] != ' ' && a[i] != '\t' { + // a has run out of whitespace and b matched up to this point. Return + // result. + return i + } + if i >= len(b) { + // We ran off the edge of b while a still has whitespace. Return 0 as + // failure. + return 0 + } + if a[i] != b[i] { + // a has whitespace but b does not. Return 0 as failure. + return 0 + } + } + // We ran off the end of a and b kept up + return i +} + +// --------------------------------------------------------------------------- +// Lexer + +type position struct { + byteNo int // Byte position of last rune read + lineNo int // Line number + lineStart int // Rune position of the last newline +} + +type lexer struct { + fileName string // The file name being lexed, only used for errors + input string // The input string + source *ast.Source + + pos position // Current position in input + prev position // Previous position in input + + tokens tokens // The tokens that we've generated so far + + // Information about the token we are working on right now + fodder fodder + tokenStart int + tokenStartLoc ast.Location +} + +const lexEOF = -1 + +func makeLexer(fn string, input string) *lexer { + return &lexer{ + fileName: fn, + input: input, + source: ast.BuildSource(input), + pos: position{byteNo: 0, lineNo: 1, lineStart: 0}, + prev: position{byteNo: lexEOF, lineNo: 0, lineStart: 0}, + tokenStartLoc: ast.Location{Line: 1, Column: 1}, + } +} + +// next returns the next rune in the input. +func (l *lexer) next() rune { + if int(l.pos.byteNo) >= len(l.input) { + l.prev = l.pos + return lexEOF + } + r, w := utf8.DecodeRuneInString(l.input[l.pos.byteNo:]) + l.prev = l.pos + l.pos.byteNo += w + if r == '\n' { + l.pos.lineStart = l.pos.byteNo + l.pos.lineNo++ + } + return r +} + +func (l *lexer) acceptN(n int) { + for i := 0; i < n; i++ { + l.next() + } +} + +// peek returns but does not consume the next rune in the input. +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +// backup steps back one rune. Can only be called once per call of next. +func (l *lexer) backup() { + if l.prev.byteNo == lexEOF { + panic("backup called with no valid previous rune") + } + l.pos = l.prev + l.prev = position{byteNo: lexEOF} +} + +func locationFromPosition(pos position) ast.Location { + return ast.Location{Line: pos.lineNo, Column: pos.byteNo - pos.lineStart + 1} +} + +func (l *lexer) location() ast.Location { + return locationFromPosition(l.pos) +} + +func (l *lexer) prevLocation() ast.Location { + if l.prev.byteNo == lexEOF { + panic("prevLocation called with no valid previous rune") + } + return locationFromPosition(l.prev) +} + +// Reset the current working token start to the current cursor position. This +// may throw away some characters. This does not throw away any accumulated +// fodder. +func (l *lexer) resetTokenStart() { + l.tokenStart = l.pos.byteNo + l.tokenStartLoc = l.location() +} + +func (l *lexer) emitFullToken(kind tokenKind, data, stringBlockIndent, stringBlockTermIndent string) { + l.tokens = append(l.tokens, token{ + kind: kind, + fodder: l.fodder, + data: data, + stringBlockIndent: stringBlockIndent, + stringBlockTermIndent: stringBlockTermIndent, + loc: ast.MakeLocationRange(l.fileName, l.source, l.tokenStartLoc, l.location()), + }) + l.fodder = fodder{} +} + +func (l *lexer) emitToken(kind tokenKind) { + l.emitFullToken(kind, l.input[l.tokenStart:l.pos.byteNo], "", "") + l.resetTokenStart() +} + +func (l *lexer) addWhitespaceFodder() { + fodderData := l.input[l.tokenStart:l.pos.byteNo] + if len(l.fodder) == 0 || l.fodder[len(l.fodder)-1].Kind != FodderWhitespace { + l.fodder = append(l.fodder, FodderElement{Kind: FodderWhitespace, Data: fodderData}) + } else { + l.fodder[len(l.fodder)-1].Data += fodderData + } + l.resetTokenStart() +} + +func (l *lexer) addCommentFodder(kind FodderKind) { + fodderData := l.input[l.tokenStart:l.pos.byteNo] + l.fodder = append(l.fodder, FodderElement{Kind: kind, Data: fodderData}) + l.resetTokenStart() +} + +func (l *lexer) addFodder(kind FodderKind, data string) { + l.fodder = append(l.fodder, FodderElement{Kind: kind, Data: data}) +} + +func (l *lexer) makeStaticErrorPoint(msg string, loc ast.Location) StaticError { + return StaticError{Msg: msg, Loc: ast.MakeLocationRange(l.fileName, l.source, loc, loc)} +} + +// lexNumber will consume a number and emit a token. It is assumed +// that the next rune to be served by the lexer will be a leading digit. +func (l *lexer) lexNumber() error { + // This function should be understood with reference to the linked image: + // http://www.json.org/number.gif + + // Note, we deviate from the json.org documentation as follows: + // There is no reason to lex negative numbers as atomic tokens, it is better to parse them + // as a unary operator combined with a numeric literal. This avoids x-1 being tokenized as + // <identifier> <number> instead of the intended <identifier> <binop> <number>. + + type numLexState int + const ( + numBegin numLexState = iota + numAfterZero + numAfterOneToNine + numAfterDot + numAfterDigit + numAfterE + numAfterExpSign + numAfterExpDigit + ) + + state := numBegin + +outerLoop: + for true { + r := l.next() + switch state { + case numBegin: + switch { + case r == '0': + state = numAfterZero + case r >= '1' && r <= '9': + state = numAfterOneToNine + default: + // The caller should ensure the first rune is a digit. + panic("Couldn't lex number") + } + case numAfterZero: + switch r { + case '.': + state = numAfterDot + case 'e', 'E': + state = numAfterE + default: + break outerLoop + } + case numAfterOneToNine: + switch { + case r == '.': + state = numAfterDot + case r == 'e' || r == 'E': + state = numAfterE + case r >= '0' && r <= '9': + state = numAfterOneToNine + default: + break outerLoop + } + case numAfterDot: + switch { + case r >= '0' && r <= '9': + state = numAfterDigit + default: + return l.makeStaticErrorPoint( + fmt.Sprintf("Couldn't lex number, junk after decimal point: %v", strconv.QuoteRuneToASCII(r)), + l.prevLocation()) + } + case numAfterDigit: + switch { + case r == 'e' || r == 'E': + state = numAfterE + case r >= '0' && r <= '9': + state = numAfterDigit + default: + break outerLoop + } + case numAfterE: + switch { + case r == '+' || r == '-': + state = numAfterExpSign + case r >= '0' && r <= '9': + state = numAfterExpDigit + default: + return l.makeStaticErrorPoint( + fmt.Sprintf("Couldn't lex number, junk after 'E': %v", strconv.QuoteRuneToASCII(r)), + l.prevLocation()) + } + case numAfterExpSign: + if r >= '0' && r <= '9' { + state = numAfterExpDigit + } else { + return l.makeStaticErrorPoint( + fmt.Sprintf("Couldn't lex number, junk after exponent sign: %v", strconv.QuoteRuneToASCII(r)), + l.prevLocation()) + } + + case numAfterExpDigit: + if r >= '0' && r <= '9' { + state = numAfterExpDigit + } else { + break outerLoop + } + } + } + + l.backup() + l.emitToken(tokenNumber) + return nil +} + +// lexIdentifier will consume a identifer and emit a token. It is assumed +// that the next rune to be served by the lexer will be a leading digit. This +// may emit a keyword or an identifier. +func (l *lexer) lexIdentifier() { + r := l.next() + if !isIdentifierFirst(r) { + panic("Unexpected character in lexIdentifier") + } + for ; r != lexEOF; r = l.next() { + if !isIdentifier(r) { + break + } + } + l.backup() + + switch l.input[l.tokenStart:l.pos.byteNo] { + case "assert": + l.emitToken(tokenAssert) + case "else": + l.emitToken(tokenElse) + case "error": + l.emitToken(tokenError) + case "false": + l.emitToken(tokenFalse) + case "for": + l.emitToken(tokenFor) + case "function": + l.emitToken(tokenFunction) + case "if": + l.emitToken(tokenIf) + case "import": + l.emitToken(tokenImport) + case "importstr": + l.emitToken(tokenImportStr) + case "in": + l.emitToken(tokenIn) + case "local": + l.emitToken(tokenLocal) + case "null": + l.emitToken(tokenNullLit) + case "self": + l.emitToken(tokenSelf) + case "super": + l.emitToken(tokenSuper) + case "tailstrict": + l.emitToken(tokenTailStrict) + case "then": + l.emitToken(tokenThen) + case "true": + l.emitToken(tokenTrue) + default: + // Not a keyword, assume it is an identifier + l.emitToken(tokenIdentifier) + } +} + +// lexSymbol will lex a token that starts with a symbol. This could be a +// C or C++ comment, block quote or an operator. This function assumes that the next +// rune to be served by the lexer will be the first rune of the new token. +func (l *lexer) lexSymbol() error { + r := l.next() + + // Single line C++ style comment + if r == '/' && l.peek() == '/' { + l.next() + l.resetTokenStart() // Throw out the leading // + for r = l.next(); r != lexEOF && r != '\n'; r = l.next() { + } + // Leave the '\n' in the lexer to be fodder for the next round + l.backup() + l.addCommentFodder(FodderCommentCpp) + return nil + } + + if r == '/' && l.peek() == '*' { + commentStartLoc := l.tokenStartLoc + l.next() // consume the '*' + l.resetTokenStart() // Throw out the leading /* + for r = l.next(); ; r = l.next() { + if r == lexEOF { + return l.makeStaticErrorPoint("Multi-line comment has no terminating */", + commentStartLoc) + } + if r == '*' && l.peek() == '/' { + commentData := l.input[l.tokenStart : l.pos.byteNo-1] // Don't include trailing */ + l.addFodder(FodderCommentC, commentData) + l.next() // Skip past '/' + l.resetTokenStart() // Start next token at this point + return nil + } + } + } + + if r == '|' && strings.HasPrefix(l.input[l.pos.byteNo:], "||") { + commentStartLoc := l.tokenStartLoc + l.acceptN(2) // Skip "||" + var cb bytes.Buffer + + // Skip whitespace + for r = l.next(); r == ' ' || r == '\t' || r == '\r'; r = l.next() { + } + + // Skip \n + if r != '\n' { + return l.makeStaticErrorPoint("Text block requires new line after |||.", + commentStartLoc) + } + + // Process leading blank lines before calculating stringBlockIndent + for r = l.next(); r == '\n'; r = l.next() { + cb.WriteRune(r) + } + l.backup() + numWhiteSpace := checkWhitespace(l.input[l.pos.byteNo:], l.input[l.pos.byteNo:]) + stringBlockIndent := l.input[l.pos.byteNo : l.pos.byteNo+numWhiteSpace] + if numWhiteSpace == 0 { + return l.makeStaticErrorPoint("Text block's first line must start with whitespace", + commentStartLoc) + } + + for { + if numWhiteSpace <= 0 { + panic("Unexpected value for numWhiteSpace") + } + l.acceptN(numWhiteSpace) + for r = l.next(); r != '\n'; r = l.next() { + if r == lexEOF { + return l.makeStaticErrorPoint("Unexpected EOF", commentStartLoc) + } + cb.WriteRune(r) + } + cb.WriteRune('\n') + + // Skip any blank lines + for r = l.next(); r == '\n'; r = l.next() { + cb.WriteRune(r) + } + l.backup() + + // Look at the next line + numWhiteSpace = checkWhitespace(stringBlockIndent, l.input[l.pos.byteNo:]) + if numWhiteSpace == 0 { + // End of the text block + var stringBlockTermIndent string + for r = l.next(); r == ' ' || r == '\t'; r = l.next() { + stringBlockTermIndent += string(r) + } + l.backup() + if !strings.HasPrefix(l.input[l.pos.byteNo:], "|||") { + return l.makeStaticErrorPoint("Text block not terminated with |||", commentStartLoc) + } + l.acceptN(3) // Skip '|||' + l.emitFullToken(tokenStringBlock, cb.String(), + stringBlockIndent, stringBlockTermIndent) + l.resetTokenStart() + return nil + } + } + } + + // Assume any string of symbols is a single operator. + for r = l.next(); isSymbol(r); r = l.next() { + // Not allowed // in operators + if r == '/' && strings.HasPrefix(l.input[l.pos.byteNo:], "/") { + break + } + // Not allowed /* in operators + if r == '/' && strings.HasPrefix(l.input[l.pos.byteNo:], "*") { + break + } + // Not allowed ||| in operators + if r == '|' && strings.HasPrefix(l.input[l.pos.byteNo:], "||") { + break + } + } + + l.backup() + + // Operators are not allowed to end with + - ~ ! unless they are one rune long. + // So, wind it back if we need to, but stop at the first rune. + // This relies on the hack that all operator symbols are ASCII and thus there is + // no need to treat this substring as general UTF-8. + for r = rune(l.input[l.pos.byteNo-1]); l.pos.byteNo > l.tokenStart+1; l.pos.byteNo-- { + switch r { + case '+', '-', '~', '!': + continue + } + break + } + + if l.input[l.tokenStart:l.pos.byteNo] == "$" { + l.emitToken(tokenDollar) + } else { + l.emitToken(tokenOperator) + } + return nil +} + +func Lex(fn string, input string) (tokens, error) { + l := makeLexer(fn, input) + + var err error + + for r := l.next(); r != lexEOF; r = l.next() { + switch r { + case ' ', '\t', '\r', '\n': + l.addWhitespaceFodder() + continue + case '{': + l.emitToken(tokenBraceL) + case '}': + l.emitToken(tokenBraceR) + case '[': + l.emitToken(tokenBracketL) + case ']': + l.emitToken(tokenBracketR) + case ',': + l.emitToken(tokenComma) + case '.': + l.emitToken(tokenDot) + case '(': + l.emitToken(tokenParenL) + case ')': + l.emitToken(tokenParenR) + case ';': + l.emitToken(tokenSemicolon) + + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + l.backup() + err = l.lexNumber() + if err != nil { + return nil, err + } + + // String literals + case '"': + stringStartLoc := l.prevLocation() + for r = l.next(); ; r = l.next() { + if r == lexEOF { + return nil, l.makeStaticErrorPoint("Unterminated String", stringStartLoc) + } + if r == '"' { + // Don't include the quotes in the token data + l.emitFullToken(tokenStringDouble, l.input[l.tokenStart+1:l.pos.byteNo-1], "", "") + l.resetTokenStart() + break + } + if r == '\\' && l.peek() != lexEOF { + r = l.next() + } + } + case '\'': + stringStartLoc := l.prevLocation() + for r = l.next(); ; r = l.next() { + if r == lexEOF { + return nil, l.makeStaticErrorPoint("Unterminated String", stringStartLoc) + } + if r == '\'' { + // Don't include the quotes in the token data + l.emitFullToken(tokenStringSingle, l.input[l.tokenStart+1:l.pos.byteNo-1], "", "") + l.resetTokenStart() + break + } + if r == '\\' && l.peek() != lexEOF { + r = l.next() + } + } + case '@': + // Verbatim string literals. + // ' and " quoting is interpreted here, unlike non-verbatim strings + // where it is done later by jsonnet_string_unescape. This is OK + // in this case because no information is lost by resoving the + // repeated quote into a single quote, so we can go back to the + // original form in the formatter. + var data []rune + stringStartLoc := l.prevLocation() + quot := l.next() + var kind tokenKind + if quot == '"' { + kind = tokenVerbatimStringDouble + } else if quot == '\'' { + kind = tokenVerbatimStringSingle + } else { + return nil, l.makeStaticErrorPoint( + fmt.Sprintf("Couldn't lex verbatim string, junk after '@': %v", quot), + stringStartLoc, + ) + } + for r = l.next(); ; r = l.next() { + if r == lexEOF { + return nil, l.makeStaticErrorPoint("Unterminated String", stringStartLoc) + } else if r == quot { + if l.peek() == quot { + l.next() + data = append(data, r) + } else { + l.emitFullToken(kind, string(data), "", "") + l.resetTokenStart() + break + } + } else { + data = append(data, r) + } + } + + case '#': + l.resetTokenStart() // Throw out the leading # + for r = l.next(); r != lexEOF && r != '\n'; r = l.next() { + } + // Leave the '\n' in the lexer to be fodder for the next round + l.backup() + l.addCommentFodder(FodderCommentHash) + + default: + if isIdentifierFirst(r) { + l.backup() + l.lexIdentifier() + } else if isSymbol(r) { + l.backup() + err = l.lexSymbol() + if err != nil { + return nil, err + } + } else { + return nil, l.makeStaticErrorPoint( + fmt.Sprintf("Could not lex the character %s", strconv.QuoteRuneToASCII(r)), + l.prevLocation()) + } + + } + } + + // We are currently at the EOF. Emit a special token to capture any + // trailing fodder + l.emitToken(tokenEndOfFile) + return l.tokens, nil +} diff --git a/pkg/docparser/lexer_test.go b/pkg/docparser/lexer_test.go new file mode 100644 index 00000000..188cac81 --- /dev/null +++ b/pkg/docparser/lexer_test.go @@ -0,0 +1,295 @@ +/* +Copyright 2016 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package docparser + +import ( + "testing" +) + +type lexTest struct { + name string + input string + tokens tokens + errString string +} + +var ( + tEOF = token{kind: tokenEndOfFile} +) + +var lexTests = []lexTest{ + {"empty", "", tokens{}, ""}, + {"whitespace", " \t\n\r\r\n", tokens{}, ""}, + + {"brace L", "{", tokens{{kind: tokenBraceL, data: "{"}}, ""}, + {"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""}, + {"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""}, + {"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""}, + {"colon", ":", tokens{{kind: tokenOperator, data: ":"}}, ""}, + {"colon2", "::", tokens{{kind: tokenOperator, data: "::"}}, ""}, + {"colon3", ":::", tokens{{kind: tokenOperator, data: ":::"}}, ""}, + {"arrow right", "->", tokens{{kind: tokenOperator, data: "->"}}, ""}, + {"less than minus", "<-", tokens{{kind: tokenOperator, data: "<"}, + {kind: tokenOperator, data: "-"}}, ""}, + {"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""}, + {"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""}, + {"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""}, + {"paren L", "(", tokens{{kind: tokenParenL, data: "("}}, ""}, + {"paren R", ")", tokens{{kind: tokenParenR, data: ")"}}, ""}, + {"semicolon", ";", tokens{{kind: tokenSemicolon, data: ";"}}, ""}, + + {"not 1", "!", tokens{{kind: tokenOperator, data: "!"}}, ""}, + {"not 2", "! ", tokens{{kind: tokenOperator, data: "!"}}, ""}, + {"not equal", "!=", tokens{{kind: tokenOperator, data: "!="}}, ""}, + {"tilde", "~", tokens{{kind: tokenOperator, data: "~"}}, ""}, + {"plus", "+", tokens{{kind: tokenOperator, data: "+"}}, ""}, + {"minus", "-", tokens{{kind: tokenOperator, data: "-"}}, ""}, + + {"number 0", "0", tokens{{kind: tokenNumber, data: "0"}}, ""}, + {"number 1", "1", tokens{{kind: tokenNumber, data: "1"}}, ""}, + {"number 1.0", "1.0", tokens{{kind: tokenNumber, data: "1.0"}}, ""}, + {"number 0.10", "0.10", tokens{{kind: tokenNumber, data: "0.10"}}, ""}, + {"number 0e100", "0e100", tokens{{kind: tokenNumber, data: "0e100"}}, ""}, + {"number 1e100", "1e100", tokens{{kind: tokenNumber, data: "1e100"}}, ""}, + {"number 1.1e100", "1.1e100", tokens{{kind: tokenNumber, data: "1.1e100"}}, ""}, + {"number 1.1e-100", "1.1e-100", tokens{{kind: tokenNumber, data: "1.1e-100"}}, ""}, + {"number 1.1e+100", "1.1e+100", tokens{{kind: tokenNumber, data: "1.1e+100"}}, ""}, + {"number 0100", "0100", tokens{ + {kind: tokenNumber, data: "0"}, + {kind: tokenNumber, data: "100"}, + }, ""}, + {"number 10+10", "10+10", tokens{ + {kind: tokenNumber, data: "10"}, + {kind: tokenOperator, data: "+"}, + {kind: tokenNumber, data: "10"}, + }, ""}, + {"number 1.+3", "1.+3", tokens{}, "number 1.+3:1:3 Couldn't lex number, junk after decimal point: '+'"}, + {"number 1e!", "1e!", tokens{}, "number 1e!:1:3 Couldn't lex number, junk after 'E': '!'"}, + {"number 1e+!", "1e+!", tokens{}, "number 1e+!:1:4 Couldn't lex number, junk after exponent sign: '!'"}, + + {"double string \"hi\"", "\"hi\"", tokens{{kind: tokenStringDouble, data: "hi"}}, ""}, + {"double string \"hi nl\"", "\"hi\n\"", tokens{{kind: tokenStringDouble, data: "hi\n"}}, ""}, + {"double string \"hi\\\"\"", "\"hi\\\"\"", tokens{{kind: tokenStringDouble, data: "hi\\\""}}, ""}, + {"double string \"hi\\nl\"", "\"hi\\\n\"", tokens{{kind: tokenStringDouble, data: "hi\\\n"}}, ""}, + {"double string \"hi", "\"hi", tokens{}, "double string \"hi:1:1 Unterminated String"}, + + {"single string 'hi'", "'hi'", tokens{{kind: tokenStringSingle, data: "hi"}}, ""}, + {"single string 'hi nl'", "'hi\n'", tokens{{kind: tokenStringSingle, data: "hi\n"}}, ""}, + {"single string 'hi\\''", "'hi\\''", tokens{{kind: tokenStringSingle, data: "hi\\'"}}, ""}, + {"single string 'hi\\nl'", "'hi\\\n'", tokens{{kind: tokenStringSingle, data: "hi\\\n"}}, ""}, + {"single string 'hi", "'hi", tokens{}, "single string 'hi:1:1 Unterminated String"}, + + {"assert", "assert", tokens{{kind: tokenAssert, data: "assert"}}, ""}, + {"else", "else", tokens{{kind: tokenElse, data: "else"}}, ""}, + {"error", "error", tokens{{kind: tokenError, data: "error"}}, ""}, + {"false", "false", tokens{{kind: tokenFalse, data: "false"}}, ""}, + {"for", "for", tokens{{kind: tokenFor, data: "for"}}, ""}, + {"function", "function", tokens{{kind: tokenFunction, data: "function"}}, ""}, + {"if", "if", tokens{{kind: tokenIf, data: "if"}}, ""}, + {"import", "import", tokens{{kind: tokenImport, data: "import"}}, ""}, + {"importstr", "importstr", tokens{{kind: tokenImportStr, data: "importstr"}}, ""}, + {"in", "in", tokens{{kind: tokenIn, data: "in"}}, ""}, + {"local", "local", tokens{{kind: tokenLocal, data: "local"}}, ""}, + {"null", "null", tokens{{kind: tokenNullLit, data: "null"}}, ""}, + {"self", "self", tokens{{kind: tokenSelf, data: "self"}}, ""}, + {"super", "super", tokens{{kind: tokenSuper, data: "super"}}, ""}, + {"tailstrict", "tailstrict", tokens{{kind: tokenTailStrict, data: "tailstrict"}}, ""}, + {"then", "then", tokens{{kind: tokenThen, data: "then"}}, ""}, + {"true", "true", tokens{{kind: tokenTrue, data: "true"}}, ""}, + + {"identifier", "foobar123", tokens{{kind: tokenIdentifier, data: "foobar123"}}, ""}, + {"identifier", "foo bar123", tokens{{kind: tokenIdentifier, data: "foo"}, {kind: tokenIdentifier, data: "bar123"}}, ""}, + + {"c++ comment", "// hi", tokens{}, ""}, // This test doesn't look at fodder (yet?) + {"hash comment", "# hi", tokens{}, ""}, // This test doesn't look at fodder (yet?) + {"c comment", "/* hi */", tokens{}, ""}, // This test doesn't look at fodder (yet?) + {"c comment no term", "/* hi", tokens{}, "c comment no term:1:1 Multi-line comment has no terminating */"}, // This test doesn't look at fodder (yet?) + + { + "block string spaces", + `||| + test + more + ||| + foo +|||`, + tokens{ + { + kind: tokenStringBlock, + data: "test\n more\n|||\n foo\n", + stringBlockIndent: " ", + stringBlockTermIndent: "", + }, + }, + "", + }, + { + "block string tabs", + `||| + test + more + ||| + foo +|||`, + tokens{ + { + kind: tokenStringBlock, + data: "test\n more\n|||\n foo\n", + stringBlockIndent: "\t", + stringBlockTermIndent: "", + }, + }, + "", + }, + { + "block string mixed", + `||| + test + more + ||| + foo +|||`, + tokens{ + { + kind: tokenStringBlock, + data: "test\n more\n|||\n foo\n", + stringBlockIndent: "\t \t", + stringBlockTermIndent: "", + }, + }, + "", + }, + { + "block string blanks", + `||| + + test + + + more + ||| + foo +|||`, + tokens{ + { + kind: tokenStringBlock, + data: "\ntest\n\n\n more\n|||\n foo\n", + stringBlockIndent: " ", + stringBlockTermIndent: "", + }, + }, + "", + }, + { + "block string bad indent", + `||| + test + foo +|||`, + tokens{}, + "block string bad indent:1:1 Text block not terminated with |||", + }, + { + "block string eof", + `||| + test`, + tokens{}, + "block string eof:1:1 Unexpected EOF", + }, + { + "block string not term", + `||| + test +`, + tokens{}, + "block string not term:1:1 Text block not terminated with |||", + }, + { + "block string no ws", + `||| +test +|||`, + tokens{}, + "block string no ws:1:1 Text block's first line must start with whitespace", + }, + + {"verbatim_string1", `@""`, tokens{{kind: tokenVerbatimStringDouble, data: ""}}, ""}, + {"verbatim_string2", `@''`, tokens{{kind: tokenVerbatimStringSingle, data: ""}}, ""}, + {"verbatim_string3", `@""""`, tokens{{kind: tokenVerbatimStringDouble, data: `"`}}, ""}, + {"verbatim_string4", `@''''`, tokens{{kind: tokenVerbatimStringSingle, data: "'"}}, ""}, + {"verbatim_string5", `@"\n"`, tokens{{kind: tokenVerbatimStringDouble, data: "\\n"}}, ""}, + {"verbatim_string6", `@"''"`, tokens{{kind: tokenVerbatimStringDouble, data: "''"}}, ""}, + + {"verbatim_string_unterminated", `@"blah blah`, tokens{}, "verbatim_string_unterminated:1:1 Unterminated String"}, + {"verbatim_string_junk", `@blah blah`, tokens{}, "verbatim_string_junk:1:1 Couldn't lex verbatim string, junk after '@': 98"}, + + {"op *", "*", tokens{{kind: tokenOperator, data: "*"}}, ""}, + {"op /", "/", tokens{{kind: tokenOperator, data: "/"}}, ""}, + {"op %", "%", tokens{{kind: tokenOperator, data: "%"}}, ""}, + {"op &", "&", tokens{{kind: tokenOperator, data: "&"}}, ""}, + {"op |", "|", tokens{{kind: tokenOperator, data: "|"}}, ""}, + {"op ^", "^", tokens{{kind: tokenOperator, data: "^"}}, ""}, + {"op =", "=", tokens{{kind: tokenOperator, data: "="}}, ""}, + {"op <", "<", tokens{{kind: tokenOperator, data: "<"}}, ""}, + {"op >", ">", tokens{{kind: tokenOperator, data: ">"}}, ""}, + {"op >==|", ">==|", tokens{{kind: tokenOperator, data: ">==|"}}, ""}, + + {"junk", "💩", tokens{}, "junk:1:1 Could not lex the character '\\U0001f4a9'"}, +} + +func tokensEqual(ts1, ts2 tokens) bool { + if len(ts1) != len(ts2) { + return false + } + for i := range ts1 { + t1, t2 := ts1[i], ts2[i] + if t1.kind != t2.kind { + return false + } + if t1.data != t2.data { + return false + } + if t1.stringBlockIndent != t2.stringBlockIndent { + return false + } + if t1.stringBlockTermIndent != t2.stringBlockTermIndent { + return false + } + } + return true +} + +func TestLex(t *testing.T) { + for _, test := range lexTests { + // Copy the test tokens and append an EOF token + testTokens := append(tokens(nil), test.tokens...) + testTokens = append(testTokens, tEOF) + tokens, err := Lex(test.name, test.input) + var errString string + if err != nil { + errString = err.Error() + } + if errString != test.errString { + t.Errorf("%s: error result does not match. got\n\t%+v\nexpected\n\t%+v", + test.name, errString, test.errString) + } + if err == nil && !tokensEqual(tokens, testTokens) { + t.Errorf("%s: got\n\t%+v\nexpected\n\t%+v", test.name, tokens, testTokens) + } + } +} + +// TODO: test fodder, test position reporting diff --git a/pkg/docparser/literalfield_set.go b/pkg/docparser/literalfield_set.go new file mode 100644 index 00000000..d78ae6b0 --- /dev/null +++ b/pkg/docparser/literalfield_set.go @@ -0,0 +1,172 @@ +// Generated by: main +// TypeWriter: set +// Directive: +gen on literalField + +package docparser + +// Set is a modification of https://github.com/deckarep/golang-set +// The MIT License (MIT) +// Copyright (c) 2013 Ralph Caraveo (deckarep@gmail.com) + +// literalFieldSet is the primary type that represents a set +type literalFieldSet map[LiteralField]struct{} + +// NewliteralFieldSet creates and returns a reference to an empty set. +func NewliteralFieldSet(a ...LiteralField) literalFieldSet { + s := make(literalFieldSet) + for _, i := range a { + s.Add(i) + } + return s +} + +// ToSlice returns the elements of the current set as a slice +func (set literalFieldSet) ToSlice() []LiteralField { + var s []LiteralField + for v := range set { + s = append(s, v) + } + return s +} + +// Add adds an item to the current set if it doesn't already exist in the set. +func (set literalFieldSet) Add(i LiteralField) bool { + _, found := set[i] + set[i] = struct{}{} + return !found //False if it existed already +} + +// Contains determines if a given item is already in the set. +func (set literalFieldSet) Contains(i LiteralField) bool { + _, found := set[i] + return found +} + +// ContainsAll determines if the given items are all in the set +func (set literalFieldSet) ContainsAll(i ...LiteralField) bool { + for _, v := range i { + if !set.Contains(v) { + return false + } + } + return true +} + +// IsSubset determines if every item in the other set is in this set. +func (set literalFieldSet) IsSubset(other literalFieldSet) bool { + for elem := range set { + if !other.Contains(elem) { + return false + } + } + return true +} + +// IsSuperset determines if every item of this set is in the other set. +func (set literalFieldSet) IsSuperset(other literalFieldSet) bool { + return other.IsSubset(set) +} + +// Union returns a new set with all items in both sets. +func (set literalFieldSet) Union(other literalFieldSet) literalFieldSet { + unionedSet := NewliteralFieldSet() + + for elem := range set { + unionedSet.Add(elem) + } + for elem := range other { + unionedSet.Add(elem) + } + return unionedSet +} + +// Intersect returns a new set with items that exist only in both sets. +func (set literalFieldSet) Intersect(other literalFieldSet) literalFieldSet { + intersection := NewliteralFieldSet() + // loop over smaller set + if set.Cardinality() < other.Cardinality() { + for elem := range set { + if other.Contains(elem) { + intersection.Add(elem) + } + } + } else { + for elem := range other { + if set.Contains(elem) { + intersection.Add(elem) + } + } + } + return intersection +} + +// Difference returns a new set with items in the current set but not in the other set +func (set literalFieldSet) Difference(other literalFieldSet) literalFieldSet { + differencedSet := NewliteralFieldSet() + for elem := range set { + if !other.Contains(elem) { + differencedSet.Add(elem) + } + } + return differencedSet +} + +// SymmetricDifference returns a new set with items in the current set or the other set but not in both. +func (set literalFieldSet) SymmetricDifference(other literalFieldSet) literalFieldSet { + aDiff := set.Difference(other) + bDiff := other.Difference(set) + return aDiff.Union(bDiff) +} + +// Clear clears the entire set to be the empty set. +func (set *literalFieldSet) Clear() { + *set = make(literalFieldSet) +} + +// Remove allows the removal of a single item in the set. +func (set literalFieldSet) Remove(i LiteralField) { + delete(set, i) +} + +// Cardinality returns how many items are currently in the set. +func (set literalFieldSet) Cardinality() int { + return len(set) +} + +// Iter returns a channel of type literalField that you can range over. +func (set literalFieldSet) Iter() <-chan LiteralField { + ch := make(chan LiteralField) + go func() { + for elem := range set { + ch <- elem + } + close(ch) + }() + + return ch +} + +// Equal determines if two sets are equal to each other. +// If they both are the same size and have the same items they are considered equal. +// Order of items is not relevent for sets to be equal. +func (set literalFieldSet) Equal(other literalFieldSet) bool { + if set.Cardinality() != other.Cardinality() { + return false + } + for elem := range set { + if !other.Contains(elem) { + return false + } + } + return true +} + +// Clone returns a clone of the set. +// Does NOT clone the underlying elements. +func (set literalFieldSet) Clone() literalFieldSet { + clonedSet := NewliteralFieldSet() + for elem := range set { + clonedSet.Add(elem) + } + return clonedSet +} diff --git a/pkg/docparser/parser.go b/pkg/docparser/parser.go new file mode 100644 index 00000000..c11a4429 --- /dev/null +++ b/pkg/docparser/parser.go @@ -0,0 +1,1223 @@ +/* +Copyright 2016 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package docparser + +import ( + "fmt" + "strconv" + "strings" + + "github.com/google/go-jsonnet/ast" + "github.com/ksonnet/ksonnet-lib/ksonnet-gen/astext" +) + +type precedence int + +const ( + applyPrecedence precedence = 2 // ast.Function calls and indexing. + unaryPrecedence precedence = 4 // Logical and bitwise negation, unary + - + maxPrecedence precedence = 16 // ast.Local, If, ast.Import, ast.Function, Error +) + +var bopPrecedence = map[ast.BinaryOp]precedence{ + ast.BopMult: 5, + ast.BopDiv: 5, + ast.BopPercent: 5, + ast.BopPlus: 6, + ast.BopMinus: 6, + ast.BopShiftL: 7, + ast.BopShiftR: 7, + ast.BopGreater: 8, + ast.BopGreaterEq: 8, + ast.BopLess: 8, + ast.BopLessEq: 8, + ast.BopIn: 8, + ast.BopManifestEqual: 9, + ast.BopManifestUnequal: 9, + ast.BopBitwiseAnd: 10, + ast.BopBitwiseXor: 11, + ast.BopBitwiseOr: 12, + ast.BopAnd: 13, + ast.BopOr: 14, +} + +// --------------------------------------------------------------------------- + +func makeUnexpectedError(t *token, while string) error { + return MakeStaticError( + fmt.Sprintf("Unexpected: %v while %v", t, while), t.loc) +} + +func locFromTokens(begin, end *token) ast.LocationRange { + return ast.LocationRangeBetween(&begin.loc, &end.loc) +} + +func locFromTokenAST(begin *token, end ast.Node) ast.LocationRange { + return ast.LocationRangeBetween(&begin.loc, end.Loc()) +} + +// --------------------------------------------------------------------------- + +type parser struct { + t tokens + currT int +} + +func makeParser(t tokens) *parser { + return &parser{ + t: t, + } +} + +func (p *parser) pop() *token { + t := &p.t[p.currT] + p.currT++ + return t +} + +func (p *parser) unexpectedTokenError(tk tokenKind, t *token) error { + if tk == t.kind { + panic("Unexpectedly expected token kind.") + } + return MakeStaticError(fmt.Sprintf("Expected token %v but got %v", tk, t), t.loc) +} + +func (p *parser) popExpect(tk tokenKind) (*token, error) { + t := p.pop() + if t.kind != tk { + return nil, p.unexpectedTokenError(tk, t) + } + return t, nil +} + +func (p *parser) popExpectOp(op string) (*token, error) { + t := p.pop() + if t.kind != tokenOperator || t.data != op { + return nil, MakeStaticError( + fmt.Sprintf("Expected operator %v but got %v", op, t), t.loc) + } + return t, nil +} + +func (p *parser) peek() *token { + return &p.t[p.currT] +} + +func (p *parser) doublePeek() *token { + return &p.t[p.currT+1] +} + +// in some cases it's convenient to parse something as an expression, and later +// decide that it should be just an identifer +func astVarToIdentifier(node ast.Node) (*ast.Identifier, bool) { + v, ok := node.(*ast.Var) + if ok { + return &v.Id, true + } + return nil, false +} + +func (p *parser) parseArgument() (*ast.Identifier, ast.Node, error) { + var id *ast.Identifier + if p.peek().kind == tokenIdentifier && p.doublePeek().kind == tokenOperator && p.doublePeek().data == "=" { + ident := p.pop() + var tmpID = ast.Identifier(ident.data) + id = &tmpID + p.pop() // "=" token + } + expr, err := p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + return id, expr, nil +} + +// TODO(sbarzowski) - this returned bool is weird +// TODO(sbarzowski) - name - it's also used for parameters +func (p *parser) parseArguments(elementKind string) (*token, *ast.Arguments, bool, error) { + args := &ast.Arguments{} + gotComma := false + namedArgumentAdded := false + first := true + for { + next := p.peek() + + if next.kind == tokenParenR { + // gotComma can be true or false here. + return p.pop(), args, gotComma, nil + } + + if !first && !gotComma { + return nil, nil, false, MakeStaticError(fmt.Sprintf("Expected a comma before next %s, got %s.", elementKind, next), next.loc) + } + + id, expr, err := p.parseArgument() + if err != nil { + return nil, nil, false, err + } + if id == nil { + if namedArgumentAdded { + return nil, nil, false, MakeStaticError("Positional argument after a named argument is not allowed", next.loc) + } + args.Positional = append(args.Positional, expr) + } else { + namedArgumentAdded = true + args.Named = append(args.Named, ast.NamedArgument{Name: *id, Arg: expr}) + } + + if p.peek().kind == tokenComma { + p.pop() + gotComma = true + } else { + gotComma = false + } + + first = false + } +} + +// TODO(sbarzowski) - this returned bool is weird +func (p *parser) parseParameters(elementKind string) (*ast.Parameters, bool, error) { + _, args, trailingComma, err := p.parseArguments(elementKind) + if err != nil { + return nil, false, err + } + var params ast.Parameters + for _, arg := range args.Positional { + id, ok := astVarToIdentifier(arg) + if !ok { + return nil, false, MakeStaticError(fmt.Sprintf("Expected simple identifier but got a complex expression."), *arg.Loc()) + } + params.Required = append(params.Required, *id) + } + for _, arg := range args.Named { + params.Optional = append(params.Optional, ast.NamedParameter{Name: arg.Name, DefaultArg: arg.Arg}) + } + return ¶ms, trailingComma, nil +} + +// TODO(sbarzowski) add location to all individual binds +func (p *parser) parseBind(binds *ast.LocalBinds) error { + varID, err := p.popExpect(tokenIdentifier) + if err != nil { + return err + } + for _, b := range *binds { + if b.Variable == ast.Identifier(varID.data) { + return MakeStaticError(fmt.Sprintf("Duplicate local var: %v", varID.data), varID.loc) + } + } + + var fun *ast.Function + if p.peek().kind == tokenParenL { + p.pop() + params, gotComma, err := p.parseParameters("function parameter") + if err != nil { + return err + } + fun = &ast.Function{ + Parameters: *params, + TrailingComma: gotComma, + } + } + + _, err = p.popExpectOp("=") + if err != nil { + return err + } + body, err := p.parse(maxPrecedence) + if err != nil { + return err + } + + if fun != nil { + fun.NodeBase = ast.NewNodeBaseLoc(locFromTokenAST(varID, body)) + fun.Body = body + *binds = append(*binds, ast.LocalBind{ + Variable: ast.Identifier(varID.data), + Body: body, + Fun: fun, + }) + } else { + *binds = append(*binds, ast.LocalBind{ + Variable: ast.Identifier(varID.data), + Body: body, + }) + } + + return nil +} + +func (p *parser) parseObjectAssignmentOp() (plusSugar bool, hide ast.ObjectFieldHide, err error) { + op, err := p.popExpect(tokenOperator) + if err != nil { + return + } + opStr := op.data + if opStr[0] == '+' { + plusSugar = true + opStr = opStr[1:] + } + + numColons := 0 + for len(opStr) > 0 { + if opStr[0] != ':' { + err = MakeStaticError( + fmt.Sprintf("Expected one of :, ::, :::, +:, +::, +:::, got: %v", op.data), op.loc) + return + } + opStr = opStr[1:] + numColons++ + } + + switch numColons { + case 1: + hide = ast.ObjectFieldInherit + case 2: + hide = ast.ObjectFieldHidden + case 3: + hide = ast.ObjectFieldVisible + default: + err = MakeStaticError( + fmt.Sprintf("Expected one of :, ::, :::, +:, +::, +:::, got: %v", op.data), op.loc) + return + } + + return +} + +// +gen set +type LiteralField string + +// Parse object or object comprehension without leading brace +func (p *parser) parseObjectRemainder(tok *token) (ast.Node, *token, error) { + var fields astext.ObjectFields + literalFields := make(literalFieldSet) + binds := make(ast.IdentifierSet) + + gotComma := false + first := true + + for { + next := p.pop() + if !gotComma && !first { + if next.kind == tokenComma { + next = p.pop() + gotComma = true + } + } + + if next.kind == tokenBraceR { + return &astext.Object{ + Fields: fields, + Object: ast.Object{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(tok, next)), + TrailingComma: gotComma, + }, + }, next, nil + } + + if next.kind == tokenFor { + // It's a comprehension + numFields := 0 + numAsserts := 0 + field := astext.ObjectField{} + for _, f := range fields { + if f.Kind == ast.ObjectLocal { + continue + } + if f.Kind == ast.ObjectAssert { + numAsserts++ + continue + } + numFields++ + field = f + } + + if numAsserts > 0 { + return nil, nil, MakeStaticError("Object comprehension cannot have asserts.", next.loc) + } + if numFields != 1 { + return nil, nil, MakeStaticError("Object comprehension can only have one field.", next.loc) + } + if field.Hide != ast.ObjectFieldInherit { + return nil, nil, MakeStaticError("Object comprehensions cannot have hidden fields.", next.loc) + } + if field.Kind != ast.ObjectFieldExpr { + return nil, nil, MakeStaticError("Object comprehensions can only have [e] fields.", next.loc) + } + spec, last, err := p.parseComprehensionSpecs(tokenBraceR) + if err != nil { + return nil, nil, err + } + + var astFields ast.ObjectFields + for _, field := range fields { + astFields = append(astFields, field.ObjectField) + } + + return &ast.ObjectComp{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(tok, last)), + Fields: astFields, + TrailingComma: gotComma, + Spec: *spec, + }, last, nil + } + + if !gotComma && !first { + return nil, nil, MakeStaticError("Expected a comma before next field.", next.loc) + } + first = false + + switch next.kind { + case tokenBracketL, tokenIdentifier, tokenStringDouble, tokenStringSingle, + tokenStringBlock, tokenVerbatimStringDouble, tokenVerbatimStringSingle: + var kind ast.ObjectFieldKind + var expr1 ast.Node + var id *ast.Identifier + switch next.kind { + case tokenIdentifier: + kind = ast.ObjectFieldID + id = (*ast.Identifier)(&next.data) + case tokenStringDouble, tokenStringSingle, + tokenStringBlock, tokenVerbatimStringDouble, tokenVerbatimStringSingle: + kind = ast.ObjectFieldStr + expr1 = tokenStringToAst(next) + default: + kind = ast.ObjectFieldExpr + var err error + expr1, err = p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + _, err = p.popExpect(tokenBracketR) + if err != nil { + return nil, nil, err + } + } + + isMethod := false + methComma := false + var params *ast.Parameters + if p.peek().kind == tokenParenL { + p.pop() + var err error + params, methComma, err = p.parseParameters("method parameter") + if err != nil { + return nil, nil, err + } + isMethod = true + } + + plusSugar, hide, err := p.parseObjectAssignmentOp() + if err != nil { + return nil, nil, err + } + + if plusSugar && isMethod { + return nil, nil, MakeStaticError( + fmt.Sprintf("Cannot use +: syntax sugar in a method: %v", next.data), next.loc) + } + + if kind != ast.ObjectFieldExpr { + if !literalFields.Add(LiteralField(next.data)) { + return nil, nil, MakeStaticError( + fmt.Sprintf("Duplicate field: %v", next.data), next.loc) + } + } + + body, err := p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + + var method *ast.Function + if isMethod { + method = &ast.Function{ + Parameters: *params, + TrailingComma: methComma, + Body: body, + } + } + + var comment *astext.Comment + + if len(next.fodder) > 0 { + var comments []string + for _, f := range next.fodder { + if f.Kind != FodderCommentCpp { + continue + } + + text := strings.TrimSpace(f.Data) + comments = append(comments, text) + } + + s := strings.Join(comments, "\n") + if len(s) > 0 { + comment = &astext.Comment{Text: s} + } + } + + fields = append(fields, astext.ObjectField{ + Comment: comment, + ObjectField: ast.ObjectField{ + Kind: kind, + Hide: hide, + SuperSugar: plusSugar, + MethodSugar: isMethod, + Method: method, + Expr1: expr1, + Id: id, + Params: params, + TrailingComma: methComma, + Expr2: body, + }, + }) + + case tokenLocal: + varID, err := p.popExpect(tokenIdentifier) + if err != nil { + return nil, nil, err + } + + id := ast.Identifier(varID.data) + + if binds.Contains(id) { + return nil, nil, MakeStaticError(fmt.Sprintf("Duplicate local var: %v", id), varID.loc) + } + + // TODO(sbarzowski) Can we reuse regular local bind parsing here? + + isMethod := false + funcComma := false + var params *ast.Parameters + if p.peek().kind == tokenParenL { + p.pop() + isMethod = true + params, funcComma, err = p.parseParameters("function parameter") + if err != nil { + return nil, nil, err + } + } + _, err = p.popExpectOp("=") + if err != nil { + return nil, nil, err + } + + body, err := p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + + var method *ast.Function + if isMethod { + method = &ast.Function{ + Parameters: *params, + TrailingComma: funcComma, + Body: body, + } + } + + binds.Add(id) + + fields = append(fields, astext.ObjectField{ + ObjectField: ast.ObjectField{ + Kind: ast.ObjectLocal, + Hide: ast.ObjectFieldVisible, + SuperSugar: false, + MethodSugar: isMethod, + Method: method, + Id: &id, + Params: params, + TrailingComma: funcComma, + Expr2: body, + }, + }) + + case tokenAssert: + cond, err := p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + var msg ast.Node + if p.peek().kind == tokenOperator && p.peek().data == ":" { + p.pop() + msg, err = p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + } + + fields = append(fields, astext.ObjectField{ + ObjectField: ast.ObjectField{ + Kind: ast.ObjectAssert, + Hide: ast.ObjectFieldVisible, + Expr2: cond, + Expr3: msg, + }, + }) + default: + return nil, nil, makeUnexpectedError(next, "parsing field definition") + } + gotComma = false + } +} + +/* parses for x in expr for y in expr if expr for z in expr ... */ +func (p *parser) parseComprehensionSpecs(end tokenKind) (*ast.ForSpec, *token, error) { + var parseComprehensionSpecsHelper func(outer *ast.ForSpec) (*ast.ForSpec, *token, error) + parseComprehensionSpecsHelper = func(outer *ast.ForSpec) (*ast.ForSpec, *token, error) { + var ifSpecs []ast.IfSpec + + varID, err := p.popExpect(tokenIdentifier) + if err != nil { + return nil, nil, err + } + id := ast.Identifier(varID.data) + _, err = p.popExpect(tokenIn) + if err != nil { + return nil, nil, err + } + arr, err := p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + forSpec := &ast.ForSpec{ + VarName: id, + Expr: arr, + Outer: outer, + } + + maybeIf := p.pop() + for ; maybeIf.kind == tokenIf; maybeIf = p.pop() { + cond, err := p.parse(maxPrecedence) + if err != nil { + return nil, nil, err + } + ifSpecs = append(ifSpecs, ast.IfSpec{ + Expr: cond, + }) + } + forSpec.Conditions = ifSpecs + if maybeIf.kind == end { + return forSpec, maybeIf, nil + } + + if maybeIf.kind != tokenFor { + return nil, nil, MakeStaticError( + fmt.Sprintf("Expected for, if or %v after for clause, got: %v", end, maybeIf), maybeIf.loc) + } + + return parseComprehensionSpecsHelper(forSpec) + } + return parseComprehensionSpecsHelper(nil) +} + +// Assumes that the leading '[' has already been consumed and passed as tok. +// Should read up to and consume the trailing ']' +func (p *parser) parseArray(tok *token) (ast.Node, error) { + next := p.peek() + if next.kind == tokenBracketR { + p.pop() + return &ast.Array{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(tok, next)), + }, nil + } + + first, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + var gotComma bool + next = p.peek() + if next.kind == tokenComma { + p.pop() + next = p.peek() + gotComma = true + } + + if next.kind == tokenFor { + // It's a comprehension + p.pop() + spec, last, err := p.parseComprehensionSpecs(tokenBracketR) + if err != nil { + return nil, err + } + return &ast.ArrayComp{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(tok, last)), + Body: first, + TrailingComma: gotComma, + Spec: *spec, + }, nil + } + // Not a comprehension: It can have more elements. + elements := ast.Nodes{first} + + for { + if next.kind == tokenBracketR { + // TODO(dcunnin): SYNTAX SUGAR HERE (preserve comma) + p.pop() + break + } + if !gotComma { + return nil, MakeStaticError("Expected a comma before next array element.", next.loc) + } + nextElem, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + elements = append(elements, nextElem) + next = p.peek() + if next.kind == tokenComma { + p.pop() + next = p.peek() + gotComma = true + } else { + gotComma = false + } + } + + return &ast.Array{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(tok, next)), + Elements: elements, + TrailingComma: gotComma, + }, nil +} + +func tokenStringToAst(tok *token) *ast.LiteralString { + switch tok.kind { + case tokenStringSingle: + return &ast.LiteralString{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: tok.data, + Kind: ast.StringSingle, + } + case tokenStringDouble: + return &ast.LiteralString{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: tok.data, + Kind: ast.StringDouble, + } + case tokenStringBlock: + return &ast.LiteralString{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: tok.data, + Kind: ast.StringBlock, + BlockIndent: tok.stringBlockIndent, + } + case tokenVerbatimStringDouble: + return &ast.LiteralString{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: tok.data, + Kind: ast.VerbatimStringDouble, + } + case tokenVerbatimStringSingle: + return &ast.LiteralString{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: tok.data, + Kind: ast.VerbatimStringSingle, + } + default: + panic(fmt.Sprintf("Not a string token %#+v", tok)) + } +} + +func (p *parser) parseTerminal() (ast.Node, error) { + tok := p.pop() + switch tok.kind { + case tokenAssert, tokenBraceR, tokenBracketR, tokenComma, tokenDot, tokenElse, + tokenError, tokenFor, tokenFunction, tokenIf, tokenIn, tokenImport, tokenImportStr, + tokenLocal, tokenOperator, tokenParenR, tokenSemicolon, tokenTailStrict, tokenThen: + return nil, makeUnexpectedError(tok, "parsing terminal") + + case tokenEndOfFile: + return nil, MakeStaticError("Unexpected end of file.", tok.loc) + + case tokenBraceL: + obj, _, err := p.parseObjectRemainder(tok) + return obj, err + + case tokenBracketL: + return p.parseArray(tok) + + case tokenParenL: + inner, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + _, err = p.popExpect(tokenParenR) + if err != nil { + return nil, err + } + return inner, nil + + // Literals + case tokenNumber: + // This shouldn't fail as the lexer should make sure we have good input but + // we handle the error regardless. + num, err := strconv.ParseFloat(tok.data, 64) + if err != nil { + return nil, MakeStaticError("Could not parse floating point number.", tok.loc) + } + return &ast.LiteralNumber{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: num, + OriginalString: tok.data, + }, nil + case tokenStringDouble, tokenStringSingle, + tokenStringBlock, tokenVerbatimStringDouble, tokenVerbatimStringSingle: + return tokenStringToAst(tok), nil + case tokenFalse: + return &ast.LiteralBoolean{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: false, + }, nil + case tokenTrue: + return &ast.LiteralBoolean{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Value: true, + }, nil + case tokenNullLit: + return &ast.LiteralNull{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + }, nil + + // Variables + case tokenDollar: + return &ast.Dollar{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + }, nil + case tokenIdentifier: + return &ast.Var{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Id: ast.Identifier(tok.data), + }, nil + case tokenSelf: + return &ast.Self{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + }, nil + case tokenSuper: + next := p.pop() + var index ast.Node + var id *ast.Identifier + switch next.kind { + case tokenDot: + fieldID, err := p.popExpect(tokenIdentifier) + if err != nil { + return nil, err + } + id = (*ast.Identifier)(&fieldID.data) + case tokenBracketL: + var err error + index, err = p.parse(maxPrecedence) + if err != nil { + return nil, err + } + _, err = p.popExpect(tokenBracketR) + if err != nil { + return nil, err + } + default: + return nil, MakeStaticError("Expected . or [ after super.", tok.loc) + } + return &ast.SuperIndex{ + NodeBase: ast.NewNodeBaseLoc(tok.loc), + Index: index, + Id: id, + }, nil + } + + return nil, MakeStaticError(fmt.Sprintf("INTERNAL ERROR: Unknown tok kind: %v", tok.kind), tok.loc) +} + +func (p *parser) parsingFailure(msg string, tok *token) (ast.Node, error) { + return nil, MakeStaticError(msg, tok.loc) +} + +func (p *parser) parse(prec precedence) (ast.Node, error) { + begin := p.peek() + + switch begin.kind { + // These cases have effectively maxPrecedence as the first + // call to parse will parse them. + case tokenAssert: + p.pop() + cond, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + var msg ast.Node + if p.peek().kind == tokenOperator && p.peek().data == ":" { + p.pop() + msg, err = p.parse(maxPrecedence) + if err != nil { + return nil, err + } + } + _, err = p.popExpect(tokenSemicolon) + if err != nil { + return nil, err + } + rest, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + return &ast.Assert{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(begin, rest)), + Cond: cond, + Message: msg, + Rest: rest, + }, nil + + case tokenError: + p.pop() + expr, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + return &ast.Error{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(begin, expr)), + Expr: expr, + }, nil + + case tokenIf: + p.pop() + cond, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + _, err = p.popExpect(tokenThen) + if err != nil { + return nil, err + } + branchTrue, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + var branchFalse ast.Node + lr := locFromTokenAST(begin, branchTrue) + if p.peek().kind == tokenElse { + p.pop() + branchFalse, err = p.parse(maxPrecedence) + if err != nil { + return nil, err + } + lr = locFromTokenAST(begin, branchFalse) + } + return &ast.Conditional{ + NodeBase: ast.NewNodeBaseLoc(lr), + Cond: cond, + BranchTrue: branchTrue, + BranchFalse: branchFalse, + }, nil + + case tokenFunction: + p.pop() + next := p.pop() + if next.kind == tokenParenL { + params, gotComma, err := p.parseParameters("function parameter") + if err != nil { + return nil, err + } + body, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + return &ast.Function{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(begin, body)), + Parameters: *params, + TrailingComma: gotComma, + Body: body, + }, nil + } + return nil, MakeStaticError(fmt.Sprintf("Expected ( but got %v", next), next.loc) + + case tokenImport: + p.pop() + body, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + if lit, ok := body.(*ast.LiteralString); ok { + if lit.Kind == ast.StringBlock { + return nil, MakeStaticError("Block string literals not allowed in imports", *body.Loc()) + } + return &ast.Import{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(begin, body)), + File: lit, + }, nil + } + return nil, MakeStaticError("Computed imports are not allowed", *body.Loc()) + + case tokenImportStr: + p.pop() + body, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + if lit, ok := body.(*ast.LiteralString); ok { + if lit.Kind == ast.StringBlock { + return nil, MakeStaticError("Block string literals not allowed in imports", *body.Loc()) + } + return &ast.ImportStr{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(begin, body)), + File: lit, + }, nil + } + return nil, MakeStaticError("Computed imports are not allowed", *body.Loc()) + + case tokenLocal: + p.pop() + var binds ast.LocalBinds + for { + err := p.parseBind(&binds) + if err != nil { + return nil, err + } + delim := p.pop() + if delim.kind != tokenSemicolon && delim.kind != tokenComma { + return nil, MakeStaticError(fmt.Sprintf("Expected , or ; but got %v", delim), delim.loc) + } + if delim.kind == tokenSemicolon { + break + } + } + body, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + return &ast.Local{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(begin, body)), + Binds: binds, + Body: body, + }, nil + + default: + // ast.Unary operator + if begin.kind == tokenOperator { + uop, ok := ast.UopMap[begin.data] + if !ok { + return nil, MakeStaticError(fmt.Sprintf("Not a unary operator: %v", begin.data), begin.loc) + } + if prec == unaryPrecedence { + op := p.pop() + expr, err := p.parse(prec) + if err != nil { + return nil, err + } + return &ast.Unary{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(op, expr)), + Op: uop, + Expr: expr, + }, nil + } + } + + // Base case + if prec == 0 { + return p.parseTerminal() + } + + lhs, err := p.parse(prec - 1) + if err != nil { + return nil, err + } + + for { + // Then next token must be a binary operator. + + var bop ast.BinaryOp + + // Check precedence is correct for this level. If we're parsing operators + // with higher precedence, then return lhs and let lower levels deal with + // the operator. + switch p.peek().kind { + case tokenIn: + bop = ast.BopIn + if bopPrecedence[bop] != prec { + return lhs, nil + } + case tokenOperator: + _ = "breakpoint" + if p.peek().data == ":" { + // Special case for the colons in assert. Since COLON is no-longer a + // special token, we have to make sure it does not trip the + // op_is_binary test below. It should terminate parsing of the + // expression here, returning control to the parsing of the actual + // assert AST. + return lhs, nil + } + if p.peek().data == "::" { + // Special case for [e::] + // We need to stop parsing e when we see the :: and + // avoid tripping the op_is_binary test below. + return lhs, nil + } + var ok bool + bop, ok = ast.BopMap[p.peek().data] + if !ok { + return nil, MakeStaticError(fmt.Sprintf("Not a binary operator: %v", p.peek().data), p.peek().loc) + } + + if bopPrecedence[bop] != prec { + return lhs, nil + } + + case tokenDot, tokenBracketL, tokenParenL, tokenBraceL: + if applyPrecedence != prec { + return lhs, nil + } + default: + return lhs, nil + } + + op := p.pop() + switch op.kind { + case tokenBracketL: + // handle slice + var indexes [3]ast.Node + colonsConsumed := 0 + + var end *token + readyForNextIndex := true + for colonsConsumed < 3 { + if p.peek().kind == tokenBracketR { + end = p.pop() + break + } else if p.peek().data == ":" { + colonsConsumed++ + end = p.pop() + readyForNextIndex = true + } else if p.peek().data == "::" { + colonsConsumed += 2 + end = p.pop() + readyForNextIndex = true + } else if readyForNextIndex { + indexes[colonsConsumed], err = p.parse(maxPrecedence) + if err != nil { + return nil, err + } + readyForNextIndex = false + } else { + return nil, p.unexpectedTokenError(tokenBracketR, p.peek()) + } + } + if colonsConsumed > 2 { + // example: target[42:42:42:42] + return p.parsingFailure("Invalid slice: too many colons", end) + } + if colonsConsumed == 0 && readyForNextIndex { + // example: target[] + return p.parsingFailure("ast.Index requires an expression", end) + } + isSlice := colonsConsumed > 0 + + if isSlice { + lhs = &ast.Slice{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(begin, end)), + Target: lhs, + BeginIndex: indexes[0], + EndIndex: indexes[1], + Step: indexes[2], + } + } else { + lhs = &ast.Index{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(begin, end)), + Target: lhs, + Index: indexes[0], + } + } + case tokenDot: + fieldID, err := p.popExpect(tokenIdentifier) + if err != nil { + return nil, err + } + id := ast.Identifier(fieldID.data) + lhs = &ast.Index{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(begin, fieldID)), + Target: lhs, + Id: &id, + } + case tokenParenL: + end, args, gotComma, err := p.parseArguments("function argument") + if err != nil { + return nil, err + } + tailStrict := false + if p.peek().kind == tokenTailStrict { + p.pop() + tailStrict = true + } + lhs = &ast.Apply{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(begin, end)), + Target: lhs, + Arguments: *args, + TrailingComma: gotComma, + TailStrict: tailStrict, + } + case tokenBraceL: + obj, end, err := p.parseObjectRemainder(op) + if err != nil { + return nil, err + } + lhs = &ast.ApplyBrace{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(begin, end)), + Left: lhs, + Right: obj, + } + default: + if op.kind == tokenIn && p.peek().kind == tokenSuper { + super := p.pop() + lhs = &ast.InSuper{ + NodeBase: ast.NewNodeBaseLoc(locFromTokens(begin, super)), + Index: lhs, + } + } else { + rhs, err := p.parse(prec - 1) + if err != nil { + return nil, err + } + lhs = &ast.Binary{ + NodeBase: ast.NewNodeBaseLoc(locFromTokenAST(begin, rhs)), + Left: lhs, + Op: bop, + Right: rhs, + } + } + } + } + } +} + +// --------------------------------------------------------------------------- + +func Parse(t tokens) (ast.Node, error) { + p := makeParser(t) + expr, err := p.parse(maxPrecedence) + if err != nil { + return nil, err + } + + if p.peek().kind != tokenEndOfFile { + return nil, MakeStaticError(fmt.Sprintf("Did not expect: %v", p.peek()), p.peek().loc) + } + + addContext(expr, &topLevelContext, anonymous) + + return expr, nil +} diff --git a/pkg/docparser/parser_test.go b/pkg/docparser/parser_test.go new file mode 100644 index 00000000..37388eb7 --- /dev/null +++ b/pkg/docparser/parser_test.go @@ -0,0 +1,271 @@ +/* +Copyright 2016 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package docparser + +import ( + "fmt" + "testing" +) + +var tests = []string{ + `true`, + `1`, + `1.2e3`, + `!true`, + `null`, + + `$.foo.bar`, + `self.foo.bar`, + `super.foo.bar`, + `super[1]`, + `error "Error!"`, + + `"world"`, + `'world'`, + `||| + world +|||`, + + `foo(bar)`, + `foo(bar,)`, + `foo(bar) tailstrict`, + `foo(bar=42)`, + `foo(bar=42,)`, + `foo(bar, baz=42)`, + `foo.bar`, + `foo[bar]`, + + `true || false`, + `0 && 1 || 0`, + `0 && (1 || 0)`, + + `function(x) x`, + `function(x=5) x`, + `function(x, y=5) x`, + + `local foo = "bar"; foo`, + `local foo(bar) = bar; foo(1)`, + `{ local foo = "bar", baz: 1}`, + `{ local foo(bar) = bar, baz: foo(1)}`, + + `{ foo(bar, baz): bar+baz }`, + + `{ ["foo" + "bar"]: 3 }`, + `{ ["field" + x]: x for x in [1, 2, 3] }`, + `{ local y = x, ["field" + x]: x for x in [1, 2, 3] }`, + `{ ["field" + x]: x for x in [1, 2, 3] if x <= 2 }`, + `{ ["field" + x + y]: x + y for x in [1, 2, 3] if x <= 2 for y in [4, 5, 6]}`, + + `[]`, + `[a, b, c]`, + `[x for x in [1,2,3] ]`, + `[x for x in [1,2,3] if x <= 2]`, + `[x+y for x in [1,2,3] if x <= 2 for y in [4, 5, 6]]`, + + `{}`, + `{ hello: "world" }`, + `{ hello +: "world" }`, + `{ + hello: "world", + "name":: joe, + 'mood'::: "happy", + ||| + key type +|||: "block", +}`, + + `assert true: 'woah!'; true`, + `{ assert true: 'woah!', foo: bar }`, + + `if n > 1 then 'foos' else 'foo'`, + + `local foo = function(x) x + 1; true`, + `local foo = function(x=5) x + 1; true`, + `local foo = function(x=5) x + 1; x(x=3)`, + + `import 'foo.jsonnet'`, + `importstr 'foo.text'`, + + `{a: b} + {c: d}`, + `{a: b}{c: d}`, + + // no colons + `[][0]`, + // one colon + `[][:]`, + `[][1:]`, + `[][:1]`, + `[][1:2]`, + // two colons + `[][::]`, + `[][1::]`, + `[][:1:]`, + `[][::1]`, + `[][1:1:]`, + `[][:1:1]`, + `[][1::1]`, + `[][1:1:1]`, + + `a in b`, + `{ x: if "opt" in super then "x" else "y" }`, +} + +func TestParser(t *testing.T) { + for _, s := range tests { + t.Run(s, func(t *testing.T) { + fmt.Println(s) + tokens, err := Lex("test", s) + if err != nil { + t.Errorf("Unexpected lex error\n input: %v\n error: %v", s, err) + return + } + _, err = Parse(tokens) + if err != nil { + t.Errorf("Unexpected parse error\n input: %v\n error: %v", s, err) + } + }) + + } +} + +type testError struct { + input string + err string +} + +var errorTests = []testError{ + {`,`, `test:1:1-2 Unexpected: (",", ",") while parsing terminal`}, + {`function(a, b c)`, `test:1:15-16 Expected a comma before next function parameter, got (IDENTIFIER, "c").`}, + {`function(a, 1)`, `test:1:13-14 Expected simple identifier but got a complex expression.`}, + {`function(,)`, `test:1:10-11 Unexpected: (",", ",") while parsing terminal`}, + {`function(a=)`, `test:1:12-13 Unexpected: (")", ")") while parsing terminal`}, + {`function(a=,)`, `test:1:12-13 Unexpected: (",", ",") while parsing terminal`}, + {`function(a=5, b)`, `test:1:15-16 Positional argument after a named argument is not allowed`}, + {`a b`, `test:1:3-4 Did not expect: (IDENTIFIER, "b")`}, + {`foo(a, bar(a b))`, `test:1:14-15 Expected a comma before next function argument, got (IDENTIFIER, "b").`}, + + {`local`, `test:1:6 Expected token IDENTIFIER but got end of file`}, + {`local foo = 1, foo = 2; true`, `test:1:16-19 Duplicate local var: foo`}, + {`local foo(a b) = a; true`, `test:1:13-14 Expected a comma before next function parameter, got (IDENTIFIER, "b").`}, + {`local foo(a): a; true`, `test:1:13-14 Expected operator = but got ":"`}, + {`local foo(a) = bar(a b); true`, `test:1:22-23 Expected a comma before next function argument, got (IDENTIFIER, "b").`}, + {`local foo: 1; true`, `test:1:10-11 Expected operator = but got ":"`}, + {`local foo = bar(a b); true`, `test:1:19-20 Expected a comma before next function argument, got (IDENTIFIER, "b").`}, + + {`{a b}`, `test:1:4-5 Expected token OPERATOR but got (IDENTIFIER, "b")`}, + {`{a = b}`, `test:1:4-5 Expected one of :, ::, :::, +:, +::, +:::, got: =`}, + {`{a :::: b}`, `test:1:4-8 Expected one of :, ::, :::, +:, +::, +:::, got: ::::`}, + + {`{assert x for x in [1, 2, 3]}`, `test:1:11-14 Object comprehension cannot have asserts.`}, + {`{['foo' + x]: true, [x]: x for x in [1, 2, 3]}`, `test:1:28-31 Object comprehension can only have one field.`}, + {`{foo: x for x in [1, 2, 3]}`, `test:1:9-12 Object comprehensions can only have [e] fields.`}, + {`{[x]:: true for x in [1, 2, 3]}`, `test:1:13-16 Object comprehensions cannot have hidden fields.`}, + {`{[x]: true for 1 in [1, 2, 3]}`, `test:1:16-17 Expected token IDENTIFIER but got (NUMBER, "1")`}, + {`{[x]: true for x at [1, 2, 3]}`, `test:1:18-20 Expected token in but got (IDENTIFIER, "at")`}, + {`{[x]: true for x in [1, 2 3]}`, `test:1:27-28 Expected a comma before next array element.`}, + {`{[x]: true for x in [1, 2, 3] if (a b)}`, `test:1:37-38 Expected token ")" but got (IDENTIFIER, "b")`}, + {`{[x]: true for x in [1, 2, 3] if a b}`, `test:1:36-37 Expected for, if or "}" after for clause, got: (IDENTIFIER, "b")`}, + + {`{a: b c:d}`, `test:1:7-8 Expected a comma before next field.`}, + + {`{[(x y)]: z}`, `test:1:6-7 Expected token ")" but got (IDENTIFIER, "y")`}, + {`{[x y]: z}`, `test:1:5-6 Expected token "]" but got (IDENTIFIER, "y")`}, + + {`{foo(x y): z}`, `test:1:8-9 Expected a comma before next method parameter, got (IDENTIFIER, "y").`}, + {`{foo(x)+: z}`, `test:1:2-5 Cannot use +: syntax sugar in a method: foo`}, + {`{foo: 1, foo: 2}`, `test:1:10-13 Duplicate field: foo`}, + {`{foo: (1 2)}`, `test:1:10-11 Expected token ")" but got (NUMBER, "2")`}, + + {`{local 1 = 3, true}`, `test:1:8-9 Expected token IDENTIFIER but got (NUMBER, "1")`}, + {`{local foo = 1, local foo = 2, true}`, `test:1:23-26 Duplicate local var: foo`}, + {`{local foo(a b) = 1, a: true}`, `test:1:14-15 Expected a comma before next function parameter, got (IDENTIFIER, "b").`}, + {`{local foo(a): 1, a: true}`, `test:1:14-15 Expected operator = but got ":"`}, + {`{local foo(a) = (a b), a: true}`, `test:1:20-21 Expected token ")" but got (IDENTIFIER, "b")`}, + + {`{assert (a b), a: true}`, `test:1:12-13 Expected token ")" but got (IDENTIFIER, "b")`}, + {`{assert a: (a b), a: true}`, `test:1:15-16 Expected token ")" but got (IDENTIFIER, "b")`}, + + {`{function(a, b) a+b: true}`, `test:1:2-10 Unexpected: (function, "function") while parsing field definition`}, + + {`[(a b), 2, 3]`, `test:1:5-6 Expected token ")" but got (IDENTIFIER, "b")`}, + {`[1, (a b), 2, 3]`, `test:1:8-9 Expected token ")" but got (IDENTIFIER, "b")`}, + {`[a for b in [1 2 3]]`, `test:1:16-17 Expected a comma before next array element.`}, + + {`for`, `test:1:1-4 Unexpected: (for, "for") while parsing terminal`}, + {``, `test:1:1 Unexpected end of file.`}, + {`((a b))`, `test:1:5-6 Expected token ")" but got (IDENTIFIER, "b")`}, + {`a.1`, `test:1:3-4 Expected token IDENTIFIER but got (NUMBER, "1")`}, + {`super.1`, `test:1:7-8 Expected token IDENTIFIER but got (NUMBER, "1")`}, + {`super[(a b)]`, `test:1:10-11 Expected token ")" but got (IDENTIFIER, "b")`}, + {`super[a b]`, `test:1:9-10 Expected token "]" but got (IDENTIFIER, "b")`}, + {`super`, `test:1:1-6 Expected . or [ after super.`}, + + {`assert (a b); true`, `test:1:11-12 Expected token ")" but got (IDENTIFIER, "b")`}, + {`assert a: (a b); true`, `test:1:14-15 Expected token ")" but got (IDENTIFIER, "b")`}, + {`assert a: 'foo', true`, `test:1:16-17 Expected token ";" but got (",", ",")`}, + {`assert a: 'foo'; (a b)`, `test:1:21-22 Expected token ")" but got (IDENTIFIER, "b")`}, + + {`error (a b)`, `test:1:10-11 Expected token ")" but got (IDENTIFIER, "b")`}, + + {`if (a b) then c`, `test:1:7-8 Expected token ")" but got (IDENTIFIER, "b")`}, + {`if a b c`, `test:1:6-7 Expected token then but got (IDENTIFIER, "b")`}, + {`if a then (b c)`, `test:1:14-15 Expected token ")" but got (IDENTIFIER, "c")`}, + {`if a then b else (c d)`, `test:1:21-22 Expected token ")" but got (IDENTIFIER, "d")`}, + + {`function(a) (a b)`, `test:1:16-17 Expected token ")" but got (IDENTIFIER, "b")`}, + {`function a a`, `test:1:10-11 Expected ( but got (IDENTIFIER, "a")`}, + + {`import (a b)`, `test:1:11-12 Expected token ")" but got (IDENTIFIER, "b")`}, + {`import (a+b)`, `test:1:9-12 Computed imports are not allowed`}, + {`importstr (a b)`, `test:1:14-15 Expected token ")" but got (IDENTIFIER, "b")`}, + {`importstr (a+b)`, `test:1:12-15 Computed imports are not allowed`}, + + {`local a = b ()`, `test:1:15 Expected , or ; but got end of file`}, + {`local a = b; (a b)`, `test:1:17-18 Expected token ")" but got (IDENTIFIER, "b")`}, + + {`1+ <<`, `test:1:4-6 Not a unary operator: <<`}, + {`-(a b)`, `test:1:5-6 Expected token ")" but got (IDENTIFIER, "b")`}, + {`1~2`, `test:1:2-3 Not a binary operator: ~`}, + + {`a[(b c)]`, `test:1:6-7 Expected token ")" but got (IDENTIFIER, "c")`}, + {`a[b c]`, `test:1:5-6 Expected token "]" but got (IDENTIFIER, "c")`}, + {`a[]`, `test:1:3-4 ast.Index requires an expression`}, + {`a[42:42:42:42]`, `test:1:11-12 Invalid slice: too many colons`}, + {`a[42:42::42]`, `test:1:8-10 Invalid slice: too many colons`}, + + {`a{b c}`, `test:1:5-6 Expected token OPERATOR but got (IDENTIFIER, "c")`}, +} + +func TestParserErrors(t *testing.T) { + for _, s := range errorTests { + t.Run(s.input, func(t *testing.T) { + tokens, err := Lex("test", s.input) + if err != nil { + t.Errorf("Unexpected lex error\n input: %v\n error: %v", s.input, err) + return + } + _, err = Parse(tokens) + if err == nil { + t.Errorf("Expected parse error but got success\n input: %v", s.input) + return + } + if err.Error() != s.err { + t.Errorf("Error string not as expected\n input: %v\n expected error: %v\n actual error: %v", s.input, s.err, err.Error()) + } + }) + } + +} diff --git a/pkg/docparser/static_error.go b/pkg/docparser/static_error.go new file mode 100644 index 00000000..58232d32 --- /dev/null +++ b/pkg/docparser/static_error.go @@ -0,0 +1,49 @@ +/* +Copyright 2016 Google Inc. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package docparser + +import ( + "fmt" + + "github.com/google/go-jsonnet/ast" +) + +////////////////////////////////////////////////////////////////////////////// +// StaticError + +// StaticError represents an error during parsing/lexing or static analysis. +// TODO(sbarzowski) Make it possible to have multiple static errors and warnings +type StaticError struct { + Loc ast.LocationRange + Msg string +} + +func MakeStaticErrorMsg(msg string) StaticError { + return StaticError{Msg: msg} +} + +func MakeStaticError(msg string, lr ast.LocationRange) StaticError { + return StaticError{Msg: msg, Loc: lr} +} + +func (err StaticError) Error() string { + loc := "" + if err.Loc.IsSet() { + loc = err.Loc.String() + } + return fmt.Sprintf("%v %v", loc, err.Msg) +} diff --git a/pkg/util/jsonnet/import.go b/pkg/util/jsonnet/import.go new file mode 100644 index 00000000..56015d9b --- /dev/null +++ b/pkg/util/jsonnet/import.go @@ -0,0 +1,53 @@ +package jsonnet + +import ( + "github.com/ksonnet/ksonnet-lib/ksonnet-gen/astext" + "github.com/ksonnet/ksonnet/pkg/docparser" + "github.com/pkg/errors" + "github.com/spf13/afero" +) + +var ( + // importFs is the filesystem import uses when a importFs is not supplied. + importFs = afero.NewOsFs() +) + +// Import imports jsonnet from a path. +func Import(filename string) (*astext.Object, error) { + return ImportFromFs(filename, importFs) +} + +// ImportFromFs imports jsonnet from a path on an afero filesystem. +func ImportFromFs(filename string, fs afero.Fs) (*astext.Object, error) { + if filename == "" { + return nil, errors.New("filename was blank") + } + + b, err := afero.ReadFile(fs, filename) + if err != nil { + return nil, errors.Wrap(err, "read lib") + } + + return Parse(filename, string(b)) + +} + +// Parse converts a jsonnet snippet to AST. +func Parse(filename, src string) (*astext.Object, error) { + tokens, err := docparser.Lex(filename, src) + if err != nil { + return nil, errors.Wrap(err, "lex jsonnet snippet") + } + + node, err := docparser.Parse(tokens) + if err != nil { + return nil, errors.Wrap(err, "parse jsonnet snippet") + } + + root, ok := node.(*astext.Object) + if !ok { + return nil, errors.New("root was not an object") + } + + return root, nil +} diff --git a/pkg/util/jsonnet/import_test.go b/pkg/util/jsonnet/import_test.go new file mode 100644 index 00000000..b0dee6fe --- /dev/null +++ b/pkg/util/jsonnet/import_test.go @@ -0,0 +1,75 @@ +package jsonnet + +import ( + "testing" + + "github.com/google/go-jsonnet/ast" + "github.com/ksonnet/ksonnet-lib/ksonnet-gen/astext" + "github.com/spf13/afero" + "github.com/stretchr/testify/require" +) + +func stageContent(t *testing.T, fs afero.Fs, path string, data []byte) { + err := afero.WriteFile(importFs, path, data, 0644) + require.NoError(t, err) +} + +func TestImport(t *testing.T) { + ogFs := importFs + defer func(ogFs afero.Fs) { + importFs = ogFs + }(ogFs) + + importFs = afero.NewMemMapFs() + + stageContent(t, importFs, "/obj.jsonnet", []byte("{}")) + stageContent(t, importFs, "/array.jsonnet", []byte(`["a", "b"]`)) + stageContent(t, importFs, "/parser.jsonnet", []byte("localï¸ a = b; []")) + + cases := []struct { + name string + path string + isErr bool + }{ + { + name: "with an existing jsonnet file", + path: "/obj.jsonnet", + }, + { + name: "no filename", + isErr: true, + }, + { + name: "invalid file", + path: "/invalid", + isErr: true, + }, + { + name: "parser error", + path: "/parser.jsonnet", + isErr: true, + }, + { + name: "not an object", + path: "/array.jsonnet", + isErr: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + obj, err := Import(tc.path) + if tc.isErr { + require.Error(t, err) + } else { + require.NoError(t, err) + + obj.NodeBase = ast.NodeBase{} + expected := &astext.Object{} + + require.Equal(t, expected, obj) + } + }) + } + +} diff --git a/pkg/util/jsonnet/object.go b/pkg/util/jsonnet/object.go new file mode 100644 index 00000000..6062d75f --- /dev/null +++ b/pkg/util/jsonnet/object.go @@ -0,0 +1,135 @@ +package jsonnet + +import ( + "fmt" + + "github.com/google/go-jsonnet/ast" + "github.com/ksonnet/ksonnet-lib/ksonnet-gen/astext" + "github.com/pkg/errors" +) + +// Set sets an object key at path to a value. +func Set(object *astext.Object, path []string, value ast.Node) error { + if len(path) == 0 { + return errors.New("path was empty") + } + + curObj := object + + for i, k := range path { + field, err := findField(curObj, k) + if err != nil { + switch err.(type) { + default: + return err + case *unknownField: + field, err = astext.CreateField(k) + if err != nil { + return err + } + field.Hide = ast.ObjectFieldInherit + curObj.Fields = append(curObj.Fields, *field) + } + } + + if i == len(path)-1 { + field, _ = findField(curObj, k) + if canUpdateObject(field.Expr2, value) { + return errors.New("can't set object to non object") + } + field.Expr2 = value + return nil + } + + if field.Expr2 == nil { + curObj = &astext.Object{} + field.Expr2 = curObj + } else if obj, ok := field.Expr2.(*astext.Object); ok { + curObj = obj + } else { + return errors.Errorf("child is not an object at %q", k) + } + } + + return nil +} + +func canUpdateObject(node1, node2 ast.Node) bool { + return isNodeObject(node1) && !isNodeObject(node2) +} + +func isNodeObject(node ast.Node) bool { + _, ok := node.(*astext.Object) + return ok +} + +type unknownField struct { + name string +} + +func (e *unknownField) Error() string { + return fmt.Sprintf("unable to find field %q", e.name) +} + +func findField(object *astext.Object, id string) (*astext.ObjectField, error) { + for i := range object.Fields { + fieldID, err := FieldID(object.Fields[i]) + if err != nil { + return nil, err + } + + if id == fieldID { + return &object.Fields[i], nil + } + } + + return nil, &unknownField{name: id} +} + +// FindObject finds a path in an object. +func FindObject(object *astext.Object, path []string) (*astext.Object, error) { + if len(path) == 0 { + return nil, errors.New("search path was empty") + } + + for i := range object.Fields { + id, err := FieldID(object.Fields[i]) + if err != nil { + return nil, err + } + + if path[0] == id { + if len(path) == 1 { + + return object, nil + } + + child, ok := object.Fields[i].Expr2.(*astext.Object) + if !ok { + return nil, errors.Errorf("child is a %T. expected an object", object.Fields[i].Expr2) + } + + return FindObject(child, path[1:]) + } + } + + return nil, errors.New("path was not found") +} + +// FieldID returns the id for an object field. +func FieldID(field astext.ObjectField) (string, error) { + if field.Expr1 != nil { + lf, ok := field.Expr1.(*ast.LiteralString) + if !ok { + return "", errors.New("field Expr1 is not a string") + } + + return lf.Value, nil + } + + if field.Id == nil { + return "", errors.New("field does not have an ID") + } + + return string(*field.Id), nil +} diff --git a/pkg/util/jsonnet/object_test.go b/pkg/util/jsonnet/object_test.go new file mode 100644 index 00000000..126e6224 --- /dev/null +++ b/pkg/util/jsonnet/object_test.go @@ -0,0 +1,206 @@ +package jsonnet + +import ( + "bytes" + "io/ioutil" + "testing" + + "github.com/google/go-jsonnet/ast" + "github.com/ksonnet/ksonnet-lib/ksonnet-gen/astext" + nm "github.com/ksonnet/ksonnet-lib/ksonnet-gen/nodemaker" + "github.com/ksonnet/ksonnet-lib/ksonnet-gen/printer" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSet(t *testing.T) { + labels := map[string]interface{}{ + "metadata": map[string]interface{}{ + "labels": map[string]interface{}{ + "label": "label", + }, + }, + } + + labelsObject, err := nm.KVFromMap(labels) + require.NoError(t, err) + + cases := []struct { + name string + updatePath []string + update ast.Node + expected string + isErr bool + }{ + { + name: "update existing field", + updatePath: []string{"a", "b", "c"}, + update: nm.NewInt(9).Node(), + expected: "{\n a:: {\n b:: {\n c:: 9,\n },\n },\n}", + }, + { + name: "set map", + updatePath: []string{"a", "d"}, + update: labelsObject.Node(), + expected: string(testdata(t, "set-map.jsonnet")), + }, + { + name: "set new field", + updatePath: []string{"a", "e"}, + update: nm.NewInt(9).Node(), + expected: "{\n a:: {\n b:: {\n c:: \"value\",\n },\n e: 9,\n },\n}", + }, + { + name: "set object field to non object", + updatePath: []string{"a"}, + update: nm.NewInt(9).Node(), + isErr: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + b := nm.NewObject() + b.Set(nm.NewKey("c"), nm.NewStringDouble("value")) + + a := nm.NewObject() + a.Set(nm.NewKey("b"), b) + + object := nm.NewObject() + object.Set(nm.NewKey("a"), a) + + astObject := object.Node().(*astext.Object) + + err := Set(astObject, tc.updatePath, tc.update) + if tc.isErr { + require.Error(t, err) + } else { + require.NoError(t, err) + + var got bytes.Buffer + err = printer.Fprint(&got, astObject) + require.NoError(t, err) + + require.Equal(t, tc.expected, got.String()) + } + }) + } +} + +func TestFindObject(t *testing.T) { + b := nm.NewObject() + b.Set(nm.NewKey("c"), nm.NewStringDouble("value")) + + a := nm.NewObject() + a.Set(nm.NewKey("b"), b) + a.Set(nm.NewKey("d-1", nm.KeyOptCategory(ast.ObjectFieldStr)), nm.NewStringDouble("string")) + + object := nm.NewObject() + object.Set(nm.NewKey("a"), a) + + astObject := object.Node().(*astext.Object) + + cases := []struct { + name string + path []string + expected ast.Node + isErr bool + }{ + { + name: "find nested object", + path: []string{"a", "b", "c"}, + expected: b.Node(), + }, + { + name: "find string id object", + path: []string{"a", "d-1"}, + expected: a.Node(), + }, + { + name: "invalid path", + path: []string{"z"}, + isErr: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + node, err := FindObject(astObject, tc.path) + if tc.isErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, node) + + } + + }) + } +} + +func TestFieldID(t *testing.T) { + + expr1Field := astext.ObjectField{ + ObjectField: ast.ObjectField{ + Expr1: nm.NewStringDouble("my-field").Node(), + }, + } + + invalidExpr1Field := astext.ObjectField{ + ObjectField: ast.ObjectField{ + Expr1: nm.NewInt(1).Node(), + }, + } + + id := ast.Identifier("my-field") + idField := astext.ObjectField{ + ObjectField: ast.ObjectField{ + Id: &id, + }, + } + + cases := []struct { + name string + field astext.ObjectField + expected string + isErr bool + }{ + { + name: "no id", + isErr: true, + }, + { + name: "field with id in Expr1", + field: expr1Field, + expected: "my-field", + }, + { + name: "field with invalid Expr1", + field: invalidExpr1Field, + isErr: true, + }, + { + name: "field with id as Identifier", + field: idField, + expected: "my-field", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + id, err := FieldID(tc.field) + if tc.isErr { + require.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tc.expected, id) + } + }) + } +} + +func testdata(t *testing.T, name string) []byte { + b, err := ioutil.ReadFile("testdata/" + name) + require.NoError(t, err, "read testdata %s", name) + return b +} diff --git a/pkg/util/jsonnet/testdata/set-map.jsonnet b/pkg/util/jsonnet/testdata/set-map.jsonnet new file mode 100644 index 00000000..badd3afe --- /dev/null +++ b/pkg/util/jsonnet/testdata/set-map.jsonnet @@ -0,0 +1,14 @@ +{ + a:: { + b:: { + c:: "value", + }, + d: { + metadata: { + labels: { + label: "label", + }, + }, + }, + }, +} \ No newline at end of file -- GitLab