diff --git a/Package.swift b/Package.swift index ddf795d3de7..060d19e7576 100644 --- a/Package.swift +++ b/Package.swift @@ -45,6 +45,7 @@ let package = Package( .macCatalyst(.v13), ], products: [ + .library(name: "SwiftParser", type: .static, targets: ["SwiftParser"]), .library(name: "SwiftSyntax", type: .static, targets: ["SwiftSyntax"]), .library(name: "SwiftSyntaxParser", type: .static, targets: ["SwiftSyntaxParser"]), .library(name: "SwiftSyntaxBuilder", type: .static, targets: ["SwiftSyntaxBuilder"]), @@ -101,10 +102,18 @@ let package = Package( name: "_SwiftSyntaxTestSupport", dependencies: ["SwiftSyntax"] ), + .target( + name: "SwiftParser", + dependencies: ["SwiftSyntax"] + ), .executableTarget( name: "lit-test-helper", dependencies: ["SwiftSyntax", "SwiftSyntaxParser"] ), + .executableTarget( + name: "swift-parser-test", + dependencies: ["SwiftSyntax", "SwiftParser", .product(name: "ArgumentParser", package: "swift-argument-parser")] + ), .executableTarget( name: "generate-swift-syntax-builder", dependencies: ["SwiftSyntaxBuilder"], @@ -144,8 +153,23 @@ let package = Package( ), .testTarget( name: "PerformanceTest", - dependencies: ["SwiftSyntax", "SwiftSyntaxParser"], + dependencies: ["SwiftSyntax", "SwiftSyntaxParser", "SwiftParser"], exclude: ["Inputs"] ), + .testTarget( + name: "SwiftParserTest", + dependencies: ["SwiftParser", "_SwiftSyntaxTestSupport"] + ), ] ) + +if ProcessInfo.processInfo.environment["SWIFTCI_USE_LOCAL_DEPS"] == nil { + // Building standalone. + package.dependencies += [ + .package(url: "https://github.com/apple/swift-argument-parser.git", .upToNextMinor(from: "1.0.1")), + ] +} else { + package.dependencies += [ + .package(path: "../swift-argument-parser") + ] +} \ No newline at end of file diff --git a/Sources/SwiftParser/Attributes.swift b/Sources/SwiftParser/Attributes.swift new file mode 100644 index 00000000000..1f4a4e62b54 --- /dev/null +++ b/Sources/SwiftParser/Attributes.swift @@ -0,0 +1,760 @@ +//===------------------------ Attributes.swift ----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + mutating func parseAttributeList() -> RawAttributeListSyntax? { + guard self.at(.atSign) else { + return nil + } + + var elements = [RawSyntax]() + repeat { + let attribute = self.parseAttribute() + elements.append(attribute) + } while self.at(.atSign) + return RawAttributeListSyntax(elements: elements, arena: self.arena) + } +} + +extension Parser { + enum DeclarationAttribute: SyntaxText { + case _silgen_name = "_silgen_name" + case available = "available" + case final = "final" + case objc = "objc" + case required = "required" + case optional = "optional" + case dynamicCallable = "dynamicCallable" + case main = "main" + case _exported = "_exported" + case dynamicMemberLookup = "dynamicMemberLookup" + case NSCopying = "NSCopying" + case IBAction = "IBAction" + case IBDesignable = "IBDesignable" + case IBInspectable = "IBInspectable" + case IBOutlet = "IBOutlet" + case NSManaged = "NSManaged" + case lazy = "lazy" + case LLDBDebuggerFunction = "LLDBDebuggerFunction" + case UIApplicationMain = "UIApplicationMain" + case unsafe_no_objc_tagged_pointer = "unsafe_no_objc_tagged_pointer" + case inline = "inline" + case _semantics = "_semantics" + case dynamic = "dynamic" + case infix = "infix" + case prefix = "prefix" + case postfix = "postfix" + case _transparent = "_transparent" + case requires_stored_property_inits = "requires_stored_property_inits" + case nonobjc = "nonobjc" + case _fixed_layout = "_fixed_layout" + case inlinable = "inlinable" + case _specialize = "_specialize" + case objcMembers = "objcMembers" + case _compilerInitialized = "_compilerInitialized" + case __consuming = "__consuming" + case mutating = "mutating" + case nonmutating = "nonmutating" + case convenience = "convenience" + case override = "override" + case _hasStorage = "_hasStorage" + case `private` = "private" + case `fileprivate` = "fileprivate" + case `internal` = "internal" + case `public` = "public" + case open = "open" + case __setter_access = "__setter_access" + case __raw_doc_comment = "__raw_doc_comment" + case weak = "weak" + case unowned = "unowned" + case _effects = "_effects" + case __objc_bridged = "__objc_bridged" + case NSApplicationMain = "NSApplicationMain" + case _objc_non_lazy_realization = "_objc_non_lazy_realization" + case __synthesized_protocol = "__synthesized_protocol" + case testable = "testable" + case _alignment = "_alignment" + case `rethrows` = "rethrows" + case _swift_native_objc_runtime_base = "_swift_native_objc_runtime_base" + case indirect = "indirect" + case warn_unqualified_access = "warn_unqualified_access" + case _show_in_interface = "_show_in_interface" + case _cdecl = "_cdecl" + case usableFromInline = "usableFromInline" + case discardableResult = "discardableResult" + case GKInspectable = "GKInspectable" + case _implements = "_implements" + case _objcRuntimeName = "_objcRuntimeName" + case _staticInitializeObjCMetadata = "_staticInitializeObjCMetadata" + case _restatedObjCConformance = "_restatedObjCConformance" + case _typeSequence = "_typeSequence" + case _optimize = "_optimize" + case _clangImporterSynthesizedType = "_clangImporterSynthesizedType" + case _weakLinked = "_weakLinked" + case frozen = "frozen" + case _frozen = "_frozen" + case _forbidSerializingReference = "_forbidSerializingReference" + case _hasInitialValue = "_hasInitialValue" + case _nonoverride = "_nonoverride" + case _dynamicReplacement = "_dynamicReplacement" + case _borrowed = "_borrowed" + case _private = "_private" + case _alwaysEmitIntoClient = "_alwaysEmitIntoClient" + case _implementationOnly = "_implementationOnly" + case _custom = "_custom" + case propertyWrapper = "propertyWrapper" + case _disfavoredOverload = "_disfavoredOverload" + case resultBuilder = "resultBuilder" + case _projectedValueProperty = "_projectedValueProperty" + case _nonEphemeral = "_nonEphemeral" + case differentiable = "differentiable" + case _hasMissingDesignatedInitializers = "_hasMissingDesignatedInitializers" + case _inheritsConvenienceInitializers = "_inheritsConvenienceInitializers" + case _typeEraser = "_typeEraser" + case IBSegueAction = "IBSegueAction" + case _originallyDefinedIn = "_originallyDefinedIn" + case derivative = "derivative" + case _spi = "_spi" + case transpose = "transpose" + case noDerivative = "noDerivative" + case actor = "actor" + case isolated = "isolated" + case globalActor = "globalActor" + case _specializeExtension = "_specializeExtension" + case async = "async" + case Sendable = "Sendable" + case _marker = "_marker" + case reasync = "reasync" + case nonisolated = "nonisolated" + case _unsafeInheritExecutor = "_unsafeInheritExecutor" + case _implicitSelfCapture = "_implicitSelfCapture" + case _inheritActorContext = "_inheritActorContext" + case distributed = "distributed" + case _assemblyVision = "_assemblyVision" + case _nonSendable = "_nonSendable" + case _noImplicitCopy = "_noImplicitCopy" + case _noLocks = "_noLocks" + case _noAllocation = "_noAllocation" + case preconcurrency = "preconcurrency" + case _const = "_const" + case _unavailableFromAsync = "_unavailableFromAsync" + case exclusivity = "exclusivity" + case _backDeploy = "_backDeploy" + case _local = "_local" + } + + mutating func parseAttribute() -> RawSyntax { + switch self.peek().tokenText { + case "available": + return RawSyntax(self.parseAvailabilityAttribute()) + case "differentiable": + return RawSyntax(self.parseDifferentiableAttribute()) + case "objc": + return RawSyntax(self.parseObjectiveCAttribute()) + case "_specialize": + return RawSyntax(self.parseSpecializeAttribute()) + case "_private": + return RawSyntax(self.parsePrivateImportAttribute()) + case "_dynamicReplacement": + return RawSyntax(self.parseDynamicReplacementAttribute()) + case "_spi": + return RawSyntax(self.parseSPIAttribute()) + default: + break + } + + guard DeclarationAttribute(rawValue: self.peek().tokenText) != nil else { + return RawSyntax(self.parseCustomAttribute()) + } + + let atSign = self.eat(.atSign) + let ident = self.consumeIdentifier() + let leftParen: RawTokenSyntax? + let arg: RawSyntax? + let unexpectedBeforeRightParen: RawUnexpectedNodesSyntax? + let rightParen: RawTokenSyntax? + if self.at(.leftParen) { + var args = [RawTokenSyntax]() + leftParen = self.eat(.leftParen) + while !self.at(.eof), !self.at(.rightParen) { + args.append(self.consumeAnyToken()) + } + arg = RawSyntax(RawTokenListSyntax(elements: args, arena: self.arena)) + (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + } else { + leftParen = nil + arg = nil + unexpectedBeforeRightParen = nil + rightParen = nil + } + return RawSyntax(RawAttributeSyntax( + atSignToken: atSign, + attributeName: ident, + leftParen: leftParen, + argument: arg, + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena)) + } + + mutating func parseCustomAttribute() -> RawCustomAttributeSyntax { + let atSign = self.eat(.atSign) + let attrName = self.parseType() + + // Custom attributes are stricter than normal attributes about their + // argument lists "immediately" following the attribute name. + guard self.at(.leftParen) && !self.currentToken.isAtStartOfLine && self.lookahead().isCustomAttributeArgument() else { + return RawCustomAttributeSyntax( + atSignToken: atSign, attributeName: attrName, + leftParen: nil, argumentList: nil, rightParen: nil, + arena: self.arena) + } + let leftParen = self.eat(.leftParen) + let arguments = self.parseArgumentListElements() + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + return RawCustomAttributeSyntax( + atSignToken: atSign, attributeName: attrName, + leftParen: leftParen, + argumentList: RawTupleExprElementListSyntax(elements: arguments, arena: self.arena), + unexpectedBeforeRightParen, + rightParen: rightParen, + arena: self.arena) + } +} + +extension Parser { + mutating func parseAvailabilityAttribute() -> RawAttributeSyntax { + let atSign = self.eat(.atSign) + assert(self.currentToken.tokenText == "available") + let available = self.consumeAnyToken() + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + + let argument: RawSyntax + do { + if self.peek().tokenKind == .integerLiteral { + argument = RawSyntax(self.parseAvailabilitySpecList(from: .available)) + } else if self.peek().tokenKind == .floatingLiteral { + argument = RawSyntax(self.parseAvailabilitySpecList(from: .available)) + } else { + argument = RawSyntax(self.parseExtendedAvailabilitySpecList()) + } + } + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + + return RawAttributeSyntax( + atSignToken: atSign, + attributeName: available, + unexpectedBeforeLeftParen, + leftParen: leftParen, + argument: argument, + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena) + } +} + +extension Parser { + mutating func parseDifferentiableAttribute() -> RawAttributeSyntax { + let atSign = self.eat(.atSign) + assert(self.currentToken.tokenText == "differentiable") + let differentiable = self.consumeAnyToken() + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + + let argument = self.parseDifferentiableAttributeArguments() + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + + return RawAttributeSyntax( + atSignToken: atSign, + attributeName: differentiable, + unexpectedBeforeLeftParen, + leftParen: leftParen, + argument: RawSyntax(argument), + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena) + } + + enum DifferentiabilityKind: SyntaxText { + case reverse = "reverse" + case linear = "_linear" + case forward = "_forward" + } + + mutating func parseDifferentiableAttributeArguments() -> RawDifferentiableAttributeArgumentsSyntax { + let diffKind: RawTokenSyntax? + let diffKindComma: RawTokenSyntax? + if + .identifier == self.currentToken.tokenKind, + DifferentiabilityKind(rawValue: self.currentToken.tokenText) != nil + { + diffKind = self.consumeIdentifier() + diffKindComma = self.consume(if: .comma) + } else { + diffKind = nil + diffKindComma = nil + } + + let diffParams: RawDifferentiabilityParamsClauseSyntax? + let diffParamsComma: RawTokenSyntax? + if self.currentToken.tokenText == "wrt" { + diffParams = self.parseDifferentiabilityParameters() + diffParamsComma = self.consume(if: .comma) + } else { + diffParams = nil + diffParamsComma = nil + } + + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + return RawDifferentiableAttributeArgumentsSyntax( + diffKind: diffKind, + diffKindComma: diffKindComma, + diffParams: diffParams, + diffParamsComma: diffParamsComma, + whereClause: whereClause, + arena: self.arena) + } + + mutating func parseDifferentiabilityParameters() -> RawDifferentiabilityParamsClauseSyntax { + let wrt = self.consumeIdentifier() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + + guard self.at(.leftParen) else { + // If no opening '(' for parameter list, parse a single parameter. + let param = self.parseDifferentiabilityParameter().map(RawSyntax.init(_:)) + ?? RawSyntax(RawTokenListSyntax(elements: self.recover(), arena: self.arena)) + return RawDifferentiabilityParamsClauseSyntax( + wrtLabel: wrt, + unexpectedBeforeColon, + colon: colon, + parameters: param, + arena: self.arena + ) + } + + let leftParen = self.eat(.leftParen) + var elements = [RawDifferentiabilityParamSyntax]() + while !self.at(.eof) && !self.at(.rightParen) { + guard let param = self.parseDifferentiabilityParameter() else { + break + } + elements.append(param) + } + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + + let parameters = RawDifferentiabilityParamListSyntax(elements: elements, arena: self.arena) + let list = RawDifferentiabilityParamsSyntax( + leftParen: leftParen, + diffParams: parameters, + unexpectedBeforeRightParen, + rightParen: rightParen, + arena: self.arena + ) + return RawDifferentiabilityParamsClauseSyntax( + wrtLabel: wrt, + unexpectedBeforeColon, + colon: colon, + parameters: RawSyntax(list), + arena: self.arena + ) + } + + mutating func parseDifferentiabilityParameter() -> RawDifferentiabilityParamSyntax? { + switch self.currentToken.tokenKind { + case .identifier: + let token = self.consumeIdentifier() + let comma = self.consume(if: .comma) + return RawDifferentiabilityParamSyntax( + parameter: RawSyntax(token), trailingComma: comma, arena: self.arena) + case .integerLiteral: + let token = self.consumeAnyToken() + let comma = self.consume(if: .comma) + return RawDifferentiabilityParamSyntax( + parameter: RawSyntax(token), trailingComma: comma, arena: self.arena) + case .selfKeyword: + let token = self.eat(.selfKeyword) + let comma = self.consume(if: .comma) + return RawDifferentiabilityParamSyntax( + parameter: RawSyntax(token), trailingComma: comma, arena: self.arena) + default: + return nil + } + } +} + +extension Parser { + mutating func parseObjectiveCAttribute() -> RawAttributeSyntax { + let atSign = self.eat(.atSign) + assert(self.currentToken.tokenText == "objc") + let objc = self.consumeAnyToken() + + let leftParen: RawTokenSyntax? + let argument: RawObjCSelectorSyntax? + let unexpectedBeforeRightParen: RawUnexpectedNodesSyntax? + let rightParen: RawTokenSyntax? + if self.at(.leftParen) { + leftParen = self.eat(.leftParen) + argument = self.parseObjectiveCSelector() + (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + } else { + leftParen = nil + argument = nil + unexpectedBeforeRightParen = nil + rightParen = nil + } + + return RawAttributeSyntax( + atSignToken: atSign, attributeName: objc, + leftParen: leftParen, + argument: argument.map(RawSyntax.init), + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena) + } + + mutating func parseObjectiveCSelector() -> RawObjCSelectorSyntax { + var elements = [RawObjCSelectorPieceSyntax]() + while !self.at(.eof) && !self.at(.rightParen) { + // Empty selector piece. + if self.at(.colon) { + let colon = self.eat(.colon) + elements.append(RawObjCSelectorPieceSyntax( + name: nil, colon: colon, arena: self.arena)) + continue + } + + if self.currentToken.isIdentifier || self.currentToken.isKeyword { + let name = self.consumeAnyToken() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + elements.append(RawObjCSelectorPieceSyntax( + name: name, + unexpectedBeforeColon, + colon: colon, + arena: self.arena)) + } + } + return RawObjCSelectorSyntax(elements: elements, arena: self.arena) + } +} + +extension Parser { + mutating func parseSpecializeAttribute() -> RawAttributeSyntax { + let atSign = self.eat(.atSign) + assert(self.currentToken.tokenText == "_specialize") + let specializeToken = self.consumeAnyToken() + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + let argument = self.parseSpecializeAttributeSpecList() + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + return RawAttributeSyntax( + atSignToken: atSign, + attributeName: specializeToken, + unexpectedBeforeLeftParen, + leftParen: leftParen, + argument: RawSyntax(argument), + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena) + } + + enum SpecializeParameter: SyntaxText { + case target + case availability + case exported + case kind + case spi + case spiModule + case available + } + mutating func parseSpecializeAttributeSpecList() -> RawSpecializeAttributeSpecListSyntax { + var elements = [RawSyntax]() + // Parse optional "exported" and "kind" labeled parameters. + while !self.at(.eof) && !self.at(.whereKeyword) { + let ident = self.parseAnyIdentifier() + guard let knownParameter = SpecializeParameter(rawValue: ident.tokenText) else { + fatalError() + } + let (unexpectedBeforeColon, colon) = self.expect(.colon) + + switch knownParameter { + case .target: + let (targetFunction, args) = self.parseDeclNameRef([ .zeroArgCompoundNames, .keywordsUsingSpecialNames, .operators ]) + let declName = RawDeclNameSyntax( + declBaseName: RawSyntax(targetFunction), + declNameArguments: args, + arena: self.arena) + let comma = self.consume(if: .comma) + elements.append(RawSyntax(RawTargetFunctionEntrySyntax( + label: ident, + unexpectedBeforeColon, + colon: colon, + declname: declName, + trailingComma: comma, + arena: self.arena + ))) + case .availability: + let availability = self.parseAvailabilitySpecList(from: .available) + // FIXME: This is modeled incorrectly in libSyntax. + let semi = RawTokenSyntax(missing: .semicolon, arena: self.arena) + elements.append(RawSyntax(RawAvailabilityEntrySyntax( + label: ident, + unexpectedBeforeColon, + colon: colon, + availabilityList: availability, + semicolon: semi, + arena: self.arena + ))) + case .available: + // FIXME: I have no idea what this is supposed to be, but the Syntax + // tree only allows us to insert a token so we'll take anything. + let available = self.consumeAnyToken() + let comma = self.consume(if: .comma) + elements.append(RawSyntax(RawLabeledSpecializeEntrySyntax( + label: ident, + unexpectedBeforeColon, + colon: colon, + value: available, + trailingComma: comma, + arena: self.arena + ))) + case .exported: + let (unexpectedBeforeValue, value) = self.expectAny([.trueKeyword, .falseKeyword], default: .falseKeyword) + let comma = self.consume(if: .comma) + elements.append(RawSyntax(RawLabeledSpecializeEntrySyntax( + label: ident, + unexpectedBeforeColon, + colon: colon, + unexpectedBeforeValue, + value: value, + trailingComma: comma, + arena: self.arena + ))) + case .kind: + let valueLabel = self.parseAnyIdentifier() + let comma = self.consume(if: .comma) + elements.append(RawSyntax(RawLabeledSpecializeEntrySyntax( + label: ident, + unexpectedBeforeColon, + colon: colon, + value: valueLabel, + trailingComma: comma, + arena: self.arena + ))) + case .spiModule, .spi: + let valueLabel = self.consumeAnyToken() + let comma = self.consume(if: .comma) + elements.append(RawSyntax(RawLabeledSpecializeEntrySyntax( + label: ident, + unexpectedBeforeColon, + colon: colon, + value: valueLabel, + trailingComma: comma, + arena: self.arena + ))) + } + } + + // Parse the where clause. + if self.at(.whereKeyword) { + let whereClause = self.parseGenericWhereClause() + elements.append(RawSyntax(whereClause)) + } + return RawSpecializeAttributeSpecListSyntax(elements: elements, arena: self.arena) + } +} + +extension Parser { + enum TypeAttribute: SyntaxText { + case autoclosure = "autoclosure" + case convention = "convention" + case noescape = "noescape" + case escaping = "escaping" + case differentiable = "differentiable" + case noDerivative = "noDerivative" + case async = "async" + case Sendable = "Sendable" + case unchecked = "unchecked" + case _typeSequence = "_typeSequence" + case _local = "_local" + case blockStorage = "block_storage" + case box = "box" + case dynamicSelf = "dynamic_self" + case silWeak = "sil_weak" + case silUnowned = "sil_unowned" + case silUnmanaged = "sil_unmanaged" + case error = "error" + case out = "out" + case `in` = "in" + case `inout` = "inout" + case inoutAliasable = "inout_aliasable" + case inGuaranteed = "in_guaranteed" + case inConstant = "in_constant" + case owned = "owned" + case unownedInnerPointer = "unowned_inner_pointer" + case guaranteed = "guaranteed" + case autoreleased = "autoreleased" + case calleeOwned = "callee_owned" + case calleeGuaranteed = "callee_guaranteed" + case objcMetatype = "objc_metatype" + case opened = "opened" + case pseudogeneric = "pseudogeneric" + case yields = "yields" + case yieldOnce = "yield_once" + case yieldMany = "yield_many" + case capturesGenerics = "captures_generics" + case moveOnly = "moveOnly" + case thin = "thin" + case thick = "thick" + case _opaqueReturnTypeOf = "_opaqueReturnTypeOf" + } +} + +extension Parser { + mutating func parsePrivateImportAttribute() -> RawAttributeSyntax { + let atSign = self.eat(.atSign) + assert(self.currentToken.tokenText == "_private") + let privateToken = self.consumeAnyToken() + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + let label = self.consumeIdentifier() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + let filename = self.consumeAnyToken() + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + return RawAttributeSyntax( + atSignToken: atSign, + attributeName: privateToken, + unexpectedBeforeLeftParen, + leftParen: leftParen, + argument: RawSyntax(RawNamedAttributeStringArgumentSyntax( + nameTok: label, + unexpectedBeforeColon, + colon: colon, + stringOrDeclname: RawSyntax(filename), + arena: self.arena + )), + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena) + } +} + +extension Parser { + mutating func parseDynamicReplacementAttribute() -> RawAttributeSyntax { + let atSign = self.eat(.atSign) + assert(self.currentToken.tokenText == "_dynamicReplacement") + let dynamicReplacementToken = self.consumeAnyToken() + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + let label: RawTokenSyntax + if self.currentToken.tokenText == "for" { + label = self.consumeAnyToken() + } else { + label = RawTokenSyntax(missing: .forKeyword, arena: self.arena) + } + let (unexpectedBeforeColon, colon) = self.expect(.colon) + let (base, args) = self.parseDeclNameRef([ + .zeroArgCompoundNames, .keywordsUsingSpecialNames, .operators, + ]) + let method = RawDeclNameSyntax(declBaseName: RawSyntax(base), declNameArguments: args, arena: self.arena) + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + return RawAttributeSyntax( + atSignToken: atSign, + attributeName: dynamicReplacementToken, + unexpectedBeforeLeftParen, + leftParen: leftParen, + argument: RawSyntax(RawNamedAttributeStringArgumentSyntax( + nameTok: label, + unexpectedBeforeColon, + colon: colon, + stringOrDeclname: RawSyntax(method), + arena: self.arena + )), + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: arena) + } +} + +extension Parser { + mutating func parseSPIAttribute() -> RawAttributeSyntax { + let atSign = self.eat(.atSign) + assert(self.currentToken.tokenText == "_spi") + let spiToken = self.consumeAnyToken() + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + let label = self.consumeAnyToken() + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + return RawAttributeSyntax( + atSignToken: atSign, + attributeName: spiToken, + unexpectedBeforeLeftParen, + leftParen: leftParen, + argument: RawSyntax(label), + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena) + } +} + +// MARK: Lookahead + +extension Parser.Lookahead { + func isCustomAttributeArgument() -> Bool { + var lookahead = self.lookahead() + lookahead.skipSingle() + + // If we have any keyword, identifier, or token that follows a function + // type's parameter list, this is a parameter list and not an attribute. + // Alternatively, we might have a token that illustrates we're not going to + // get anything following the attribute, which means the parentheses describe + // what follows the attribute. + switch lookahead.currentToken.tokenKind { + case .arrow, + .throwKeyword, + .throwsKeyword, + .rethrowsKeyword, + .rightParen, + .rightBrace, + .rightSquareBracket, + .rightAngle: + return false + case _ where lookahead.currentToken.isContextualKeyword("async"): + return false + case _ where lookahead.currentToken.isContextualKeyword("reasync"): + return false + default: + return true + } + } + + mutating func canParseCustomAttribute() -> Bool { + guard self.canParseType() else { + return false + } + + if self.at(.leftParen) && !self.currentToken.isAtStartOfLine && self.lookahead().isCustomAttributeArgument() { + self.skipSingle() + } + + return true + } +} diff --git a/Sources/SwiftParser/Availability.swift b/Sources/SwiftParser/Availability.swift new file mode 100644 index 00000000000..893c34b2d28 --- /dev/null +++ b/Sources/SwiftParser/Availability.swift @@ -0,0 +1,262 @@ +//===------------------------ Availability.swift ----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + enum AvailabilitySpecSource { + case available + case unavailable + case macro + } + + /// Parse a list of availability arguments. + /// + /// Grammar + /// ======= + /// + /// availability-arguments → availability-argument | availability-argument , availability-arguments + mutating func parseAvailabilitySpecList( + from source: AvailabilitySpecSource + ) -> RawAvailabilitySpecListSyntax { + var elements = [RawAvailabilityArgumentSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let entry: RawSyntax + switch source { + case .available where self.currentToken.isIdentifier, + .unavailable where self.currentToken.isIdentifier: + entry = RawSyntax(self.parseAvailabilityMacro()) + default: + entry = self.parseAvailabilitySpec() + } + + keepGoing = self.consume(if: .comma) + elements.append(RawAvailabilityArgumentSyntax( + entry: entry, trailingComma: keepGoing, arena: self.arena)) + + // Before continuing to parse the next specification, we check that it's + // also in the shorthand syntax and recover from it. + if + keepGoing != nil, + self.currentToken.isIdentifier, + AvailabilityArgumentKind(rawValue: self.currentToken.tokenText) != nil + { + var tokens = [RawTokenSyntax]() + tokens.append(self.consumeAnyToken()) + while !self.at(.eof) && !self.at(.comma) && !self.at(.rightParen) { + tokens.append(self.consumeAnyToken()) + } + let syntax = RawTokenListSyntax(elements: tokens, arena: self.arena) + keepGoing = self.consume(if: .comma) + elements.append(RawAvailabilityArgumentSyntax( + entry: RawSyntax(syntax), trailingComma: keepGoing, arena: self.arena)) + } + } while keepGoing != nil + } + + return RawAvailabilitySpecListSyntax(elements: elements, arena: self.arena) + } + + enum AvailabilityArgumentKind: SyntaxText { + case message + case renamed + case introduced + case deprecated + case obsoleted + case unavailable + case noasync + } + + mutating func parseExtendedAvailabilitySpecList() -> RawAvailabilitySpecListSyntax { + var elements = [RawAvailabilityArgumentSyntax]() + + // Parse the platform from the first element. + let platform = self.consumeAnyToken() + var keepGoing: RawTokenSyntax? = self.consume(if: .comma) + elements.append(RawAvailabilityArgumentSyntax( + entry: RawSyntax(platform), trailingComma: keepGoing, arena: self.arena)) + + do { + while keepGoing != nil { + guard self.currentToken.tokenKind == .identifier, + let argKind = AvailabilityArgumentKind(rawValue: self.currentToken.tokenText) else { + // Not sure what this label is but, let's just eat it and + // keep going. + let arg = self.consumeAnyToken() + keepGoing = self.consume(if: .comma) + elements.append(RawAvailabilityArgumentSyntax( + entry: RawSyntax(arg), trailingComma: keepGoing, arena: self.arena)) + continue + } + + let entry: RawSyntax + switch argKind { + case .message, .renamed: + let argumentLabel = self.consumeAnyToken() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + // FIXME: Make sure this is a string literal with no interpolation. + let stringValue = self.consumeAnyToken() + + entry = RawSyntax(RawAvailabilityLabeledArgumentSyntax( + label: argumentLabel, + unexpectedBeforeColon, + colon: colon, + value: RawSyntax(stringValue), + arena: self.arena + )) + case .introduced, .obsoleted: + let argumentLabel = self.consumeAnyToken() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + let version = self.parseVersionTuple() + entry = RawSyntax(RawAvailabilityLabeledArgumentSyntax( + label: argumentLabel, + unexpectedBeforeColon, + colon: colon, + value: RawSyntax(version), + arena: self.arena + )) + case .deprecated: + let argumentLabel = self.consumeAnyToken() + if self.at(.colon) { + let colon = self.eat(.colon) + let version = self.parseVersionTuple() + entry = RawSyntax(RawAvailabilityLabeledArgumentSyntax( + label: argumentLabel, colon: colon, value: RawSyntax(version), arena: self.arena)) + } else { + entry = RawSyntax(argumentLabel) + } + case .unavailable, .noasync: + let argument = self.consumeAnyToken() + // FIXME: Can we model this in SwiftSyntax by making the + // 'labeled' argument part optional? + entry = RawSyntax(argument) + } + + keepGoing = self.consume(if: .comma) + elements.append(RawAvailabilityArgumentSyntax( + entry: entry, trailingComma: keepGoing, arena: self.arena)) + } + } + return RawAvailabilitySpecListSyntax(elements: elements, arena: self.arena) + } + + /// Parse an availability argument. + /// + /// Grammar + /// ======= + /// + /// availability-argument → platform-name platform-version + /// availability-argument → * + mutating func parseAvailabilitySpec() -> RawSyntax { + if self.currentToken.isBinaryOperator && self.currentToken.tokenText == "*" { + let star = self.consumeAnyToken() + // FIXME: Use makeAvailabilityVersionRestriction here - but swift-format + // doesn't expect it. + return RawSyntax(star) + } + + if self.currentToken.isIdentifier || self.at(.wildcardKeyword) { + if self.currentToken.tokenText == "swift" || self.currentToken.tokenText == "_PackageDescription" { + return RawSyntax(self.parsePlatformAgnosticVersionConstraintSpec()) + } + } + + return RawSyntax(self.parsePlatformVersionConstraintSpec()) + } + + mutating func parsePlatformAgnosticVersionConstraintSpec() -> RawAvailabilityVersionRestrictionSyntax { + assert(self.currentToken.isIdentifier || self.at(.wildcardKeyword)) + let platform = self.consumeAnyToken() + let version = self.parseVersionTuple() + return RawAvailabilityVersionRestrictionSyntax( + platform: platform, version: version, arena: self.arena) + } + + /// Parse a platform-specific version constraint. + /// + /// The grammar calls out Apple-specific names, even though the Swift compiler + /// will accept any identifier here. The compiler will diagnose usages of platforms it + /// doesn't know about later. + /// + /// Grammar + /// ======= + /// + /// platform-name → iOS | iOSApplicationExtension + /// platform-name → macOS | macOSApplicationExtension + /// platform-name → macCatalyst | macCatalystApplicationExtension + /// platform-name → watchOS + /// platform-name → tvOS + mutating func parsePlatformVersionConstraintSpec() -> RawAvailabilityVersionRestrictionSyntax { + // Register the platform name as a keyword token. + let plaform = self.consume(remapping: .contextualKeyword) + let version = self.parseVersionTuple() + return RawAvailabilityVersionRestrictionSyntax( + platform: plaform, version: version, arena: self.arena) + } + + /// Parse an availability macro. + /// + /// Availability macros are not an official part of the Swift language. + /// + /// Grammar + /// ======= + /// + /// availability-argument → macro-name platform-version + mutating func parseAvailabilityMacro() -> RawAvailabilityVersionRestrictionSyntax { + let platform = self.consumeAnyToken() + + let version: RawVersionTupleSyntax? + if case .integerLiteral = self.currentToken.tokenKind { + version = self.parseVersionTuple() + } else if case .floatingLiteral = self.currentToken.tokenKind { + version = self.parseVersionTuple() + } else { + version = nil + } + + return RawAvailabilityVersionRestrictionSyntax( + platform: platform, version: version, arena: self.arena) + } + + /// Parse a dot-separated list of version numbers. + /// + /// Grammar + /// ======= + /// + /// platform-version → decimal-digits + /// platform-version → decimal-digits '.' decimal-digits + /// platform-version → decimal-digits '.' decimal-digits '.' decimal-digits + mutating func parseVersionTuple() -> RawVersionTupleSyntax { + if self.currentToken.tokenKind == .integerLiteral { + let majorMinor = self.consumeAnyToken() + return RawVersionTupleSyntax( + majorMinor: RawSyntax(majorMinor), patchPeriod: nil, patchVersion: nil, + arena: self.arena) + } + + let majorMinor = self.consumeAnyToken() + let period = self.consume(if: .period) + + let patch: RawTokenSyntax? + if period != nil { + patch = self.consumeAnyToken() + } else { + patch = nil + } + + return RawVersionTupleSyntax( + majorMinor: RawSyntax(majorMinor), patchPeriod: period, patchVersion: patch, + arena: self.arena) + } +} diff --git a/Sources/SwiftParser/CharacterInfo.swift b/Sources/SwiftParser/CharacterInfo.swift new file mode 100644 index 00000000000..d6bcddd7e75 --- /dev/null +++ b/Sources/SwiftParser/CharacterInfo.swift @@ -0,0 +1,210 @@ +//===----------------------- CharacterInfo.swift --------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +extension Character { + fileprivate struct Info: OptionSet { + var rawValue: UInt8 + + init(rawValue: UInt8) { + self.rawValue = rawValue + } + + static let SPACE = Character.Info(rawValue: 0x01) // ' ' + static let DIGIT = Character.Info(rawValue: 0x02) // 0-9 + static let XLETTER = Character.Info(rawValue: 0x04) // a-f,A-F + static let UPPER = Character.Info(rawValue: 0x08) // A-Z + static let LOWER = Character.Info(rawValue: 0x10) // a-z + static let UNDER = Character.Info(rawValue: 0x20) // _ + static let PERIOD = Character.Info(rawValue: 0x40) // . + static let PUNCT = Character.Info(rawValue: 0x80) // `$@() + + static let XUPPER: Character.Info = [ .XLETTER, .UPPER ] + + static let XLOWER: Character.Info = [ .XLETTER, .LOWER ] + } +} + +extension Unicode.Scalar { + var isASCII: Bool { + return self.value <= 127 + } + + /// A Boolean value indicating whether this scalar is one which is recommended + /// to be allowed to appear in a starting position in a programming language + /// identifier. + var isAsciiIdentifierStart: Bool { + return self.testRawInfoTable { entry in + !entry.intersection([ .UPPER, .LOWER, .UNDER, ]).isEmpty + } + } + + /// A Boolean value indicating whether this scalar is one which is recommended + /// to be allowed to appear in a non-starting position in a programming + /// language identifier. + var isAsciiIdentifierContinue: Bool { + return self.testRawInfoTable { entry in + !entry.intersection([ .UPPER, .LOWER, .DIGIT, .UNDER ]).isEmpty + } + } + + /// A Boolean value indicating whether this scalar is an ASCII character used + /// for the representation of base-10 numbers. + var isDigit: Bool { + return self.testRawInfoTable { entry in + entry.contains(.DIGIT) + } + } + + /// A Boolean value indicating whether this scalar is considered to be either + /// an uppercase or lowercase ASCII character. + var isLetter: Bool { + return self.testRawInfoTable { entry in + !entry.intersection([ .UPPER, .LOWER ]).isEmpty + } + } + + /// A Boolean value indicating whether this scalar is an ASCII character + /// commonly used for the representation of hexadecimal numbers. + var isHexDigit: Bool { + return self.testRawInfoTable { entry in + !entry.intersection([ .DIGIT, .XLETTER ]).isEmpty + } + } +} + +extension Unicode.Scalar { + private func testRawInfoTable( + _ performTest: (Character.Info) -> Bool + ) -> Bool { + return self.isASCII && withUnsafePointer(to: &InfoTable) { InfoTable in + let infoPtr = UnsafeRawBufferPointer(start: InfoTable, count: 0x80) + .assumingMemoryBound(to: Character.Info.self) + return performTest(infoPtr[Int(self.value)]) + } + } +} + +#if swift(<5.7) +extension UnsafeRawBufferPointer { + /// Returns a typed buffer to the memory referenced by this buffer, + /// assuming that the memory is already bound to the specified type. + /// + /// Use this method when you have a raw buffer to memory that has *already* + /// been bound to the specified type. The memory starting at this pointer + /// must be bound to the type `T`. Accessing memory through the returned + /// pointer is undefined if the memory has not been bound to `T`. To bind + /// memory to `T`, use `bindMemory(to:capacity:)` instead of this method. + /// + /// - Note: The buffer's base address must match the + /// alignment of `T` (as reported by `MemoryLayout.alignment`). + /// That is, `Int(bitPattern: self.baseAddress) % MemoryLayout.alignment` + /// must equal zero. + /// + /// - Parameter to: The type `T` that the memory has already been bound to. + /// - Returns: A typed pointer to the same memory as this raw pointer. + fileprivate func assumingMemoryBound( + to: T.Type + ) -> UnsafeBufferPointer { + guard let s = self.baseAddress else { + return .init(start: nil, count: 0) + } + let c = self.count + let n = c / MemoryLayout.stride + return .init(start: s.assumingMemoryBound(to: T.self), count: n) + } +} +#endif + +private var InfoTable: CharacterInfoTable = ( + // 0 NUL 1 SOH 2 STX 3 ETX + // 4 EOT 5 ENQ 6 ACK 7 BEL + [], [], [], [], + [], [], [], [], + // 8 BS 9 HT 10 NL 11 VT + //12 NP 13 CR 14 SO 15 SI + [], [], [], [], + [], [], [], [], + //16 DLE 17 DC1 18 DC2 19 DC3 + //20 DC4 21 NAK 22 SYN 23 ETB + [], [], [], [], + [], [], [], [], + //24 CAN 25 EM 26 SUB 27 ESC + //28 FS 29 GS 30 RS 31 US + [], [], [], [], + [], [], [], [], + //32 SP 33 ! 34 " 35 # + //36 $ 37 % 38 & 39 ' + .SPACE, [], [], [], + .PUNCT, [], [], [], + //40 ( 41 ) 42 * 43 + + //44 , 45 - 46 . 47 / + .PUNCT, .PUNCT, [] , [], + [], [], .PERIOD, [], + //48 0 49 1 50 2 51 3 + //52 4 53 5 54 6 55 7 + .DIGIT, .DIGIT, .DIGIT, .DIGIT, + .DIGIT, .DIGIT, .DIGIT, .DIGIT, + //56 8 57 9 58 : 59 ; + //60 < 61 = 62 > 63 ? + .DIGIT, .DIGIT, [], [], + [], [], [], [], + //64 @ 65 A 66 B 67 C + //68 D 69 E 70 F 71 G + .PUNCT, .XUPPER, .XUPPER, .XUPPER, + .XUPPER, .XUPPER, .XUPPER, .UPPER, + //72 H 73 I 74 J 75 K + //76 L 77 M 78 N 79 O + .UPPER, .UPPER, .UPPER, .UPPER, + .UPPER, .UPPER, .UPPER, .UPPER, + //80 P 81 Q 82 R 83 S + //84 T 85 U 86 V 87 W + .UPPER, .UPPER, .UPPER, .UPPER, + .UPPER, .UPPER, .UPPER, .UPPER, + //88 X 89 Y 90 Z 91 [ + //92 \ 93 ] 94 ^ 95 _ + .UPPER, .UPPER, .UPPER, [], + .PUNCT, [], [], .UNDER, + //96 ` 97 a 98 b 99 c + //100 d 101 e 102 f 103 g + .PUNCT, .XLOWER, .XLOWER, .XLOWER, + .XLOWER, .XLOWER, .XLOWER, .LOWER, + //104 h 105 i 106 j 107 k + //108 l 109 m 110 n 111 o + .LOWER, .LOWER, .LOWER, .LOWER, + .LOWER, .LOWER, .LOWER, .LOWER, + //112 p 113 q 114 r 115 s + //116 t 117 u 118 v 119 w + .LOWER, .LOWER, .LOWER, .LOWER, + .LOWER, .LOWER, .LOWER, .LOWER, + //120 x 121 y 122 z 123 { + //124 | 125 } 126 ~ 127 DEL + .LOWER, .LOWER, .LOWER, [], + [], [] , [] , []) + +private typealias CharacterInfoTable = ( + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, + Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info +) diff --git a/Sources/SwiftParser/Declarations.swift b/Sources/SwiftParser/Declarations.swift new file mode 100644 index 00000000000..db71bdc7bc8 --- /dev/null +++ b/Sources/SwiftParser/Declarations.swift @@ -0,0 +1,1845 @@ +//===----------------------- Declarations.swift ---------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + @_spi(RawSyntax) + public struct DeclAttributes { + public var attributes: RawAttributeListSyntax? + public var modifiers: RawModifierListSyntax? + + public init(attributes: RawAttributeListSyntax?, modifiers: RawModifierListSyntax?) { + self.attributes = attributes + self.modifiers = modifiers + } + } + + /// Parse a declaration. + /// + /// Grammar + /// ======= + /// + /// declaration → import-declaration + /// declaration → constant-declaration + /// declaration → variable-declaration + /// declaration → typealias-declaration + /// declaration → function-declaration + /// declaration → enum-declaration + /// declaration → struct-declaration + /// declaration → class-declaration + /// declaration → actor-declaration + /// declaration → protocol-declaration + /// declaration → initializer-declaration + /// declaration → deinitializer-declaration + /// declaration → extension-declaration + /// declaration → subscript-declaration + /// declaration → operator-declaration + /// declaration → precedence-group-declaration + /// + /// declarations → declaration declarations? + @_spi(RawSyntax) + public mutating func parseDeclaration() -> RawDeclSyntax { + if self.at(.poundIfKeyword) { + return RawDeclSyntax(self.parsePoundIfDirective { parser in + var parsedDecl = parser.parseDeclaration() + if parsedDecl.is(RawMissingDeclSyntax.self) { + // Try to recover from a bogus decl. + var tokenList = [RawTokenSyntax]() + while !parser.at(.eof) && !parser.at(.poundElseKeyword) && + !parser.at(.poundElseifKeyword) && !parser.at(.poundEndifKeyword) { + let tokens = parser.recover() + guard !tokens.isEmpty else { + break + } + tokenList.append(contentsOf: tokens) + } + let unexpected = RawUnexpectedNodesSyntax(elements: tokenList.map(RawSyntax.init), arena: parser.arena) + parsedDecl = RawDeclSyntax(RawMissingDeclSyntax(unexpected, attributes: nil, modifiers: nil, arena: parser.arena)) + } + let semicolon = parser.consume(if: .semicolon) + return RawMemberDeclListItemSyntax( + decl: parsedDecl, + semicolon: semicolon, + arena: parser.arena) + } + syntax: { parser, elements in + return RawSyntax(RawMemberDeclListSyntax(elements: elements, arena: parser.arena)) + }) + } else if self.at(.poundWarningKeyword) || self.at(.poundErrorKeyword) { + return self.parsePoundDiagnosticDeclaration() + } + + let attrs = DeclAttributes( + attributes: self.parseAttributeList(), + modifiers: self.parseModifierList()) + switch self.currentToken.tokenKind { + case .importKeyword: + return RawDeclSyntax(self.parseImportDeclaration(attrs)) + case .classKeyword: + return RawDeclSyntax(self.parseClassDeclaration(attrs)) + case .enumKeyword: + return RawDeclSyntax(self.parseEnumDeclaration(attrs)) + case .caseKeyword: + return RawDeclSyntax(self.parseDeclEnumCase(attrs)) + case .structKeyword: + return RawDeclSyntax(self.parseStructDeclaration(attrs)) + case .protocolKeyword: + return RawDeclSyntax(self.parseProtocolDeclaration(attrs)) + case .associatedtypeKeyword: + return RawDeclSyntax(self.parseAssociatedTypeDeclaration(attrs)) + case .typealiasKeyword: + return RawDeclSyntax(self.parseTypealiasDeclaration(attrs)) + case .extensionKeyword: + return RawDeclSyntax(self.parseExtensionDeclaration(attrs)) + case .funcKeyword: + return RawDeclSyntax(self.parseFuncDeclaration(attrs)) + case .subscriptKeyword: + return RawDeclSyntax(self.parseSubscriptDeclaration(attrs)) + case .letKeyword, .varKeyword: + return RawDeclSyntax(self.parseLetOrVarDeclaration(attrs)) + case .initKeyword: + return RawDeclSyntax(self.parseInitializerDeclaration(attrs)) + case .deinitKeyword: + return RawDeclSyntax(self.parseDeinitializerDeclaration(attrs)) + case .operatorKeyword: + return RawDeclSyntax(self.parseOperatorDeclaration(attrs)) + case .precedencegroupKeyword: + return RawDeclSyntax(self.parsePrecedenceGroupDeclaration(attrs)) + case _ where self.currentToken.isContextualKeyword("actor"): + return RawDeclSyntax(self.parseActorDeclaration(attrs)) + default: + return RawDeclSyntax(RawMissingDeclSyntax( + attributes: attrs.attributes, + modifiers: attrs.modifiers, + arena: self.arena)) + } + } +} + +extension Parser { + /// Parse an import declaration. + /// + /// Grammar + /// ======= + /// + /// import-declaration → attributes? 'import' import-kind? import-path + /// import-kind → 'typealias' | 'struct' | 'class' | 'enum' | 'protocol' | 'let' | 'var' | 'func' + /// import-path → identifier | identifier '.' import-path + @_spi(RawSyntax) + public mutating func parseImportDeclaration(_ attrs: DeclAttributes) -> RawImportDeclSyntax { + let importKeyword = self.eat(.importKeyword) + let kind = self.parseImportKind() + let path = self.parseImportAccessPath() + return RawImportDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + importTok: importKeyword, + importKind: kind, + path: path, + arena: self.arena) + } + + @_spi(RawSyntax) + public mutating func parseImportKind() -> RawTokenSyntax? { + guard self.currentToken.tokenKind.isKeyword else { + return nil + } + + switch self.currentToken.tokenKind { + case .typealiasKeyword, + .structKeyword, + .classKeyword, + .enumKeyword, + .protocolKeyword, + .varKeyword, + .letKeyword, + .funcKeyword: + return self.consumeAnyToken() + default: + return nil + } + } + + @_spi(RawSyntax) + public mutating func parseImportAccessPath() -> RawAccessPathSyntax { + var elements = [RawAccessPathComponentSyntax]() + var keepGoing: RawTokenSyntax? = nil + repeat { + let name = self.parseAnyIdentifier() + keepGoing = self.consume(if: .period) + elements.append(RawAccessPathComponentSyntax( + name: name, trailingDot: keepGoing, arena: self.arena)) + } while keepGoing != nil + return RawAccessPathSyntax(elements: elements, arena: self.arena) + } +} + +extension Parser { + /// Parse an extension declaration. + /// + /// Grammar + /// ======= + /// + /// extension-declaration → attributes? access-level-modifier? 'extension' type-identifier type-inheritance-clause? generic-where-clause?t extension-body + /// extension-body → '{' extension-members? '}' + /// extension-members → extension-member extension-members? + /// extension-member → declaration | compiler-control-statement + @_spi(RawSyntax) + public mutating func parseExtensionDeclaration(_ attrs: DeclAttributes) -> RawExtensionDeclSyntax { + let extensionKeyword = self.eat(.extensionKeyword) + let type = self.parseType() + + let inheritance: RawTypeInheritanceClauseSyntax? + if self.at(.colon) { + inheritance = self.parseInheritance() + } else { + inheritance = nil + } + + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + let members = self.parseMemberDeclList() + return RawExtensionDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + extensionKeyword: extensionKeyword, + extendedType: type, + inheritanceClause: inheritance, + genericWhereClause: whereClause, + members: members, + arena: self.arena) + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parseGenericParameters() -> RawGenericParameterClauseSyntax { + assert(self.currentToken.starts(with: "<")) + + let langle = self.consume(remapping: .leftAngle) + var elements = [RawGenericParameterSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let attributes = self.parseAttributeList() + + let name = self.consumeIdentifier() + if name.isMissing && elements.isEmpty { + break + } + + // Parse the ':' followed by a type. + let colon = self.consume(if: .colon) + let inherited: RawTypeSyntax? + if colon != nil { + switch self.currentToken.tokenKind { + case .identifier, .protocolKeyword, .anyKeyword: + inherited = self.parseType() + default: + inherited = nil + } + } else { + inherited = nil + } + keepGoing = self.consume(if: .comma) + elements.append(RawGenericParameterSyntax( + attributes: attributes, + name: name, + colon: colon, + inheritedType: inherited, + trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + + let rangle: RawTokenSyntax + if self.currentToken.starts(with: ">") { + rangle = self.consume(remapping: .rightAngle) + } else { + rangle = RawTokenSyntax(missing: .rightAngle, arena: self.arena) + } + + let parameters: RawGenericParameterListSyntax + if elements.isEmpty && rangle.isMissing { + parameters = RawGenericParameterListSyntax(elements: [], arena: self.arena) + } else { + parameters = RawGenericParameterListSyntax(elements: elements, arena: self.arena) + } + return RawGenericParameterClauseSyntax( + leftAngleBracket: langle, + genericParameterList: parameters, + rightAngleBracket: rangle, + arena: self.arena) + } + + enum LayoutConstraint: SyntaxText { + case trivialLayout = "_Trivial" + case trivialAtMostLayout = "_TrivialAtMost" + case unknownLayout = "_UnknownLayout" + case refCountedObjectLayout = "_RefCountedObject" + case nativeRefCountedObjectLayout = "_NativeRefCountedObject" + case classLayout = "_Class" + case nativeClassLayout = "_NativeClass" + + var hasArguments: Bool { + switch self { + case .trivialLayout, + .trivialAtMostLayout: + return true + + case .unknownLayout, + .refCountedObjectLayout, + .nativeRefCountedObjectLayout, + .classLayout, + .nativeClassLayout: + return false + } + } + } + + @_spi(RawSyntax) + public mutating func parseGenericWhereClause() -> RawGenericWhereClauseSyntax { + let whereKeyword = self.eat(.whereKeyword) + + var elements = [RawGenericRequirementSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let firstType = self.parseType() + guard !firstType.is(RawMissingTypeSyntax.self) else { + keepGoing = self.consume(if: .comma) + elements.append(RawGenericRequirementSyntax( + body: RawSyntax(RawMissingSyntax(arena: self.arena)), + trailingComma: keepGoing, + arena: self.arena + )) + continue + } + + let requirement: RawSyntax + if self.at(.colon) { + // A conformance-requirement. + let colon = self.eat(.colon) + if self.currentToken.isIdentifier, let layoutConstraint = LayoutConstraint(rawValue: self.currentToken.tokenText) { + // Parse a layout constraint. + let constraint = self.consumeIdentifier() + + let unexpectedBeforeLeftParen: RawUnexpectedNodesSyntax? + let leftParen: RawTokenSyntax? + let size: RawTokenSyntax? + let comma: RawTokenSyntax? + let alignment: RawTokenSyntax? + let unexpectedBeforeRightParen: RawUnexpectedNodesSyntax? + let rightParen: RawTokenSyntax? + // Unlike the other layout constraints, _Trivial's argument list + // is optional. + if layoutConstraint.hasArguments && !(layoutConstraint == .trivialLayout && !self.at(.leftParen)) { + (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + size = self.consumeInteger() + comma = self.consume(if: .comma) + if comma != nil { + alignment = self.consumeInteger() + } else { + alignment = nil + } + (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + } else { + unexpectedBeforeLeftParen = nil + leftParen = nil + size = nil + comma = nil + alignment = nil + unexpectedBeforeRightParen = nil + rightParen = nil + } + + requirement = RawSyntax(RawLayoutRequirementSyntax( + typeIdentifier: firstType, + colon: colon, layoutConstraint: constraint, + unexpectedBeforeLeftParen, + leftParen: leftParen, + size: size, + comma: comma, + alignment: alignment, + unexpectedBeforeRightParen, + rightParen: rightParen, + arena: self.arena)) + } else { + // Parse the protocol or composition. + let secondType = self.parseType() + requirement = RawSyntax(RawConformanceRequirementSyntax( + leftTypeIdentifier: firstType, + colon: colon, + rightTypeIdentifier: secondType, + arena: self.arena)) + } + } else if (self.currentToken.isAnyOperator && self.currentToken.tokenText == "==") || self.at(.equal) { + let equal = self.consumeAnyToken() + let secondType = self.parseType() + requirement = RawSyntax(RawSameTypeRequirementSyntax( + leftTypeIdentifier: firstType, + equalityToken: equal, + rightTypeIdentifier: secondType, + arena: self.arena)) + } else { + requirement = RawSyntax(RawSameTypeRequirementSyntax( + leftTypeIdentifier: RawTypeSyntax(RawMissingTypeSyntax(arena: self.arena)), + equalityToken: RawTokenSyntax(missing: .equal, arena: self.arena), + rightTypeIdentifier: RawTypeSyntax(RawMissingTypeSyntax(arena: self.arena)), + arena: self.arena + )) + } + + keepGoing = self.consume(if: .comma) + elements.append(RawGenericRequirementSyntax( + body: requirement, trailingComma: keepGoing, arena: self.arena)) + } while keepGoing != nil + } + + return RawGenericWhereClauseSyntax( + whereKeyword: whereKeyword, + requirementList: RawGenericRequirementListSyntax(elements: elements, arena: self.arena), + arena: self.arena) + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parseMemberDeclList() -> RawMemberDeclBlockSyntax { + var elements = [RawMemberDeclListItemSyntax]() + let (unexpectedBeforeLBrace, lbrace) = self.expect(.leftBrace) + do { + while !self.at(.eof) && !self.at(.rightBrace) { + let decl = self.parseDeclaration() + let semi = self.consume(if: .semicolon) + elements.append(RawMemberDeclListItemSyntax( + decl: decl, semicolon: semi, arena: self.arena)) + if decl.is(RawMissingDeclSyntax.self) { + break + } + } + } + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + let members: RawMemberDeclListSyntax + if elements.isEmpty && (lbrace.isMissing || rbrace.isMissing) { + members = RawMemberDeclListSyntax(elements: [], arena: self.arena) + } else { + members = RawMemberDeclListSyntax(elements: elements, arena: self.arena) + } + + return RawMemberDeclBlockSyntax( + unexpectedBeforeLBrace, + leftBrace: lbrace, + members: members, + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena) + } +} + +extension Parser { + /// Parse a class declaration. + /// + /// Grammar + /// ======= + /// + /// class-declaration → attributes? access-level-modifier? final? 'class' class-name generic-parameter-clause? type-inheritance-clause? generic-where-clause? class-body + /// class-declaration → attributes? final access-level-modifier? 'class' class-name generic-parameter-clause? type-inheritance-clause? generic-where-clause? class-body + /// + /// class-name → identifier + /// + /// class-body → '{' class-members? '}' + /// + /// class-members → class-member class-members? + /// class-member → declaration | compiler-control-statement + @_spi(RawSyntax) + public mutating func parseClassDeclaration(_ attrs: DeclAttributes) -> RawClassDeclSyntax { + let classKeyword = self.eat(.classKeyword) + let name = self.consumeIdentifier() + if name.isMissing { + return RawClassDeclSyntax( + attributes: attrs.attributes, + modifiers: attrs.modifiers, + classKeyword: classKeyword, + identifier: name, + genericParameterClause: nil, + inheritanceClause: nil, + genericWhereClause: nil, + members: RawMemberDeclBlockSyntax( + leftBrace: RawTokenSyntax(missing: .leftBrace, arena: self.arena), + members: RawMemberDeclListSyntax(elements: [], arena: self.arena), + rightBrace: RawTokenSyntax(missing: .rightBrace, arena: self.arena), + arena: self.arena + ), + arena: self.arena) + } + + let generics: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + generics = self.parseGenericParameters() + } else { + generics = nil + } + + let inheritance: RawTypeInheritanceClauseSyntax? + if self.at(.colon) { + inheritance = self.parseInheritance() + } else { + inheritance = nil + } + + // Parse a 'where' clause if present. + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + + let members = self.parseMemberDeclList() + return RawClassDeclSyntax( + attributes: attrs.attributes, + modifiers: attrs.modifiers, + classKeyword: classKeyword, + identifier: name, + genericParameterClause: generics, + inheritanceClause: inheritance, + genericWhereClause: whereClause, + members: members, + arena: self.arena) + } + + /// Parse a class declaration. + /// + /// Grammar + /// ======= + /// + /// type-inheritance-clause → ':' type-inheritance-list + /// type-inheritance-list → attributes? type-identifier | attributes? type-identifier ',' type-inheritance-list + @_spi(RawSyntax) + public mutating func parseInheritance() -> RawTypeInheritanceClauseSyntax { + let colon = self.eat(.colon) + var elements = [RawInheritedTypeSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let type: RawTypeSyntax + if self.at(.classKeyword) { + let classKeyword = self.eat(.classKeyword) + type = RawTypeSyntax(RawClassRestrictionTypeSyntax( + classKeyword: classKeyword, + arena: self.arena)) + } else { + type = self.parseType() + } + + keepGoing = self.consume(if: .comma) + elements.append(RawInheritedTypeSyntax( + typeName: type, trailingComma: keepGoing, arena: self.arena)) + } while keepGoing != nil + } + return RawTypeInheritanceClauseSyntax( + colon: colon, + inheritedTypeCollection: RawInheritedTypeListSyntax(elements: elements, arena: self.arena), + arena: self.arena) + } +} + +extension Parser { + /// Parse an enum declaration. + /// + /// Grammar + /// ======= + /// + /// enum-declaration → attributes? access-level-modifier? union-style-enum + /// enum-declaration → attributes? access-level-modifier? raw-value-style-enum + /// + /// union-style-enum → 'indirect'? 'enum' enum-name generic-parameter-clause? type-inheritance-clause? generic-where-clause?' '{' union-style-enum-members? '}' + /// union-style-enum-members → union-style-enum-member union-style-enum-members? + /// union-style-enum-member → declaration | union-style-enum-case-clause | compiler-control-statement + /// + /// enum-name → identifier + /// enum-case-name → identifier + /// + /// raw-value-style-enum → 'enum' enum-name generic-parameter-clause? type-inheritance-clause generic-where-clause? '{' raw-value-style-enum-members '}' + /// raw-value-style-enum-members → raw-value-style-enum-member raw-value-style-enum-members? + /// raw-value-style-enum-member → declaration | raw-value-style-enum-case-clause | compiler-control-statement + @_spi(RawSyntax) + public mutating func parseEnumDeclaration(_ attrs: DeclAttributes) -> RawEnumDeclSyntax { + let enumKeyword = self.eat(.enumKeyword) + let name = self.consumeIdentifier() + if name.isMissing { + return RawEnumDeclSyntax( + attributes: attrs.attributes, + modifiers: attrs.modifiers, + enumKeyword: enumKeyword, + identifier: name, + genericParameters: nil, + inheritanceClause: nil, + genericWhereClause: nil, + members: RawMemberDeclBlockSyntax( + leftBrace: RawTokenSyntax(missing: .leftBrace, arena: self.arena), + members: RawMemberDeclListSyntax(elements: [], arena: self.arena), + rightBrace: RawTokenSyntax(missing: .rightBrace, arena: self.arena), + arena: self.arena + ), + arena: self.arena) + } + + let generics: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + generics = self.parseGenericParameters() + } else { + generics = nil + } + + let inheritance: RawTypeInheritanceClauseSyntax? + if self.at(.colon) { + inheritance = self.parseInheritance() + } else { + inheritance = nil + } + + // Parse a 'where' clause if present. + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + + let members = self.parseMemberDeclList() + return RawEnumDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + enumKeyword: enumKeyword, + identifier: name, + genericParameters: generics, + inheritanceClause: inheritance, + genericWhereClause: whereClause, + members: members, + arena: self.arena) + } + + /// Parse an enum 'case' declaration. + /// + /// Grammar + /// ======= + /// + /// union-style-enum-case-clause → attributes? 'indirect'? 'case' union-style-enum-case-list + /// union-style-enum-case-list → union-style-enum-case | union-style-enum-case ',' union-style-enum-case-list + /// union-style-enum-case → enum-case-name tuple-type? + /// + /// raw-value-style-enum-case-clause → attributes? 'case' raw-value-style-enum-case-list + /// raw-value-style-enum-case-list → raw-value-style-enum-case | raw-value-style-enum-case ',' raw-value-style-enum-case-list + /// raw-value-style-enum-case → enum-case-name raw-value-assignment? + /// raw-value-assignment → = raw-value-literal + /// raw-value-literal → numeric-literal | static-string-literal | boolean-literal + @_spi(RawSyntax) + public mutating func parseDeclEnumCase(_ attrs: DeclAttributes) -> RawEnumCaseDeclSyntax { + let caseKeyword = self.eat(.caseKeyword) + var elements = [RawEnumCaseElementSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let name = self.consumeIdentifier() + + let associatedValue: RawParameterClauseSyntax? + if self.at(.leftParen) && !self.currentToken.isAtStartOfLine { + associatedValue = self.parseParameterClause() + } else { + associatedValue = nil + } + + // See if there's a raw value expression. + let rawValue: RawInitializerClauseSyntax? + if self.at(.equal) { + let eq = self.eat(.equal) + let value = self.parseExpression() + rawValue = RawInitializerClauseSyntax(equal: eq, value: value, arena: self.arena) + } else { + rawValue = nil + } + + // Continue through the comma-separated list. + keepGoing = self.consume(if: .comma) + elements.append(RawEnumCaseElementSyntax( + identifier: name, + associatedValue: associatedValue, + rawValue: rawValue, + trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + + return RawEnumCaseDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + caseKeyword: caseKeyword, + elements: RawEnumCaseElementListSyntax(elements: elements, arena: self.arena), + arena: self.arena) + } +} + +extension Parser { + /// Parse a struct declaration. + /// + /// Grammar + /// ======= + /// + /// struct-declaration → attributes? access-level-modifier? struct struct-name generic-parameter-clause? type-inheritance-clause? generic-where-clause? struct-body + /// + /// struct-name → identifier + /// + /// struct-body → '{' struct-members? '}' + /// + /// struct-members → struct-member struct-members? + /// struct-member → declaration | compiler-control-statement + @_spi(RawSyntax) + public mutating func parseStructDeclaration(_ attrs: DeclAttributes) -> RawStructDeclSyntax { + let structKeyword = self.eat(.structKeyword) + let name = self.consumeIdentifier() + if name.isMissing { + return RawStructDeclSyntax( + attributes: attrs.attributes, + modifiers: attrs.modifiers, + structKeyword: structKeyword, + identifier: name, + genericParameterClause: nil, + inheritanceClause: nil, + genericWhereClause: nil, + members: RawMemberDeclBlockSyntax( + leftBrace: RawTokenSyntax(missing: .leftBrace, arena: self.arena), + members: RawMemberDeclListSyntax(elements: [], arena: self.arena), + rightBrace: RawTokenSyntax(missing: .rightBrace, arena: self.arena), + arena: self.arena + ), + arena: self.arena) + } + + let generics: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + generics = self.parseGenericParameters() + } else { + generics = nil + } + + let inheritance: RawTypeInheritanceClauseSyntax? + if self.at(.colon) { + inheritance = self.parseInheritance() + } else { + inheritance = nil + } + + // Parse a 'where' clause if present. + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + + let members = self.parseMemberDeclList() + return RawStructDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + structKeyword: structKeyword, + identifier: name, + genericParameterClause: generics, + inheritanceClause: inheritance, + genericWhereClause: whereClause, + members: members, + arena: self.arena) + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parsePrimaryAssociatedTypes() -> RawPrimaryAssociatedTypeClauseSyntax { + let langle = self.consume(remapping: .leftAngle) + var associatedTypes = [RawPrimaryAssociatedTypeSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + // Parse the name of the parameter. + let name = self.consumeIdentifier() + keepGoing = self.consume(if: .comma) + associatedTypes.append(RawPrimaryAssociatedTypeSyntax( + name: name, + trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + let rangle = self.consume(remapping: .rightAngle) + return RawPrimaryAssociatedTypeClauseSyntax( + leftAngleBracket: langle, + primaryAssociatedTypeList: RawPrimaryAssociatedTypeListSyntax(elements: associatedTypes, arena: self.arena), + rightAngleBracket: rangle, + arena: self.arena) + } + + /// Parse a protocol declaration. + /// + /// Grammar + /// ======= + /// + /// protocol-declaration → attributes? access-level-modifier? 'protocol' protocol-name type-inheritance-clause? generic-where-clause? protocol-body + /// + /// protocol-name → identifier + /// protocol-body → '{' protocol-members? '}' + /// + /// protocol-members → protocol-member protocol-members? + /// protocol-member → protocol-member-declaration | compiler-control-statement + /// + /// protocol-member-declaration → protocol-property-declaration + /// protocol-member-declaration → protocol-method-declaration + /// protocol-member-declaration → protocol-initializer-declaration + /// protocol-member-declaration → protocol-subscript-declaration + /// protocol-member-declaration → protocol-associated-type-declaration + /// protocol-member-declaration → typealias-declaration + @_spi(RawSyntax) + public mutating func parseProtocolDeclaration(_ attrs: DeclAttributes) -> RawProtocolDeclSyntax { + let protocolKeyword = self.eat(.protocolKeyword) + let name = self.consumeIdentifier() + if name.isMissing { + return RawProtocolDeclSyntax( + attributes: attrs.attributes, + modifiers: attrs.modifiers, + protocolKeyword: protocolKeyword, + identifier: name, + primaryAssociatedTypeClause: nil, + inheritanceClause: nil, + genericWhereClause: nil, + members: RawMemberDeclBlockSyntax( + leftBrace: RawTokenSyntax(missing: .leftBrace, arena: self.arena), + members: RawMemberDeclListSyntax(elements: [], arena: self.arena), + rightBrace: RawTokenSyntax(missing: .rightBrace, arena: self.arena), + arena: self.arena + ), + arena: self.arena) + } + + let primaries: RawPrimaryAssociatedTypeClauseSyntax? + if self.currentToken.starts(with: "<") { + primaries = self.parsePrimaryAssociatedTypes() + } else { + primaries = nil + } + + + // Parse optional inheritance clause. + let inheritance: RawTypeInheritanceClauseSyntax? + if self.at(.colon) { + inheritance = self.parseInheritance() + } else { + inheritance = nil + } + + // Parse a 'where' clause if present. + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + + let members = self.parseMemberDeclList() + + return RawProtocolDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + protocolKeyword: protocolKeyword, + identifier: name, + primaryAssociatedTypeClause: primaries, + inheritanceClause: inheritance, + genericWhereClause: whereClause, + members: members, + arena: self.arena) + } +} + +extension Parser { + /// Parse an associated type declaration. + /// + /// Grammar + /// ======= + /// + /// protocol-associated-type-declaration → attributes? access-level-modifier? 'associatedtype' typealias-name type-inheritance-clause? typealias-assignment? generic-where-clause? + @_spi(RawSyntax) + public mutating func parseAssociatedTypeDeclaration(_ attrs: DeclAttributes) -> RawAssociatedtypeDeclSyntax { + let assocKeyword = self.eat(.associatedtypeKeyword) + let name = self.consumeIdentifier() + if name.isMissing { + return RawAssociatedtypeDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + associatedtypeKeyword: assocKeyword, + identifier: name, + inheritanceClause: nil, + initializer: nil, + genericWhereClause: nil, + arena: self.arena) + } + + // Parse optional inheritance clause. + let inheritance: RawTypeInheritanceClauseSyntax? + if self.at(.colon) { + inheritance = self.parseInheritance() + } else { + inheritance = nil + } + + // Parse default type, if any. + let defaultType: RawTypeInitializerClauseSyntax? + if self.at(.equal) { + let equal = self.eat(.equal) + let type = self.parseType() + defaultType = RawTypeInitializerClauseSyntax( + equal: equal, value: type, + arena: self.arena) + } else { + defaultType = nil + } + + // Parse a 'where' clause if present. + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + + return RawAssociatedtypeDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + associatedtypeKeyword: assocKeyword, + identifier: name, + inheritanceClause: inheritance, + initializer: defaultType, + genericWhereClause: whereClause, + arena: self.arena) + } +} + +extension Parser { + /// Parse an actor declaration. + /// + /// Grammar + /// ======= + /// + /// actor-declaration → attributes? access-level-modifier? 'actor' actor-name generic-parameter-clause? type-inheritance-clause? generic-where-clause? actor-body + /// actor-name → identifier + /// actor-body → '{' actor-members? '}' + /// actor-members → actor-member actor-members? + /// actor-member → declaration | compiler-control-statement + @_spi(RawSyntax) + public mutating func parseActorDeclaration(_ attrs: DeclAttributes) -> RawActorDeclSyntax { + assert(self.currentToken.isContextualKeyword("actor")) + let actorKeyword = self.consumeIdentifier() + let name = self.consumeIdentifier() + + let generics: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + generics = self.parseGenericParameters() + } else { + generics = nil + } + + // Parse optional inheritance clause. + let inheritance: RawTypeInheritanceClauseSyntax? + if self.at(.colon) { + inheritance = self.parseInheritance() + } else { + inheritance = nil + } + + // Parse a 'where' clause if present. + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + + let members = self.parseMemberDeclList() + return RawActorDeclSyntax( + attributes: attrs.attributes, + modifiers: attrs.modifiers, + actorKeyword: actorKeyword, + identifier: name, + genericParameterClause: generics, + inheritanceClause: inheritance, + genericWhereClause: whereClause, + members: members, + arena: self.arena) + } +} + +extension Parser { + /// Parse an initializer declaration. + /// + /// Grammar + /// ======= + /// + /// initializer-declaration → initializer-head generic-parameter-clause? parameter-clause 'async'? 'throws'? generic-where-clause? initializer-body + /// initializer-declaration → initializer-head generic-parameter-clause? parameter-clause 'async'? 'rethrows' generic-where-clause? initializer-body + /// + /// initializer-head → attributes? declaration-modifiers? 'init' + /// initializer-head → attributes? declaration-modifiers? 'init' '?' + /// initializer-head → attributes? declaration-modifiers? 'init' '!' + /// initializer-body → code-block + @_spi(RawSyntax) + public mutating func parseInitializerDeclaration(_ attrs: DeclAttributes) -> RawInitializerDeclSyntax { + assert(self.at(.initKeyword)) + let initKeyword = self.eat(.initKeyword) + + // Parse the '!' or '?' for a failable initializer. + let failable: RawTokenSyntax? + if self.at(.exclamationMark) + || (self.currentToken.isAnyOperator && self.currentToken.tokenText == "!") + || self.at(.postfixQuestionMark) { + failable = self.consumeAnyToken() + } else { + failable = nil + } + + let generics: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + generics = self.parseGenericParameters() + } else { + generics = nil + } + + // Parse the signature. + let signature = self.parseFunctionSignature() + + let whereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + whereClause = self.parseGenericWhereClause() + } else { + whereClause = nil + } + + let items = self.parseOptionalCodeBlock() + + return RawInitializerDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + initKeyword: initKeyword, + optionalMark: failable, + genericParameterClause: generics, + signature: signature, + genericWhereClause: whereClause, + body: items, + arena: self.arena) + } + + /// Parse a deinitializer declaration. + /// + /// Grammar + /// ======= + /// + /// deinitializer-declaration → attributes? 'deinit' code-block + @_spi(RawSyntax) + public mutating func parseDeinitializerDeclaration(_ attrs: DeclAttributes) -> RawDeinitializerDeclSyntax { + let deinitKeyword = self.eat(.deinitKeyword) + let items = self.parseCodeBlock() + return RawDeinitializerDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + deinitKeyword: deinitKeyword, body: items, + arena: self.arena) + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parseParameterClause(isClosure: Bool = false) -> RawParameterClauseSyntax { + let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen) + var elements = [RawFunctionParameterSyntax]() + do { + var keepGoing = true + while !self.at(.eof) && !self.at(.rightParen) && keepGoing { + // Attributes. + let attrs = self.parseAttributeList() + + let firstName: RawTokenSyntax? + let secondName: RawTokenSyntax? + let unexpectedBeforeColon: RawUnexpectedNodesSyntax? + let colon: RawTokenSyntax? + let shouldParseType: Bool + + if self.lookahead().startsParameterName(isClosure) { + if self.currentToken.canBeArgumentLabel { + firstName = self.parseArgumentLabel() + } else { + firstName = nil + } + + if self.currentToken.canBeArgumentLabel { + secondName = self.parseArgumentLabel() + } else { + secondName = nil + } + if isClosure { + unexpectedBeforeColon = nil + colon = self.consume(if: .colon) + shouldParseType = (colon != nil) + } else { + (unexpectedBeforeColon, colon) = self.expect(.colon) + shouldParseType = true + } + } else { + firstName = nil + secondName = nil + unexpectedBeforeColon = nil + colon = nil + shouldParseType = true + } + + let type: RawTypeSyntax? + if shouldParseType { + type = self.parseType() + } else { + type = nil + } + + let ellipsis: RawTokenSyntax? + if self.currentToken.isEllipsis { + ellipsis = self.consume(remapping: .ellipsis) + } else { + ellipsis = nil + } + + let defaultArgument: RawInitializerClauseSyntax? + if self.at(.equal) { + defaultArgument = self.parseDefaultArgument() + } else { + defaultArgument = nil + } + + let trailingComma = self.consume(if: .comma) + keepGoing = trailingComma != nil + elements.append(RawFunctionParameterSyntax( + attributes: attrs, + firstName: firstName, + secondName: secondName, + unexpectedBeforeColon, + colon: colon, + type: type, + ellipsis: ellipsis, + defaultArgument: defaultArgument, + trailingComma: trailingComma, + arena: self.arena)) + } + } + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + + let parameters: RawFunctionParameterListSyntax + if elements.isEmpty && (lparen.isMissing || rparen.isMissing) { + parameters = RawFunctionParameterListSyntax(elements: [], arena: self.arena) + } else { + parameters = RawFunctionParameterListSyntax(elements: elements, arena: self.arena) + } + + return RawParameterClauseSyntax( + unexpectedBeforeLParen, + leftParen: lparen, + parameterList: parameters, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena) + } + + @_spi(RawSyntax) + public mutating func parseFunctionReturnClause() -> RawReturnClauseSyntax { + let arrow = self.eat(.arrow) + let unexpectedBeforeReturnType: RawUnexpectedNodesSyntax? + if let unexpectedToken = self.consume(ifAny: .tryKeyword, .throwKeyword, .throwsKeyword) { + unexpectedBeforeReturnType = RawUnexpectedNodesSyntax(elements: [RawSyntax(unexpectedToken)], arena: self.arena) + } else { + unexpectedBeforeReturnType = nil + } + let result = self.parseType() + return RawReturnClauseSyntax( + arrow: arrow, + unexpectedBeforeReturnType, + returnType: result, + arena: self.arena) + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parseFuncDeclaration(_ attrs: DeclAttributes) -> RawFunctionDeclSyntax { + let funcKeyword = self.eat(.funcKeyword) + let identifier: RawTokenSyntax + if self.currentToken.isAnyOperator || self.at(.exclamationMark) || self.at(.prefixAmpersand) { + var name = self.currentToken.tokenText + if name.count > 1 && name.hasSuffix("<") && self.peek().isIdentifier { + name = SyntaxText(rebasing: name.dropLast()) + } + identifier = self.consumePrefix(name, as: .spacedBinaryOperator) + } else { + identifier = self.consumeIdentifier() + } + + let genericParams: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + genericParams = self.parseGenericParameters() + } else { + genericParams = nil + } + + let signature = self.parseFunctionSignature() + + let generics: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + generics = self.parseGenericWhereClause() + } else { + generics = nil + } + + let body = self.parseOptionalCodeBlock() + return RawFunctionDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + funcKeyword: funcKeyword, + identifier: identifier, + genericParameterClause: genericParams, + signature: signature, + genericWhereClause: generics, + body: body, + arena: self.arena) + } + + @_spi(RawSyntax) + public mutating func parseFunctionSignature() -> RawFunctionSignatureSyntax { + let input = self.parseParameterClause() + + let async: RawTokenSyntax? + if self.currentToken.isContextualKeyword("async") { + async = self.consume(remapping: .contextualKeyword) + } else { + async = nil + } + + let throwsKeyword: RawTokenSyntax? + if self.at(.throwsKeyword) || self.at(.rethrowsKeyword) { + throwsKeyword = self.consumeAnyToken() + } else { + throwsKeyword = nil + } + + let output: RawReturnClauseSyntax? + if self.at(.arrow) { + output = self.parseFunctionReturnClause() + } else { + output = nil + } + + return RawFunctionSignatureSyntax( + input: input, + asyncOrReasyncKeyword: async, + throwsOrRethrowsKeyword: throwsKeyword, + output: output, + arena: self.arena) + } +} + +extension Parser { + /// Parse a subscript declaration. + /// + /// Grammar + /// ======= + /// + /// subscript-declaration → subscript-head subscript-result generic-where-clause? code-block + /// subscript-declaration → subscript-head subscript-result generic-where-clause? getter-setter-block + /// subscript-declaration → subscript-head subscript-result generic-where-clause? getter-setter-keyword-block + /// subscript-head → attributes? declaration-modifiers? 'subscript' generic-parameter-clause? parameter-clause + /// subscript-result → '->' attributes? type + @_spi(RawSyntax) + public mutating func parseSubscriptDeclaration(_ attrs: DeclAttributes) -> RawSubscriptDeclSyntax { + let subscriptKeyword = self.eat(.subscriptKeyword) + let genericParameterClause: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + genericParameterClause = self.parseGenericParameters() + } else { + genericParameterClause = nil + } + + let indices = self.parseParameterClause() + + let result: RawReturnClauseSyntax + if self.at(.arrow) { + result = self.parseFunctionReturnClause() + } else { + result = RawReturnClauseSyntax( + arrow: RawTokenSyntax(missing: .arrow, arena: self.arena), + returnType: RawTypeSyntax(RawMissingTypeSyntax(arena: self.arena)), + arena: self.arena + ) + } + + // Parse a 'where' clause if present. + let genericWhereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + genericWhereClause = self.parseGenericWhereClause() + } else { + genericWhereClause = nil + } + + // Parse getter and setter. + let accessor: RawSyntax? + if self.at(.leftBrace) { + accessor = self.parseGetSet() + } else { + accessor = nil + } + + return RawSubscriptDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + subscriptKeyword: subscriptKeyword, + genericParameterClause: genericParameterClause, + indices: indices, + result: result, + genericWhereClause: genericWhereClause, + accessor: accessor, + arena: self.arena) + } +} + +extension Parser { + /// Parse a variable declaration starting with a leading 'let' or 'var' keyword. + /// + /// Grammar + /// ======= + /// + /// constant-declaration → attributes? declaration-modifiers? 'let' pattern-initializer-list + /// pattern-initializer-list → pattern-initializer | pattern-initializer ',' pattern-initializer-list + /// pattern-initializer → pattern initializer? + /// initializer → = expression + @_spi(RawSyntax) + public mutating func parseLetOrVarDeclaration(_ attrs: DeclAttributes) -> RawVariableDeclSyntax { + let introducer: RawTokenSyntax + if self.at(.letKeyword) { + introducer = self.eat(.letKeyword) + } else { + assert(self.at(.varKeyword)) + introducer = self.eat(.varKeyword) + } + + var elements = [RawPatternBindingSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + + let (pattern, type) = self.parseTypedPattern() + + // Parse an initializer if present. + let initializer: RawInitializerClauseSyntax? + if self.at(.equal) { + let equal = self.eat(.equal) + let value = self.parseExpression() + initializer = RawInitializerClauseSyntax( + equal: equal, value: value, + arena: self.arena) + } else { + initializer = nil + } + + let accessor: RawSyntax? + if self.at(.leftBrace) { + accessor = self.parseGetSet() + } else { + accessor = nil + } + + keepGoing = self.consume(if: .comma) + elements.append(RawPatternBindingSyntax( + pattern: pattern, + typeAnnotation: type, + initializer: initializer, + accessor: accessor, + trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + + return RawVariableDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + letOrVarKeyword: introducer, + bindings: RawPatternBindingListSyntax(elements: elements, arena: self.arena), + arena: self.arena) + } + + enum AccessorKind: SyntaxText, Equatable { + case `get` = "get" + case `set` = "set" + case `didSet` = "didSet" + case `willSet` = "willSet" + case unsafeAddress = "unsafeAddress" + case addressWithOwner = "addressWithOwner" + case addressWithNativeOwner = "addressWithNativeOwner" + case unsafeMutableAddress = "unsafeMutableAddress" + case mutableAddressWithOwner = "mutableAddressWithOwner" + case mutableAddressWithNativeOwner = "mutableAddressWithNativeOwner" + case _read = "_read" + case _modify = "_modify" + } + + struct AccessorIntroducer { + var attributes: RawAttributeListSyntax? + var modifier: RawDeclModifierSyntax? + var introducer: (AccessorKind, RawTokenSyntax)? + } + + mutating func parseAccessorIntroducer() -> AccessorIntroducer { + let attrs = self.parseAttributeList() + + // Parse the contextual keywords for 'mutating' and 'nonmutating' before + // get and set. + let modifier: RawDeclModifierSyntax? + if self.currentToken.isContextualKeyword("mutating") || + self.currentToken.isContextualKeyword("nonmutating") || + self.currentToken.isContextualKeyword("__consuming") { + modifier = RawDeclModifierSyntax( + name: self.consumeAnyToken(), detail: nil, + arena: self.arena) + } else { + modifier = nil + } + + guard + self.currentToken.isIdentifier, + let kind = AccessorKind(rawValue: self.currentToken.tokenText) + else { + return AccessorIntroducer( + attributes: attrs, modifier: modifier, introducer: nil) + } + + let introducer = self.consume(remapping: .contextualKeyword) + return AccessorIntroducer( + attributes: attrs, modifier: modifier, introducer: (kind, introducer)) + } + + @_spi(RawSyntax) + public mutating func parseEffectsSpecifier() -> RawTokenSyntax? { + // 'async' + if self.currentToken.isContextualKeyword("async") { + return self.consume(remapping: .contextualKeyword) + } + + // 'reasync' + if self.currentToken.isContextualKeyword("reasync") { + return self.consume(remapping: .contextualKeyword) + } + + // 'throws'/'rethrows' + if self.at(.throwsKeyword) || self.at(.rethrowsKeyword) { + return self.consumeAnyToken() + } + + // diagnose 'throw'/'try'. + if (self.at(.throwKeyword) || self.at(.tryKeyword)) + && !self.currentToken.isAtStartOfLine { + return self.consumeAnyToken() + } + + return nil + } + + @_spi(RawSyntax) + public mutating func parseEffectsSpecifiers() -> [RawTokenSyntax] { + var specifiers = [RawTokenSyntax]() + while let specifier = self.parseEffectsSpecifier() { + specifiers.append(specifier) + } + return specifiers + } + + /// Parse the body of a variable declaration. This can include explicit + /// getters, setters, and observers, or the body of a computed property. + /// + /// Grammar + /// ======= + /// + /// getter-setter-block → code-block + /// getter-setter-block → { getter-clause setter-clause opt } + /// getter-setter-block → { setter-clause getter-clause } + /// getter-clause → attributes opt mutation-modifier opt get code-block + /// setter-clause → attributes opt mutation-modifier opt set setter-name opt code-block + /// setter-name → ( identifier ) + /// getter-setter-keyword-block → { getter-keyword-clause setter-keyword-clause opt } + /// getter-setter-keyword-block → { setter-keyword-clause getter-keyword-clause } + /// getter-keyword-clause → attributes opt mutation-modifier opt get + /// setter-keyword-clause → attributes opt mutation-modifier opt set + /// willSet-didSet-block → { willSet-clause didSet-clause opt } + /// willSet-didSet-block → { didSet-clause willSet-clause opt } + /// willSet-clause → attributes opt willSet setter-name opt code-block + /// didSet-clause → attributes opt didSet setter-name opt code-block + @_spi(RawSyntax) + public mutating func parseGetSet() -> RawSyntax { + // Parse getter and setter. + let lbrace = self.eat(.leftBrace) + // Collect all explicit accessors to a list. + var elements = [RawAccessorDeclSyntax]() + do { + while !self.at(.eof) && !self.at(.rightBrace) { + let introducer = self.parseAccessorIntroducer() + guard let (kind, kindToken) = introducer.introducer else { + // There can only be an implicit getter if no other accessors were + // seen before this one. + guard elements.isEmpty else { + // Recover until the matching right brace. It's a little + // presumptuous of us to assume everything between here and there + // is an accessor, but we cannot stick unexpected anywhere for the + // moment... + while !self.at(.eof) && !self.at(.rightBrace) { + for token in self.recover() { + elements.append(RawAccessorDeclSyntax( + attributes: nil, modifier: nil, + accessorKind: token, + parameter: nil, + asyncKeyword: nil, throwsKeyword: nil, + body: nil, + arena: self.arena)) + } + } + + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + return RawSyntax(RawAccessorBlockSyntax( + leftBrace: lbrace, + accessors: RawAccessorListSyntax(elements: elements, arena: self.arena), + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena)) + } + + var body = [RawCodeBlockItemSyntax]() + while !self.at(.eof) && !self.at(.rightBrace) { + body.append(self.parseCodeBlockItem()) + } + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + return RawSyntax(RawCodeBlockSyntax( + leftBrace: lbrace, + statements: RawCodeBlockItemListSyntax(elements: body, arena: self.arena), + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena)) + } + + // 'set' and 'willSet' can have an optional name. This isn't valid in a + // protocol, but we parse and then reject it for better QoI. + // + // set-name ::= '(' identifier ')' + let parameter: RawAccessorParameterSyntax? + if self.at(.leftParen) && [ AccessorKind.set, .willSet, .didSet ].firstIndex(of: kind) != nil { + let lparen = self.eat(.leftParen) + let name = self.consumeIdentifier() + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + parameter = RawAccessorParameterSyntax( + leftParen: lparen, + name: name, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena + ) + } else { + parameter = nil + } + + // Next, parse effects specifiers. While it's only valid to have them + // on 'get' accessors, we also emit diagnostics if they show up on others. + let asyncKeyword: RawTokenSyntax? + let throwsKeyword: RawTokenSyntax? + if self.currentToken.isEffectsSpecifier { + asyncKeyword = self.parseEffectsSpecifier() + throwsKeyword = self.parseEffectsSpecifier() + } else { + asyncKeyword = nil + throwsKeyword = nil + } + + let body = self.parseOptionalCodeBlock() + + elements.append(RawAccessorDeclSyntax( + attributes: introducer.attributes, + modifier: introducer.modifier, + accessorKind: kindToken, + parameter: parameter, + asyncKeyword: asyncKeyword, + throwsKeyword: throwsKeyword, + body: body, + arena: self.arena)) + } + } + + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + return RawSyntax(RawAccessorBlockSyntax( + leftBrace: lbrace, + accessors: RawAccessorListSyntax(elements: elements, arena: self.arena), + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena)) + } +} + +extension Parser { + /// Parse a typealias declaration. + /// + /// Grammar + /// ======= + /// + /// typealias-declaration → attributes? access-level-modifier? 'typealias' typealias-name generic-parameter-clause? typealias-assignment + /// typealias-name → identifier + /// typealias-assignment → '=' type + @_spi(RawSyntax) + public mutating func parseTypealiasDeclaration(_ attrs: DeclAttributes) -> RawTypealiasDeclSyntax { + let typealiasKeyword = self.eat(.typealiasKeyword) + let name = self.consumeIdentifier() + + // Parse a generic parameter list if it is present. + let generics: RawGenericParameterClauseSyntax? + if self.currentToken.starts(with: "<") { + generics = self.parseGenericParameters() + } else { + generics = nil + } + + // Parse the binding alias. + let (unexpectedBeforeEqual, equal) = self.expect(.equal) + let value = self.parseType() + let initializer = RawTypeInitializerClauseSyntax( + unexpectedBeforeEqual, + equal: equal, + value: value, + arena: self.arena + ) + + // Parse a 'where' clause if present. + let genericWhereClause: RawGenericWhereClauseSyntax? + if self.at(.whereKeyword) { + genericWhereClause = self.parseGenericWhereClause() + } else { + genericWhereClause = nil + } + + return RawTypealiasDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + typealiasKeyword: typealiasKeyword, + identifier: name, + genericParameterClause: generics, + initializer: initializer, + genericWhereClause: genericWhereClause, + arena: self.arena) + } +} + +extension Parser { + /// Parse an operator declaration. + /// + /// Grammar + /// ======= + /// + /// operator-declaration → prefix-operator-declaration | postfix-operator-declaration | infix-operator-declaration + /// prefix-operator-declaration → 'prefix' 'operator' operator + /// postfix-operator-declaration → 'postfix' 'operator' operator + /// infix-operator-declaration → 'infix' 'operator' operator infix-operator-group? + /// infix-operator-group → ':' precedence-group-name + @_spi(RawSyntax) + public mutating func parseOperatorDeclaration(_ attrs: DeclAttributes) -> RawOperatorDeclSyntax { + let operatorKeyword = self.eat(.operatorKeyword) + let identifier = self.consumeAnyToken() + + // Parse (or diagnose) a specified precedence group and/or + // designated protocol. These both look like identifiers, so we + // parse them both as identifiers here and sort it out in type + // checking. + let precedenceAndTypes: RawOperatorPrecedenceAndTypesSyntax? + if self.at(.colon) { + let colon = self.eat(.colon) + let identifier = self.consumeIdentifier() + precedenceAndTypes = RawOperatorPrecedenceAndTypesSyntax( + colon: colon, + precedenceGroupAndDesignatedTypes: RawIdentifierListSyntax(elements: [ identifier ], arena: self.arena), + arena: self.arena) + } else { + precedenceAndTypes = nil + } + return RawOperatorDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + operatorKeyword: operatorKeyword, + identifier: identifier, + operatorPrecedenceAndTypes: precedenceAndTypes, + arena: self.arena) + } + + + /// Parse a precedence group declaration. + /// + /// Grammar + /// ======= + /// + /// precedence-group-declaration → precedencegroup precedence-group-name '{' precedence-group-attributes? '}' + /// + /// precedence-group-attributes → precedence-group-attribute precedence-group-attributes? + /// precedence-group-attribute → precedence-group-relation + /// precedence-group-attribute → precedence-group-assignment + /// precedence-group-attribute → precedence-group-associativity + /// + /// precedence-group-relation → 'higherThan' ':' precedence-group-names + /// precedence-group-relation → 'lowerThan' ':' precedence-group-names + /// + /// precedence-group-assignment → 'assignment' ':' boolean-literal + /// + /// precedence-group-associativity → 'associativity' ':' 'left' + /// precedence-group-associativity → 'associativity' ':' 'right' + /// precedence-group-associativity → 'associativity' ':' 'none' + /// + /// precedence-group-names → precedence-group-name | precedence-group-name ',' precedence-group-names + /// precedence-group-name → identifier + @_spi(RawSyntax) + public mutating func parsePrecedenceGroupDeclaration(_ attrs: DeclAttributes) -> RawPrecedenceGroupDeclSyntax { + let group = self.eat(.precedencegroupKeyword) + let identifier = self.consumeIdentifier() + let (unexpectedBeforeLBrace, lbrace) = self.expect(.leftBrace) + var elements = [RawSyntax]() + do { + while !self.at(.eof) && !self.at(.rightBrace) { + switch self.currentToken.tokenText { + case "associativity": + let associativity = self.consumeIdentifier() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + let value = self.consumeIdentifier() + elements.append(RawSyntax(RawPrecedenceGroupAssociativitySyntax( + associativityKeyword: associativity, + unexpectedBeforeColon, + colon: colon, + value: value, + arena: self.arena + ))) + case "assignment": + let assignmentKeyword = self.consumeIdentifier() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + let flag: RawTokenSyntax + if self.at(.trueKeyword) { + flag = self.eat(.trueKeyword) + } else if self.at(.falseKeyword) { + flag = self.eat(.falseKeyword) + } else { + flag = RawTokenSyntax(missing: .trueKeyword, arena: self.arena) + } + elements.append(RawSyntax(RawPrecedenceGroupAssignmentSyntax( + assignmentKeyword: assignmentKeyword, + unexpectedBeforeColon, + colon: colon, + flag: flag, + arena: self.arena + ))) + case "higherThan", "lowerThan": + // "lowerThan" and "higherThan" are contextual keywords. + let level = self.consume(remapping: .contextualKeyword) + let (unexpectedBeforeColon, colon) = self.expect(.colon) + var names = [RawPrecedenceGroupNameElementSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let name = self.consumeIdentifier() + keepGoing = self.consume(if: .comma) + names.append(RawPrecedenceGroupNameElementSyntax( + name: name, trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + elements.append(RawSyntax(RawPrecedenceGroupRelationSyntax( + higherThanOrLowerThan: level, + unexpectedBeforeColon, + colon: colon, + otherNames: RawPrecedenceGroupNameListSyntax(elements: names, arena: self.arena), + arena: self.arena))) + default: + var tokenList = [RawTokenSyntax]() + while !self.at(.eof) && !self.at(.rightBrace) { + let tokens = self.recover() + guard !tokens.isEmpty else { + break + } + tokenList.append(contentsOf: tokens) + } + elements.append(RawSyntax(RawTokenListSyntax(elements: tokenList, arena: self.arena))) + } + } + } + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + return RawPrecedenceGroupDeclSyntax( + attributes: attrs.attributes, modifiers: attrs.modifiers, + precedencegroupKeyword: group, + identifier: identifier, + unexpectedBeforeLBrace, + leftBrace: lbrace, + groupAttributes: RawPrecedenceGroupAttributeListSyntax(elements: elements, arena: self.arena), + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena) + } +} + +extension Parser { + enum PoundDiagnosticKind { + case error(RawTokenSyntax) + case warning(RawTokenSyntax) + } + + @_spi(RawSyntax) + public mutating func parsePoundDiagnosticDeclaration() -> RawDeclSyntax { + assert(self.at(.poundErrorKeyword) || self.at(.poundWarningKeyword)) + + let directive: PoundDiagnosticKind + if self.at(.poundErrorKeyword) { + directive = .error(self.eat(.poundErrorKeyword)) + } else { + directive = .error(self.eat(.poundWarningKeyword)) + } + + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + let stringLiteral: RawStringLiteralExprSyntax + if self.currentToken.tokenKind == .stringLiteral { + stringLiteral = self.parseStringLiteral() + } else { + stringLiteral = RawStringLiteralExprSyntax( + openDelimiter: nil, + openQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena), + segments: RawStringLiteralSegmentsSyntax(elements: [], arena: self.arena), + closeQuote: RawTokenSyntax(missing: .stringQuote, arena: self.arena), + closeDelimiter: nil, + arena: self.arena + ) + } + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + + switch directive { + case .error(let tok): + return RawDeclSyntax(RawPoundErrorDeclSyntax( + poundError: tok, + unexpectedBeforeLeftParen, + leftParen: leftParen, + message: stringLiteral, + unexpectedBeforeRightParen, + rightParen: rightParen, + arena: self.arena)) + case .warning(let tok): + return RawDeclSyntax(RawPoundWarningDeclSyntax( + poundWarning: tok, + unexpectedBeforeLeftParen, + leftParen: leftParen, + message: stringLiteral, + unexpectedBeforeRightParen, + rightParen: rightParen, + arena: self.arena)) + } + } +} diff --git a/Sources/SwiftParser/Diagnostics/Diagnostic.swift b/Sources/SwiftParser/Diagnostics/Diagnostic.swift new file mode 100644 index 00000000000..d16cc776ebf --- /dev/null +++ b/Sources/SwiftParser/Diagnostics/Diagnostic.swift @@ -0,0 +1,43 @@ +//===--- Diagnostics.swift ------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +import SwiftSyntax + +public struct Diagnostic { + /// The message that should be displayed to the user + public let diagMessage: DiagnosticMessage + + /// The node at whose start location the message should be displayed. + public let node: Syntax + + init(node: Syntax, message: DiagnosticMessage) { + self.diagMessage = message + self.node = node + } + + /// The message that should be displayed to the user. + public var message: String { + return diagMessage.message + } + + /// An ID that identifies the diagnostic's message. + /// See ``DiagnosticMessageID``. + public var diagnosticID: DiagnosticMessageID { + return diagMessage.diagnosticID + } + + /// The location at which the diagnostic should be displayed. + public func location(converter: SourceLocationConverter) -> SourceLocation { + return node.startLocation(converter: converter) + } +} + diff --git a/Sources/SwiftParser/Diagnostics/DiagnosticMessage.swift b/Sources/SwiftParser/Diagnostics/DiagnosticMessage.swift new file mode 100644 index 00000000000..dfb86fc6017 --- /dev/null +++ b/Sources/SwiftParser/Diagnostics/DiagnosticMessage.swift @@ -0,0 +1,51 @@ +//===--- DiagnosticMessage.swift ------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + + +/// An identifier that identifies a diagnostic message's type. +/// Fundamentally different diagnostics should have a different `diagnosticID` +/// so that clients may filter/prioritise/highlight/... certain diagnostics. +/// Two diagnostics with the same ID don’t need to necessarily have the exact +/// same wording. Eg. it’s possible that the message contains more context when +/// available. +public struct DiagnosticMessageID: Hashable { + private let value: String + + public init(_ value: String) { + self.value = value + } +} + +/// Types conforming to this protocol represent diagnostic messages that can be +/// shown to the client. +public protocol DiagnosticMessage { + /// The diagnostic message that should be displayed in the client. + var message: String { get } + + /// See ``DiagnosticMessageID``. + var diagnosticID: DiagnosticMessageID { get } +} + +/// A diagnostic how's ID is determined by the diagnostic's type. +public protocol TypedDiagnosticMessage: DiagnosticMessage { + var diagnosticID: DiagnosticMessageID { get } +} + +public extension TypedDiagnosticMessage { + static var diagnosticID: DiagnosticMessageID { + return DiagnosticMessageID("\(self)") + } + + var diagnosticID: DiagnosticMessageID { + return Self.diagnosticID + } +} diff --git a/Sources/SwiftParser/Diagnostics/ParseDiagnosticsGenerator.swift b/Sources/SwiftParser/Diagnostics/ParseDiagnosticsGenerator.swift new file mode 100644 index 00000000000..ce2f5c08081 --- /dev/null +++ b/Sources/SwiftParser/Diagnostics/ParseDiagnosticsGenerator.swift @@ -0,0 +1,118 @@ +//===--- ParseDiagnosticsGenerator.swift ----------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +import SwiftSyntax + +extension UnexpectedNodesSyntax { + func tokens(satisfying isIncluded: (TokenSyntax) -> Bool) -> [TokenSyntax] { + return self.children(viewMode: .sourceAccurate).compactMap({ $0.as(TokenSyntax.self) }).filter(isIncluded) + } + + func tokens(withKind kind: TokenKind) -> [TokenSyntax] { + return self.tokens(satisfying: { $0.tokenKind == kind }) + } +} + +public class ParseDiagnosticsGenerator: SyntaxAnyVisitor { + private var diagnostics: [Diagnostic] = [] + + /// IDs of nodes for which we already generated diagnostics in a parent's visit + /// method and that should thus not be visited. + private var handledNodes: [SyntaxIdentifier] = [] + + private init() { + super.init(viewMode: .all) + } + + public static func diagnostics(for tree: SyntaxProtocol) -> [Diagnostic] { + let diagProducer = ParseDiagnosticsGenerator() + diagProducer.walk(tree) + return diagProducer.diagnostics + } + + // MARK: - Private helper functions + + /// Produce a diagnostic. + private func addDiagnostic(_ node: T, _ message: DiagnosticMessage) { + diagnostics.append(Diagnostic(node: Syntax(node), message: message)) + } + + /// If a diagnostic is generated that covers multiple syntax nodes, mark them as handles so they don't produce the generic diagnostics anymore. + private func markNodesAsHandled(_ nodes: SyntaxIdentifier...) { + handledNodes.append(contentsOf: nodes) + } + + /// Whether the node should be skipped for diagnostic emission. + /// Every visit method must check this at the beginning. + private func shouldSkip(_ node: T) -> Bool { + return handledNodes.contains(node.id) + } + + // MARK: - Generic diagnostic generation + + public override func visitAny(_ node: Syntax) -> SyntaxVisitorContinueKind { + if shouldSkip(node) { + return .skipChildren + } + return .visitChildren + } + + override public func visit(_ node: UnexpectedNodesSyntax) -> SyntaxVisitorContinueKind { + if shouldSkip(node) { + return .skipChildren + } + addDiagnostic(node, UnexpectedNodesDiagnostic(unexpectedNodes: node)) + return .skipChildren + } + + override public func visit(_ node: TokenSyntax) -> SyntaxVisitorContinueKind { + if shouldSkip(node) { + return .skipChildren + } + if node.isMissing { + addDiagnostic(node, MissingTokenDiagnostic(missingToken: node)) + } + return .skipChildren + } + + // MARK: - Specialized diagnostic generation + + public override func visit(_ node: ForInStmtSyntax) -> SyntaxVisitorContinueKind { + if shouldSkip(node) { + return .skipChildren + } + // This is mostly a proof-of-concept implementation to produce more complex diagnostics. + if let unexpectedCondition = node.body.unexpectedBeforeLeftBrace { + // Detect C-style for loops based on two semicolons which could not be parsed between the 'for' keyword and the '{' + if unexpectedCondition.tokens(withKind: .semicolon).count == 2 { + addDiagnostic(node, CStyleForLoopDiagnostic()) + markNodesAsHandled(node.inKeyword.id, unexpectedCondition.id) + } + } + return .visitChildren + } + + public override func visit(_ node: FunctionSignatureSyntax) -> SyntaxVisitorContinueKind { + if shouldSkip(node) { + return .skipChildren + } + if let output = node.output, let unexpectedBeforeReturnType = output.unexpectedBetweenArrowAndReturnType { + if let throwsInReturnPosition = unexpectedBeforeReturnType.tokens(withKind: .throwsKeyword).first { + addDiagnostic(throwsInReturnPosition, ThrowsInReturnPositionDiagnostic()) + markNodesAsHandled(unexpectedBeforeReturnType.id, throwsInReturnPosition.id) + return .visitChildren + } + } + return .visitChildren + } +} + diff --git a/Sources/SwiftParser/Diagnostics/ParserDiagnosticMessages.swift b/Sources/SwiftParser/Diagnostics/ParserDiagnosticMessages.swift new file mode 100644 index 00000000000..172d881b526 --- /dev/null +++ b/Sources/SwiftParser/Diagnostics/ParserDiagnosticMessages.swift @@ -0,0 +1,81 @@ +//===--- ParserDiagnosticKinds.swift --------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +import SwiftSyntax + +extension Syntax { + // FIXME: These should be defined in gyb_syntax_support. + var nodeTypeNameForDiagnostics: String? { + if self.is(DeclSyntax.self) { + return "declaration" + } else if self.is(ExprSyntax.self) { + return "expression" + } else if self.is(PatternSyntax.self) { + return "pattern" + } else if self.is(StmtSyntax.self) { + return "statement" + } else if self.is(TypeSyntax.self) { + return "type" + } else if self.is(FunctionParameterSyntax.self) { + return "function parameter" + } else if self.is(ParameterClauseSyntax.self) { + return "parameter clause" + } else { + return nil + } + } +} + +// MARK: - Diagnostics (please sort alphabetically) + +public struct CStyleForLoopDiagnostic: TypedDiagnosticMessage { + public var message = "C-style for statement has been removed in Swift 3" +} + +public struct MissingTokenDiagnostic: TypedDiagnosticMessage { + public let missingToken: TokenSyntax + + public var message: String { + guard let parent = missingToken.parent, let parentTypeName = parent.nodeTypeNameForDiagnostics else { + return "Expected '\(missingToken.text)'" + } + switch missingToken.tokenKind { + case .leftAngle, .leftBrace, .leftParen, .leftSquareBracket: + if parent.children(viewMode: .fixedUp).first?.as(TokenSyntax.self) == missingToken { + return "Expected '\(missingToken.text)' to start \(parentTypeName)" + } + case .rightAngle, .rightBrace, .rightParen, .rightSquareBracket: + if parent.children(viewMode: .fixedUp).last?.as(TokenSyntax.self) == missingToken { + return "Expected '\(missingToken.text)' to end \(parentTypeName)" + } + default: + break + } + return "Expected '\(missingToken.text)' in \(parentTypeName)" + } +} + +public struct ThrowsInReturnPositionDiagnostic: TypedDiagnosticMessage { + public let message = "'throws' may only occur before '->'" +} + +public struct UnexpectedNodesDiagnostic: TypedDiagnosticMessage { + public let unexpectedNodes: UnexpectedNodesSyntax + + public var message: String { + if let parentTypeName = unexpectedNodes.parent?.nodeTypeNameForDiagnostics { + return "Unexpected text '\(unexpectedNodes.description)' found in \(parentTypeName)" + } else { + return "Unexpected text '\(unexpectedNodes.description)'" + } + } +} diff --git a/Sources/SwiftParser/Directives.swift b/Sources/SwiftParser/Directives.swift new file mode 100644 index 00000000000..39e8b631f2c --- /dev/null +++ b/Sources/SwiftParser/Directives.swift @@ -0,0 +1,178 @@ +//===------------------------ Directives.swift ----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + /// Parse a conditional compilation block. + /// + /// This function should be used to parse conditional compilation statements, + /// declarations, and expressions. It is generic over the particular kind of + /// parse that must occur for these elements, and allows a context-specific + /// syntax kind to be emitted to collect the results. For example, declaration + /// parsing parses items and collects the items into a `MemberDeclListSyntax` + /// node. + /// + /// Grammar + /// ======= + /// + /// conditional-compilation-block → if-directive-clause elseif-directive-clauses? else-directive-clause? endif-directive + /// + /// if-directive-clause → if-directive compilation-condition statements? + /// elseif-directive-clauses → elseif-directive-clause elseif-directive-clauses? + /// elseif-directive-clause → elseif-directive compilation-condition statements? + /// else-directive-clause → else-directive statements? + /// if-directive → '#if' + /// elseif-directive → '#elseif' + /// else-directive → '#else' + /// endif-directive → '#endif' + /// + /// compilation-condition → platform-condition + /// compilation-condition → identifier + /// compilation-condition → boolean-literal + /// compilation-condition → '(' compilation-condition ')' + /// compilation-condition → '!' compilation-condition + /// compilation-condition → compilation-condition '&&' compilation-condition + /// compilation-condition → compilation-condition '||' compilation-condition + /// + /// platform-condition → 'os' '(' operating-system ')' + /// platform-condition → 'arch' '(' architecture ')' + /// platform-condition → 'swift' '(' '>=' swift-version ')' | 'swift' ( < swift-version ) + /// platform-condition → 'compiler' '(' '>=' swift-version ')' | 'compiler' ( < swift-version ) + /// platform-condition → 'canImport' '(' import-path ')' + /// platform-condition → 'targetEnvironment' '(' environment ')' + /// + /// operating-system → 'macOS' | 'iOS' | 'watchOS' | 'tvOS' | 'Linux' | 'Windows' + /// architecture → 'i386' | 'x86_64' | 'arm' | 'arm64' + /// swift-version → decimal-digits swift-version-continuation? + /// swift-version-continuation → '.' decimal-digits swift-version-continuation? + /// environment → 'simulator' | 'macCatalyst' + /// + /// - Parameters: + /// - parseElement: Parse an element of the conditional compilation block. + /// - syntax: A function that aggregates the parsed conditional elements + /// into a syntax collection. + @_spi(RawSyntax) + public mutating func parsePoundIfDirective( + _ parseElement: (inout Parser) -> Element?, + syntax: (inout Parser, [Element]) -> RawSyntax + ) -> RawIfConfigDeclSyntax { + var clauses = [RawIfConfigClauseSyntax]() + do { + var poundIf = self.eat(.poundIfKeyword) + repeat { + // Parse the condition. + let condition: RawExprSyntax? + if self.at(.poundElseKeyword) { + poundIf = self.consumeAnyToken() + condition = nil + } else if self.at(.poundElseifKeyword) { + poundIf = self.consumeAnyToken() + condition = RawExprSyntax(self.parseSequenceExpression(.basic, forDirective: true)) + } else { + assert(poundIf.tokenKind == .poundIfKeyword) + condition = RawExprSyntax(self.parseSequenceExpression(.basic, forDirective: true)) + } + + var elements = [Element]() + do { + while !self.at(.eof) && !self.at(.poundElseKeyword) && !self.at(.poundElseifKeyword) && !self.at(.poundEndifKeyword) { + guard let element = parseElement(&self) else { + break + } + elements.append(element) + } + } + + clauses.append(RawIfConfigClauseSyntax( + poundKeyword: poundIf, + condition: condition, + elements: syntax(&self, elements), + arena: self.arena)) + } while self.at(.poundElseifKeyword) || self.at(.poundElseKeyword) + } + + let (unexpectedBeforePoundEndIf, poundEndIf) = self.expect(.poundEndifKeyword) + return RawIfConfigDeclSyntax( + clauses: RawIfConfigClauseListSyntax(elements: clauses, arena: self.arena), + unexpectedBeforePoundEndIf, + poundEndif: poundEndIf, + arena: self.arena) + } +} + +extension Parser { + /// Parse a #line literal. + /// + /// Grammar + /// ======= + /// + /// literal-expression → '#line' + @_spi(RawSyntax) + public mutating func parsePoundLineDirective() -> RawPoundLineExprSyntax { + let token = self.eat(.poundLineKeyword) + return RawPoundLineExprSyntax(poundLine: token, arena: self.arena) + } + + /// Parse a line control directive. + /// + /// Grammar + /// ======= + /// + /// line-control-statement → '#sourceLocation' '(' 'file' ':' file-path ',' 'line' ':' line-number ')' + /// line-control-statement → '#sourceLocation' '(' ')' + /// line-number → `A decimal integer greater than zero` + /// file-path → static-string-literal + @_spi(RawSyntax) + public mutating func parsePoundSourceLocationDirective() -> RawPoundSourceLocationSyntax { + let line = self.consumeAnyToken() + let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen) + let args: RawPoundSourceLocationArgsSyntax? + if !self.at(.rightParen) { + let file = self.consumeIdentifier() + let (unexpectedBeforeFileColon, fileColon) = self.expect(.colon) + let (unexpectedBeforeFileName, fileName) = self.expect(.stringLiteral) + let (unexpectedBeforeComma, comma) = self.expect(.comma) + + let line = self.consumeIdentifier() + let (unexpectedBeforeLineColon, lineColon) = self.expect(.colon) + let lineNumber = self.consumeInteger() + + args = RawPoundSourceLocationArgsSyntax( + fileArgLabel: file, + unexpectedBeforeFileColon, + fileArgColon: fileColon, + unexpectedBeforeFileName, + fileName: fileName, + unexpectedBeforeComma, + comma: comma, + lineArgLabel: line, + unexpectedBeforeLineColon, + lineArgColon: lineColon, + lineNumber: lineNumber, + arena: self.arena + ) + } else { + args = nil + } + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + return RawPoundSourceLocationSyntax( + poundSourceLocation: line, + unexpectedBeforeLParen, + leftParen: lparen, + args: args, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena + ) + } +} diff --git a/Sources/SwiftParser/Expressions.swift b/Sources/SwiftParser/Expressions.swift new file mode 100644 index 00000000000..e9ccb1ccbb6 --- /dev/null +++ b/Sources/SwiftParser/Expressions.swift @@ -0,0 +1,2078 @@ +//===----------------------- Expressions.swift ----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + public enum ExprFlavor { + case basic + case trailingClosure + } + + /// Parse an expression. + /// + /// Grammar + /// ======= + /// + /// expression → try-operator? await-operator? prefix-expression infix-expressions? + /// expression-list → expression | expression ',' expression-list + @_spi(RawSyntax) + public mutating func parseExpression(_ flavor: ExprFlavor = .trailingClosure) -> RawExprSyntax { + // If we are parsing a refutable pattern, check to see if this is the start + // of a let/var/is pattern. If so, parse it as an UnresolvedPatternExpr and + // let pattern type checking determine its final form. + // + // Only do this if we're parsing a pattern, to improve QoI on malformed + // expressions followed by (e.g.) let/var decls. + if self.at(.varKeyword) || self.at(.letKeyword) || self.at(.isKeyword) { + let pattern = self.parseMatchingPattern() + return RawExprSyntax(RawUnresolvedPatternExprSyntax(pattern: pattern, arena: self.arena)) + } + return RawExprSyntax(self.parseSequenceExpression(flavor)) + } +} + +extension Parser { + struct PendingTernary { + var conditionExpression: RawExprSyntax + var questionMark: RawTokenSyntax + var firstChoice: RawExprSyntax + var unexpectedBeforeColon: RawUnexpectedNodesSyntax? + var colonMark: RawTokenSyntax + func withSecondChoice(_ sencondChoice: RawExprSyntax?, arena: SyntaxArena) -> RawTernaryExprSyntax { + return RawTernaryExprSyntax( + conditionExpression: conditionExpression, + questionMark: questionMark, + firstChoice: firstChoice, + unexpectedBeforeColon, + colonMark: colonMark, + secondChoice: sencondChoice ?? RawExprSyntax(RawMissingExprSyntax(arena: arena)), + arena: arena) + } + } + /// Parse a sequence of expressions. + /// + /// Grammar + /// ======= + /// + /// infix-expression → infix-operator prefix-expression + /// infix-expression → assignment-operator try-operator? prefix-expression + /// infix-expression → conditional-operator try-operator? prefix-expression + /// infix-expression → type-casting-operator + /// infix-expressions → infix-expression infix-expressions? + /// + /// assignment-operator → '=' + /// conditional-operator → '?' expression ':' + /// type-casting-operator → 'is' type + /// type-casting-operator → 'as' type + /// type-casting-operator → 'as' '?' type + /// type-casting-operator → 'as' '!' type + @_spi(RawSyntax) + public mutating func parseSequenceExpression( + _ flavor: ExprFlavor, + forDirective: Bool = false, + inVarOrLet: Bool = false + ) -> RawExprSyntax { + var elements = [RawExprSyntax]() + var pendingTernary: PendingTernary? = nil + var sequenceLoopCondition = LoopProgressCondition() + SEQUENCE_LOOP: while sequenceLoopCondition.evaluate(currentToken) { + if forDirective && self.currentToken.isAtStartOfLine { + break SEQUENCE_LOOP + } + + // Parse a unary expression. + let unary = self.parseSequenceExpressionElement(flavor, forDirective: forDirective) + if unary.is(RawMissingExprSyntax.self) { + if !elements.isEmpty { + // If there are elements queued up we need to yield them all. + if let pendingTernary = pendingTernary { + elements.append(RawExprSyntax(pendingTernary.withSecondChoice(nil, arena: self.arena))) + } + return RawExprSyntax(RawSequenceExprSyntax( + elements: RawExprListSyntax(elements: elements, arena: self.arena), + arena: self.arena)) + } else if let pendingTernary = pendingTernary { + // Okay, so there aren't any items, but we have a ternary expression + // to yield. + return RawExprSyntax(pendingTernary.withSecondChoice(nil, arena: self.arena)) + } else { + // No items, no ternary expression, no other choice but to propagate + // our failure upwards. + return RawExprSyntax(RawMissingExprSyntax(arena: self.arena)) + } + } + elements.append(unary) + + // We know we can make a syntax node for ternary expression. + if let ternary = pendingTernary { + elements.append(RawExprSyntax(ternary.withSecondChoice(elements.popLast(), arena: self.arena))) + pendingTernary = nil + } + + if forDirective && self.currentToken.isAtStartOfLine { + break SEQUENCE_LOOP + } + + var operatorLoopCondition = LoopProgressCondition() + OPERATOR_LOOP: while operatorLoopCondition.evaluate(currentToken) { + switch self.currentToken.tokenKind { + case .spacedBinaryOperator, .unspacedBinaryOperator: + // If this is an "&& #available()" expression (or related things that + // show up in a stmt-condition production), then don't eat it. + // + // These are not general expressions, and && is an infix operator, + // so the code is invalid. We get better recovery if we bail out from + // this, because then we can produce a fixit to rewrite the && into a , + // if we're in a stmt-condition. + if self.currentToken.tokenText == "&&" + && [RawTokenKind.poundAvailableKeyword, .poundUnavailableKeyword, .letKeyword, .varKeyword, .caseKeyword] + .contains(self.peek().tokenKind) { + break SEQUENCE_LOOP + } + + // Parse the operator. + let operatorToken = self.parseOperatorExpression() + elements.append(RawExprSyntax(RawBinaryOperatorExprSyntax(operatorToken: operatorToken, arena: self.arena))) + break OPERATOR_LOOP + case .infixQuestionMark: + // Save the '?'. + let question = self.eat(.infixQuestionMark) + let firstChoice = self.parseSequenceExpression(flavor) + + // Make sure there's a matching ':' after the middle expr. + let (unexpectedBeforeColon, colon) = self.expect(.colon) + + let condition = elements.popLast()! + pendingTernary = PendingTernary( + conditionExpression: condition, + questionMark: question, + firstChoice: RawExprSyntax(firstChoice), + unexpectedBeforeColon: unexpectedBeforeColon, + colonMark: colon) + + // If the colon is missing there's not much more structure we can + // expect out of this expression sequence. Push the pending ternary + // node on and break out to end parsing here. + if colon.isMissing { + elements.append(RawExprSyntax(pendingTernary!.withSecondChoice(nil, arena: self.arena))) + break SEQUENCE_LOOP + } + break OPERATOR_LOOP + case .equal: + guard !inVarOrLet else { + break SEQUENCE_LOOP + } + + let eq = self.eat(.equal) + elements.append(RawExprSyntax(RawAssignmentExprSyntax( + assignToken: eq, + arena: self.arena))) + break OPERATOR_LOOP + case .isKeyword: + let isKeyword = self.eat(.isKeyword) + let type = self.parseType() + elements.append(RawExprSyntax(RawIsExprSyntax( + isTok: isKeyword, typeName: type, + arena: self.arena))) + // We already parsed the right operand as part of the 'is' production. + // Jump directly to parsing another operator. + continue + case .asKeyword: + let asKeyword = self.eat(.asKeyword) + let failable: RawTokenSyntax? + if self.at(.postfixQuestionMark) || self.at(.exclamationMark) { + failable = self.consumeAnyToken() + } else { + failable = nil + } + let type = self.parseType() + elements.append(RawExprSyntax(RawAsExprSyntax( + asTok: asKeyword, + questionOrExclamationMark: failable, + typeName: type, + arena: self.arena))) + // We already parsed the right operand as part of the 'as' production. + // Jump directly to parsing another operator. + continue + case .identifier: + // 'async' followed by 'throws' or '->' implies that we have an arrow + // expression. + guard + self.currentToken.isContextualKeyword("async"), + (self.peek().tokenKind == .arrow || self.peek().tokenKind == .throwsKeyword) + else { + break SEQUENCE_LOOP + } + fallthrough + case .arrow, .throwsKeyword: + let asyncKeyword: RawTokenSyntax? + if self.currentToken.isContextualKeyword("async") { + asyncKeyword = self.consume(remapping: .contextualKeyword) + } else { + asyncKeyword = nil + } + + let throwsKeyword = self.consume(if: .throwsKeyword) + let (unexpectedBeforeArrow, arrow) = self.expect(.arrow) + + elements.append(RawExprSyntax(RawArrowExprSyntax( + asyncKeyword: asyncKeyword, + throwsToken: throwsKeyword, + unexpectedBeforeArrow, + arrowToken: arrow, + arena: self.arena))) + break OPERATOR_LOOP + + default: + // If the next token is not a binary operator, we're done. + break SEQUENCE_LOOP + } + } + } + + // If we saw no operators, don't build a sequence. + if elements.count == 1, let first = elements.first { + return first + } + return RawExprSyntax(RawSequenceExprSyntax( + elements: RawExprListSyntax(elements: elements, arena: self.arena), + arena: self.arena)) + } + + + /// Parse an expression sequence element. + /// + /// Grammar + /// ======= + /// + /// expression → try-operator? await-operator? prefix-expression infix-expressions? + /// expression-list → expression | expression ',' expression-list + @_spi(RawSyntax) + public mutating func parseSequenceExpressionElement( + _ flavor: ExprFlavor, + forDirective: Bool = false + ) -> RawExprSyntax { + if self.currentToken.isContextualKeyword("await") { + let awaitTok = self.consumeAnyToken() + let sub = self.parseSequenceExpressionElement(flavor) + return RawExprSyntax(RawAwaitExprSyntax( + awaitKeyword: awaitTok, expression: sub, + arena: self.arena)) + } + + // Try to parse '@' sign or 'inout' as a attributed typerepr. + if self.at(.atSign) || self.at(.inoutKeyword) { + var backtrack = self.lookahead() + if backtrack.canParseType() { + let type = self.parseType() + return RawExprSyntax(RawTypeExprSyntax(type: type, + arena: self.arena)) + } + } + + guard self.at(.tryKeyword) else { + return self.parseUnaryExpression(flavor, forDirective: forDirective) + } + + let tryKeyword = self.eat(.tryKeyword) + let mark: RawTokenSyntax? + if self.at(.exclamationMark) || self.at(.postfixQuestionMark) { + mark = self.consumeAnyToken() + } else { + mark = nil + } + + let expression = self.parseSequenceExpressionElement(flavor) + return RawExprSyntax(RawTryExprSyntax( + tryKeyword: tryKeyword, + questionOrExclamationMark: mark, + expression: expression, + arena: self.arena)) + } + + /// Parse an optional prefix operator followed by an expression. + /// + /// Grammar + /// ======= + /// + /// prefix-expression → prefix-operator? postfix-expression + /// prefix-expression → in-out-expression + /// + /// in-out-expression → '&' identifier + @_spi(RawSyntax) + public mutating func parseUnaryExpression( + _ flavor: ExprFlavor, + forDirective: Bool = false + ) -> RawExprSyntax { + // First check to see if we have the start of a regex literal `/.../`. + // tryLexRegexLiteral(/*forUnappliedOperator*/ false) + switch self.currentToken.tokenKind { + case .prefixAmpersand: + let amp = self.eat(.prefixAmpersand) + let expr = self.parseUnaryExpression(flavor) + return RawExprSyntax(RawInOutExprSyntax( + ampersand: amp, expression: RawExprSyntax(expr), + arena: self.arena)) + + case .backslash: + return RawExprSyntax(self.parseKeyPathExpression(forDirective: forDirective)) + + case .prefixOperator: + let op = self.parseOperatorExpression() + let postfix = self.parseUnaryExpression(flavor, forDirective: forDirective) + return RawExprSyntax(RawPrefixOperatorExprSyntax( + operatorToken: op, postfixExpression: postfix, + arena: self.arena)) + + default: + // If the next token is not an operator, just parse this as expr-postfix. + return self.parsePostfixExpression(flavor, forDirective: forDirective) + } + } + + @_spi(RawSyntax) + public mutating func parseOperatorExpression() -> RawTokenSyntax { + assert(self.currentToken.isAnyOperator) + return self.consumeAnyToken() + } + + /// Parse a postfix expression applied to another expression. + /// + /// Grammar + /// ======= + /// + /// postfix-expression → primary-expression + /// postfix-expression → postfix-expression postfix-operator + /// postfix-expression → function-call-expression + /// postfix-expression → initializer-expression + /// postfix-expression → explicit-member-expression + /// postfix-expression → postfix-self-expression + /// postfix-expression → subscript-expression + /// postfix-expression → forced-value-expression + /// postfix-expression → optional-chaining-expression + @_spi(RawSyntax) + public mutating func parsePostfixExpression( + _ flavor: ExprFlavor, + forDirective: Bool + ) -> RawExprSyntax { + let head = self.parsePrimaryExpression(flavor) + guard !head.is(RawMissingExprSyntax.self) else { + return head + } + return self.parsePostfixExpressionSuffix(head, flavor, forDirective: forDirective) + } + + @_spi(RawSyntax) + public mutating func parseDottedExpressionSuffix(_ start: RawExprSyntax?) -> RawExprSyntax { + assert(self.at(.period) || self.at(.prefixPeriod)) + + // A key path is special, because it allows .[, unlike anywhere else. The + // period itself should be left in the token stream. (.? and .! end up + // being operators, and so aren't handled here.) +// if (periodHasKeyPathBehavior && peekToken().is(tok::l_square)) { +// break +// } + + let period = self.consume(remapping: .period) + // Handle "x.42" - a tuple index. + if self.currentToken.tokenKind == .integerLiteral { + let name = self.consumeAnyToken() + return RawExprSyntax(RawMemberAccessExprSyntax( + base: start, dot: period, name: name, declNameArguments: nil, + arena: self.arena)) + } + + // Handle "x.self" expr. + if self.at(.selfKeyword) { + let selfKeyword = self.eat(.selfKeyword) + return RawExprSyntax(RawMemberAccessExprSyntax( + base: start, dot: period, name: selfKeyword, declNameArguments: nil, + arena: self.arena)) + } + + let (ident, args) = self.parseDeclNameRef([ .keywords, .compoundNames ]) + let memberAccess = RawMemberAccessExprSyntax( + base: start, dot: period, name: ident, declNameArguments: args, + arena: self.arena) + + guard self.lookahead().canParseAsGenericArgumentList() else { + return RawExprSyntax(memberAccess) + } + + let generics = self.parseGenericArguments() + return RawExprSyntax(RawSpecializeExprSyntax( + expression: RawExprSyntax(memberAccess), + genericArgumentClause: generics, + arena: self.arena)) + } + + @_spi(RawSyntax) + public mutating func parseIfConfigExpressionSuffix( + _ start: RawExprSyntax?, + _ flavor: ExprFlavor, + forDirective: Bool + ) -> RawExprSyntax { + assert(self.at(.poundIfKeyword)) + + let config = self.parsePoundIfDirective { parser -> RawExprSyntax? in + let head: RawExprSyntax + if parser.at(.period) || parser.at(.prefixPeriod) { + head = parser.parseDottedExpressionSuffix(nil) + } else if parser.at(.poundIfKeyword) { + head = parser.parseIfConfigExpressionSuffix(nil, flavor, forDirective: forDirective) + } else { + // TODO: diagnose and skip. + return nil + } + let result = parser.parsePostfixExpressionSuffix(head, flavor, forDirective: forDirective) + + // TODO: diagnose and skip the remaining token in the current clause. + return result + } + syntax: { parser, elements in + guard elements.count == 1 else { + assert(elements.isEmpty) + return RawSyntax(RawMissingExprSyntax(arena: parser.arena)) + } + return RawSyntax(elements.first!) + } + + return RawExprSyntax(RawPostfixIfConfigExprSyntax( + base: start, config: config, + arena: self.arena)) + } + + /// Parse the suffix of a postfix expression. + /// + /// Grammar + /// ======= + /// + /// postfix-expression → postfix-expression postfix-operator + /// postfix-expression → function-call-expression + /// postfix-expression → initializer-expression + /// postfix-expression → explicit-member-expression + /// postfix-expression → postfix-self-expression + /// postfix-expression → subscript-expression + /// postfix-expression → forced-value-expression + /// postfix-expression → optional-chaining-expression + @_spi(RawSyntax) + public mutating func parsePostfixExpressionSuffix( + _ start: RawExprSyntax, + _ flavor: ExprFlavor, + forDirective: Bool + ) -> RawExprSyntax { + // Handle suffix expressions. + var leadingExpr = start + var loopCondition = LoopProgressCondition() + while loopCondition.evaluate(currentToken) { + if forDirective && self.currentToken.isAtStartOfLine { + return leadingExpr + } + + // Check for a .foo suffix. + if self.at(.period) || self.at(.prefixPeriod) { + leadingExpr = self.parseDottedExpressionSuffix(leadingExpr) + continue + } + + // If there is an expr-call-suffix, parse it and form a call. + if self.at(.leftParen) && !self.currentToken.isAtStartOfLine { + // Parse the argument list. + let lparen = self.eat(.leftParen) + let args = self.parseArgumentListElements() + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + + // If we can parse trailing closures, do so. + let trailingClosure: RawClosureExprSyntax? + let additionalTrailingClosures: RawMultipleTrailingClosureElementListSyntax? + if case .trailingClosure = flavor, self.at(.leftBrace), self.lookahead().isValidTrailingClosure(flavor) { + (trailingClosure, additionalTrailingClosures) = self.parseTrailingClosures(flavor) + } else { + trailingClosure = nil + additionalTrailingClosures = nil + } + + leadingExpr = RawExprSyntax(RawFunctionCallExprSyntax( + calledExpression: leadingExpr, + leftParen: lparen, + argumentList: RawTupleExprElementListSyntax(elements: args, arena: self.arena), + unexpectedBeforeRParen, + rightParen: rparen, + trailingClosure: trailingClosure, + additionalTrailingClosures: additionalTrailingClosures, + arena: self.arena)) + continue + } + + // Check for a [expr] suffix. + // Note that this cannot be the start of a new line. + if self.at(.leftSquareBracket) && !self.currentToken.isAtStartOfLine { + let lsquare = self.eat(.leftSquareBracket) + let args = self.parseArgumentListElements() + let (unexpectedBeforeRSquare, rsquare) = self.expect(.rightSquareBracket) + + // If we can parse trailing closures, do so. + let trailingClosure: RawClosureExprSyntax? + let additionalTrailingClosures: RawMultipleTrailingClosureElementListSyntax? + if case .trailingClosure = flavor, self.at(.leftBrace), self.lookahead().isValidTrailingClosure(flavor) { + (trailingClosure, additionalTrailingClosures) = self.parseTrailingClosures(flavor) + } else { + trailingClosure = nil + additionalTrailingClosures = nil + } + + leadingExpr = RawExprSyntax(RawSubscriptExprSyntax( + calledExpression: leadingExpr, + leftBracket: lsquare, + argumentList: RawTupleExprElementListSyntax(elements: args, arena: self.arena), + unexpectedBeforeRSquare, + rightBracket: rsquare, + trailingClosure: trailingClosure, + additionalTrailingClosures: additionalTrailingClosures, + arena: self.arena)) + continue + } + + // Check for a trailing closure, if allowed. + if self.at(.leftBrace) && self.lookahead().isValidTrailingClosure(flavor) { + // FIXME: if Result has a trailing closure, break out. + // Add dummy blank argument list to the call expression syntax. + let list = RawTupleExprElementListSyntax(elements: [], arena: self.arena) + let (first, rest) = self.parseTrailingClosures(flavor) + + leadingExpr = RawExprSyntax(RawFunctionCallExprSyntax( + calledExpression: leadingExpr, + leftParen: nil, + argumentList: list, + rightParen: nil, + trailingClosure: first, + additionalTrailingClosures: rest, + arena: self.arena)) + + // We only allow a single trailing closure on a call. This could be + // generalized in the future, but needs further design. + if self.at(.leftBrace) { + break + } + continue + } + + // Check for a ? suffix. + if let question = self.consume(if: .postfixQuestionMark) { + leadingExpr = RawExprSyntax(RawOptionalChainingExprSyntax( + expression: leadingExpr, questionMark: question, + arena: self.arena)) + continue + } + + // Check for a ! suffix. + if let exlaim = self.consume(if: .exclamationMark) { + leadingExpr = RawExprSyntax(RawForcedValueExprSyntax( + expression: leadingExpr, exclamationMark: exlaim, + arena: self.arena)) + continue + } + + // Check for a postfix-operator suffix. + if self.currentToken.tokenKind == .postfixOperator { + // KeyPaths are more restricted in what can go after a ., and so we treat + // them specially. + // if (periodHasKeyPathBehavior && startsWithSymbol(Tok, '.')) + // break + + let op = self.parseOperatorExpression() + leadingExpr = RawExprSyntax(RawPostfixUnaryExprSyntax( + expression: leadingExpr, operatorToken: op, + arena: self.arena)) + continue + } + + if self.at(.poundIfKeyword) { + // Check if the first '#if' body starts with '.' , and parse + // it as a "postfix ifconfig expression". + do { + var backtrack = self.lookahead() + // Skip to the first body. We may need to skip multiple '#if' directives + // since we support nested '#if's. e.g. + // baseExpr + // #if CONDITION_1 + // #if CONDITION_2 + // .someMember + repeat { + backtrack.eat(.poundIfKeyword) + while !backtrack.at(.eof) && !backtrack.currentToken.isAtStartOfLine { + backtrack.skipSingle() + } + } while backtrack.at(.poundIfKeyword) + + guard backtrack.isAtStartOfPostfixExprSuffix() else { + break + } + } + + leadingExpr = self.parseIfConfigExpressionSuffix( + leadingExpr, flavor, forDirective: forDirective) + continue + } + + // Otherwise, we don't know what this token is, it must end the expression. + break + } + return leadingExpr + } +} + +extension Parser { + /// Parse a keypath expression. + /// + /// Grammar + /// ======= + /// + /// key-path-expression → '\' type? '.' key-path-components + /// + /// key-path-components → key-path-component | key-path-component '.' key-path-components + /// key-path-component → identifier key-path-postfixes? | key-path-postfixes + /// + /// key-path-postfixes → key-path-postfix key-path-postfixes? + /// key-path-postfix → '?' | '!' | 'self' | '[' function-call-argument-list ']' + @_spi(RawSyntax) + public mutating func parseKeyPathExpression(forDirective: Bool) -> RawKeyPathExprSyntax { + // Consume '\'. + let backslash = self.eat(.backslash) + + // For uniformity, \.foo is parsed as if it were MAGIC.foo, so we need to + // make sure the . is there, but parsing the ? in \.? as .? doesn't make + // sense. This is all made more complicated by .?. being considered an + // operator token. Since keypath allows '.!' '.?' and '.[', consume '.' + // the token is a operator starts with '.', or the following token is '['. + let root: RawExprSyntax? + if !self.currentToken.starts(with: ".") { + root = self.parsePostfixExpression(.basic, forDirective: forDirective) + } else { + root = nil + } + + let expression: RawExprSyntax + if (self.currentToken.isAnyOperator && self.currentToken.tokenText.count != 1) || self.peek().tokenKind == .leftSquareBracket { + let dot = self.consumePrefix(".", as: .period) + let base = RawExprSyntax(RawKeyPathBaseExprSyntax(period: dot, arena: self.arena)) + expression = self.parsePostfixExpressionSuffix(base, .basic, forDirective: forDirective) + } else if self.at(.period) || self.at(.prefixPeriod) { + // Inside a keypath's path, the period always behaves normally: the key path + // behavior is only the separation between type and path. + let base = self.parseDottedExpressionSuffix(nil) + expression = self.parsePostfixExpressionSuffix(base, .basic, forDirective: forDirective) + } else { + expression = RawExprSyntax(RawMissingExprSyntax(arena: self.arena)) + } + + return RawKeyPathExprSyntax( + backslash: backslash, + rootExpr: root, + expression: expression, + arena: self.arena) + } +} + +extension Parser { + /// Parse a "primary expression" - these are the most basic leaves of the + /// Swift expression grammar. + /// + /// Grammar + /// ======= + /// + /// primary-expression → identifier generic-argument-clause? + /// primary-expression → literal-expression + /// primary-expression → self-expression + /// primary-expression → superclass-expression + /// primary-expression → closure-expression + /// primary-expression → parenthesized-expression + /// primary-expression → tuple-expression + /// primary-expression → implicit-member-expression + /// primary-expression → wildcard-expression + /// primary-expression → key-path-expression + /// primary-expression → selector-expression + /// primary-expression → key-path-string-expression + @_spi(RawSyntax) + public mutating func parsePrimaryExpression(_ flavor: ExprFlavor) -> RawExprSyntax { + switch self.currentToken.tokenKind { + case .integerLiteral: + let digits = self.eat(.integerLiteral) + return RawExprSyntax(RawIntegerLiteralExprSyntax(digits: digits, arena: self.arena)) + case .floatingLiteral: + let digits = self.eat(.floatingLiteral) + return RawExprSyntax(RawFloatLiteralExprSyntax(floatingDigits: digits, arena: self.arena)) + case .stringLiteral: + return RawExprSyntax(self.parseStringLiteral()) + case .regexLiteral: + return RawExprSyntax(self.parseRegexLiteral()) + case .nilKeyword: + let nilKeyword = self.eat(.nilKeyword) + return RawExprSyntax(RawNilLiteralExprSyntax(nilKeyword: nilKeyword, arena: self.arena)) + case .trueKeyword, .falseKeyword: + let tok = self.eat(self.currentToken.tokenKind) + return RawExprSyntax(RawBooleanLiteralExprSyntax(booleanLiteral: tok, arena: self.arena)) + case .__file__Keyword: + let tok = self.eat(.__file__Keyword) + return RawExprSyntax(RawPoundFileExprSyntax(poundFile: tok, arena: self.arena)) + case .poundFileKeyword: + let tok = self.eat(.poundFileKeyword) + return RawExprSyntax(RawPoundFileExprSyntax(poundFile: tok, arena: self.arena)) + case .poundFilePathKeyword: + let tok = self.eat(.poundFilePathKeyword) + return RawExprSyntax(RawPoundFilePathExprSyntax(poundFilePath: tok, arena: self.arena)) + case .poundFunctionKeyword: + let tok = self.eat(.poundFunctionKeyword) + return RawExprSyntax(RawPoundFunctionExprSyntax(poundFunction: tok, arena: self.arena)) + case .__function__Keyword: + let tok = self.eat(.__function__Keyword) + return RawExprSyntax(RawPoundFunctionExprSyntax(poundFunction: tok, arena: self.arena)) + case .poundLineKeyword: + let tok = self.eat(.poundLineKeyword) + return RawExprSyntax(RawPoundLineExprSyntax(poundLine: tok, arena: self.arena)) + case .__line__Keyword: + let tok = self.eat(.__line__Keyword) + return RawExprSyntax(RawPoundLineExprSyntax(poundLine: tok, arena: self.arena)) + case .poundColumnKeyword: + let tok = self.eat(.poundColumnKeyword) + return RawExprSyntax(RawPoundColumnExprSyntax(poundColumn: tok, arena: self.arena)) + case .__column__Keyword: + let tok = self.eat(.__column__Keyword) + return RawExprSyntax(RawPoundColumnExprSyntax(poundColumn: tok, arena: self.arena)) + case .poundDsohandleKeyword: + let tok = self.eat(.poundDsohandleKeyword) + return RawExprSyntax(RawPoundDsohandleExprSyntax(poundDsohandle: tok, arena: self.arena)) + case .__dso_handle__Keyword: + let tok = self.eat(.__dso_handle__Keyword) + return RawExprSyntax(RawPoundDsohandleExprSyntax(poundDsohandle: tok, arena: self.arena)) + case .identifier, .selfKeyword: + // 'any' followed by another identifier is an existential type. + if self.currentToken.isContextualKeyword("any"), + self.peek().tokenKind == .identifier, + self.peek().isAtStartOfLine + { + let ty = self.parseType() + return RawExprSyntax(RawTypeExprSyntax(type: ty, arena: self.arena)) + } + + return RawExprSyntax(self.parseIdentifierExpression()) + case .capitalSelfKeyword: // Self + return RawExprSyntax(self.parseIdentifierExpression()) + case .anyKeyword: // Any + let anyType = RawTypeSyntax(self.parseAnyType()) + return RawExprSyntax(RawTypeExprSyntax(type: anyType, arena: self.arena)) + case .dollarIdentifier: + return RawExprSyntax(self.parseAnonymousClosureArgument()) + case .wildcardKeyword: // _ + let wild = self.eat(.wildcardKeyword) + return RawExprSyntax(RawDiscardAssignmentExprSyntax( + wildcard: wild, arena: self.arena)) + case .poundSelectorKeyword: + return RawExprSyntax(self.parseObjectiveCSelectorLiteral()) + case .poundKeyPathKeyword: + return RawExprSyntax(self.parseObjectiveCKeyPathExpression()) + + case .poundColorLiteralKeyword, + .poundImageLiteralKeyword, + .poundFileLiteralKeyword: + return RawExprSyntax(self.parseObjectLiteralExpression()) + + case .leftBrace: // expr-closure + return RawExprSyntax(self.parseClosureExpression()) + case .period, //=.foo + .prefixPeriod: // .foo + let dot = self.consume(remapping: .prefixPeriod) + let (name, args) = self.parseDeclNameRef([ .keywords, .compoundNames ]) + return RawExprSyntax(RawMemberAccessExprSyntax( + base: nil, dot: dot, name: name, declNameArguments: args, + arena: self.arena)) + case .superKeyword: // 'super' + return RawExprSyntax(self.parseSuperExpression()) + + case .leftParen: + // Build a tuple expression syntax node. + // AST differentiates paren and tuple expression where the former allows + // only one element without label. However, libSyntax tree doesn't have this + // differentiation. A tuple expression node in libSyntax can have a single + // element without label. + return RawExprSyntax(self.parseTupleExpression()) + + case .leftSquareBracket: + return self.parseCollectionLiteral() + + default: + return RawExprSyntax(RawMissingExprSyntax(arena: self.arena)) + } + } +} + +extension Parser { + /// Parse an identifier as an expression. + /// + /// Grammar + /// ======= + /// + /// primary-expression → identifier + @_spi(RawSyntax) + public mutating func parseIdentifierExpression() -> RawExprSyntax { + let (name, args) = self.parseDeclNameRef(.compoundNames) + let identifier = RawIdentifierExprSyntax( + identifier: name, declNameArguments: args, + arena: self.arena) + + guard self.lookahead().canParseAsGenericArgumentList() else { + return RawExprSyntax(identifier) + } + + let generics = self.parseGenericArguments() + return RawExprSyntax(RawSpecializeExprSyntax( + expression: RawExprSyntax(identifier), genericArgumentClause: generics, + arena: self.arena)) + } +} + +extension Parser { + /// Parse an identifier as an expression. + /// + /// Grammar + /// ======= + /// + /// playground-literal → '#colorLiteral' '(' red ':' expression , green ':' expression , blue ':' expression , alpha ':' expression ) + /// playground-literal → '#fileLiteral' '(' resourceName ':' expression ')' + /// playground-literal → '#imageLiteral' '(' resourceName ':' expression ')' + @_spi(RawSyntax) + public mutating func parseObjectLiteralExpression() -> RawObjectLiteralExprSyntax { + let poundKeyword = self.consumeAnyToken() + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + let arguments = self.parseArgumentListElements() + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + return RawObjectLiteralExprSyntax( + identifier: poundKeyword, + unexpectedBeforeLeftParen, + leftParen: leftParen, + arguments: RawTupleExprElementListSyntax(elements: arguments, arena: self.arena), + unexpectedBeforeRightParen, + rightParen: rightParen, + arena: self.arena) + } +} + +extension Parser { + /// Parse a string literal expression. + /// + /// Grammar + /// ======= + /// + /// string-literal → static-string-literal | interpolated-string-literal + /// + /// string-literal-opening-delimiter → extended-string-literal-delimiter? '"' + /// string-literal-closing-delimiter → '"' extended-string-literal-delimiter? + /// + /// static-string-literal → string-literal-opening-delimiter quoted-text? string-literal-closing-delimiter + /// static-string-literal → multiline-string-literal-opening-delimiter multiline-quoted-text? multiline-string-literal-closing-delimiter + /// + /// multiline-string-literal-opening-delimiter → extended-string-literal-delimiter? '"""' + /// multiline-string-literal-closing-delimiter → '"""' extended-string-literal-delimiter? + /// extended-string-literal-delimiter → '#' extended-string-literal-delimiter? + /// + /// quoted-text → quoted-text-item quoted-text? + /// quoted-text-item → escaped-character + /// quoted-text-item → `Any Unicode scalar value except ", \, U+000A, or U+000D` + /// + /// multiline-quoted-text → multiline-quoted-text-item multiline-quoted-text? + /// multiline-quoted-text-item → escaped-character + /// multiline-quoted-text-item → `Any Unicode scalar value except \` + /// multiline-quoted-text-item → escaped-newline + /// + /// interpolated-string-literal → string-literal-opening-delimiter interpolated-text? string-literal-closing-delimiter + /// interpolated-string-literal → multiline-string-literal-opening-delimiter multiline-interpolated-text? multiline-string-literal-closing-delimiter + /// interpolated-text → interpolated-text-item interpolated-text? + /// interpolated-text-item → '\(' expression ')' | quoted-text-item + /// + /// multiline-interpolated-text → multiline-interpolated-text-item multiline-interpolated-text? + /// multiline-interpolated-text-item → '\(' expression ')' | multiline-quoted-text-item + /// escape-sequence → \ extended-string-literal-delimiter + /// escaped-character → escape-sequence '0' | escape-sequence '\' | escape-sequence 't' | escape-sequence 'n' | escape-sequence 'r' | escape-sequence '"' | escape-sequence ''' + /// + /// escaped-character → escape-sequence 'u' '{' unicode-scalar-digits '}' + /// unicode-scalar-digits → Between one and eight hexadecimal digits + /// + /// escaped-newline → escape-sequence inline-spaces? line-break + @_spi(RawSyntax) + public mutating func parseStringLiteral() -> RawStringLiteralExprSyntax { + var text = self.currentToken.wholeText[self.currentToken.textRange] + + /// Parse opening raw string delimiter if exist. + let openDelimiter = self.parseStringLiteralDelimiter(at: .leading, text: text) + if let openDelimiter = openDelimiter { + text = text.dropFirst(openDelimiter.tokenText.count) + } + + /// Parse open quote. + let openQuote = self.parseStringLiteralQuote( + at: openDelimiter != nil ? .leadingRaw : .leading, text: text) + text = text.dropFirst(openQuote.tokenText.count) + + /// Parse segments. + let (segments, closeStart) = self.parseStringLiteralSegments( + text, openQuote, openDelimiter?.tokenText ?? "") + text = text[closeStart...] + + /// Parse close quote. + let closeQuote = self.parseStringLiteralQuote( + at: openDelimiter != nil ? .trailingRaw : .trailing, + text: text[closeStart...]) + text = text.dropFirst(closeQuote.byteLength) + + /// Parse closing raw string delimiter if exist. + let closeDelimiter = self.parseStringLiteralDelimiter(at: .trailing, text: text) + assert((openDelimiter == nil) == (closeDelimiter == nil), + "existence of open/close delimiter should match") + if let closeDelimiter = closeDelimiter { + text = text.dropFirst(closeDelimiter.tokenText.count) + } + + assert(text.isEmpty, + "string literal parsing should consume all the literal text") + + /// Discard the raw string literal token and create the structed string + /// literal expression. + /// FIXME: We should not instantiate `RawTokenSyntax` and discard it here. + _ = self.consumeAnyToken() + + /// Construct the literal expression. + return RawStringLiteralExprSyntax( + openDelimiter: openDelimiter, + openQuote: openQuote, + segments: segments, + closeQuote: closeQuote, + closeDelimiter: closeDelimiter, + arena: self.arena) + } + + // Enumerates the positions that a quote can appear in a string literal. + enum QuotePosition { + /// The quote appears in leading position. + /// + /// ```swift + /// "Hello World" + /// ^ + /// ##"Hello World"## + /// ^ + /// ``` + case leading + + /// The quote appears in trailing position. + /// + /// ```swift + /// "Hello World" + /// ^ + /// ##"Hello World"## + /// ^ + /// ``` + case trailing + /// The quote appears in at the start of a raw string literal. + /// + /// ```swift + /// ##"Hello World"## + /// ^ + /// ``` + case leadingRaw + /// The quote appears in at the end of a raw string literal. + /// + /// ```swift + /// ##"Hello World"## + /// ^ + /// ``` + case trailingRaw + } + + /// Create string literal delimiter/quote token syntax for `position`. + /// + /// `text` will the token text of the token. The `text.base` must be the whole + /// text of the original `.stringLiteral` token including trivia. + private func makeStringLiteralQuoteToken( + _ kind: RawTokenKind, + text: Slice, + at position: QuotePosition + ) -> RawTokenSyntax { + let wholeText: SyntaxText + let textRange: Range + switch position { + case .leadingRaw, .trailingRaw: + wholeText = SyntaxText(rebasing: text) + textRange = wholeText.startIndex ..< wholeText.endIndex + case .leading: + wholeText = SyntaxText(rebasing: text.base[.. + ) -> RawTokenSyntax? { + assert(position != .leadingRaw && position != .trailingRaw) + var index = text.startIndex + while index < text.endIndex && text[index] == UInt8(ascii: "#") { + index = text.index(after: index) + } + guard index > text.startIndex else { + return nil + } + return makeStringLiteralQuoteToken( + .rawStringDelimiter, text: text[.. + ) -> RawTokenSyntax { + // Single quote. We only support single line literal. + if let first = text.first, first == UInt8(ascii: "'") { + let index = text.index(after: text.startIndex) + return makeStringLiteralQuoteToken( + .singleQuote, text: text[.., + _ closer: RawTokenSyntax, + _ delimiter: SyntaxText + ) -> (RawStringLiteralSegmentsSyntax, SyntaxText.Index) { + let allowsMultiline = closer.tokenKind == .multilineStringQuote + + var segments = [RawSyntax]() + var segment = text + var stringLiteralSegmentStart = segment.startIndex + while let slashIndex = segment.firstIndex(of: UInt8(ascii: "\\")), stringLiteralSegmentStart < segment.endIndex { + let delimiterStart = text.index(after: slashIndex) + guard (delimiterStart < segment.endIndex && + SyntaxText(rebasing: text[delimiterStart...]).hasPrefix(delimiter)) else { + // If `\` is not followed by the custom delimiter, it's not a segment delimiter. + // Restart after the `\`. + segment = text[text.index(after: delimiterStart)...] + continue + } + + let contentStart = text.index(delimiterStart, offsetBy: delimiter.count) + guard (contentStart < segment.endIndex && + text[contentStart] == UInt8(ascii: "(")) else { + // If `\` (or `\#`) is not followed by `(`, it's not a segment delimiter. + // Restart after the `(`. + segment = text[text.index(after: contentStart)...] + continue + } + + // Collect ".stringSegment" before `\`. + let segmentToken = RawTokenSyntax( + kind: .stringSegment, + text: SyntaxText(rebasing: text[stringLiteralSegmentStart.. RawRegexLiteralExprSyntax { + let literal = self.eat(.regexLiteral) + return RawRegexLiteralExprSyntax(regex: literal, arena: self.arena) + } +} + +extension Parser { + /// Parse an Objective-C #keypath literal. + /// + /// Grammar + /// ======= + /// + /// key-path-string-expression → '#keyPath' '(' expression ')' + @_spi(RawSyntax) + public mutating func parseObjectiveCKeyPathExpression() -> RawObjcKeyPathExprSyntax { + let keyword = self.eat(.poundKeyPathKeyword) + // Parse the leading '('. + let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen) + + // Parse the sequence of unqualified-names. + var elements = [RawObjcNamePieceSyntax]() + do { + var flags: DeclNameOptions = .compoundNames + var keepGoing: RawTokenSyntax? = nil + repeat { + // Parse the next name. + let (name, _) = self.parseDeclNameRef(flags) + + // After the first component, we can start parsing keywords. + flags.formUnion(.keywords) + + // Parse the next period to continue the path. + keepGoing = self.consume(if: .period) + elements.append(RawObjcNamePieceSyntax( + name: name, dot: keepGoing, arena: self.arena)) + } while keepGoing != nil + } + + // Parse the closing ')'. + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + return RawObjcKeyPathExprSyntax( + keyPath: keyword, + unexpectedBeforeLParen, + leftParen: lparen, + name: RawObjcNameSyntax(elements: elements, arena: self.arena), + unexpectedBeforeRParen, + rightParen: rparen, arena: self.arena) + } +} + +extension Parser { + /// Parse a 'super' reference to the superclass instance of a class. + /// + /// Grammar + /// ======= + /// + /// primary-expression → 'super' + @_spi(RawSyntax) + public mutating func parseSuperExpression() -> RawSuperRefExprSyntax { + // Parse the 'super' reference. + let superKeyword = self.eat(.superKeyword) + return RawSuperRefExprSyntax(superKeyword: superKeyword, arena: self.arena) + } +} + +extension Parser { + /// Parse a tuple expression. + /// + /// Grammar + /// ======= + /// + /// tuple-expression → '(' ')' | '(' tuple-element ',' tuple-element-list ')' + /// tuple-element-list → tuple-element | tuple-element ',' tuple-element-list + @_spi(RawSyntax) + public mutating func parseTupleExpression() -> RawTupleExprSyntax { + let lparen = self.eat(.leftParen) + let elements = self.parseArgumentListElements() + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + return RawTupleExprSyntax( + leftParen: lparen, + elementList: RawTupleExprElementListSyntax(elements: elements, arena: self.arena), + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena) + } +} + +extension Parser { + enum CollectionKind { + case dictionary(key: RawExprSyntax, unexpectedBeforeColon: RawUnexpectedNodesSyntax?, colon: RawTokenSyntax, value: RawExprSyntax) + case array(RawExprSyntax) + } + + /// Parse an element of an array or dictionary literal. + /// + /// Grammar + /// ======= + /// + /// array-literal-item → expression + /// + /// dictionary-literal-item → expression ':' expression + mutating func parseCollectionElement(_ existing: CollectionKind?) -> CollectionKind { + let key = self.parseExpression() + switch existing { + case .array(_): + return .array(key) + case nil: + guard self.at(.colon) else { + return .array(key) + } + fallthrough + case .dictionary: + let (unexpectedBeforeColon, colon) = self.expect(.colon) + let value = self.parseExpression() + return .dictionary(key: key, unexpectedBeforeColon: unexpectedBeforeColon, colon: colon, value: value) + } + } + + /// Parse an array or dictionary literal. + /// + /// Grammar + /// ======= + /// + /// array-literal → '[' array-literal-items? ']' + /// array-literal-items → array-literal-item ','? | array-literal-item ',' array-literal-items + /// + /// dictionary-literal → '[' dictionary-literal-items ']' | '[' ':' ']' + /// dictionary-literal-items → dictionary-literal-item ','? | dictionary-literal-item ',' dictionary-literal-items + @_spi(RawSyntax) + public mutating func parseCollectionLiteral() -> RawExprSyntax { + let lsquare = self.eat(.leftSquareBracket) + + if self.at(.rightSquareBracket) { + let rsquare = self.eat(.rightSquareBracket) + return RawExprSyntax(RawArrayExprSyntax( + leftSquare: lsquare, + elements: RawArrayElementListSyntax(elements: [], arena: self.arena), + rightSquare: rsquare, + arena: self.arena)) + } + + if self.at(.colon) && self.peek().tokenKind == .rightSquareBracket { + let colon = self.eat(.colon) + let rsquare = self.eat(.rightSquareBracket) + // FIXME: We probably want a separate node for the empty case. + return RawExprSyntax(RawDictionaryExprSyntax( + leftSquare: lsquare, + content: RawSyntax(colon), + rightSquare: rsquare, arena: self.arena)) + } + + var elementKind: CollectionKind? = nil + var elements = [RawSyntax]() + do { + var collectionLoopCondition = LoopProgressCondition() + COLLECTION_LOOP: while collectionLoopCondition.evaluate(currentToken) { + elementKind = self.parseCollectionElement(elementKind) + + // Parse the ',' if exists. + let comma = self.consume(if: .comma) + + switch elementKind! { + case .array(let el): + elements.append(RawSyntax(RawArrayElementSyntax( + expression: el, trailingComma: comma, arena: self.arena))) + if el.is(RawMissingExprSyntax.self) { + break COLLECTION_LOOP + } + case .dictionary(let key, let unexpectedBeforeColon, let colon, let value): + elements.append(RawSyntax(RawDictionaryElementSyntax( + keyExpression: key, + unexpectedBeforeColon, + colon: colon, + valueExpression: value, + trailingComma: comma, + arena: self.arena))) + if key.is(RawMissingExprSyntax.self), colon.isMissing, value.is(RawMissingExprSyntax.self) { + break COLLECTION_LOOP + } + } + + // If we found EOF or the closing square bracket, bailout. + if self.at(.rightSquareBracket) || self.at(.eof) { + break + } + + // If The next token is at the beginning of a new line and can never start + // an element, break. + if self.currentToken.isAtStartOfLine + && (self.at(.rightBrace) || self.at(.poundEndifKeyword) || self.lookahead().isStartOfDeclaration() || self.lookahead().isStartOfStatement()) { + break + } + } + } + + let (unexpectedBeforeRSquare, rsquare) = self.expect(.rightSquareBracket) + switch elementKind! { + case .dictionary: + return RawExprSyntax(RawDictionaryExprSyntax( + leftSquare: lsquare, + content: RawSyntax(RawDictionaryElementListSyntax(elements: elements.map { + $0.as(RawDictionaryElementSyntax.self)! + }, arena: self.arena)), + unexpectedBeforeRSquare, + rightSquare: rsquare, + arena: self.arena)) + case .array: + return RawExprSyntax(RawArrayExprSyntax( + leftSquare: lsquare, + elements: RawArrayElementListSyntax(elements: elements.map { + $0.as(RawArrayElementSyntax.self)! + }, arena: self.arena), + unexpectedBeforeRSquare, + rightSquare: rsquare, + arena: self.arena)) + } + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parseDefaultArgument() -> RawInitializerClauseSyntax { + let eq = self.eat(.equal) + let expr = self.parseExpression() + return RawInitializerClauseSyntax( + equal: eq, value: expr, arena: self.arena) + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parseAnonymousClosureArgument() -> RawIdentifierExprSyntax { + guard self.currentToken.tokenKind == .dollarIdentifier else { + fatalError("Production invoked with non-dollar token!") + } + let ident = self.consumeAnyToken() + return RawIdentifierExprSyntax( + identifier: ident, declNameArguments: nil, arena: self.arena) + } +} + +extension Parser { + /// Parse a #selector expression. + /// + /// Grammar + /// ======= + /// + /// selector-expression → '#selector' '(' expression ) + /// selector-expression → '#selector' '(' 'getter' ':' expression ')' + /// selector-expression → '#selector' '(' 'setter' ':' expression ')' + @_spi(RawSyntax) + public mutating func parseObjectiveCSelectorLiteral() -> RawObjcSelectorExprSyntax { + // Consume '#selector'. + let selector = self.eat(.poundSelectorKeyword) + // Parse the leading '('. + let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen) + + // Parse possible 'getter:' or 'setter:' modifiers, and determine + // the kind of selector we're working with. + let kind: RawTokenSyntax? + let colon: RawTokenSyntax? + if + self.peek().tokenKind == .colon, + self.currentToken.isContextualKeyword("getter") || self.currentToken.isContextualKeyword("setter") + { + // Parse the modifier. + kind = self.consume(remapping: .contextualKeyword) + colon = self.eat(.colon) + } else { + kind = nil + colon = nil + } + + // Parse the subexpression. + let subexpr = self.parseExpression() + // Parse the closing ')'. + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + return RawObjcSelectorExprSyntax( + poundSelector: selector, + unexpectedBeforeLParen, + leftParen: lparen, + kind: kind, + colon: colon, + name: subexpr, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena) + } +} + +extension Parser { + /// Parse a closure expression. + /// + /// Grammar + /// ======= + /// + /// closure-expression → '{' attributes? closure-signature? statements? '}' + @_spi(RawSyntax) + public mutating func parseClosureExpression() -> RawClosureExprSyntax { + // Parse the opening left brace. + let lbrace = self.eat(.leftBrace) + // Parse the closure-signature, if present. + let signature = self.parseClosureSignatureIfPresent() + + // Parse the body. + var elements = [RawCodeBlockItemSyntax]() + do { + while !self.at(.eof) && !self.at(.rightBrace) { + elements.append(self.parseCodeBlockItem()) + } + } + + // Parse the closing '}'. + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + return RawClosureExprSyntax( + leftBrace: lbrace, + signature: signature, + statements: RawCodeBlockItemListSyntax(elements: elements, arena: arena), + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena) + } +} + +extension Parser { + /// Parse the signature of a closure, if one is present. + /// + /// Grammar + /// ======= + /// + /// closure-signature → capture-list? closure-parameter-clause 'async'? 'throws'? function-result? 'in' + /// closure-signature → capture-list 'in' + /// + /// closure-parameter-clause → '(' ')' | '(' closure-parameter-list ')' | identifier-list + /// + /// closure-parameter-list → closure-parameter | closure-parameter , closure-parameter-list + /// closure-parameter → closure-parameter-name type-annotation? + /// closure-parameter → closure-parameter-name type-annotation '...' + /// closure-parameter-name → identifier + /// + /// capture-list → '[' capture-list-items ']' + /// capture-list-items → capture-list-item | capture-list-item , capture-list-items + /// capture-list-item → capture-specifier? identifier + /// capture-list-item → capture-specifier? identifier '=' expression + /// capture-list-item → capture-specifier? self-expression + /// + /// capture-specifier → 'weak' | 'unowned' | 'unowned(safe)' | 'unowned(unsafe)' + @_spi(RawSyntax) + public mutating func parseClosureSignatureIfPresent() -> RawClosureSignatureSyntax? { + // If we have a leading token that may be part of the closure signature, do a + // speculative parse to validate it and look for 'in'. + guard self.at(.atSign) || self.at(.leftParen) || self.at(.leftSquareBracket) + || self.currentToken.isIdentifier || self.at(.wildcardKeyword) else { + // No closure signature. + return nil + } + + guard self.lookahead().canParseClosureSignature() else { + return nil + } + + let attrs = self.parseAttributeList() + + let captures: RawClosureCaptureSignatureSyntax? + if self.at(.leftSquareBracket) { + let lsquare = self.eat(.leftSquareBracket) + // At this point, we know we have a closure signature. Parse the capture list + // and parameters. + var elements = [RawClosureCaptureItemSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + // Parse any specifiers on the capture like `weak` or `unowned` + let specifier = self.parseClosureCaptureSpecifiers() + + // The thing being capture specified is an identifier, or as an identifier + // followed by an expression. + let name: RawTokenSyntax? + let assignToken: RawTokenSyntax? + let expression: RawExprSyntax + if self.peek().tokenKind == .equal { + // The name is a new declaration. + name = self.consumeIdentifier() + assignToken = self.eat(.equal) + expression = self.parseExpression() + } else { + // This is the simple case - the identifier is both the name and + // the expression to capture. + name = nil + assignToken = nil + expression = RawExprSyntax(self.parseIdentifierExpression()) + } + + keepGoing = self.consume(if: .comma) + elements.append(RawClosureCaptureItemSyntax( + specifier: specifier, + name: name, + assignToken: assignToken, + expression: expression, + trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + let (unexpectedBeforeRSquare, rsquare) = self.expect(.rightSquareBracket) + + captures = RawClosureCaptureSignatureSyntax( + leftSquare: lsquare, + items: elements.isEmpty ? nil : RawClosureCaptureItemListSyntax(elements: elements, arena: self.arena), + unexpectedBeforeRSquare, + rightSquare: rsquare, arena: self.arena) + } else { + captures = nil + } + + var input: RawSyntax? + var asyncKeyword: RawTokenSyntax? = nil + var throwsTok: RawTokenSyntax? = nil + var output: RawReturnClauseSyntax? = nil + if !self.at(.inKeyword) { + if self.at(.leftParen) { + // Parse the closure arguments. + input = RawSyntax(self.parseParameterClause(isClosure: true)) + } else { + var params = [RawClosureParamSyntax]() + do { + // Parse identifier (',' identifier)* + var keepGoing: RawTokenSyntax? = nil + repeat { + let name: RawTokenSyntax + if self.currentToken.isIdentifier { + name = self.consumeIdentifier() + } else { + name = self.eat(.wildcardKeyword) + } + keepGoing = consume(if: .comma) + params.append(RawClosureParamSyntax( + name: name, trailingComma: keepGoing, arena: self.arena)) + } while keepGoing != nil + } + + input = RawSyntax(RawClosureParamListSyntax(elements: params, arena: self.arena)) + } + + asyncKeyword = self.parseEffectsSpecifier() + throwsTok = self.parseEffectsSpecifier() + + // Parse the optional explicit return type. + if self.at(.arrow) { + // Consume the '->'. + let arrow = self.eat(.arrow) + + // Parse the type. + let returnTy = self.parseType() + + output = RawReturnClauseSyntax( + arrow: arrow, returnType: returnTy, arena: self.arena) + } + } + + // Parse the 'in'. + let (unexpectedBeforeInTok, inTok) = self.expect(.inKeyword) + return RawClosureSignatureSyntax( + attributes: attrs, + capture: captures, + input: input, + asyncKeyword: asyncKeyword, + throwsTok: throwsTok, + output: output, + unexpectedBeforeInTok, + inTok: inTok, + arena: self.arena) + } + + @_spi(RawSyntax) + public mutating func parseClosureCaptureSpecifiers() -> RawTokenListSyntax { + var specifiers = [RawTokenSyntax]() + do { + // Check for the strength specifier: "weak", "unowned", or + // "unowned(safe/unsafe)". + if self.currentToken.isContextualKeyword("weak") { + specifiers.append(self.consumeIdentifier()) + } else if self.currentToken.isContextualKeyword("unowned") { + specifiers.append(self.consumeIdentifier()) + if let lparen = self.consume(if: .leftParen) { + specifiers.append(lparen) + specifiers.append(self.expectWithoutLookahead(.identifier, "unsafe")) + specifiers.append(self.expectWithoutLookahead(.rightParen)) + } + } else if (self.currentToken.isIdentifier || self.at(.selfKeyword)) { + let next = self.peek() + // "x = 42", "x," and "x]" are all strong captures of x. + guard next.tokenKind == .equal || next.tokenKind == .comma + || next.tokenKind == .rightSquareBracket || next.tokenKind == .period + else { + // Recover from unexpected in the capture specifiers. + // + // FIXME: This is quite poor modeling in SwiftSyntax. + specifiers.append(contentsOf: self.recover()) + return RawTokenListSyntax(elements: specifiers, arena: self.arena) + } + } else { + // Recover from unexpected in the capture specifiers. + // + // FIXME: This is quite poor modeling in SwiftSyntax. + specifiers.append(contentsOf: self.recover()) + return RawTokenListSyntax(elements: specifiers, arena: self.arena) + } + + guard self.currentToken.isIdentifier || self.at(.selfKeyword) else { + // Recover from unexpected in the capture specifiers. + // + // FIXME: This is quite poor modeling in SwiftSyntax. + specifiers.append(contentsOf: self.recover()) + return RawTokenListSyntax(elements: specifiers, arena: self.arena) + } + } + // Squash all tokens, if any, as the specifier of the captured item. + return RawTokenListSyntax(elements: specifiers, arena: self.arena) + } +} + +extension Parser { + /// Parse the elements of an argument list. + /// + /// This is currently the same as parsing a tuple expression. In the future, + /// this will be a dedicated argument list type. + /// + /// Grammar + /// ======= + /// + /// tuple-element → expression | identifier ':' expression + @_spi(RawSyntax) + public mutating func parseArgumentListElements() -> [RawTupleExprElementSyntax] { + guard !self.at(.rightParen) else { + return [] + } + + var result = [RawTupleExprElementSyntax]() + var keepGoing: RawTokenSyntax? = nil + repeat { + let label: RawTokenSyntax? + let colon: RawTokenSyntax? + if self.currentToken.canBeArgumentLabel && self.peek().tokenKind == .colon { + label = self.consumeAnyToken() + colon = self.eat(.colon) + } else { + label = nil + colon = nil + } + + // See if we have an operator decl ref '()'. The operator token in + // this case lexes as a binary operator because it neither leads nor + // follows a proper subexpression. + let expr: RawExprSyntax + if self.currentToken.isBinaryOperator + && (self.peek().tokenKind == .comma || self.peek().tokenKind == .rightParen || self.peek().tokenKind == .rightSquareBracket) { + let (ident, args) = self.parseDeclNameRef(.operators) + expr = RawExprSyntax(RawIdentifierExprSyntax( + identifier: ident, declNameArguments: args, arena: self.arena)) + } else { + expr = self.parseExpression() + } + keepGoing = self.consume(if: .comma) + result.append(RawTupleExprElementSyntax( + label: label, colon: colon, expression: expr, trailingComma: keepGoing, arena: self.arena)) + } while keepGoing != nil + return result + } + + /// Parse an argument list. + /// + /// This is currently the same as parsing a tuple expression. In the future, + /// this will be a dedicated argument list type. + /// + /// Grammar + /// ======= + /// + /// tuple-expression → '(' ')' | '(' tuple-element ',' tuple-element-list ')' + /// tuple-element-list → tuple-element | tuple-element ',' tuple-element-list + @_spi(RawSyntax) + public mutating func parseArgumentList(_ flavor: ExprFlavor) -> RawTupleExprSyntax { + let lparen = self.eat(.leftParen) + let args = self.parseArgumentListElements() + let (unexpectedBeforeRightParen, rparen) = self.expect(.rightParen) + + // FIXME: Introduce new SyntaxKind for ArgumentList (rdar://81786229) + return RawTupleExprSyntax( + leftParen: lparen, + elementList: RawTupleExprElementListSyntax(elements: args, arena: self.arena), + unexpectedBeforeRightParen, + rightParen: rparen, + arena: self.arena) + } +} + +extension Parser { + /// Parse the trailing closure(s) following a call expression. + /// + /// Grammar + /// ======= + /// + /// trailing-closures → closure-expression labeled-trailing-closures? + /// labeled-trailing-closures → labeled-trailing-closure labeled-trailing-closures? + /// labeled-trailing-closure → identifier ':' closure-expression + @_spi(RawSyntax) + public mutating func parseTrailingClosures(_ flavor: ExprFlavor) -> (RawClosureExprSyntax, RawMultipleTrailingClosureElementListSyntax?) { + // Parse the closure. + let closure = self.parseClosureExpression() + + // Parse labeled trailing closures. + var elements = [RawMultipleTrailingClosureElementSyntax]() + while self.lookahead().isStartOfLabelledTrailingClosure() { + let label = self.parseArgumentLabel() + let (unexpectedBeforeColon, colon) = self.expect(.colon) + let closure = self.parseClosureExpression() + elements.append(RawMultipleTrailingClosureElementSyntax( + label: label, + unexpectedBeforeColon, + colon: colon, + closure: closure, + arena: self.arena + )) + } + + let trailing = elements.isEmpty ? nil : RawMultipleTrailingClosureElementListSyntax(elements: elements, arena: self.arena) + return (closure, trailing) + } +} + +extension Parser.Lookahead { + func isStartOfLabelledTrailingClosure() -> Bool { + // Fast path: the next two tokens must be a label and a colon. + // But 'default:' is ambiguous with switch cases and we disallow it + // (unless escaped) even outside of switches. + if !self.currentToken.canBeArgumentLabel + || self.at(.defaultKeyword) + || self.peek().tokenKind != .colon { + return false + } + + // Do some tentative parsing to distinguish `label: { ... }` and + // `label: switch x { ... }`. + var backtrack = self.lookahead() + backtrack.consumeAnyToken() + if backtrack.peek().tokenKind == .leftBrace { + return true + } + + return false + } + + /// Recover invalid uses of trailing closures in a situation + /// where the parser requires an expr-basic (which does not allow them). We + /// handle this by doing some lookahead in common situations. And later, Sema + /// will emit a diagnostic with a fixit to add wrapping parens. + func isValidTrailingClosure(_ flavor: Parser.ExprFlavor) -> Bool { + assert(self.at(.leftBrace), "Couldn't be a trailing closure") + + // If this is the start of a get/set accessor, then it isn't a trailing + // closure. + guard !self.lookahead().isStartOfGetSetAccessor() else { + return false + } + + // If this is a normal expression (not an expr-basic) then trailing closures + // are allowed, so this is obviously one. + // TODO: We could handle try to disambiguate cases like: + // let x = foo + // {...}() + // by looking ahead for the ()'s, but this has been replaced by do{}, so this + // probably isn't worthwhile. + // + guard case .basic = flavor else { + return true + } + + // If this is an expr-basic, then a trailing closure is not allowed. However, + // it is very common for someone to write something like: + // + // for _ in numbers.filter {$0 > 4} { + // + // and we want to recover from this very well. We need to perform arbitrary + // look-ahead to disambiguate this case, so we only do this in the case where + // the token after the { is on the same line as the {. + guard !self.peek().isAtStartOfLine else { + return false + } + + // Determine if the {} goes with the expression by eating it, and looking + // to see if it is immediately followed by a token which indicates we should + // consider it part of the preceding expression + var backtrack = self.lookahead() + backtrack.eat(.leftBrace) + while !backtrack.at(.eof) && !backtrack.at(.rightBrace) { + backtrack.consumeAnyToken() + } + + guard backtrack.consume(if: .rightBrace) != nil else { + return false + } + + switch backtrack.currentToken.tokenKind { + case .leftBrace, + .whereKeyword, + .comma: + return true + case .leftSquareBracket, + .leftParen, + .period, + .prefixPeriod, + .isKeyword, + .asKeyword, + .postfixQuestionMark, + .infixQuestionMark, + .exclamationMark, + .colon, + .equal, + .postfixOperator, + .spacedBinaryOperator, + .unspacedBinaryOperator: + return !backtrack.currentToken.isAtStartOfLine + default: + return false + } + } +} + +// MARK: Lookahead + +extension Parser.Lookahead { + // Consume 'async', 'throws', and 'rethrows', but in any order. + mutating func consumeEffectsSpecifiers() { + while self.currentToken.isEffectsSpecifier && !self.currentToken.isAtStartOfLine { + self.consumeAnyToken() + } + } + + func canParseClosureSignature() -> Bool { + // Consume attributes. + var lookahead = self.lookahead() + while lookahead.at(.atSign) { + lookahead.eat(.atSign) + guard lookahead.currentToken.isIdentifier else { + break + } + _ = lookahead.canParseCustomAttribute() + } + + // Skip by a closure capture list if present. + if lookahead.consume(if: .leftSquareBracket) != nil { + while !lookahead.at(.eof) && !lookahead.at(.rightSquareBracket) { + lookahead.consumeAnyToken() + } + + if lookahead.consume(if: .rightSquareBracket) == nil { + return false + } + } + + // Parse pattern-tuple func-signature-result? 'in'. + if lookahead.consume(if: .leftParen) != nil { // Consume the ')'. + + // While we don't have '->' or ')', eat balanced tokens. + while !lookahead.at(.eof) && !lookahead.at(.rightParen) { + lookahead.skipSingle() + } + + // Consume the ')', if it's there. + if lookahead.consume(if: .rightParen) != nil { + lookahead.consumeEffectsSpecifiers() + + // Parse the func-signature-result, if present. + if lookahead.consume(if: .arrow) != nil { + guard lookahead.canParseType() else { + return false + } + + lookahead.consumeEffectsSpecifiers() + } + } + // Okay, we have a closure signature. + } else if lookahead.currentToken.isIdentifier || lookahead.at(.wildcardKeyword) { + // Parse identifier (',' identifier)* + lookahead.consumeAnyToken() + while lookahead.consume(if: .comma) != nil { + if lookahead.currentToken.isIdentifier || lookahead.at(.wildcardKeyword) { + lookahead.consumeAnyToken() + continue + } + + return false + } + + lookahead.consumeEffectsSpecifiers() + + // Parse the func-signature-result, if present. + if lookahead.consume(if: .arrow) != nil { + guard lookahead.canParseType() else { + return false + } + + lookahead.consumeEffectsSpecifiers() + } + } + + // Parse the 'in' at the end. + guard lookahead.at(.inKeyword) else { + return false + } + // Okay, we have a closure signature. + return true + } +} + +extension Parser.Lookahead { + // Helper function to see if we can parse member reference like suffixes + // inside '#if'. + fileprivate func isAtStartOfPostfixExprSuffix() -> Bool { + guard self.at(.period) || self.at(.prefixPeriod) else { + return false + } + + if case .integerLiteral = self.currentToken.tokenKind { + return true + } + + if !self.peek().isIdentifier, + self.peek().tokenKind != .capitalSelfKeyword, + self.peek().tokenKind != .selfKeyword, + !self.peek().tokenKind.isKeyword { + return false + } + return true + } +} diff --git a/Sources/SwiftParser/Lexer.swift b/Sources/SwiftParser/Lexer.swift new file mode 100644 index 00000000000..76058880ab6 --- /dev/null +++ b/Sources/SwiftParser/Lexer.swift @@ -0,0 +1,2232 @@ +//===-------------------------- Lexer.swift -------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +/// A lexical analyzer for the Swift programming language. +/// +/// - Seealso: ``Lexer/Lexeme`` +/// - Seealso: ``Lexer/Cursor`` +public struct Lexer { + /// A trivia-delimited region of source text. + /// + /// A lexeme is the fundamental output unit of lexical analysis. Each lexeme + /// represents a fully identified, meaningful part of the input text that + /// will can be consumed by a ``Parser``. + public struct Lexeme { + @_spi(RawSyntax) + public var tokenKind: RawTokenKind + public var isAtStartOfLine: Bool + var start: UnsafePointer + public var leadingTriviaByteLength: Int + public var textByteLength: Int + public var trailingTriviaByteLength: Int + + @_spi(RawSyntax) + public init( + tokenKind: RawTokenKind, + isAtStartOfLine: Bool, + start: UnsafePointer, + leadingTriviaLength: Int, + textLength: Int, + trailingTriviaLength: Int + ) { + self.tokenKind = tokenKind + self.isAtStartOfLine = isAtStartOfLine + self.start = start + self.leadingTriviaByteLength = leadingTriviaLength + self.textByteLength = textLength + self.trailingTriviaByteLength = trailingTriviaLength + } + + public var byteLength: Int { + leadingTriviaByteLength + textByteLength + trailingTriviaByteLength + } + + @_spi(RawSyntax) + public var wholeText: SyntaxText { + SyntaxText(baseAddress: start, count: byteLength) + } + + @_spi(RawSyntax) + public var textRange: Range { + leadingTriviaByteLength ..< leadingTriviaByteLength + textByteLength + } + + @_spi(RawSyntax) + public var tokenText: SyntaxText { + SyntaxText(baseAddress: start.advanced(by: leadingTriviaByteLength), + count: textByteLength) + } + @_spi(RawSyntax) + public var leadingTriviaText: SyntaxText { + SyntaxText(baseAddress: start, + count: leadingTriviaByteLength) + } + @_spi(RawSyntax) + public var trailingTriviaText: SyntaxText { + SyntaxText(baseAddress: start.advanced(by: leadingTriviaByteLength+textByteLength), + count: trailingTriviaByteLength) + } + } +} + +extension Lexer { + /// A sequence of ``Lexer/Lexeme`` tokens starting from a ``Lexer/Cursor`` + /// that points into an input buffer. + public struct LexemeSequence: IteratorProtocol, Sequence { + fileprivate let start: Lexer.Cursor + fileprivate var cursor: Lexer.Cursor + fileprivate var nextToken: Lexer.Lexeme + + fileprivate init(start: Lexer.Cursor, cursor: Lexer.Cursor) { + self.start = start + self.cursor = cursor + self.nextToken = self.cursor.nextToken(self.start) + } + + public mutating func next() -> Lexer.Lexeme? { + return self.advance() + } + + mutating func advance() -> Lexer.Lexeme { + defer { + if self.cursor.isAtEndOfFile { + self.nextToken = Lexeme( + tokenKind: .eof, isAtStartOfLine: false, start: self.cursor.pointer, + leadingTriviaLength: 0, textLength: 0, trailingTriviaLength: 0) + } else { + self.nextToken = self.cursor.nextToken(self.start) + } + } + return self.nextToken + } + + /// - Warning: Do not add more usages of this function. + mutating func resetForSplit(of bytes: Int) -> Lexer.Lexeme { + guard bytes > 0 else { + return self.advance() + } + + // FIXME: This is kind of ridiculous. We shouldn't have to look backwards + // in the token stream. We should be fusing together runs of operator and + // identifier characters in the parser, not splitting and backing up + // again in the lexer. + let backUpLength = self.nextToken.byteLength + bytes + self.cursor.backUp(by: backUpLength) + self.nextToken = self.cursor.nextToken(self.start) + return self.advance() + } + + func peek() -> Lexer.Lexeme { + return self.nextToken + } + } + + @_spi(RawSyntax) + public static func tokenize( + _ input: UnsafeBufferPointer, + from startIndex: Int = 0 + ) -> LexemeSequence { + assert(input.isEmpty || startIndex < input.endIndex) + let startChar = startIndex == input.startIndex ? UInt8(ascii: "\0") : input[startIndex-1] + let start = Cursor(input: input, previous: UInt8(ascii: "\0")) + let cursor = Cursor(input: UnsafeBufferPointer(rebasing: input[startIndex...]), previous: startChar) + return LexemeSequence(start: start, cursor: cursor) + } +} + +extension Lexer { + public static func lexToEndOfInterpolatedExpression(_ input: UnsafeBufferPointer, _ IsMultilineString: Bool) -> Int { + let cursor = Lexer.Cursor(input: input, previous: 0) + let advancedCursor = Lexer.Cursor.skipToEndOfInterpolatedExpression(cursor, IsMultilineString) + return advancedCursor.input.baseAddress! - cursor.input.baseAddress! + } +} + +extension Lexer { + /// A pointer into the input source text. + /// + /// A ``Lexer/Cursor`` contains the lexer's scanning and analysis interface. + /// Lexing is afforded a single byte of look-behind that is carried by the + /// cursor and updated when the cursor advances. A cursor is a safe interface + /// to reading bytes from an input buffer: all accesses to its input are + /// bounds-checked. + public struct Cursor: Equatable { + var input: UnsafeBufferPointer + var previous: UInt8 + + public static func == (lhs: Cursor, rhs: Cursor) -> Bool { + return lhs.input.baseAddress == rhs.input.baseAddress + } + + public func starts(with possiblePrefix: PossiblePrefix) -> Bool + where PossiblePrefix: Sequence, PossiblePrefix.Element == UInt8 + { + return self.input.starts(with: possiblePrefix) + } + + var pointer: UnsafePointer { + return self.input.baseAddress! + } + func distance(to other: Self) -> Int { + return self.pointer.distance(to: other.pointer) + } + + func peek(at offset: Int = 0) -> UInt8 { + assert(!self.isAtEndOfFile) + assert(offset >= 0) + assert(offset < self.input.count) + return self.input[offset] + } + + var isAtEndOfFile: Bool { + return self.input.isEmpty + } + + var isAtStartOfFile: Bool { + return !self.input.isEmpty && self.previous == UInt8(ascii: "\0") + } + } +} + +extension Lexer.Cursor { + func isLeftBound(_ bufferBegin: Lexer.Cursor) -> Bool { + // The first character in the file is not left-bound. + if self.input.baseAddress == bufferBegin.input.baseAddress { + return false + } + + switch self.previous { + case UInt8(ascii: " "), UInt8(ascii: "\r"), UInt8(ascii: "\n"), UInt8(ascii: "\t"), // whitespace + UInt8(ascii: "("), UInt8(ascii: "["), UInt8(ascii: "{"), // opening delimiters + UInt8(ascii: ","), UInt8(ascii: ";"), UInt8(ascii: ":"), // expression separators + 0: // whitespace / last char in file + return false + case UInt8(ascii: "/"): + if self.input.baseAddress! - 1 != bufferBegin.input.baseAddress + && self.input.baseAddress!.advanced(by: -2).pointee == UInt8(ascii: "*") { + return false; // End of a slash-star comment, so whitespace. + } else { + return true + } + case 0xA0: + if self.input.baseAddress! - 1 != bufferBegin.input.baseAddress + && self.input.baseAddress!.advanced(by: -2).pointee == 0xC2 { + return false; // End of a slash-star comment, so whitespace. + } else { + return true + } + default: + return true + } + } + + func isRightBound(_ isLeftBound: Bool) -> Bool { + guard !self.isAtEndOfFile else { + return false // last char in file + } + + switch self.peek() { + case UInt8(ascii: " "), UInt8(ascii: #"""#), UInt8(ascii: "\n"), UInt8(ascii: "\t"), // whitespace + UInt8(ascii: ")"), UInt8(ascii: "]"), UInt8(ascii: "}"), // closing delimiters + UInt8(ascii: ","), UInt8(ascii: ";"), UInt8(ascii: ":"): // expression separators + return false + + case 0: + // if (tokEnd == codeCompletionPtr) { // code-completion + // return true + // } + return false; // whitespace / last char in file + + case UInt8(ascii: "."): + // Prefer the '^' in "x^.y" to be a postfix op, not binary, but the '^' in + // "^.y" to be a prefix op, not binary. + return !isLeftBound + + case UInt8(ascii: "/"): + // A following comment counts as whitespace, so this token is not right bound. + if (self.peek(at: 1) == UInt8(ascii: "/") || self.peek(at: 1) == UInt8(ascii: "*")) { + return false + } else { + return true + } + case 0xC2: + if self.input.count > 1, self.peek(at: 1) == 0xA0 { + return false; // Non-breaking whitespace (U+00A0) + } else { + return true + } + default: + return true + } + } +} + +extension Lexer.Cursor { + func textUpTo(_ other: Lexer.Cursor) -> SyntaxText { + let count = other.input.baseAddress! - self.input.baseAddress! + assert(count >= 0) + return SyntaxText(baseAddress: self.input.baseAddress, count: count) + } +} + +extension Lexer.Cursor { + fileprivate mutating func backUp(by offset: Int) { + assert(!self.isAtStartOfFile) + self.previous = self.input.baseAddress!.advanced(by: -(offset+1)).pointee + self.input = UnsafeBufferPointer(start: self.input.baseAddress!.advanced(by: -offset), count: self.input.count + offset) + } + + mutating func advance() -> UInt8? { + var input = self.input[...] + guard let c = input.popFirst() else { + return nil // end of input + } + self.previous = c + self.input = UnsafeBufferPointer(rebasing: input) + return c + } + + mutating func advance(matching: UInt8) -> UInt8? { + guard !self.input.isEmpty else { + return nil // end of input + } + guard self.peek() == matching else { + return nil + } + return self.advance() + } + + mutating func advance(while predicate: (Unicode.Scalar) -> Bool) { + var next = self + while + !next.isAtEndOfFile, + let c = next.validateUTF8CharacterAndAdvance(), + predicate(c) + { + self = next + } + } + + mutating func advance(if predicate: (Unicode.Scalar) -> Bool) -> Bool { + guard !self.isAtEndOfFile else { + return false + } + + var tmp = self + guard let c = tmp.validateUTF8CharacterAndAdvance() else { + return false + } + + guard predicate(c) else { + return false + } + + self = tmp + return true + } + + mutating func advanceToEndOfLine() -> Bool { + while true { + guard !self.isAtEndOfFile else { + return false + } + switch self.peek() { + case UInt8(ascii: "\n"): + fallthrough + case UInt8(ascii: "\r"): + return true + case _ where self.isAtEndOfFile: + return false + default: + _ = self.advance() + } + } + } + + mutating func advanceToEndOfSlashStarComment() -> Bool { + // Make sure to advance over the * so that we don't incorrectly handle /*/ as + // the beginning and end of the comment. + _ = self.advance() + + var depth = 1 + var isMultiline = false + + while true { + switch self.advance() { + case UInt8(ascii: "*"): + // Check for a '*/' + if self.advance(if: { $0 == Unicode.Scalar("/") }) { + depth -= 1 + if depth == 0 { + return isMultiline + } + } + case UInt8(ascii: "/"): + // Check for a '/*' + if self.advance(if: { $0 == Unicode.Scalar("*") }) { + depth += 1 + } + + case UInt8(ascii: "\n"), UInt8(ascii: "\r"): + isMultiline = true + continue + case _ where self.isAtEndOfFile: + return isMultiline + default: + continue + } + } + } + + mutating func advanceIfCustomDelimiter() -> Int? { + assert(self.previous == UInt8(ascii: "#")) + + var clone = self + var length = 1 + clone.advance(while: { char in + let isDelimeter = (char == Unicode.Scalar("#")) + if isDelimeter { length += 1 } + return isDelimeter + }) + + guard clone.advance(matching: UInt8(ascii: #"""#)) != nil else { + return nil + } + self = clone + return length + } + + + /// advanceIfMultilineDelimiter - Centralized check for multiline delimiter. + mutating func advanceIfMultilineDelimiter(_ CustomDelimiterLen: Int, + _ IsOpening: Bool = false) -> Bool { + // Test for single-line string literals that resemble multiline delimiter. + var TmpPtr = self + _ = TmpPtr.advance() + if IsOpening && CustomDelimiterLen != 0 { + while !TmpPtr.isAtEndOfFile, TmpPtr.peek() != UInt8(ascii: "\r") && TmpPtr.peek() != UInt8(ascii: "\n") { + if TmpPtr.advance(if: { $0 == Unicode.Scalar(UInt8(ascii: #"""#)) }) { + if TmpPtr.delimiterMatches(CustomDelimiterLen) { + return false + } + continue + } + _ = TmpPtr.advance() + } + } + + TmpPtr = self + if (TmpPtr.previous == UInt8(ascii: #"""#) && + TmpPtr.advance(matching: UInt8(ascii: #"""#)) != nil && + TmpPtr.advance(matching: UInt8(ascii: #"""#)) != nil) { + self = TmpPtr + return true + } + + return false + } + + mutating func validateUTF8CharacterAndAdvance() -> Unicode.Scalar? { + guard let CurByte = self.advance() else { + return nil + } + + if (CurByte < 0x80) { + return Unicode.Scalar(CurByte) + } + + // Read the number of high bits set, which indicates the number of bytes in + // the character. + let EncodedBytes = (~(UInt32(CurByte) << 24)).leadingZeroBitCount + func isStartOfUTF8Character(_ S: Unicode.Scalar) -> Bool { + // RFC 2279: The octet values FE and FF never appear. + // RFC 3629: The octet values C0, C1, F5 to FF never appear. + let C = S.value + return C <= 0x80 || (C >= 0xC2 && C < 0xF5) + } + // If this is 0b10XXXXXX, then it is a continuation character. + if (EncodedBytes == 1 || + !isStartOfUTF8Character(Unicode.Scalar(CurByte))) { + // Skip until we get the start of another character. This is guaranteed to + // at least stop at the nul at the end of the buffer. + self.advance(while: { !isStartOfUTF8Character($0) }) + return nil + } + + // Drop the high bits indicating the # bytes of the result. + var CharValue = UInt32(CurByte << EncodedBytes) >> EncodedBytes + + // Read and validate the continuation bytes. + for _ in 1..= 0xC0) { + return nil + } + + // Accumulate our result. + CharValue <<= 6 + CharValue |= UInt32(CurByte & 0x3F) + _ = self.advance() + } + + // UTF-16 surrogate pair values are not valid code points. + if (CharValue >= 0xD800 && CharValue <= 0xDFFF) { + return nil + } + + // If we got here, we read the appropriate number of accumulated bytes. + // Verify that the encoding was actually minimal. + // Number of bits in the value, ignoring leading zeros. + let NumBits = 32-CharValue.leadingZeroBitCount + if (NumBits <= 5+6) { + return EncodedBytes == 2 ? Unicode.Scalar(CharValue) : nil + } + if (NumBits <= 4+6+6) { + return EncodedBytes == 3 ? Unicode.Scalar(CharValue) : nil + } + return EncodedBytes == 4 ? Unicode.Scalar(CharValue) : nil + } + + mutating func maybeConsumeNewlineEscape() -> Bool { + var TmpPtr = self + while true { + switch TmpPtr.advance() { + case UInt8(ascii: " "), UInt8(ascii: "\t"): + continue + case UInt8(ascii: "\r"): + _ = TmpPtr.advance(if: { $0 == Unicode.Scalar("\n") }) + fallthrough + case UInt8(ascii: "\n"): + self = TmpPtr + return true + case 0: + return false + default: + return false + } + } + } + + /// delimiterMatches - Does custom delimiter ('#' characters surrounding quotes) + /// match the number of '#' characters after '\' inside the string? This allows + /// interpolation inside a "raw" string. Normal/cooked string processing is + /// the degenerate case of there being no '#' characters surrounding the quotes. + /// If delimiter matches, advances byte pointer passed in and returns true. + /// Also used to detect the final delimiter of a string when IsClosing == true. + mutating func delimiterMatches(_ customDelimiterLength: Int, + _ IsClosing: Bool = false) -> Bool { + guard customDelimiterLength > 0 else { + return true + } + + var TmpPtr = self + while TmpPtr.advance(matching: UInt8(ascii: "#")) != nil { + + } + + if TmpPtr.input.baseAddress! - self.input.baseAddress! < customDelimiterLength { + return false + } + + for _ in 0.. Lexer.Cursor { + var CurPtr = CurPtr + var OpenDelimiters = [UInt8]() + var AllowNewline = [ IsMultilineString ] + var CustomDelimiter = [Int]() + + let inStringLiteral = { () -> Bool in + guard let last = OpenDelimiters.last else { + return false + } + return last == UInt8(ascii: #"""#) || last == UInt8(ascii: #"'"#) + } + while true { + // This is a simple scanner, capable of recognizing nested parentheses and + // string literals but not much else. The implications of this include not + // being able to break an expression over multiple lines in an interpolated + // string. This limitation allows us to recover from common errors though. + // + // On success scanning the expression body, the real lexer will be used to + // relex the body when parsing the expressions. We let it diagnose any + // issues with malformed tokens or other problems. + var CustomDelimiterLen = 0 + let Last = CurPtr + switch CurPtr.advance() { + // String literals in general cannot be split across multiple lines + // interpolated ones are no exception - unless multiline literals. + case UInt8(ascii: "\n"), UInt8(ascii: "\r"): + if AllowNewline.last! { + continue + } + // Will be diagnosed as an unterminated string literal. + return Last + case 0: + guard !Last.isAtEndOfFile else { + // CC token or random NUL character. + continue + } + // Will be diagnosed as an unterminated string literal. + return Last + + case UInt8(ascii: "#"): + guard !inStringLiteral(), let delim = CurPtr.advanceIfCustomDelimiter() else { + continue + } + CustomDelimiterLen = delim + assert(CurPtr.previous == UInt8(ascii: #"""#), + "advanceIfCustomDelimiter() must stop at after the quote") + fallthrough + + case UInt8(ascii: #"""#), UInt8(ascii: #"'"#): + if (!inStringLiteral()) { + // Open string literal. + OpenDelimiters.append(CurPtr.previous) + AllowNewline.append(CurPtr.advanceIfMultilineDelimiter(CustomDelimiterLen, true)) + CustomDelimiter.append(CustomDelimiterLen) + continue + } + + // In string literal. + + // Skip if it's an another kind of quote in string literal. e.g. "foo's". + guard OpenDelimiters.last == CurPtr.previous else { + continue + } + + // Multi-line string can only be closed by '"""'. + if (AllowNewline.last! && + !CurPtr.advanceIfMultilineDelimiter(CustomDelimiterLen)) { + continue + } + + // Check whether we have equivalent number of '#'s. + guard CurPtr.delimiterMatches(CustomDelimiter.last!, true) else { + continue + } + + // Close string literal. + _ = OpenDelimiters.popLast() + _ = AllowNewline.popLast() + _ = CustomDelimiter.popLast() + continue + case UInt8(ascii: "\\"): + // We ignore invalid escape sequence here. They should be diagnosed in + // the real lexer functions. + if (inStringLiteral() && + CurPtr.delimiterMatches(CustomDelimiter.last!)) { + let Last = CurPtr + switch CurPtr.advance() { + case UInt8(ascii: "("): + // Entering a recursive interpolated expression + OpenDelimiters.append(UInt8(ascii: "(")) + continue + case UInt8(ascii: "\n"), UInt8(ascii: "\r"), 0: + // Don't jump over newline/EOF due to preceding backslash. + // Let the outer switch to handle it. + CurPtr = Last + continue + default: + continue + } + } + continue + + // Paren nesting deeper to support "foo = \((a+b)-(c*d)) bar". + case UInt8(ascii: "("): + if (!inStringLiteral()) { + OpenDelimiters.append(UInt8(ascii: "(")) + } + continue + case UInt8(ascii: ")"): + if OpenDelimiters.isEmpty { + // No outstanding open delimiters; we're done. + return Last + } else if OpenDelimiters.last == UInt8(ascii: "(") { + // Pop the matching bracket and keep going. + _ = OpenDelimiters.popLast() + if OpenDelimiters.isEmpty { + // No outstanding open delimiters; we're done. + return Last + } + continue + } else { + // It's a right parenthesis in a string literal. + assert(inStringLiteral()) + continue + } + case UInt8(ascii: "/"): + if (inStringLiteral()) { + continue + } + + if !CurPtr.isAtEndOfFile, CurPtr.peek() == UInt8(ascii: "*") { + let CommentStart = Last + let isMultilineComment = CurPtr.advanceToEndOfSlashStarComment() + if isMultilineComment && !AllowNewline.last! { + // Multiline comment is prohibited in string literal. + // Return the start of the comment. + return CommentStart + } + } else if !CurPtr.isAtEndOfFile, CurPtr.peek() == UInt8(ascii: "/") { + if !AllowNewline.last! { + // '//' comment is impossible in single line string literal. + // Return the start of the comment. + return Last + } + // Advance to the end of the comment. + if CurPtr.advanceToEndOfLine() { + _ = CurPtr.advance() + } + } + continue + case nil: + return CurPtr + default: + // Normal token character. + continue + } + } + } +} + +extension Lexer.Cursor { + mutating func nextToken(_ ContentStart: Lexer.Cursor) -> Lexer.Lexeme { + // Leading trivia. + let leadingTriviaStart = self + let newlineInLeadingTrivia = self.lexTrivia(.leading) + + // Token text. + let textStart = self + let kind = self.lexImpl(ContentStart: ContentStart) + + // Trailing trivia. + let trailingTriviaStart = self + let newlineInTrailingTrivia = self.lexTrivia(.trailing) + assert(newlineInTrailingTrivia == .absent, + "trailingTrivia should not have a newline") + + return .init( + tokenKind: kind, + isAtStartOfLine: newlineInLeadingTrivia == .present, + start: leadingTriviaStart.pointer, + leadingTriviaLength: leadingTriviaStart.distance(to: textStart), + textLength: textStart.distance(to: trailingTriviaStart), + trailingTriviaLength: trailingTriviaStart.distance(to: self)) + } + + private mutating func lexImpl(ContentStart: Lexer.Cursor) -> RawTokenKind { + let start = self + switch self.advance() { + case UInt8(ascii: "@"): return .atSign + case UInt8(ascii: "{"): return .leftBrace + case UInt8(ascii: "["): return .leftSquareBracket + case UInt8(ascii: "("): return .leftParen + case UInt8(ascii: "}"): return .rightBrace + case UInt8(ascii: "]"): return .rightSquareBracket + case UInt8(ascii: ")"): return .rightParen + + case UInt8(ascii: ","): return .comma + case UInt8(ascii: ";"): return .semicolon + case UInt8(ascii: ":"): return .colon + case UInt8(ascii: "\\"): return .backslash + + case UInt8(ascii: "#"): + // Try lex a raw string literal. + if let customDelimiterLength = self.advanceIfCustomDelimiter() { + return self.lexStringLiteral(start, customDelimiterLength) + } + + // Try lex a regex literal. + if let token = self.tryLexRegexLiteral(start) { + return token + } + // Otherwise try lex a magic pound literal. + return self.lexMagicPoundLiteral() + case UInt8(ascii: "/"): + // Try lex a regex literal. + if let token = self.tryLexRegexLiteral(start) { + return token + } + + // Otherwise try lex a magic pound literal. + return self.lexOperatorIdentifier(start, ContentStart) + case UInt8(ascii: "!"): + if start.isLeftBound(ContentStart) { + return .exclamationMark + } + return self.lexOperatorIdentifier(start, ContentStart) + + case UInt8(ascii: "?"): + if start.isLeftBound(ContentStart) { + return .postfixQuestionMark + } + return self.lexOperatorIdentifier(start, ContentStart) + + case UInt8(ascii: "<"): + if !self.isAtEndOfFile, self.peek() == UInt8(ascii: "#") { + return self.tryLexEditorPlaceholder(start, ContentStart) + } + return self.lexOperatorIdentifier(start, ContentStart) + case UInt8(ascii: ">"): + return self.lexOperatorIdentifier(start, ContentStart) + + case UInt8(ascii: "="), UInt8(ascii: "-"), UInt8(ascii: "+"), + UInt8(ascii: "*"), UInt8(ascii: "%"), UInt8(ascii: "&"), + UInt8(ascii: "|"), UInt8(ascii: "^"), UInt8(ascii: "~"), + UInt8(ascii: "."): + return self.lexOperatorIdentifier(start, ContentStart) + case UInt8(ascii: "A"), UInt8(ascii: "B"), UInt8(ascii: "C"), + UInt8(ascii: "D"), UInt8(ascii: "E"), UInt8(ascii: "F"), + UInt8(ascii: "G"), UInt8(ascii: "H"), UInt8(ascii: "I"), + UInt8(ascii: "J"), UInt8(ascii: "K"), UInt8(ascii: "L"), + UInt8(ascii: "M"), UInt8(ascii: "N"), UInt8(ascii: "O"), + UInt8(ascii: "P"), UInt8(ascii: "Q"), UInt8(ascii: "R"), + UInt8(ascii: "S"), UInt8(ascii: "T"), UInt8(ascii: "U"), + UInt8(ascii: "V"), UInt8(ascii: "W"), UInt8(ascii: "X"), + UInt8(ascii: "Y"), UInt8(ascii: "Z"), + UInt8(ascii: "a"), UInt8(ascii: "b"), UInt8(ascii: "c"), + UInt8(ascii: "d"), UInt8(ascii: "e"), UInt8(ascii: "f"), + UInt8(ascii: "g"), UInt8(ascii: "h"), UInt8(ascii: "i"), + UInt8(ascii: "j"), UInt8(ascii: "k"), UInt8(ascii: "l"), + UInt8(ascii: "m"), UInt8(ascii: "n"), UInt8(ascii: "o"), + UInt8(ascii: "p"), UInt8(ascii: "q"), UInt8(ascii: "r"), + UInt8(ascii: "s"), UInt8(ascii: "t"), UInt8(ascii: "u"), + UInt8(ascii: "v"), UInt8(ascii: "w"), UInt8(ascii: "x"), + UInt8(ascii: "y"), UInt8(ascii: "z"), + UInt8(ascii: "_"): + return self.lexIdentifier(start) + + case UInt8(ascii: "$"): + return self.lexDollarIdentifier(start) + + case UInt8(ascii: "0"), UInt8(ascii: "1"), UInt8(ascii: "2"), + UInt8(ascii: "3"), UInt8(ascii: "4"), UInt8(ascii: "5"), + UInt8(ascii: "6"), UInt8(ascii: "7"), UInt8(ascii: "8"), + UInt8(ascii: "9"): + return self.lexNumber(start, ContentStart) + case UInt8(ascii: #"'"#), UInt8(ascii: #"""#): + return self.lexStringLiteral(start) + + case UInt8(ascii: "`"): + return self.lexEscapedIdentifier(start) + case nil: + return .eof + default: + var Tmp = start + if Tmp.advance(if: { Unicode.Scalar($0).isValidIdentifierStartCodePoint }) { + return self.lexIdentifier(start) + } + + if Tmp.advance(if: { Unicode.Scalar($0).isOperatorStartCodePoint }) { + return self.lexOperatorIdentifier(start, ContentStart) + } + + let shouldTokenize = self.lexUnknown(start) + assert(shouldTokenize, "Invalid UTF-8 sequence should be eaten by lexTrivia as LeadingTrivia") + return .unknown + } + } +} + +// MARK: - Trivia + +extension Lexer.Cursor { + fileprivate enum NewlinePresence { + case absent + case present + } + + fileprivate mutating func lexTrivia(_ position: TriviaPosition) -> NewlinePresence { + var hasNewline = false + while true { + let start = self + + switch self.advance() { + // 'continue' - the character is a part of the triivia. + // 'break' - the character should a part of token text. + case nil: + break + case UInt8(ascii: "\n"): + if case .trailing = position { + break + } + hasNewline = true + continue + case UInt8(ascii: "\r"): + if case .trailing = position { + break + } + hasNewline = true + continue + + case UInt8(ascii: " "): + continue + case UInt8(ascii: "\t"): + continue + case UInt8(ascii: "\u{000B}"): + continue + case UInt8(ascii: "\u{000C}"): + continue + case UInt8(ascii: "/"): + guard !self.isAtEndOfFile else { + break + } + if case .trailing = position { + // Don't lex comments as trailing trivia (for now). + break + } + + switch self.peek() { + case UInt8(ascii: "/"): + _ = self.advanceToEndOfLine() + continue + case UInt8(ascii: "*"): + self = start + _ = self.advanceToEndOfSlashStarComment() + continue + default: + break + } + case UInt8(ascii: "#"): + guard start.isAtStartOfFile, self.advance(if: { $0 == "!" }) else { + break + } + _ = self.advanceToEndOfLine() + continue + case UInt8(ascii: "<"), UInt8(ascii: ">"): + guard self.tryLexConflictMarker(start: start) else { + break + } + continue + // Start character of tokens. +// case (char)-1: case (char)-2: + case + // Punctuation. + UInt8(ascii: "{"), UInt8(ascii: "["), UInt8(ascii: "("), + UInt8(ascii: "}"), UInt8(ascii: "]"), UInt8(ascii: ")"), + UInt8(ascii: "@"), UInt8(ascii: ","), UInt8(ascii: ";"), + UInt8(ascii: ":"), UInt8(ascii: "\\"), UInt8(ascii: "$"), + + // Start of integer/hex/float literals. + UInt8(ascii: "0"), UInt8(ascii: "1"), UInt8(ascii: "2"), + UInt8(ascii: "3"), UInt8(ascii: "4"), UInt8(ascii: "5"), + UInt8(ascii: "6"), UInt8(ascii: "7"), UInt8(ascii: "8"), + UInt8(ascii: "9"), + + // Start of literals. + UInt8(ascii: #"""#), UInt8(ascii: #"'"#), UInt8(ascii: "`"), + + // Start of identifiers. + UInt8(ascii: "A"), UInt8(ascii: "B"), UInt8(ascii: "C"), + UInt8(ascii: "D"), UInt8(ascii: "E"), UInt8(ascii: "F"), + UInt8(ascii: "G"), UInt8(ascii: "H"), UInt8(ascii: "I"), + UInt8(ascii: "J"), UInt8(ascii: "K"), UInt8(ascii: "L"), + UInt8(ascii: "M"), UInt8(ascii: "N"), UInt8(ascii: "O"), + UInt8(ascii: "P"), UInt8(ascii: "Q"), UInt8(ascii: "R"), + UInt8(ascii: "S"), UInt8(ascii: "T"), UInt8(ascii: "U"), + UInt8(ascii: "V"), UInt8(ascii: "W"), UInt8(ascii: "X"), + UInt8(ascii: "Y"), UInt8(ascii: "Z"), + UInt8(ascii: "a"), UInt8(ascii: "b"), UInt8(ascii: "c"), + UInt8(ascii: "d"), UInt8(ascii: "e"), UInt8(ascii: "f"), + UInt8(ascii: "g"), UInt8(ascii: "h"), UInt8(ascii: "i"), + UInt8(ascii: "j"), UInt8(ascii: "k"), UInt8(ascii: "l"), + UInt8(ascii: "m"), UInt8(ascii: "n"), UInt8(ascii: "o"), + UInt8(ascii: "p"), UInt8(ascii: "q"), UInt8(ascii: "r"), + UInt8(ascii: "s"), UInt8(ascii: "t"), UInt8(ascii: "u"), + UInt8(ascii: "v"), UInt8(ascii: "w"), UInt8(ascii: "x"), + UInt8(ascii: "y"), UInt8(ascii: "z"), + UInt8(ascii: "_"), + + // Start of operators. + UInt8(ascii: "%"), UInt8(ascii: "!"), UInt8(ascii: "?"), + UInt8(ascii: "="), UInt8(ascii: "-"), UInt8(ascii: "+"), + UInt8(ascii: "*"), UInt8(ascii: "&"), UInt8(ascii: "|"), + UInt8(ascii: "^"), UInt8(ascii: "~"), UInt8(ascii: "."): + break + case 0xEF: + if self.input.count > 2, + self.peek(at: 0) == 0xBB, self.peek(at: 1) == 0xBF { + // BOM marker. + _ = self.advance() + _ = self.advance() + continue + } + + fallthrough + default: + var Tmp = start + if Tmp.advance(if: { Unicode.Scalar($0).isValidIdentifierStartCodePoint }) { + break + } + if Tmp.advance(if: { Unicode.Scalar($0).isOperatorStartCodePoint }) { + break + } + + guard self.lexUnknown(start) else { + continue + } + + break + } + + // `break` means the character was not a trivia. Reset the cursor and + // return the result. + self = start + return hasNewline ? .present : .absent + } + } +} + +// MARK: - Literals + +extension Lexer.Cursor { + /// lexStringLiteral: + /// string_literal ::= ["]([^"\\\n\r]|character_escape)*["] + /// string_literal ::= ["]["]["].*["]["]["] - approximately + /// string_literal ::= (#+)("")?".*"(\2\1) - "raw" strings + mutating func lexStringLiteral(_ start: Lexer.Cursor, _ customDelimiterLength: Int = 0) -> RawTokenKind { + assert(self.previous == UInt8(ascii: #"""#) || self.previous == UInt8(ascii: #"'"#)) + + let QuoteChar = self.previous + let IsMultilineString = self.advanceIfMultilineDelimiter(customDelimiterLength, true) + /* + if IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r' { + diagnose(CurPtr, diag::lex_illegal_multiline_string_start) + .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n") + } +*/ + + var wasErroneous = false + DELIMITLOOP: while true { + // Handle string interpolation. + var TmpPtr = self + guard TmpPtr.advance() != nil else { + // This is the end of string, we are done. + break DELIMITLOOP + } + if !self.isAtEndOfFile, self.peek() == UInt8(ascii: "\\") && + TmpPtr.delimiterMatches(customDelimiterLength) && + TmpPtr.advance() == UInt8(ascii: "(") { + // Consume tokens until we hit the corresponding ')'. + self = Self.skipToEndOfInterpolatedExpression(TmpPtr, IsMultilineString) + if self.advance(if: { $0 == Unicode.Scalar(")") }) { + // Successfully scanned the body of the expression literal. + continue + } else if !self.isAtEndOfFile, ((self.peek() == UInt8(ascii: "\r") || self.peek() == UInt8(ascii: "\n")) && IsMultilineString) { + // The only case we reach here is unterminated single line string in the + // interpolation. For better recovery, go on after emitting an error. +// diagnose(CurPtr, diag::lex_unterminated_string) + wasErroneous = true + continue + } else { +// diagnose(TokStart, diag::lex_unterminated_string) + return .unknown + } + } + + // String literals cannot have \n or \r in them (unless multiline). + if !self.isAtEndOfFile, ((self.peek() == UInt8(ascii: "\r") || self.peek() == UInt8(ascii: "\n")) && !IsMultilineString) + || self.isAtEndOfFile { +// diagnose(TokStart, diag::lex_unterminated_string) + return .unknown + } + + let CharValue = self.lexCharacter(QuoteChar, IsMultilineString, customDelimiterLength) + switch CharValue { + case .endOfString: + // This is the end of string, we are done. + break DELIMITLOOP + case .error: + // Remember we had already-diagnosed invalid characters. + wasErroneous = true + default: + break + } + } + +// if QuoteChar == UInt8(ascii: #"'"#) { +// assert(!IsMultilineString && customDelimiterLength == 0, +// "Single quoted string cannot have custom delimitor, nor multiline") +// diagnoseSingleQuoteStringLiteral(TokStart, CurPtr) +// } + + if wasErroneous { + return .unknown + } + + return .stringLiteral + } +} + +extension Lexer.Cursor { + enum CharacterLex { + case endOfString + case error + case success(Unicode.Scalar) + case validated(Character) + } + /// lexCharacter - Read a character and return its UTF32 code. If this is the + /// end of enclosing string/character sequence (i.e. the character is equal to + /// 'StopQuote'), this returns ~0U and advances 'CurPtr' pointing to the end of + /// terminal quote. If this is a malformed character sequence, it emits a + /// diagnostic (when EmitDiagnostics is true) and returns ~1U. + /// + /// character_escape ::= [\][\] | [\]t | [\]n | [\]r | [\]" | [\]' | [\]0 + /// character_escape ::= unicode_character_escape + mutating func lexCharacter(_ StopQuote: UInt8, _ IsMultilineString: Bool, + _ CustomDelimiterLen: Int) -> CharacterLex { + let CharStart = self + + switch self.advance() { + case UInt8(ascii: #"""#), UInt8(ascii: #"'"#): + if self.previous == StopQuote { + // Mutliline and custom escaping are only enabled for " quote. + if StopQuote != UInt8(ascii: #"""#) { + return .endOfString + } + if !IsMultilineString && CustomDelimiterLen == 0 { + return .endOfString + } + + var TmpPtr = self + if IsMultilineString && + !TmpPtr.advanceIfMultilineDelimiter(CustomDelimiterLen) { + return .success(Unicode.Scalar(UInt8(ascii: #"""#))) + } + if CustomDelimiterLen > 0 && + !TmpPtr.delimiterMatches(CustomDelimiterLen, /*IsClosing=*/true) { + return .success(Unicode.Scalar(UInt8(ascii: #"""#))) + } + self = TmpPtr + return .endOfString + } + // Otherwise, this is just a character. + return .success(Unicode.Scalar(self.previous)) + + case 0: + // assert(CurPtr - 1 != BufferEnd && "Caller must handle EOF") + // if (EmitDiagnostics) + // diagnose(CurPtr-1, diag::lex_nul_character) + return .success(Unicode.Scalar(self.previous)) + case UInt8(ascii: "\n"), UInt8(ascii: "\r"): // String literals cannot have \n or \r in them. + assert(IsMultilineString, "Caller must handle newlines in non-multiline") + return .success(Unicode.Scalar(self.previous)) + + case UInt8(ascii: "\\"): // Escapes. + if !self.delimiterMatches(CustomDelimiterLen) { + return .success(Unicode.Scalar("\\")) + } + guard + let c = self.lexEscapedCharacter(IsMultilineString), + // Check to see if the encoding is valid. + let cv = Unicode.Scalar(c) + else { + return .error + } + + return .validated(Character(cv)) + default: + // Normal characters are part of the string. + // If this is a "high" UTF-8 character, validate it. + // if ((signed char)(CurPtr[-1]) >= 0) { + // if (isPrintable(CurPtr[-1]) == 0) + // if (!(IsMultilineString && (CurPtr[-1] == '\t'))) + // if (EmitDiagnostics) + // diagnose(CharStart, diag::lex_unprintable_ascii_character) + // return CurPtr[-1] + // } + self = CharStart + guard let CharValue = self.validateUTF8CharacterAndAdvance() else { + // if (EmitDiagnostics) + // diagnose(CharStart, diag::lex_invalid_utf8) + return .error + } + return .success(CharValue) + } + } + + + fileprivate mutating func lexEscapedCharacter(_ IsMultilineString: Bool) -> UInt32? { + guard !self.isAtEndOfFile else { + return nil + } + + // Escape processing. We already ate the "\". + switch self.peek() { + // Simple single-character escapes. + case UInt8(ascii: "0"): _ = self.advance(); return UInt32(UInt8(ascii: "\0")) + case UInt8(ascii: "n"): _ = self.advance(); return UInt32(UInt8(ascii: "\n")) + case UInt8(ascii: "r"): _ = self.advance(); return UInt32(UInt8(ascii: "\r")) + case UInt8(ascii: "t"): _ = self.advance(); return UInt32(UInt8(ascii: "\t")) + case UInt8(ascii: #"""#): _ = self.advance(); return UInt32(UInt8(ascii: #"""#)) + case UInt8(ascii: #"'"#): _ = self.advance(); return UInt32(UInt8(ascii: #"'"#)) + case UInt8(ascii: "\\"): _ = self.advance(); return UInt32(UInt8(ascii: "\\")) + + case UInt8(ascii: "u"): // \u HEX HEX HEX HEX + _ = self.advance() + guard !self.isAtEndOfFile, self.peek() == UInt8(ascii: "{") else { + // if (EmitDiagnostics) + // diagnose(CurPtr-1, diag::lex_unicode_escape_braces) + return nil + } + + guard let cv = self.lexUnicodeEscape() else { + return nil + } + return cv + + case UInt8(ascii: " "), UInt8(ascii: "\t"), UInt8(ascii: "\n"), UInt8(ascii: "\r"): + if IsMultilineString && self.maybeConsumeNewlineEscape() { + return UInt32(UInt8(ascii: "\n")) + } + fallthrough + default: // Invalid escape. + // if (EmitDiagnostics) + // diagnose(CurPtr, diag::lex_invalid_escape) + // If this looks like a plausible escape character, recover as though this + // is an invalid escape. + let c = Unicode.Scalar(self.peek()) + if c.isDigit || c.isLetter { + _ = self.advance() + } + return nil + } + } + + fileprivate mutating func lexUnicodeEscape() -> UInt32? { + assert(self.peek() == UInt8(ascii: "{"), "Invalid unicode escape") + _ = self.advance() + + let DigitStart = self + var NumDigits = 0; + while self.advance(if: { $0.isHexDigit }) { + NumDigits += 1 + } + + if !self.isAtEndOfFile, self.peek() != UInt8(ascii: "}") { +// if (Diags) +// Diags->diagnose(CurPtr, diag::lex_invalid_u_escape_rbrace) + return nil + } + _ = self.advance() + + if (NumDigits < 1 || NumDigits > 8) { +// if (Diags) +// Diags->diagnose(CurPtr, diag::lex_invalid_u_escape) + return nil + } + + return UInt32(String(decoding: DigitStart.input[0.. RawTokenKind { + assert((Unicode.Scalar(self.previous).isDigit || self.previous == UInt8(ascii: ".")), + "Unexpected start") + +// let expected_int_digit = { (loc: Lexer.Cursor, kind: ExpectedDigitKind) -> TokenKind in +// diagnose(loc, diag::lex_invalid_digit_in_int_literal, StringRef(loc, 1), +// (unsigned)kind) +// loc.advance(while: { $0.isValidIdentifierContinuationCodePoint }) +// return .unknown(TokStart.adoptRange(upTo: loc) { stringBuf in +// String(decoding: stringBuf, as: UTF8.self) +// }) +// } + + if !self.isAtEndOfFile && self.previous == UInt8(ascii: "0") && self.peek() == UInt8(ascii: "x") { + return self.lexHexNumber(TokStart) + } + + if !self.isAtEndOfFile && self.previous == UInt8(ascii: "0") && self.peek() == UInt8(ascii: "o") { + // 0o[0-7][0-7_]* + _ = self.advance() + if !self.isAtEndOfFile, self.peek() < UInt8(ascii: "0") || self.peek() > UInt8(ascii: "7") { +// return expected_int_digit(self, .octal) + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + + self.advance(while: { + ($0 >= Unicode.Scalar("0") && $0 <= Unicode.Scalar("7")) || $0 == Unicode.Scalar("_") + }) + +// let tmp = self + if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { +// return expected_int_digit(tmp, .octal) + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + + return .integerLiteral + } + + if !self.isAtEndOfFile && TokStart.peek() == UInt8(ascii: "0") && self.peek() == UInt8(ascii: "b") { + // 0b[01][01_]* + _ = self.advance() + if !self.isAtEndOfFile, self.peek() != UInt8(ascii: "0") && self.peek() != UInt8(ascii: "1") { +// return expected_int_digit(self, .binary) + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + + self.advance(while: { + $0 == Unicode.Scalar("0") || $0 == Unicode.Scalar("1") || $0 == Unicode.Scalar("_") + }) + +// let tmp = self + if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { +// return expected_int_digit(tmp, .binary) + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + + return .integerLiteral + } + + // Handle a leading [0-9]+, lexing an integer or falling through if we have a + // floating point value. + self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") }) + + // Lex things like 4.x as '4' followed by a tok::period. + if !self.isAtEndOfFile, self.peek() == UInt8(ascii: ".") { + // NextToken is the soon to be previous token + // Therefore: x.0.1 is sub-tuple access, not x.float_literal + if self.input.count > 1, !Unicode.Scalar(self.peek(at: 1)).isDigit || TokStart.previous == UInt8(ascii: ".") { + return .integerLiteral + } + } else { + // Floating literals must have '.', 'e', or 'E' after digits. If it is + // something else, then this is the end of the token. + if self.isAtEndOfFile || (self.peek() != UInt8(ascii: "e") && self.peek() != UInt8(ascii: "E")) { + var tmp = self + if tmp.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { +// return expected_int_digit(tmp, .decimal) + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + + return .integerLiteral + } + } + + // Lex decimal point. + if self.advance(if: { $0 == Unicode.Scalar(".") }) { + // Lex any digits after the decimal point. + self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") }) + } + + // Lex exponent. + if self.advance(if: { $0 == Unicode.Scalar("e") || $0 == Unicode.Scalar("E") }) { + _ = self.advance(if: { $0 == Unicode.Scalar("-") || $0 == Unicode.Scalar("+") }) + + guard !self.isAtEndOfFile, Unicode.Scalar(self.peek()).isDigit else { + // There are 3 cases to diagnose if the exponent starts with a non-digit: + // identifier (invalid character), underscore (invalid first character), + // non-identifier (empty exponent) +// let tmp = self +// if (advanceIfValidContinuationOfIdentifier(CurPtr, BufferEnd)) +// diagnose(tmp, diag::lex_invalid_digit_in_fp_exponent, StringRef(tmp, 1), +// *tmp == '_') +// else +// diagnose(CurPtr, diag::lex_expected_digit_in_fp_exponent) + + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + + self.advance(while: { char in + char.isDigit || char == Unicode.Scalar("_") + }) + + if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { +// diagnose(tmp, diag::lex_invalid_digit_in_fp_exponent, StringRef(tmp, 1), +// false) + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + } + + return .floatingLiteral + } + + mutating func lexHexNumber(_ TokStart: Lexer.Cursor) -> RawTokenKind { + // We assume we're starting from the 'x' in a '0x...' floating-point literal. + assert(self.peek() == UInt8(ascii: "x"), "not a hex literal") + assert(self.previous == UInt8(ascii: "0"), "not a hex literal") + + let expected_digit = { (end: Lexer.Cursor) -> RawTokenKind in + var end = end + end.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + return .unknown + } + + let expected_hex_digit = { (loc: Lexer.Cursor) -> RawTokenKind in +// diagnose(loc, diag::lex_invalid_digit_in_hex_literal, StringRef(loc, 1), +// (unsigned)kind) + return expected_digit(loc) + } + + // 0x[0-9a-fA-F][0-9a-fA-F_]* + _ = self.advance() + guard !self.isAtEndOfFile, Unicode.Scalar(self.peek()).isHexDigit else { + return expected_hex_digit(self) + } + + self.advance(while: { $0.isHexDigit || $0 == Unicode.Scalar("_") }) + + guard !self.isAtEndOfFile else { + return .integerLiteral + } + + if self.peek() != UInt8(ascii: ".") && self.peek() != UInt8(ascii: "p") && self.peek() != UInt8(ascii: "P") { + let tmp = self + if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { + return expected_hex_digit(tmp) + } else { + return .integerLiteral + } + } + + // (\.[0-9A-Fa-f][0-9A-Fa-f_]*)? + var PtrOnDot: Lexer.Cursor? = self + if self.advance(if: { $0 == Unicode.Scalar(".") }) { + // If the character after the '.' is not a digit, assume we have an int + // literal followed by a dot expression. + if !self.isAtEndOfFile, !Unicode.Scalar(self.peek()).isHexDigit { + self = PtrOnDot! + return .integerLiteral + } + + self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") }) + + if !self.isAtEndOfFile, self.peek() != UInt8(ascii: "p") && self.peek() != UInt8(ascii: "P") { + if !Unicode.Scalar(PtrOnDot!.peek(at: 1)).isDigit { + // e.g: 0xff.description + self = PtrOnDot! + return .integerLiteral + } +// diagnose(CurPtr, diag::lex_expected_binary_exponent_in_hex_float_literal) + return .unknown + } + } else { + PtrOnDot = nil + } + + // [pP][+-]?[0-9][0-9_]* + assert(self.isAtEndOfFile || self.peek() == UInt8(ascii: "p") || self.peek() == UInt8(ascii: "P"), "not at a hex float exponent?!") + _ = self.advance() + + var signedExponent = false + if self.advance(if: { $0 == Unicode.Scalar("+") || $0 == Unicode.Scalar("-") }) { + // Eat the sign. + signedExponent = true + } + + if !self.isAtEndOfFile, !Unicode.Scalar(self.peek()).isDigit { + if let PtrOnDot = PtrOnDot, !Unicode.Scalar(PtrOnDot.peek(at: 1)).isDigit && !signedExponent { + // e.g: 0xff.fpValue, 0xff.fp + self = PtrOnDot + return .integerLiteral + } + // Note: 0xff.fp+otherExpr can be valid expression. But we don't accept it. + + // There are 3 cases to diagnose if the exponent starts with a non-digit: + // identifier (invalid character), underscore (invalid first character), + // non-identifier (empty exponent) + if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { +// diagnose(tmp, diag::lex_invalid_digit_in_fp_exponent, StringRef(tmp, 1), +// *tmp == '_') + } else { +// diagnose(CurPtr, diag::lex_expected_digit_in_fp_exponent) + } + return expected_digit(self) + } + + self.advance(while: { $0.isDigit || $0 == Unicode.Scalar("_") }) + + if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { +// diagnose(tmp, diag::lex_invalid_digit_in_fp_exponent, StringRef(tmp, 1), +// false) + return expected_digit(self) + } + + return .floatingLiteral + } +} + +extension Lexer.Cursor { + mutating func lexMagicPoundLiteral() -> RawTokenKind { + let start = self + var clone = self + // Scan for [a-zA-Z]+ to see what we match. + if !clone.isAtEndOfFile && Unicode.Scalar(clone.peek()).isAsciiIdentifierStart { + repeat { + _ = clone.advance() + } while !clone.isAtEndOfFile && Unicode.Scalar(clone.peek()).isAsciiIdentifierContinue + } + + let literal = start.textUpTo(clone) + + let kind: RawTokenKind + switch literal { + case "keyPath": kind = .poundKeyPathKeyword + case "line": kind = .poundLineKeyword + case "selector": kind = .poundSelectorKeyword + case "file": kind = .poundFileKeyword + case "fileID": kind = .poundFileIDKeyword + case "filePath": kind = .poundFilePathKeyword + case "column": kind = .poundColumnKeyword + case "function": kind = .poundFunctionKeyword + case "dsohandle": kind = .poundDsohandleKeyword + case "assert": kind = .poundAssertKeyword + case "sourceLocation": kind = .poundSourceLocationKeyword + case "warning": kind = .poundWarningKeyword + case "error": kind = .poundErrorKeyword + case "if": kind = .poundIfKeyword + case "else": kind = .poundElseKeyword + case "elseif": kind = .poundElseifKeyword + case "endif": kind = .poundEndifKeyword + case "available": kind = .poundAvailableKeyword + case "unavailable": kind = .poundUnavailableKeyword + case "fileLiteral": kind = .poundFileLiteralKeyword + case "imageLiteral": kind = .poundImageLiteralKeyword + case "colorLiteral": kind = .poundColorLiteralKeyword + default: + // If we didn't find a match, then just return `.pound`. This is highly + // dubious in terms of error recovery, but is useful for code completion and + // SIL parsing. + return .pound + } + + // If we found something specific, return it. + self = clone + return kind + } +} + +// MARK: - Identifiers + +extension Lexer.Cursor { + /// lexIdentifier - Match [a-zA-Z_][a-zA-Z_$0-9]* + mutating func lexIdentifier(_ tokStart: Lexer.Cursor) -> RawTokenKind { + self = tokStart + let didStart = self.advance(if: { $0.isValidIdentifierStartCodePoint }) + assert(didStart, "Unexpected start") + + // Lex [a-zA-Z_$0-9[[:XID_Continue:]]]* + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + + let text = tokStart.textUpTo(self) + return RawTokenKind(keyword: text) ?? .identifier + } + + mutating func lexEscapedIdentifier(_ Quote: Lexer.Cursor) -> RawTokenKind { + assert(self.previous == UInt8(ascii: "`"), "Unexpected start of escaped identifier") + + // Check whether we have an identifier followed by another backtick, in which + // case this is an escaped identifier. + let IdentifierStart = self + if self.advance(if: { $0.isValidIdentifierStartCodePoint }) { + // Keep continuing the identifier. + self.advance(while: { $0.isValidIdentifierContinuationCodePoint }) + + // If we have the terminating "`", it's an escaped identifier. + if self.advance(if: { $0 == Unicode.Scalar("`") }) { + return .identifier + } + } + + // Special case; allow '`$`'. + if Quote.starts(with: "`$`".utf8) { + self = Quote + _ = self.advance() + _ = self.advance() + _ = self.advance() + return .identifier + } + + // The backtick is punctuation. + self = IdentifierStart + return .backtick + } + + mutating func lexOperatorIdentifier(_ TokStart: Lexer.Cursor, _ ContentStart: Lexer.Cursor) -> RawTokenKind { + self = TokStart + let didStart = self.advance(if: { $0.isOperatorStartCodePoint }) + assert(didStart, "unexpected operator start") + + repeat { + // '.' cannot appear in the middle of an operator unless the operator + // started with a '.'. + if !self.isAtEndOfFile, self.peek() == UInt8(ascii: ".") && TokStart.peek() != UInt8(ascii: ".") { + break + } +// if (Identifier::isEditorPlaceholder(StringRef(CurPtr, BufferEnd-CurPtr)) && +// rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) { +// break +// } + +// // If we are lexing a `/.../` regex literal, we don't consider `/` to be an +// // operator character. +// if ForwardSlashRegexMode != LexerForwardSlashRegexMode::None && +// CurPtr.peek() == UInt8(ascii: "/") { +// break +// } + } while self.advance(if: { $0.isOperatorContinuationCodePoint }) + + if (self.input.baseAddress!-TokStart.input.baseAddress! > 2) { + // If there is a "//" or "/*" in the middle of an identifier token, + // it starts a comment. + var Ptr = TokStart + _ = Ptr.advance() + while Ptr.input.baseAddress! < self.input.baseAddress! { + defer { _ = Ptr.advance() } + if self.input.count > 1, (Ptr.peek() == UInt8(ascii: "/") && (Ptr.peek(at: 1) == UInt8(ascii: "/") || Ptr.peek(at: 1) == UInt8(ascii: "*"))) { + self = Ptr + break + } + } + } + + // Decide between the binary, prefix, and postfix cases. + // It's binary if either both sides are bound or both sides are not bound. + // Otherwise, it's postfix if left-bound and prefix if right-bound. + let leftBound = TokStart.isLeftBound(ContentStart) + let rightBound = self.isRightBound(leftBound) + + // Match various reserved words. + if (self.input.baseAddress! - TokStart.input.baseAddress! == 1) { + switch (TokStart.peek()) { + case UInt8(ascii: "="): + // Refrain from emitting this message in operator name position. +// if (NextToken.isNot(tok::kw_operator) && leftBound != rightBound) { +// auto d = diagnose(TokStart, diag::lex_unary_equal) +// if (leftBound) +// d.fixItInsert(getSourceLoc(TokStart), " ") +// else +// d.fixItInsert(getSourceLoc(TokStart+1), " ") +// } + // always emit 'tok::equal' to avoid trickle down parse errors + return .equal + case UInt8(ascii: "&"): + if (leftBound == rightBound || leftBound) { + break + } + return .prefixAmpersand + case UInt8(ascii: "."): + if leftBound == rightBound { + return .period + } + + if (rightBound) { + return .prefixPeriod + } + + // If left bound but not right bound, handle some likely situations. + + // If there is just some horizontal whitespace before the next token, its + // addition is probably incorrect. + var AfterHorzWhitespace = self + while !AfterHorzWhitespace.isAtEndOfFile, AfterHorzWhitespace.peek() == UInt8(ascii: " ") || AfterHorzWhitespace.peek() == UInt8(ascii: "\t") { + _ = AfterHorzWhitespace.advance() + } + +// // First, when we are code completing "x. ", then make sure to return +// // a tok::period, since that is what the user is wanting to know about. +// if (AfterHorzWhitespace.peek() == 0 && +// AfterHorzWhitespace == CodeCompletionPtr) { +// diagnose(TokStart, diag::expected_member_name) +// return formToken(tok::period, TokStart) +// } + +// if AfterHorzWhitespace.isRightBound(leftBound) && +// // Don't consider comments to be this. A leading slash is probably +// // either // or /* and most likely occurs just in our testsuite for +// // expected-error lines. +// AfterHorzWhitespace.peek() != UInt8(ascii: "/") { +// diagnose(TokStart, diag::extra_whitespace_period) +// .fixItRemoveChars(getSourceLoc(CurPtr), +// getSourceLoc(AfterHorzWhitespace)) +// return formToken(tok::period, TokStart) +// } + + // Otherwise, it is probably a missing member. +// diagnose(TokStart, diag::expected_member_name) + return .unknown + case UInt8(ascii: "?"): + if (leftBound) { + return .postfixQuestionMark + } + return .infixQuestionMark + default: + break + } + } else if (self.input.baseAddress! - TokStart.input.baseAddress! == 2) { + switch (TokStart.peek(), TokStart.peek(at: 1)) { + case (UInt8(ascii: "-"), UInt8(ascii: ">")): // -> + return .arrow + case (UInt8(ascii: "*"), UInt8(ascii: "/")): // */ +// diagnose(TokStart, diag::lex_unexpected_block_comment_end) + return .unknown + default: + break + } + } else { + // Verify there is no "*/" in the middle of the identifier token, we reject + // it as potentially ending a block comment. + if TokStart.textUpTo(self).contains("*/") { +// diagnose(TokStart+Pos, diag::lex_unexpected_block_comment_end) + return .unknown + } + } + + if leftBound == rightBound { + if leftBound { + return .unspacedBinaryOperator + } else { + return .spacedBinaryOperator + } + } else if leftBound { + return .postfixOperator + } else { + return .prefixOperator + } + } + + mutating func lexDollarIdentifier(_ tokStart: Lexer.Cursor) -> RawTokenKind { + assert(self.previous == UInt8(ascii: "$")) + + var isAllDigits = true + while true { + if !self.isAtEndOfFile, Unicode.Scalar(self.peek()).isDigit { + _ = self.advance() + continue + } else if self.advance(if: { $0.isValidIdentifierContinuationCodePoint }) { + isAllDigits = false + continue + } + break + } + + // If there is a standalone '$', treat it like an identifier. + if self.input.baseAddress == tokStart.input.baseAddress { + return .identifier + } + + if !isAllDigits { + return .identifier + } else { + return .dollarIdentifier + } + } +} + +// MARK: - Editor Placeholders + +extension Lexer.Cursor { + mutating func tryLexEditorPlaceholder(_ TokStart: Lexer.Cursor, _ ContentStart: Lexer.Cursor) -> RawTokenKind { + assert(self.previous == UInt8(ascii: "<") && self.peek() == UInt8(ascii: "#")) + var Ptr = self + _ = Ptr.advance() + while !Ptr.isAtEndOfFile { + defer { _ = Ptr.advance() } + if (Ptr.peek() == UInt8(ascii: "\n")) { + break + } + guard !Ptr.starts(with: "<#".utf8) else { + break + } + + if Ptr.starts(with: "#>".utf8) { + // Found it. + _ = Ptr.advance() + _ = Ptr.advance() + self = Ptr + return .identifier + } + } + + // Not a well-formed placeholder. + return self.lexOperatorIdentifier(TokStart, ContentStart) + } +} + +// MARK: - Unknown Syntax + +extension Lexer.Cursor { + private func findEndOfCurlyQuoteStringLiteral() -> Lexer.Cursor? { + var Body = self + while true { + // Don't bother with string interpolations. + if !Body.isAtEndOfFile, Body.peek(at: 0) == UInt8(ascii: "\\") && Body.peek(at: 1) == UInt8(ascii: "(") { + return nil + } + + // We didn't find the end of the string literal if we ran to end of line. + if Body.isAtEndOfFile || Body.peek() == UInt8(ascii: "\r") || Body.peek() == UInt8(ascii: "\n") { + return nil + } + + // Get the next character. + switch Body.lexCharacter(0, false, 0) { + case .error, .endOfString: + // If the character was incorrectly encoded, give up. + return nil + case .success(let CharValue) where CharValue == Unicode.Scalar(UInt8(ascii: #"""#)): + // If we found a straight-quote, then we're done. Just return the spot + // to continue. + return Body + case .validated(let CharValue) where CharValue == Character(Unicode.Scalar(0x0000201D)!): + // If we found an ending curly quote (common since this thing started with + // an opening curly quote) diagnose it with a fixit and then return. + // if (EmitDiagnostics) { + // diagnose(CharStart, diag::lex_invalid_curly_quote) + // .fixItReplaceChars(getSourceLoc(CharStart), getSourceLoc(Body), + // "\"") + // } + return Body + default: + continue + } + } + } + + mutating func lexUnknown(_ start: Lexer.Cursor) -> Bool { + var Tmp = start + if Tmp.advance(if: { Unicode.Scalar($0).isValidIdentifierContinuationCodePoint }) { + // If this is a valid identifier continuation, but not a valid identifier + // start, attempt to recover by eating more continuation characters. +// if (EmitDiagnosticsIfToken) { +// diagnose(CurPtr - 1, diag::lex_invalid_identifier_start_character) +// } + while Tmp.advance(if: { Unicode.Scalar($0).isValidIdentifierContinuationCodePoint }) { + + } + self = Tmp + return true + } + + // This character isn't allowed in Swift source. + guard let Codepoint = Tmp.validateUTF8CharacterAndAdvance() else { +// diagnose(CurPtr - 1, diag::lex_invalid_utf8) +// .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), " ") + self = Tmp + return false; // Skip presumed whitespace. + } + if (Codepoint.value == 0x000000A0) { + // Non-breaking whitespace (U+00A0) + while (Tmp.peek(at: 0) == 0xC2 && Tmp.peek(at: 1) == 0xA0) { + _ = Tmp.advance() + _ = Tmp.advance() + } + +// SmallString<8> Spaces +// Spaces.assign((Tmp - CurPtr + 1) / 2, ' ') +// diagnose(CurPtr - 1, diag::lex_nonbreaking_space) +// .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), +// Spaces) + self = Tmp + return false + } else if (Codepoint.value == 0x0000201D) { + // If this is an end curly quote, just diagnose it with a fixit hint. +// if (EmitDiagnosticsIfToken) { +// diagnose(CurPtr - 1, diag::lex_invalid_curly_quote) +// .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), "\"") +// } + self = Tmp + return true + } else if (Codepoint.value == 0x0000201C) { + // If this is a start curly quote, do a fuzzy match of a string literal + // to improve recovery. + if let Tmp2 = Tmp.findEndOfCurlyQuoteStringLiteral() { + Tmp = Tmp2 + } + + // Note, we intentionally diagnose the end quote before the start quote, + // so that the IDE suggests fixing the end quote before the start quote. + // This, in turn, works better with our error recovery because we won't + // diagnose an end curly quote in the middle of a straight quoted + // literal. +// if (EmitDiagnosticsIfToken) { +// diagnose(CurPtr - 1, diag::lex_invalid_curly_quote) +// .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(EndPtr), +// "\"") +// } + self = Tmp + return true + } + +// diagnose(CurPtr - 1, diag::lex_invalid_character) +// .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), " ") + +// char ExpectedCodepoint +// if ((ExpectedCodepoint = +// confusable::tryConvertConfusableCharacterToASCII(Codepoint))) { +// +// llvm::SmallString<4> ConfusedChar +// EncodeToUTF8(Codepoint, ConfusedChar) +// llvm::SmallString<1> ExpectedChar +// ExpectedChar += ExpectedCodepoint +// auto charNames = confusable::getConfusableAndBaseCodepointNames(Codepoint) +// diagnose(CurPtr - 1, diag::lex_confusable_character, ConfusedChar, +// charNames.first, ExpectedChar, charNames.second) +// .fixItReplaceChars(getSourceLoc(CurPtr - 1), getSourceLoc(Tmp), +// ExpectedChar) +// } + + self = Tmp + return false; // Skip presumed whitespace. + } + + enum ConflictMarker { + case normal + case perforce + + var introducer: String { + switch self { + case .perforce: + return ">>>> " + case .normal: + return "<<<<<<< " + } + } + + var terminator: String { + switch self { + case .perforce: + return "<<<<\n" + case .normal: + return ">>>>>>> " + } + } + } + mutating func tryLexConflictMarker(start: Lexer.Cursor) -> Bool { + // Only a conflict marker if it starts at the beginning of a line. + guard start.previous == UInt8(ascii: "\n") || start.previous == UInt8(ascii: "\r") else { + return false + } + + // Check to see if we have <<<<<<< or >>>>. + guard start.starts(with: "<<<<<<< ".utf8) || start.starts(with: ">>>> ".utf8) else { + return false + } + + let kind = start.peek() == UInt8(ascii: "<") ? ConflictMarker.normal : .perforce + guard let End = Self.findConflictEnd(start, kind) else { + // No end of conflict marker found. + return false + } + + // Diagnose at the conflict marker, then jump ahead to the end. +// diagnose(CurPtr, diag::lex_conflict_marker_in_file); + self = End + + // Skip ahead to the end of the marker. + if !self.isAtEndOfFile { + _ = self.advanceToEndOfLine() + } + return true + } + + /// Find the end of a version control conflict marker. + static func findConflictEnd(_ CurPtr: Lexer.Cursor, _ CMK: ConflictMarker) -> Lexer.Cursor? { + // Get a reference to the rest of the buffer minus the length of the start + // of the conflict marker. + let advanced = CurPtr.input.baseAddress?.advanced(by: CMK.introducer.utf8.count) + var restOfBuffer = Lexer.Cursor(input: .init(start: advanced, count: CurPtr.input.count - CMK.introducer.utf8.count), + previous: CurPtr.input[CMK.introducer.utf8.count-1]) + while !restOfBuffer.isAtEndOfFile { + let terminatorStart = CMK.terminator.utf8.first! + restOfBuffer.advance(while: { byte in + byte != Unicode.Scalar(terminatorStart) + }) + + guard restOfBuffer.starts(with: CMK.terminator.utf8) else { + _ = restOfBuffer.advance() + continue + } + + // Must occur at start of line. + guard restOfBuffer.previous == UInt8(ascii: "\n") || restOfBuffer.previous == UInt8(ascii: "\r") else { + continue + } + + let advanced = restOfBuffer.input.baseAddress?.advanced(by: CMK.terminator.utf8.count) + return Lexer.Cursor(input: .init(start: advanced, count: restOfBuffer.input.count - CMK.terminator.utf8.count), + previous: restOfBuffer.input[CMK.terminator.utf8.count-1]) + } + return nil + } +} + +extension Lexer.Cursor { + mutating func tryLexRegexLiteral( + _ TokStart: Lexer.Cursor + ) -> RawTokenKind? { + var Tmp = TokStart + var poundCount = 0 + while Tmp.advance(matching: UInt8(ascii: "#")) != nil { + poundCount += 1 + } + + guard Tmp.advance(matching: UInt8(ascii: "/")) != nil else { + return nil + } + + var isMultiline = false + while !Tmp.isAtEndOfFile { + switch Tmp.peek() { + case UInt8(ascii: " "), UInt8(ascii: "\t"): + _ = Tmp.advance() + continue + case UInt8(ascii: "\n"), UInt8(ascii: "\r"): + isMultiline = true + default: + break + } + break + } + + var escaped = false + DELIMITLOOP: while true { + defer { escaped = false } + + switch Tmp.advance() { + case nil: + return nil + case UInt8(ascii: "/"): + var EndLex = Tmp + for _ in 0..= 0x00B2 && c <= 0x00B5) || (c >= 0x00B7 && c <= 0x00BA) + || (c >= 0x00BC && c <= 0x00BE) || (c >= 0x00C0 && c <= 0x00D6) + || (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF) + + || (c >= 0x0100 && c <= 0x167F) + || (c >= 0x1681 && c <= 0x180D) + || (c >= 0x180F && c <= 0x1FFF) + + || (c >= 0x200B && c <= 0x200D) + || (c >= 0x202A && c <= 0x202E) + || (c >= 0x203F && c <= 0x2040) + || c == 0x2054 + || (c >= 0x2060 && c <= 0x206F) + + || (c >= 0x2070 && c <= 0x218F) + || (c >= 0x2460 && c <= 0x24FF) + || (c >= 0x2776 && c <= 0x2793) + || (c >= 0x2C00 && c <= 0x2DFF) + || (c >= 0x2E80 && c <= 0x2FFF) + + || (c >= 0x3004 && c <= 0x3007) + || (c >= 0x3021 && c <= 0x302F) + || (c >= 0x3031 && c <= 0x303F) + + || (c >= 0x3040 && c <= 0xD7FF) + + || (c >= 0xF900 && c <= 0xFD3D) + || (c >= 0xFD40 && c <= 0xFDCF) + || (c >= 0xFDF0 && c <= 0xFE44) + || (c >= 0xFE47 && c <= 0xFFF8) + + || (c >= 0x10000 && c <= 0x1FFFD) + || (c >= 0x20000 && c <= 0x2FFFD) + || (c >= 0x30000 && c <= 0x3FFFD) + || (c >= 0x40000 && c <= 0x4FFFD) + || (c >= 0x50000 && c <= 0x5FFFD) + || (c >= 0x60000 && c <= 0x6FFFD) + || (c >= 0x70000 && c <= 0x7FFFD) + || (c >= 0x80000 && c <= 0x8FFFD) + || (c >= 0x90000 && c <= 0x9FFFD) + || (c >= 0xA0000 && c <= 0xAFFFD) + || (c >= 0xB0000 && c <= 0xBFFFD) + || (c >= 0xC0000 && c <= 0xCFFFD) + || (c >= 0xD0000 && c <= 0xDFFFD) + || (c >= 0xE0000 && c <= 0xEFFFD) + } + + var isValidIdentifierStartCodePoint: Bool { + guard self.isValidIdentifierContinuationCodePoint else { + return false + } + + let c = self.value + if c < 0x80 && (self.isDigit || c == UInt8(ascii: "$")) { + return false + } + + // N1518: Recommendations for extended identifier characters for C and C++ + // Proposed Annex X.2: Ranges of characters disallowed initially + if ((c >= 0x0300 && c <= 0x036F) || + (c >= 0x1DC0 && c <= 0x1DFF) || + (c >= 0x20D0 && c <= 0x20FF) || + (c >= 0xFE20 && c <= 0xFE2F)) { + return false + } + + return true + } + + /// isOperatorStartCodePoint - Return true if the specified code point is a + /// valid start of an operator. + var isOperatorStartCodePoint: Bool { + // ASCII operator chars. + if self.value < 0x80 { + switch UInt8(self.value) { + case UInt8(ascii: "/"), + UInt8(ascii: "="), + UInt8(ascii: "-"), + UInt8(ascii: "+"), + UInt8(ascii: "*"), + UInt8(ascii: "%"), + UInt8(ascii: "<"), + UInt8(ascii: ">"), + UInt8(ascii: "!"), + UInt8(ascii: "&"), + UInt8(ascii: "|"), + UInt8(ascii: "^"), + UInt8(ascii: "~"), + UInt8(ascii: "."), + UInt8(ascii: "?"): + return true + default: + return false + } + } + + // Unicode math, symbol, arrow, dingbat, and line/box drawing chars. + let C = self.value + return (C >= 0x00A1 && C <= 0x00A7) + || C == 0x00A9 || C == 0x00AB || C == 0x00AC || C == 0x00AE + || C == 0x00B0 || C == 0x00B1 || C == 0x00B6 || C == 0x00BB + || C == 0x00BF || C == 0x00D7 || C == 0x00F7 + || C == 0x2016 || C == 0x2017 || (C >= 0x2020 && C <= 0x2027) + || (C >= 0x2030 && C <= 0x203E) || (C >= 0x2041 && C <= 0x2053) + || (C >= 0x2055 && C <= 0x205E) || (C >= 0x2190 && C <= 0x23FF) + || (C >= 0x2500 && C <= 0x2775) || (C >= 0x2794 && C <= 0x2BFF) + || (C >= 0x2E00 && C <= 0x2E7F) || (C >= 0x3001 && C <= 0x3003) + || (C >= 0x3008 && C <= 0x3030) + } + + /// isOperatorContinuationCodePoint - Return true if the specified code point + /// is a valid operator code point. + var isOperatorContinuationCodePoint: Bool { + if self.isOperatorStartCodePoint { + return true + } + + // Unicode combining characters and variation selectors. + let C = self.value + return (C >= 0x0300 && C <= 0x036F) + || (C >= 0x1DC0 && C <= 0x1DFF) + || (C >= 0x20D0 && C <= 0x20FF) + || (C >= 0xFE00 && C <= 0xFE0F) + || (C >= 0xFE20 && C <= 0xFE2F) + || (C >= 0xE0100 && C <= 0xE01EF) + } +} + +extension Unicode.Scalar { + /// Whether this character represents a printable ASCII character, + /// for the purposes of pattern parsing. + public var isPrintableASCII: Bool { + // Exclude non-printables before the space character U+20, and anything + // including and above the DEL character U+7F. + return self.value >= 0x20 && self.value < 0x7F + } +} diff --git a/Sources/SwiftParser/Lookahead.swift b/Sources/SwiftParser/Lookahead.swift new file mode 100644 index 00000000000..35893675664 --- /dev/null +++ b/Sources/SwiftParser/Lookahead.swift @@ -0,0 +1,457 @@ +//===------------------------ Lookahead.swift -----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + /// Token lookahead for the parser. + /// + /// Parser lookahead functions nearly identically to parsing, except the + /// resulting functions do not construct syntax trees and may skip an + /// arbitrary number of tokens ahead in the input stream. Instances of + /// ``Lookahead`` are distinct from their parent ``Parser`` instances, so + /// any tokens they consume will not be reflected in the parent parser. + public struct Lookahead: TokenConsumer { + var lexemes: Lexer.LexemeSequence + @_spi(RawSyntax) + public var currentToken: Lexer.Lexeme + /// Number of tokens this ``Lookahead`` has consumed from where it was started, + /// i.e. how far it looked ahead. + var tokensConsumed: Int = 0 + + private init( + lexemes: Lexer.LexemeSequence, + currentToken: Lexer.Lexeme + ) { + self.lexemes = lexemes + self.currentToken = currentToken + } + + fileprivate init(cloning other: Parser) { + self.init(lexemes: other.lexemes, currentToken: other.currentToken) + } + + /// Initiates a lookahead session from the current point in this + /// lookahead session. + func lookahead() -> Lookahead { + return Lookahead(lexemes: self.lexemes, currentToken: self.currentToken) + } + } + + /// Initiates a lookahead session from the current point in this parse. + public func lookahead() -> Lookahead { + return Lookahead(cloning: self) + } +} + +extension Parser.Lookahead { + @_spi(RawSyntax) + public func peek() -> Lexer.Lexeme { + return self.lexemes.peek() + } +} + +extension Parser.Lookahead { + public mutating func missingToken(_ kind: RawTokenKind) { + // do nothing + } + + public mutating func consumeAnyToken() { + tokensConsumed += 1 + self.currentToken = self.lexemes.advance() + } + + /// Consumes a given token, or splits the current token into a leading token + /// matching the given token and a trailing token and consumes the leading + /// token. + /// + /// ... -> consumePrefix() -> [ ] ... + mutating func consumePrefix(_ prefix: SyntaxText, as tokenKind: RawTokenKind) { + let tokenText = self.currentToken.tokenText + + if tokenText == prefix { + return self.consumeAnyToken() + } + assert(tokenText.hasPrefix(prefix)) + + // See also: Parser.consumePrefix(_:as:) + let offset = (self.currentToken.trailingTriviaByteLength + + tokenText.count + - prefix.count) + self.currentToken = self.lexemes.resetForSplit(of: offset) + } +} + +// MARK: Skipping Tokens + +extension Parser.Lookahead { + mutating func skipTypeAttribute() { + // These are keywords that we accept as attribute names. + guard self.currentToken.isIdentifier || self.at(.inKeyword) || self.at(.inoutKeyword) else { + return + } + + // Determine which attribute it is. + if let attr = Parser.TypeAttribute(rawValue: self.currentToken.tokenText) { + // Ok, it is a valid attribute, eat it, and then process it. + self.consumeAnyToken() + if case .convention = attr { + guard + self.consume(if: .leftParen) != nil, + (self.currentToken.isIdentifier ? self.consumeIdentifier() : nil) != nil, + self.consume(if: .rightParen) != nil + else { + return + } + } + return + } + + if Parser.DeclarationAttribute(rawValue: self.currentToken.tokenText) != nil { + // This is a valid decl attribute so they should have put it on the decl + // instead of the type. + // + // Recover by eating @foo(...) + self.consumeAnyToken() + if self.at(.leftParen) { + var backtrack = self.lookahead() + backtrack.skipSingle() + // If we found '->', or 'throws' after paren, it's likely a parameter + // of function type. + guard backtrack.at(.arrow) || backtrack.at(.throwsKeyword) || backtrack.at(.rethrowsKeyword) || backtrack.at(.throwKeyword) else { + self.skipSingle() + return + } + } + return + } + + _ = self.canParseCustomAttribute() + return + } + + mutating func eatParseAttributeList() -> Bool { + guard self.at(.atSign) else { + return false + } + + repeat { + self.eat(.atSign) + self.consumeIdentifier() + if self.consume(if: .leftParen) != nil { + while !self.at(.eof), !self.at(.rightParen), !self.at(.poundEndifKeyword) { + if self.consume(if: .rightParen) != nil { + break + } + self.skipSingle() + } + } + } while self.at(.atSign) + return true + } +} + +// MARK: Lookahead + +extension Parser.Lookahead { + private static let declAttributeNames: Set = [ + "autoclosure", + "convention", + "noescape", + "escaping", + "differentiable", + "noDerivative", + "async", + "Sendable", + "unchecked", + "_typeSequence", + "_local", + "block_storage", + "box", + "dynamic_self", + "sil_weak", + "sil_unowned", + "sil_unmanaged", + "error", + "out", + "in", + "inout", + "inout_aliasable", + "in_guaranteed", + "in_constant", + "owned", + "unowned_inner_pointer", + "guaranteed", + "autoreleased", + "callee_owned", + "callee_guaranteed", + "objc_metatype", + "opened", + "pseudogeneric", + "yields", + "yield_once", + "yield_many", + "captures_generics", + "thin", + "thick", + "_opaqueReturnTypeOf", + ] + + func isStartOfDeclaration() -> Bool { + guard self.currentToken.isKeywordPossibleDeclStart else { + // If this is obviously not the start of a decl, then we're done. + return false + } + + /* + // When 'init' appears inside another 'init', it's likely the user wants to + // invoke an initializer but forgets to prefix it with 'self.' or 'super.' + // Otherwise, expect 'init' to be the start of a declaration (and complain + // when the expectation is not fulfilled). + if (Tok.is(tok::kw_init)) { + return !isa(CurDeclContext); + } + */ + + // Similarly, when 'case' appears inside a function, it's probably a switch + // case, not an enum case declaration. + if self.at(.caseKeyword) { + return false + } + + /* + // The protocol keyword needs more checking to reject "protocol". + if (Tok.is(tok::kw_protocol)) { + const Token &Tok2 = peekToken(); + return !Tok2.isAnyOperator() || !Tok2.getText().equals("<"); + } + + // The 'try' case is only for simple local recovery, so we only bother to + // check 'let' and 'var' right now. + if (Tok.is(tok::kw_try)) + return peekToken().isAny(tok::kw_let, tok::kw_var); + */ + + // Skip an attribute, since it might be a type attribute. This can't + // happen at the top level of a scope, but we do use isStartOfSwiftDecl() + // in positions like generic argument lists. + if self.at(.atSign) { + var subparser = self.lookahead() + _ = subparser.eatParseAttributeList() + // If this attribute is the last element in the block, + // consider it is a start of incomplete decl. + if subparser.at(.rightBrace) || subparser.at(.eof) || subparser.at(.poundEndifKeyword) { + return true + } + return subparser.isStartOfDeclaration() + } + + // If we have a decl modifying keyword, check if the next token is a valid + // decl start. This is necessary to correctly handle Swift keywords that are + // shared by SIL, e.g 'private' in 'sil private @foo :'. We need to make sure + // this isn't considered a valid Swift decl start. + if self.currentToken.tokenKind.isKeyword { + if Self.declAttributeNames.contains(self.currentToken.tokenText) { + var subparser = self.lookahead() + subparser.consumeAnyToken() + + // Eat paren after modifier name; e.g. private(set) + if subparser.consume(if: .leftParen) != nil { + while !subparser.at(.eof) && !subparser.at(.rightBrace) && !subparser.at(.poundEndifKeyword) { + if subparser.consume(if: .rightParen) != nil { + break + } + + // If we found the start of a decl while trying to skip over the + // paren, then we have something incomplete like 'private('. Return + // true for better recovery. + if subparser.isStartOfDeclaration() { + return true + } + + subparser.consumeAnyToken() + } + } + return subparser.isStartOfDeclaration() + } + } + + // Otherwise, the only hard case left is the identifier case. + guard self.currentToken.isIdentifier else { + return true + } + + // If this is an operator declaration, handle it. + if case .operatorKeyword = self.peek().tokenKind, + (self.currentToken.isContextualKeyword("prefix") || + self.currentToken.isContextualKeyword("postfix") || + self.currentToken.isContextualKeyword("infix")) { + return true + } + + // If this can't possibly be a contextual keyword, then this identifier is + // not interesting. Bail out. + guard self.currentToken.isContextualDeclKeyword() else { + return false + } + + // If it might be, we do some more digging. + + // If this is 'unowned', check to see if it is valid. + let tok2 = self.peek() + if self.currentToken.tokenText == "unowned" && tok2.tokenKind == .leftParen && + self.isParenthesizedUnowned() { + var lookahead = self.lookahead() + lookahead.consumeIdentifier() + lookahead.eat(.leftParen) + lookahead.consumeIdentifier() + lookahead.eat(.rightParen) + return lookahead.isStartOfDeclaration() + } + + if self.currentToken.isContextualKeyword("actor") { + if tok2.isIdentifier { + return true + } + // actor may be somewhere in the modifier list. Eat the tokens until we get + // to something that isn't the start of a decl. If that is an identifier, + // it's an actor declaration, otherwise, it isn't. + var lookahead = self.lookahead() + repeat { + lookahead.consumeAnyToken() + } while lookahead.isStartOfDeclaration() + return lookahead.currentToken.isIdentifier + } + + // If the next token is obviously not the start of a decl, bail early. + guard tok2.isKeywordPossibleDeclStart else { + return false + } + + // Otherwise, do a recursive parse. + var next = self.lookahead() + next.consumeIdentifier() + return next.isStartOfDeclaration() + } + + func isParenthesizedUnowned() -> Bool { + assert(self.currentToken.tokenText == "unowned" && self.peek().tokenKind == .leftParen, + "Invariant violated") + + // Look ahead to parse the parenthesized expression. + var lookahead = self.lookahead() + lookahead.consumeIdentifier() + guard lookahead.consume(if: .leftParen) != nil else { + return false + } + return lookahead.currentToken.isIdentifier + && lookahead.peek().tokenKind == .rightParen + && (lookahead.currentToken.tokenText == "safe" || lookahead.currentToken.tokenText == "unsafe") + } +} + +extension Parser.Lookahead { + func isStartOfGetSetAccessor() -> Bool { + assert(self.at(.leftBrace), "not checking a brace?") + + // The only case this can happen is if the accessor label is immediately after + // a brace (possibly preceded by attributes). "get" is implicit, so it can't + // be checked for. Conveniently however, get/set properties are not allowed + // to have initializers, so we don't have an ambiguity, we just have to check + // for observing accessors. + // + // If we have a 'didSet' or a 'willSet' label, disambiguate immediately as + // an accessor block. + let nextToken = self.peek() + if nextToken.isContextualKeyword("didSet") || nextToken.isContextualKeyword("willSet") { + return true + } + + // If we don't have attributes, then it cannot be an accessor block. + if nextToken.tokenKind != .atSign { + return false + } + + // Eat the "{". + var lookahead = self.lookahead() + lookahead.eat(.leftBrace) + + // Eat attributes, if present. + while lookahead.consume(if: .atSign) != nil { + guard lookahead.currentToken.isIdentifier else { + return false + } + lookahead.consumeIdentifier() + // Eat paren after attribute name; e.g. @foo(x) + if lookahead.at(.leftParen) { + lookahead.skipSingle() + } + } + + // Check if we have 'didSet'/'willSet' after attributes. + return lookahead.currentToken.isContextualKeyword("didSet") || + lookahead.currentToken.isContextualKeyword("willSet") + } +} + +// MARK: Skipping Tokens + +extension Parser.Lookahead { + mutating func skipUntil(_ t1: RawTokenKind, _ t2: RawTokenKind) { + while !self.at(.eof) && !self.at(t1) && !self.at(t2) + && !self.at(.poundEndifKeyword) && !self.at(.poundElseKeyword) && !self.at(.poundElseifKeyword) { + self.skipSingle() + } + } + + mutating func skipUntilEndOfLine() { + while !self.at(.eof) && !self.currentToken.isAtStartOfLine { + self.skipSingle() + } + } + + mutating func skipSingle() { + switch self.currentToken.tokenKind { + case .leftParen: + self.consumeAnyToken() + self.skipUntil(.rightParen, .rightBrace) + self.consume(if: .rightParen) + return + case .leftBrace: + self.consumeAnyToken() + self.skipUntil(.rightBrace, .rightBrace) + self.consume(if: .rightBrace) + return + case .leftSquareBracket: + self.consumeAnyToken() + self.skipUntil(.rightSquareBracket, .rightSquareBracket) + self.consume(if: .rightSquareBracket) + return + case .poundIfKeyword, + .poundElseKeyword, + .poundElseifKeyword: + self.consumeAnyToken() + // skipUntil also implicitly stops at tok::pound_endif. + self.skipUntil(.poundElseKeyword, .poundElseifKeyword) + + if self.at(.poundElseKeyword) || self.at(.poundElseifKeyword) { + self.skipSingle() + } else { + self.consume(if: .poundElseifKeyword) + } + return + default: + self.consumeAnyToken() + return + } + } +} diff --git a/Sources/SwiftParser/LoopProgressCondition.swift b/Sources/SwiftParser/LoopProgressCondition.swift new file mode 100644 index 00000000000..9ad151ec247 --- /dev/null +++ b/Sources/SwiftParser/LoopProgressCondition.swift @@ -0,0 +1,33 @@ +//===---------------------- TokenConsumer.swift ---------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +/// A type that can be used in place of a `while true` loop. +/// See `evaluate` for more detail. +struct LoopProgressCondition { + var currentToken: Lexer.Lexeme? + + init() {} + + /// Check that the loop has made progress since `evaluate` was called the last time. + /// `currentToken` is the current token of the parser. + /// In assert builds, this traps if the loop has not made any parser progress in between two iterations, + /// ie. it checks if the the parser's `currentToken` has changed in between two calls to `evaluate`. + /// In non-assert builds, `evaluate()` returns `false` if the loop has not made progress, thus aborting the loop. + mutating func evaluate(_ currentToken: Lexer.Lexeme) -> Bool { + let hasMadeProgress = self.currentToken?.tokenText.baseAddress != currentToken.tokenText.baseAddress + self.currentToken = currentToken + assert(hasMadeProgress, "Loop should always make progress") + return hasMadeProgress + } +} diff --git a/Sources/SwiftParser/Modifiers.swift b/Sources/SwiftParser/Modifiers.swift new file mode 100644 index 00000000000..25e230b4853 --- /dev/null +++ b/Sources/SwiftParser/Modifiers.swift @@ -0,0 +1,170 @@ +//===------------------------ Modifiers.swift -----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + enum DeclModifier: SyntaxText { + case unowned = "unowned" + + case final = "final" + case required = "required" + case optional = "optional" + case lazy = "lazy" + case dynamic = "dynamic" + case infix = "infix" + case prefix = "prefix" + case postfix = "postfix" + case compilerInitialized = "_compilerInitialized" + case consuming = "__consuming" + case mutating = "mutating" + case nonmutating = "nonmutating" + case convenience = "convenience" + case override = "override" + case open = "open" + case weak = "weak" + case indirect = "indirect" + case isolated = "isolated" + case async = "async" + case nonisolated = "nonisolated" + case distributed = "distributed" + case const = "_const" + case local = "_local" + } + + @_spi(RawSyntax) + public mutating func parseModifierList() -> RawModifierListSyntax? { + var elements = [RawDeclModifierSyntax]() + var modifierLoopCondition = LoopProgressCondition() + MODIFIER_LOOP: while modifierLoopCondition.evaluate(currentToken) { + switch self.currentToken.tokenKind { + case .privateKeyword, .fileprivateKeyword, .internalKeyword, .publicKeyword: + let name = self.consumeAnyToken() + let details: RawDeclModifierDetailSyntax? + if self.at(.leftParen) { + let lparen = self.eat(.leftParen) + assert(self.currentToken.isContextualKeyword("set")) + let detail = self.consumeIdentifier() + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + details = RawDeclModifierDetailSyntax( + leftParen: lparen, + detail: detail, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena + ) + } else { + details = nil + } + + elements.append(RawDeclModifierSyntax( + name: name, detail: details, arena: self.arena)) + case .staticKeyword: + let staticKeyword = self.eat(.staticKeyword) + elements.append(RawDeclModifierSyntax( + name: staticKeyword, detail: nil, arena: self.arena)) + case .classKeyword: + // If 'class' is a modifier on another decl kind, like var or func, + // then treat it as a modifier. + do { + var lookahead = self.lookahead() + lookahead.eat(.classKeyword) + // When followed by an 'override' or CC token inside a class, + // treat 'class' as a modifier in the case of a following CC + // token, we cannot be sure there is no intention to override + // or witness something static. + guard lookahead.isStartOfDeclaration() || lookahead.currentToken.isContextualKeyword("override") else { + // This 'class' is a real ClassDecl introducer. + break MODIFIER_LOOP + } + } + let classKeyword = self.eat(.classKeyword) + elements.append(RawDeclModifierSyntax( + name: classKeyword, detail: nil, arena: self.arena)) + continue + case .identifier: + // Context sensitive keywords. + // FIXME: Sink this into the GYB + switch DeclModifier(rawValue: self.currentToken.tokenText) { + case .unowned: + elements.append(self.parseUnownedModifier()) + case .final, + .required, + .optional, + .lazy, + .dynamic, + .infix, + .prefix, + .postfix, + .compilerInitialized, + .consuming, + .mutating, + .nonmutating, + .convenience, + .override, + .open, + .weak, + .indirect, + .isolated, + .async, + .nonisolated, + .distributed, + .const, + .local: + elements.append(self.parseSimpleModifier()) + default: + break MODIFIER_LOOP + } + + default: + break MODIFIER_LOOP + } + } + return elements.isEmpty ? nil : RawModifierListSyntax(elements: elements, arena: arena) + } +} + +extension Parser { + mutating func parseSimpleModifier() -> RawDeclModifierSyntax { + let keyword = self.consume(remapping: .contextualKeyword) + return RawDeclModifierSyntax(name: keyword, detail: nil, arena: self.arena) + } + + mutating func parseModifierDetail() -> RawDeclModifierDetailSyntax { + let leftParen = self.eat(.leftParen) + let detailToken = self.consumeAnyToken() + let rightParen = self.eat(.rightParen) + return RawDeclModifierDetailSyntax( + leftParen: leftParen, detail: detailToken, rightParen: rightParen, + arena: self.arena) + } + + mutating func parseSingleArgumentModifier() -> RawDeclModifierSyntax { + let keyword = self.consume(remapping: .contextualKeyword) + let detail = self.parseModifierDetail() + return RawDeclModifierSyntax(name: keyword, detail: detail, arena: self.arena) + } + + mutating func parseUnownedModifier() -> RawDeclModifierSyntax { + assert(self.currentToken.tokenText == "unowned") + let keyword = self.consume(remapping: .contextualKeyword) + + let detail: RawDeclModifierDetailSyntax? + if self.at(.leftParen) { + detail = self.parseModifierDetail() + } else { + detail = nil + } + + return RawDeclModifierSyntax(name: keyword, detail: detail, arena: self.arena) + } +} diff --git a/Sources/SwiftParser/Names.swift b/Sources/SwiftParser/Names.swift new file mode 100644 index 00000000000..de8d7d225e8 --- /dev/null +++ b/Sources/SwiftParser/Names.swift @@ -0,0 +1,339 @@ +//===-------------------------- Names.swift -------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + mutating func parseAnyIdentifier() -> RawTokenSyntax { + if self.currentToken.isIdentifier || self.currentToken.isAnyOperator { + return self.consumeAnyToken() + } else { + return RawTokenSyntax(missing: .identifier, arena: arena) + } + } + + mutating func parseArgumentLabel() -> RawTokenSyntax { + assert(self.currentToken.canBeArgumentLabel) + return self.consumeAnyToken() + } +} + +extension Parser { + struct DeclNameOptions: OptionSet { + var rawValue: UInt8 + + /// If passed, operator basenames are allowed. + static let operators = Self(rawValue: 1 << 0) + + /// If passed, names that coincide with keywords are allowed. Used after a + /// dot to enable things like '.init' and '.default'. + static let keywords = Self(rawValue: 1 << 1) + + /// If passed, 'deinit' and 'subscript' should be parsed as special names, + /// not ordinary identifiers. + static let keywordsUsingSpecialNames: Self = [ .keywords, Self(rawValue: 1 << 2) ] + /// If passed, compound names with argument lists are allowed, unless they + /// have empty argument lists. + static let compoundNames = Self(rawValue: 1 << 4) + + /// If passed, compound names with empty argument lists are allowed. + static let zeroArgCompoundNames: Self = [ .compoundNames, Self(rawValue: 1 << 5) ] + } + + mutating func parseDeclNameRef(_ flags: DeclNameOptions = []) -> (RawTokenSyntax, RawDeclNameArgumentsSyntax?) { + // Consume the base name. + let ident: RawTokenSyntax + if self.currentToken.isIdentifier || self.at(.selfKeyword) || self.at(.capitalSelfKeyword) { + ident = self.consumeIdentifier() + } else if flags.contains(.operators) && self.currentToken.isAnyOperator { + ident = self.consume(remapping: .identifier) + } else if flags.contains(.keywords) && self.currentToken.tokenKind.isKeyword { + ident = self.consume(remapping: .identifier) + } else { + ident = self.consumeIdentifier() + } + + // Parse an argument list, if the flags allow it and it's present. + let args = self.parseArgLabelList(flags) + return (ident, args) + } + + mutating func parseArgLabelList(_ flags: DeclNameOptions) -> RawDeclNameArgumentsSyntax? { + guard flags.contains(.compoundNames) else { + return nil + } + + // Is the current token a left paren? + guard self.at(.leftParen) && !self.currentToken.isAtStartOfLine else { + return nil + } + + // Okay, let's look ahead and see if the next token is something that could + // be in an arg label list... + let next = self.peek() + + // A close parenthesis, if empty lists are allowed. + let nextIsRParen = flags.contains(.zeroArgCompoundNames) && next.tokenKind == .rightParen + // An argument label. + let nextIsArgLabel = next.canBeArgumentLabel || next.tokenKind == .colon + + guard nextIsRParen || nextIsArgLabel else { + return nil + } + + guard self.lookahead().canParseArgumentLabelList() else { + return nil + } + return self.parsePresentArgumentLabelList() + } + + mutating func parsePresentArgumentLabelList() -> RawDeclNameArgumentsSyntax { + let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen) + var elements = [RawDeclNameArgumentSyntax]() + do { + while !self.at(.eof) && !self.at(.rightParen) { + // Check to see if there is an argument label. + assert(self.currentToken.canBeArgumentLabel && self.peek().tokenKind == .colon) + let name = self.consumeAnyToken() + let colon = self.eat(.colon) + elements.append(RawDeclNameArgumentSyntax( + name: name, colon: colon, arena: arena)) + } + } + let rparen = self.eat(.rightParen) + return RawDeclNameArgumentsSyntax( + unexpectedBeforeLParen, + leftParen: lparen, + arguments: RawDeclNameArgumentListSyntax(elements: elements, arena: self.arena), + rightParen: rparen, + arena: arena) + } +} + +extension Parser.Lookahead { + func canParseArgumentLabelList() -> Bool { + var lookahead = self.lookahead() + guard lookahead.consume(if: .leftParen) != nil else { + return false + } + + while !lookahead.at(.eof) && !lookahead.at(.rightParen) { + // Check to see if there is an argument label. + guard lookahead.currentToken.canBeArgumentLabel && lookahead.peek().tokenKind == .colon else { + return false + } + + lookahead.consumeAnyToken() + guard lookahead.consume(if: .colon) != nil else { + return false + } + } + + guard lookahead.consume(if: .rightParen) != nil else { + return false + } + + return true + } +} + +extension Lexer.Lexeme { + var canBeArgumentLabel: Bool { + switch self.tokenKind { + case .identifier where self.tokenText == "__shared" || self.tokenText == "__owned": + return false + case .identifier, .wildcardKeyword: + // Identifiers, escaped identifiers, and '_' can be argument labels. + return true + case .inoutKeyword: + // inout cannot be used as an argument label. + return false + default: + // All other keywords can be argument labels. + return self.isKeyword + } + } + + var isBinaryOperator: Bool { + return (self.tokenKind == .spacedBinaryOperator || + self.tokenKind == .unspacedBinaryOperator) + } + + var isAnyOperator: Bool { + return (self.isBinaryOperator || + self.tokenKind == .postfixOperator || + self.tokenKind == .prefixOperator) + } + + func isContextualKeyword(_ name: SyntaxText) -> Bool { + switch self.tokenKind { + case .identifier, .contextualKeyword: + return self.tokenText == name + default: + return false + } + } + + func isContextualDeclKeyword() -> Bool { + guard self.isIdentifier else { + return false + } + switch self.tokenText { + case "final", + "required", + "optional", + "lazy", + "dynamic", + "infix", + "prefix", + "postfix", + "_compilerInitialized", + "__consuming", + "mutating", + "nonmutating", + "convenience", + "override", + "open", + "weak", + "unowned", + "indirect", + "actor", + "isolated", + "async", + "nonisolated", + "distributed", + "_const", + "_local": + return true + default: + return false + } + } + + func isContextualPunctuator(_ name: SyntaxText) -> Bool { + return self.isAnyOperator && self.tokenText == name + } + + var isKeyword: Bool { + self.tokenKind.isKeyword + } + + var isPunctuation: Bool { + self.tokenKind.isPunctuation + } + + var isIdentifier: Bool { + return self.tokenKind == .identifier + } + + var isEllipsis: Bool { + return self.isAnyOperator && self.tokenText == "..." + } + + + var isEffectsSpecifier: Bool { + // NOTE: If this returns 'true', that token must be handled in + // 'parseEffectsSpecifiers()'. + + if (self.isContextualKeyword("async") || + (self.isContextualKeyword("await") && !self.isAtStartOfLine) || + self.isContextualKeyword("reasync")) { + return true + } + + // 'throws' and 'rethrows' are always valid effects specifiers. + if self.tokenKind == .throwsKeyword || self.tokenKind == .rethrowsKeyword { + return true + } + + // We'll take 'throw' and 'try' too but they have to be on the same + // line as the declaration they're modifying. + if (self.tokenKind == .throwKeyword + || self.tokenKind == .tryKeyword) + && !self.isAtStartOfLine { + return true + } + + return false; + } + + var isKeywordPossibleDeclStart: Bool { + switch self.tokenKind { + case .atSign, + .associatedtypeKeyword, + .caseKeyword, + .classKeyword, + .deinitKeyword, + .enumKeyword, + .extensionKeyword, + .fileprivateKeyword, + .funcKeyword, + .importKeyword, + .initKeyword, + .internalKeyword, + .letKeyword, + .operatorKeyword, + .precedencegroupKeyword, + .privateKeyword, + .protocolKeyword, + .publicKeyword, + .staticKeyword, + .structKeyword, + .subscriptKeyword, + .typealiasKeyword, + .varKeyword, + .poundIfKeyword, + .poundWarningKeyword, + .poundErrorKeyword, + .identifier, + .poundSourceLocationKeyword: + return true + case .poundLineKeyword: + // #line at the start of the line is a directive, but it's deprecated. + // #line within a line is an expression. + return self.isAtStartOfLine + default: + return false + } + } + + func starts(with symbol: SyntaxText) -> Bool { + guard self.isAnyOperator || self.isPunctuation else { + return false + } + + return self.tokenText.hasPrefix(symbol) + } +} + +extension TokenConsumer { + mutating func consumeIdentifier() -> Token { + switch self.currentToken.tokenKind { + case .selfKeyword, + .capitalSelfKeyword, + .anyKeyword, + .identifier: + return self.consumeAnyToken() + default: + return self.missingToken(.identifier) + } + } + + mutating func consumeInteger() -> Token { + switch self.currentToken.tokenKind { + case .integerLiteral: + return self.consumeAnyToken() + default: + return self.missingToken(.integerLiteral) + } + } +} diff --git a/Sources/SwiftParser/Parser.swift b/Sources/SwiftParser/Parser.swift new file mode 100644 index 00000000000..ce1567b1914 --- /dev/null +++ b/Sources/SwiftParser/Parser.swift @@ -0,0 +1,340 @@ +//===-------------------------- Parser.swift ------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + public static func parse( + source: String, + parseTransition: IncrementalParseTransition? = nil, + filenameForDiagnostics: String = "", + languageVersion: String? = nil, + enableBareSlashRegexLiteral: Bool? = nil + ) throws -> SourceFileSyntax { + var source = source + source.makeContiguousUTF8() + return try source.withUTF8 { buffer in + return try parse(source: buffer, + parseTransition: parseTransition, + filenameForDiagnostics: filenameForDiagnostics, + languageVersion: languageVersion, + enableBareSlashRegexLiteral: enableBareSlashRegexLiteral) + } + } + + public static func parse( + source: UnsafeBufferPointer, + parseTransition: IncrementalParseTransition? = nil, + filenameForDiagnostics: String = "", + languageVersion: String? = nil, + enableBareSlashRegexLiteral: Bool? = nil + ) throws -> SourceFileSyntax { + var parser = Parser(source) + // Extended lifetime is required because `SyntaxArena` in the parser must + // be alive until `Syntax(raw:)` retains the arena. + return withExtendedLifetime(parser) { + let rawSourceFile = parser.parseSourceFile() + return Syntax(raw: rawSourceFile.raw).as(SourceFileSyntax.self)! + } + } +} + +/// A parser for the Swift programming language. +/// +/// `Parser` implements a recursive descent parser that produces a SwiftSyntax +/// tree. Its implementation is divided among a set of files named for the +/// class of syntax nodes they parse. For example, declaration parsing happens +/// in `Declaration.swift`, and expression parsing happens in `Expression.swift`. +/// +/// Parsing Swift +/// ============= +/// +/// Broadly, the parser has a one-to-one correspondence between syntax nodes +/// and parsing functions. If a function consumes input from the token stream, +/// it must be marked `mutating` to do so. Thus, parsing functions that only +/// serve e.g. to read the current token and disambiguate the parse, or recover +/// from a bad parse should be left `nonmutating` to indicate that they do not +/// consume tokens. +/// +/// Token consumption is generally conditional via ``TokenConsumer/consume(if:)`` +/// or unconditional via `consumeAnyToken()`. During parsing, it is +/// also useful to assert that the current token matches some expected structure +/// via ``TokenConsumer/eat(_:)``, which acts like ``TokenConsumer/consume(if:)``, +/// but asserts if the parsed token did not match the expected kind. +/// +/// It can also be useful to expect the presence of certain structural elements. +/// For example, a function that parses the content of code items might expect +/// an opening brace, its items, and also expect a closing brace: +/// +/// let lbrace = self.eat(.leftBrace) +/// /* */ +/// let rbrace = self.expect(.rightBrace) +/// +/// Unlike ``TokenConsumer/eat(_:)``, `expect(_:)` returns +/// a `missing` token of the given kind. This allows the tree to remain +/// well-formed even when the input text is not, all without affecting +/// source fidelity. +/// +/// For compound syntactic structures, parsing loops are often required. The general +/// structure of a source-preserving loop is +/// +/// var keepGoing: RawTokenSyntax? = nil +/// repeat { +/// // Parse an element +/// let element = self.parseElement() +/// +/// // Consume the delimeter +/// keepGoing = self.consume(if: .delimiter) +/// elements.append(RawElementSyntax(element, ..., keepGoing)) +/// } while keepGoing != nil +/// +/// Such parsing loops are kept enclosed in `do` blocks to avoid having their +/// state leak into surrounding scopes. +/// +/// Lookahead +/// ========= +/// +/// This parser provides at most one token worth of lookahead via +/// `peek()`. If more tokens are required to disambiguate a parse, a +/// ``Parser/Lookahead`` instance should be constructed instead with +/// ``Parser/lookahead()``. +/// +/// Source Fidelity +/// =============== +/// +/// The syntax trees produced by this parser are required to faithfully +/// represent the input source text. As such, there are no utilities that allow +/// for skipping tokens in the parser. In addition, consumed tokens must become +/// part of at least one syntax node. +/// +/// The exception to this is parser lookahead, which is allowed to skip as many +/// tokens as needed to disambiguate a parse. However, because lookahead +/// operates on a copy of the lexical stream, no input tokens are lost.. +public struct Parser: TokenConsumer { + let arena: SyntaxArena + /// A view of the sequence of lexemes in the input. + var lexemes: Lexer.LexemeSequence + /// The current token. If there was no input, this token will have a kind of `.eof`. + @_spi(RawSyntax) + public var currentToken: Lexer.Lexeme + + /// Initializes a Parser from the given input buffer. + /// + /// The lexer will copy any string data it needs from the resulting buffer + /// so it is the caller's responsibility to free it. + /// + /// - Parameters + /// - input: An input buffer containing Swift source text. + /// - arena: Arena the parsing syntax are made into. If it's `nil`, a new + /// arena is created automatically, and `input` copied into the + /// arena. If non-`nil`, `input` must be the registered source + /// buffer of `arena` or a slice of the source buffer. + @_spi(Testing) + public init(_ input: UnsafeBufferPointer, arena: SyntaxArena? = nil) { + var sourceBuffer: UnsafeBufferPointer + if let arena = arena { + self.arena = arena + sourceBuffer = input + assert(arena.contains(text: SyntaxText(baseAddress: input.baseAddress, count: input.count))) + } else { + self.arena = SyntaxArena( + parseTriviaFunction: TriviaParser.parseTrivia(_:position:)) + sourceBuffer = self.arena.internSourceBuffer(input) + } + self.lexemes = Lexer.tokenize(sourceBuffer) + self.currentToken = self.lexemes.advance() + } +} + +// MARK: Inspecting Tokens + +extension Parser { + /// Retrieves the token following the current token without consuming it. + @_spi(RawSyntax) + public func peek() -> Lexer.Lexeme { + return self.lexemes.peek() + } +} + +// MARK: Consuming Tokens + +extension Parser { + @_spi(RawSyntax) + public mutating func missingToken(_ kind: RawTokenKind) -> RawTokenSyntax { + return RawTokenSyntax(missing: kind, arena: arena) + } + /// Consumes the current token and advances the lexer to the next token. + /// + /// - Returns: The token that was consumed. + @_spi(RawSyntax) + public mutating func consumeAnyToken() -> RawTokenSyntax { + let tok = self.currentToken + self.currentToken = self.lexemes.advance() + return RawTokenSyntax( + kind: tok.tokenKind, wholeText: tok.wholeText, textRange: tok.textRange, + arena: arena) + } + + /// Consumes the current token and sets its kind to the given `TokenKind`, + /// then advances the lexer to the next token. + /// + /// - Parameter kind: The kind to reset the consumed token to. + /// - Returns: The token that was consumed with its kind re-mapped to the + /// given `TokenKind`. + @_spi(RawSyntax) + public mutating func consume(remapping kind: RawTokenKind) -> RawTokenSyntax { + self.currentToken.tokenKind = kind + return self.consumeAnyToken() + } + + /// Attempts to consume a token of the given kind. + /// If it cannot be found, the parser tries + /// 1. To each unexpected tokens that have lower ``TokenPrecedence`` than the + /// expected token and see if the token occurs after that unexpected. + /// 2. If the token couldn't be found after skipping unexpected, it synthesizes + /// a missing token of the requested kind. + @_spi(RawSyntax) + public mutating func expect(_ kind: RawTokenKind) -> (unexpected: RawUnexpectedNodesSyntax?, token: RawTokenSyntax) { + if let tok = self.consume(if: kind) { + return (nil, tok) + } + var lookahead = self.lookahead() + if lookahead.canRecoverTo(kind) { + var unexpectedNodes = [RawSyntax]() + for _ in 0.. (unexpected: RawUnexpectedNodesSyntax?, token: RawTokenSyntax) { + for kind in kinds { + if let tok = self.consume(if: kind) { + return (nil, tok) + } + } + var lookahead = self.lookahead() + if lookahead.canRecoverTo(kinds) { + var unexpectedNodes = [RawSyntax]() + for _ in 0.. RawTokenSyntax { + if self.currentToken.tokenKind == kind { + if let text = text { + if self.currentToken.tokenText == text { + return self.consumeAnyToken() + } + } else { + return self.consumeAnyToken() + } + } + return RawTokenSyntax(missing: kind, arena: self.arena) + } +} + +// MARK: Spliting Tokens + +extension Parser { + /// Consumes a given token, or splits the current token into a leading token + /// matching the given `prefix` and a trailing token and consumes the leading + /// token. + /// + /// ... -> consume(, as: kind) -> [ ] ... + mutating func consumePrefix( + _ prefix: SyntaxText, + as tokenKind: RawTokenKind + ) -> RawTokenSyntax { + let current = self.currentToken + // Current token can be either one-character token we want to consume... + let tokenText = current.tokenText + + if tokenText == prefix { + return self.consume(remapping: tokenKind) + } + assert(tokenText.hasPrefix(prefix)) + + let endIndex = current.textRange.lowerBound.advanced(by: prefix.count) + let tok = RawTokenSyntax( + kind: tokenKind, + wholeText: SyntaxText(rebasing: current.wholeText[..> ... -> > + // + // The current calculation is: + // + // <TOKEN> + // CURSOR ^ + // + trailing trivia length + // + // <TOKEN> + // CURSOR ^ + // + content length + // + // <TOKEN> + // CURSOR ^ + // - split point length + // + // <TOKEN> + // CURSOR ^ + let offset = (self.currentToken.trailingTriviaByteLength + + tokenText.count + - prefix.count) + self.currentToken = self.lexemes.resetForSplit(of: offset) + return tok + } +} + +extension SyntaxText { + func withBuffer(_ body: (UnsafeBufferPointer) throws -> Result) rethrows -> Result { + try body(UnsafeBufferPointer(start: self.baseAddress, count: self.count)) + } +} diff --git a/Sources/SwiftParser/Patterns.swift b/Sources/SwiftParser/Patterns.swift new file mode 100644 index 00000000000..c5d65ee76e7 --- /dev/null +++ b/Sources/SwiftParser/Patterns.swift @@ -0,0 +1,265 @@ +//===------------------------- Patterns.swift -----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + /// Parse a pattern. + /// + /// Grammar + /// ======= + /// + /// pattern → wildcard-pattern type-annotation? + /// pattern → identifier-pattern type-annotation? + /// pattern → value-binding-pattern + /// pattern → tuple-pattern type-annotation? + /// pattern → enum-case-pattern + /// pattern → optional-pattern + /// pattern → type-casting-pattern + /// pattern → expression-pattern + /// + /// wildcard-pattern → _ + /// + /// identifier-pattern → identifier + /// + /// value-binding-pattern → 'var' pattern | 'let' pattern + /// + /// tuple-pattern → ( tuple-pattern-element-list opt ) + /// + /// enum-case-pattern → type-identifier? '.' enum-case-name tuple-pattern? + /// + /// optional-pattern → identifier-pattern '?' + /// + /// type-casting-pattern → is-pattern | as-pattern + /// is-pattern → 'is' type + /// as-pattern → pattern 'as' type + /// + /// expression-pattern → expression + mutating func parsePattern() -> RawPatternSyntax { + switch self.currentToken.tokenKind { + case .leftParen: + let lparen = self.eat(.leftParen) + let elements = self.parsePatternTupleElements() + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + return RawPatternSyntax(RawTuplePatternSyntax( + leftParen: lparen, + elements: elements, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena + )) + case .wildcardKeyword: + let wildcard = self.eat(.wildcardKeyword) + return RawPatternSyntax(RawWildcardPatternSyntax( + wildcard: wildcard, typeAnnotation: nil, arena: self.arena)) + case .identifier: + let identifier = self.eat(self.currentToken.tokenKind) + return RawPatternSyntax(RawIdentifierPatternSyntax( + identifier: identifier, arena: self.arena)) + case .letKeyword, .varKeyword: + let letOrVar: RawTokenSyntax + if self.at(.letKeyword) { + letOrVar = self.eat(.letKeyword) + } else { + assert(self.at(.varKeyword)) + letOrVar = self.eat(.varKeyword) + } + let value = self.parsePattern() + return RawPatternSyntax(RawValueBindingPatternSyntax( + letOrVarKeyword: letOrVar, valuePattern: value, arena: self.arena)) + default: + return RawPatternSyntax(RawMissingPatternSyntax(arena: self.arena)) + } + } + + /// Parse a typed pattern. + /// + /// Grammar + /// ======= + /// + /// typed-pattern → pattern ':' attributes? inout? type + mutating func parseTypedPattern() -> (RawPatternSyntax, RawTypeAnnotationSyntax?) { + let pattern = self.parsePattern() + + // Now parse an optional type annotation. + guard self.at(.colon) else { + return (pattern, nil) + } + + let colon = self.eat(.colon) + let result = self.parseType() + let type = RawTypeAnnotationSyntax( + colon: colon, type: result, arena: self.arena) + return (pattern, type) + } + + /// Parse the elements of a tuple pattern. + /// + /// Grammar + /// ======= + /// + /// tuple-pattern-element-list → tuple-pattern-element | tuple-pattern-element ',' tuple-pattern-element-list + /// tuple-pattern-element → pattern | identifier ':' pattern + mutating func parsePatternTupleElements() -> RawTuplePatternElementListSyntax { + var elements = [RawTuplePatternElementSyntax]() + do { + var keepGoing = true + while !self.at(.eof) && !self.at(.rightParen) && keepGoing { + // If the tuple element has a label, parse it. + let label: RawTokenSyntax? + let colon: RawTokenSyntax? + if self.currentToken.tokenKind == .identifier, self.peek().tokenKind == .colon { + label = self.consumeAnyToken() + colon = self.eat(.colon) + } else { + label = nil + colon = nil + } + let pattern = self.parsePattern() + let trailingComma = self.consume(if: .comma) + keepGoing = trailingComma != nil + elements.append(RawTuplePatternElementSyntax( + labelName: label, + labelColon: colon, + pattern: pattern, + trailingComma: trailingComma, + arena: self.arena)) + } + } + return RawTuplePatternElementListSyntax(elements: elements, arena: self.arena) + } +} + +extension Parser { + /// Parse a pattern that appears immediately under syntax for conditionals like + /// for-in loops and guard clauses. + mutating func parseMatchingPattern() -> RawPatternSyntax { + // Parse productions that can only be patterns. + if self.at(.varKeyword) || self.at(.letKeyword) { + let letOrVar = self.consumeAnyToken() + let value = self.parseMatchingPattern() + return RawPatternSyntax(RawValueBindingPatternSyntax( + letOrVarKeyword: letOrVar, valuePattern: value, arena: self.arena)) + } else if self.at(.isKeyword) { + // matching-pattern ::= 'is' type + let isKeyword = self.eat(.isKeyword) + let type = self.parseType() + return RawPatternSyntax(RawIsTypePatternSyntax( + isKeyword: isKeyword, type: type, arena: self.arena)) + } else { + // matching-pattern ::= expr + // Fall back to expression parsing for ambiguous forms. Name lookup will + // disambiguate. + let expr = RawExprSyntax(self.parseSequenceExpression(.basic, inVarOrLet: true)) + return RawPatternSyntax(RawExpressionPatternSyntax(expression: expr, arena: self.arena)) + } + } +} + +// MARK: Lookahead + +extension Parser.Lookahead { + /// pattern ::= identifier + /// pattern ::= '_' + /// pattern ::= pattern-tuple + /// pattern ::= 'var' pattern + /// pattern ::= 'let' pattern + mutating func canParsePattern() -> Bool { + switch self.currentToken.tokenKind { + case .identifier, .wildcardKeyword: + self.consumeAnyToken() + return true + case .letKeyword, .varKeyword: + self.consumeAnyToken() + return self.canParsePattern() + case .leftParen: + return self.canParsePatternTuple() + default: + return false + } + } + + private mutating func canParsePatternTuple() -> Bool { + guard self.consume(if: .leftParen) != nil else { + return false + } + + if !self.at(.rightParen) { + repeat { + guard self.canParsePattern() else { + return false + } + } while self.consume(if: .comma) != nil + } + + return self.consume(if: .rightParen) != nil + } + + /// typed-pattern ::= pattern (':' type)? + mutating func canParseTypedPattern() -> Bool { + guard self.canParsePattern() else { + return false + } + + if self.consume(if: .colon) != nil { + return self.canParseType() + } + return true + } + + /// Determine whether we are at the start of a parameter name when + /// parsing a parameter. + func startsParameterName(_ isClosure: Bool) -> Bool { + // To have a parameter name here, we need a name. + guard self.currentToken.canBeArgumentLabel else { + return false + } + + // If the next token is ':', this is a name. + let nextTok = self.peek() + if nextTok.tokenKind == .colon { + return true + } + + // If the next token can be an argument label, we might have a name. + if nextTok.canBeArgumentLabel { + // If the first name wasn't "isolated", we're done. + if !self.currentToken.isContextualKeyword("isolated") && + !self.currentToken.isContextualKeyword("some") && + !self.currentToken.isContextualKeyword("any") { + return true + } + + // "isolated" can be an argument label, but it's also a contextual keyword, + // so look ahead one more token (two total) see if we have a ':' that would + // indicate that this is an argument label. + do { + var backtrack = self.lookahead() + if backtrack.at(.colon) { + return true // isolated : + } + backtrack.consumeAnyToken() + backtrack.consumeAnyToken() + return backtrack.currentToken.canBeArgumentLabel && nextTok.tokenKind == .colon + } + } + + if nextTok.isOptionalToken || nextTok.isImplicitlyUnwrappedOptionalToken { + return false + } + + // The identifier could be a name or it could be a type. In a closure, we + // assume it's a name, because the type can be inferred. Elsewhere, we + // assume it's a type. + return isClosure + } +} diff --git a/Sources/SwiftParser/README.md b/Sources/SwiftParser/README.md new file mode 100644 index 00000000000..f7b1a693e7c --- /dev/null +++ b/Sources/SwiftParser/README.md @@ -0,0 +1,70 @@ +## Overview + +The `SwiftParser` framework implements a parser that accepts Swift source text +as input and produces a SwiftSyntax syntax tree. This module is under active development and is not yet ready to completely replace `SwiftSyntaxParser`. For more information about the design of this module, please see [the module documentation](SwiftParser.docc/SwiftParser.md). + +## Quickstart + +The easiest way to parse Swift source code is to call the `Parser.parse` method, providing it with a string containing the source code: + +```swift +import SwiftParser +import SwiftSyntax + +let sourceText = +""" +func greeting(name: String) { + print("Hello, \(name)!") +} +""" + +// Parse the source code in sourceText into a syntax tree +let sourceFile: SourceFileSyntax = Parser.parse(source: sourceText) + +// The "description" of the source tree is the source-accurate view of what was parsed. +assert(sourceFile.description == sourceText) + +// Visualize the complete syntax tree. +dump(sourceFile) +``` + +## Implementation Status + +There are a number of implementation tasks involved in realizing the parser, which include: + +* [ ] Parsing + * [x] Rough parse of full grammar + * [x] Recovery from unexpected syntax + * [x] Insert missing syntax where needed + * [ ] SIL syntax + * [ ] [Regular expression literals](https://github.com/apple/swift-experimental-string-processing) + * [x] Parse into raw syntax nodes +* [ ] Diagnostics + * [ ] Render diagnostics to a terminal with color, highlights, etc. + * [ ] Fix-Its to add/remove/replace syntax + * [ ] Emit diagnostics in the same binary format that the Swift compiler/driver do + * [x] Syntax visitor to emit diagnostics for all missing and unexpected syntax +* [ ] Tooling and automation + * [ ] Improve test harness to easily check the expected parse trees + diagnostics + * [ ] Migrate [attribute definitions](https://github.com/apple/swift/blob/main/include/swift/AST/Attr.def) from C++ to Swift + * [ ] Migrate [grammar definition](https://github.com/apple/swift/tree/main/utils/gyb_syntax_support) to the swift-syntax repository + * [ ] Migrate grammar definition from Python to Swift + * [ ] Replace uses of gyb with SwiftSyntax-based generation +* [ ] Auditing the grammar to ensure that it accurately represents the Swift grammar + * [ ] Attributes + * [ ] Declarations + * [ ] Statements + * [ ] Expressions +* [ ] AST generation within the Swift compiler itself + +### Testing methodology + +We'll employ a number of different techniques to help test the new parser at each phase, including: + +* [x] Unit tests for specific parser behaviors, added as we implement pieces and uncover bugs. + +* [ ] Self-parse test to ensure that all Swift files in the package can be parsed, that the syntax tree is well-formed, and that it always reproduces the input source. + +* [ ] Parsing tests from the Swift compiler repository to check that the parsers accept the same code. + +* [ ] Fuzzing tests that ensure that the parser does not crash, and always reproduces the input source. diff --git a/Sources/SwiftParser/Recovery.swift b/Sources/SwiftParser/Recovery.swift new file mode 100644 index 00000000000..83ccc9f0ec6 --- /dev/null +++ b/Sources/SwiftParser/Recovery.swift @@ -0,0 +1,210 @@ +//===------------------------- Recovery.swift -----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + /// Implements general-purpose, balanced delimiter parser recovery. + /// + /// This function implements a very coarse recovery algorithm that tries to + /// skip over as much balanced token structure as it can before yielding back + /// to the parser. For example, this enables the parser to recover from + /// + /// ```swift + /// case: { ("Hello World") } + /// ``` + /// + /// Item parsing runs recovery in a loop until it encounters a + /// token that could be the start of a new item. After eating the erroneous + /// `case` at the top level, the parser calls `recover()` to eat the colon, + /// then calls it again to eat the braces. The final time around, this + /// routine will eat not just the braces, but also the parenthesized + /// `"Hello World"` string token contained therein and its paren tokens. + /// + /// Using Recovery + /// ============== + /// + /// Parser recovery is a method of last resort. It indicates that the parser + /// has encountered some part of the input that it believes to be so erroneous + /// it cannot possibly assign it any structure. The resulting syntax tree + /// thus contains a sequence of unparsed tokens. As such, this function should + /// be considered only when the parser would otherwise be unable to make + /// forward progress, such as when an entire missing syntax node is + /// encountered. For simple cases like a missing token, it is more appropriate + /// to use ``Parser/expect(_:)``, which will correctly handle looking ahead + /// to try to reach the desired point in the token stream. + /// + /// - Returns: A non-empty list of tokens that were eaten during the recovery process. + mutating func recover() -> [RawTokenSyntax] { + var tokens = [RawTokenSyntax]() + switch self.currentToken.tokenKind { + case .leftParen: + tokens.append(self.consumeAnyToken()) + while !self.at(.eof) && !self.at(.rightParen) + && !self.at(.poundEndifKeyword) && !self.at(.poundElseKeyword) && !self.at(.poundElseifKeyword) { + tokens.append(contentsOf: self.recover()) + } + if let token = self.consume(if: .rightParen) { + tokens.append(token) + } + return tokens + case .leftBrace: + tokens.append(self.consumeAnyToken()) + while !self.at(.eof) && !self.at(.rightBrace) + && !self.at(.poundEndifKeyword) && !self.at(.poundElseKeyword) && !self.at(.poundElseifKeyword) { + tokens.append(contentsOf: self.recover()) + } + if let token = self.consume(if: .rightBrace) { + tokens.append(token) + } + return tokens + case .leftSquareBracket: + tokens.append(self.consumeAnyToken()) + while !self.at(.eof) && !self.at(.rightSquareBracket) + && !self.at(.poundEndifKeyword) && !self.at(.poundElseKeyword) && !self.at(.poundElseifKeyword) { + tokens.append(contentsOf: self.recover()) + } + if let token = self.consume(if: .rightSquareBracket) { + tokens.append(token) + } + return tokens + case .poundIfKeyword, + .poundElseKeyword, + .poundElseifKeyword: + tokens.append(self.consumeAnyToken()) + // skipUntil also implicitly stops at tok::pound_endif. + while !self.at(.eof) && !self.at(.poundElseKeyword) && !self.at(.poundElseifKeyword) + && !self.at(.poundEndifKeyword) { + tokens.append(contentsOf: self.recover()) + } + if let token = self.consume(if: .rightSquareBracket) { + tokens.append(token) + } + + if self.at(.poundElseKeyword) || self.at(.poundElseifKeyword) { + tokens.append(contentsOf: self.recover()) + return tokens + } else { + if let token = self.consume(if: .poundEndifKeyword) { + tokens.append(token) + } + return tokens + } + + default: + tokens.append(self.consumeAnyToken()) + return tokens + } + } +} + +extension Parser { + /// A recovery function that recovers from a number of special cases for syntax + /// elements that cannot possibly be the start of items. + /// + /// This function is intended to be called at the start of item parsing so + /// that future calls to item parsing will have a better shot at succeeding + /// without necessarily invoking the general purpose recovery + /// mechanism. + /// + /// - Returns: A syntax node capturing the result of recovering from a bad + /// item parse, or `nil` if recovery did not occur. + mutating func recoverFromBadItem() -> RawCodeBlockItemSyntax? { + if let extraRightBrace = self.consume(if: .rightBrace) { + // If we see an extraneous right brace, we need to make progress by + // eating it. The legacy parser forms an unknown stmt kind here, so + // we match it. + let missingStmt = RawMissingStmtSyntax(arena: self.arena) + return RawCodeBlockItemSyntax( + item: RawSyntax(missingStmt), + semicolon: nil, + errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: [ extraRightBrace ], arena: self.arena)), + arena: self.arena) + } else if self.at(.caseKeyword) || self.at(.defaultKeyword) { + // If there's a case or default label at the top level then the user + // has tried to write one outside of the scope of a switch statement. + // Recover up to the next braced block. + let missingStmt = RawMissingStmtSyntax(arena: self.arena) + let extraTokens = self.recover() + return RawCodeBlockItemSyntax( + item: RawSyntax(missingStmt), + semicolon: nil, + errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: extraTokens, arena: self.arena)), + arena: self.arena) + } else if self.at(.poundElseKeyword) || self.at(.poundElseifKeyword) + || self.at(.poundEndifKeyword) { + // In the case of a catastrophic parse error, consume any trailing + // #else, #elseif, or #endif and move on to the next statement or + // declaration block. + let token = self.consumeAnyToken() + // Create 'MissingDecl' for orphan directives. + return RawCodeBlockItemSyntax( + item: RawSyntax(RawMissingDeclSyntax(attributes: nil, modifiers: nil, arena: self.arena)), + semicolon: nil, + errorTokens: RawSyntax(RawNonEmptyTokenListSyntax(elements: [ token ], arena: self.arena)), + arena: self.arena) + } else { + return nil + } + } +} + +// MARK: Lookahead + +extension Parser.Lookahead { + /// Tries eating tokens until it finds a token of `kind` without skipping any + /// higher precedence tokens. If it found a token of `kind` in this way, + /// returns `true`, otherwise `false`. + /// If this method returns `true`, the parser probably wants to consume the + /// tokens this lookahead skipped over to find `kind` by consuming + /// `lookahead.tokensConsumed` as unexpected. + mutating func canRecoverTo(_ kind: RawTokenKind) -> Bool { + // If the `Set` implementation has noticable performance overheads, we could + // provide a matching implementaiton for a single `TokenKind` here. + return canRecoverTo([kind]) + } + + /// Tries eating tokens until it finds a token whose kind is in `kinds` + /// without skipping tokens that have a precedence that's higher than the + /// lowest precedence in `kinds`. If it found a token of `kind` in this way, + /// returns `true`, otherwise `false`. + /// If this method returns `true`, the parser probably wants to consume the + /// tokens this lookahead skipped over to find `kind` by consuming + /// `lookahead.tokensConsumed` as unexpected. + mutating func canRecoverTo(_ kinds: [RawTokenKind]) -> Bool { + assert(!kinds.isEmpty) + let recoveryPrecedence = kinds.map(TokenPrecedence.init).min()! + while !self.at(.eof) { + if !recoveryPrecedence.shouldSkipOverNewlines, + self.currentToken.isAtStartOfLine { + break + } + if self.atAny(kinds) { + return true + } + let currentTokenPrecedence = TokenPrecedence(self.currentToken.tokenKind) + if currentTokenPrecedence >= recoveryPrecedence { + break + } + self.consumeAnyToken() + if let closingDelimiter = currentTokenPrecedence.closingTokenKind { + guard self.canRecoverTo(closingDelimiter) else { + break + } + self.eat(closingDelimiter) + } + } + + return false + } +} + diff --git a/Sources/SwiftParser/Statements.swift b/Sources/SwiftParser/Statements.swift new file mode 100644 index 00000000000..ea7420e9d52 --- /dev/null +++ b/Sources/SwiftParser/Statements.swift @@ -0,0 +1,1108 @@ +//===------------------------ Statements.swift ----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + /// Parse a statement. + /// + /// This function is meant to be invoked as part of parsing an item. As such + /// it does not deal with parsing expressions, declarations, or consuming + /// any trailing semicolons. + /// + /// - Note: This function must be kept in sync with ``Parser/Lookahead/isStartOfStatement()`` + /// - Seealso: ``Parser/Lookahead/isStartOfStatement()`` + /// + /// Grammar + /// ======= + /// + /// statement → loop-statement ';'? + /// statement → branch-statement ';'? + /// statement → labeled-statement ';'? + /// statement → control-transfer-statement ';'? + /// statement → defer-statement ';'? + /// statement → do-statement ';'? + /// + /// loop-statement → for-in-statement + /// loop-statement → while-statement + /// loop-statement → repeat-while-statement + /// + /// branch-statement → if-statement + /// branch-statement → guard-statement + /// branch-statement → switch-statement + /// + /// labeled-statement → statement-label loop-statement + /// labeled-statement → statement-label if-statement + /// labeled-statement → statement-label switch-statement + /// labeled-statement → statement-label do-statement + /// + /// control-transfer-statement → break-statement + /// control-transfer-statement → continue-statement + /// control-transfer-statement → fallthrough-statement + /// control-transfer-statement → return-statement + /// control-transfer-statement → throw-statement + @_spi(RawSyntax) + public mutating func parseStatement() -> RawStmtSyntax { + // If this is a label on a loop/switch statement, consume it and pass it into + // parsing logic below. + func label(_ stmt: S, with label: Parser.StatementLabel?) -> RawStmtSyntax { + guard let label = label else { + return RawStmtSyntax(stmt) + } + return RawStmtSyntax(RawLabeledStmtSyntax( + labelName: label.label, labelColon: label.colon, statement: RawStmtSyntax(stmt), + arena: self.arena)) + } + + let optLabel = self.parseOptionalStatementLabel() + switch self.currentToken.tokenKind { + case .forKeyword: + return label(self.parseForEachStatement(), with: optLabel) + case .whileKeyword: + return label(self.parseWhileStatement(), with: optLabel) + case .repeatKeyword: + return label(self.parseRepeatWhileStatement(), with: optLabel) + + case .ifKeyword: + return label(self.parseIfStatement(), with: optLabel) + case .guardKeyword: + return label(self.parseGuardStatement(), with: optLabel) + case .switchKeyword: + return label(self.parseSwitchStatement(), with: optLabel) + + case .breakKeyword: + return label(self.parseBreakStatement(), with: optLabel) + case .continueKeyword: + return label(self.parseContinueStatement(), with: optLabel) + case .fallthroughKeyword: + return label(self.parseFallthroughStatement(), with: optLabel) + case .returnKeyword: + return label(self.parseReturnStatement(), with: optLabel) + case .throwKeyword: + return label(self.parseThrowStatement(), with: optLabel) + case .deferKeyword: + return label(self.parseDeferStatement(), with: optLabel) + case .doKeyword: + return label(self.parseDoStatement(), with: optLabel) + + case .poundAssertKeyword: + // FIXME: This drops `optLabel`. + return RawStmtSyntax(self.parsePoundAssertStatement()) + case _ where self.currentToken.isContextualKeyword("yield"): + fallthrough + case .yield: + // FIXME: This drops `optLabel`. + return RawStmtSyntax(self.parseYieldStatement()) + default: + let missingStmt = RawStmtSyntax(RawMissingStmtSyntax(arena: self.arena)) + return label(missingStmt, with: optLabel) + } + } +} + +// MARK: Conditional Statements + +extension Parser { + /// Parse an if statement. + /// + /// Grammar + /// ======= + /// + /// if-statement → 'if' condition-list code-block else-clause? + /// else-clause → 'else' code-block | else if-statement + @_spi(RawSyntax) + public mutating func parseIfStatement() -> RawIfStmtSyntax { + let ifKeyword = self.eat(.ifKeyword) + // A scope encloses the condition and true branch for any variables bound + // by a conditional binding. The else branch does *not* see these variables. + let conditions = self.parseConditionList() + let body = self.parseCodeBlock() + + // The else branch, if any, is outside of the scope of the condition. + let elseKeyword = self.consume(if: .elseKeyword) + let elseBody: RawSyntax? + if elseKeyword != nil { + if self.at(.ifKeyword) { + elseBody = RawSyntax(self.parseIfStatement()) + } else { + elseBody = RawSyntax(self.parseCodeBlock()) + } + } else { + elseBody = nil + } + + return RawIfStmtSyntax( + ifKeyword: ifKeyword, + conditions: conditions, + body: body, + elseKeyword: elseKeyword, elseBody: elseBody, + arena: self.arena) + } +} + +extension Parser { + /// Parse a guard statement. + /// + /// Grammar + /// ======= + /// + /// guard-statement → 'guard' condition-list 'else' code-block + @_spi(RawSyntax) + public mutating func parseGuardStatement() -> RawGuardStmtSyntax { + let guardKeyword = self.eat(.guardKeyword) + let conditions = self.parseConditionList() + let (unexpectedBeforeElseKeyword, elseKeyword) = self.expect(.elseKeyword) + let body = self.parseCodeBlock() + return RawGuardStmtSyntax( + guardKeyword: guardKeyword, + conditions: conditions, + unexpectedBeforeElseKeyword, + elseKeyword: elseKeyword, + body: body, + arena: self.arena) + } +} + + +extension Parser { + /// Parse a list of condition elements. + /// + /// Grammar + /// ======= + /// + /// condition-list → condition | condition , condition-list + @_spi(RawSyntax) + public mutating func parseConditionList() -> RawConditionElementListSyntax { + // We have a simple comma separated list of clauses, but also need to handle + // a variety of common errors situations (including migrating from Swift 2 + // syntax). + var elements = [RawConditionElementSyntax]() + var keepGoing: RawTokenSyntax? = nil + repeat { + let condition = self.parseConditionElement() + keepGoing = self.consume(if: .comma) + elements.append(RawConditionElementSyntax( + condition: RawSyntax(condition), trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + + return RawConditionElementListSyntax(elements: elements, arena: self.arena) + } + + /// Parse a condition element. + /// + /// Grammar + /// ======= + /// + /// condition → expression | availability-condition | case-condition | optional-binding-condition + /// + /// case-condition → 'case' pattern initializer + /// optional-binding-condition → 'let' pattern initializer? | 'var' pattern initializer? + @_spi(RawSyntax) + public mutating func parseConditionElement() -> RawSyntax { + // Parse a leading #available/#unavailable condition if present. + if self.at(.poundAvailableKeyword) || self.at(.poundUnavailableKeyword) { + return self.parsePoundAvailableConditionElement() + } + + // Parse the basic expression case. If we have a leading let/var/case + // keyword or an assignment, then we know this is a binding. + if !self.at(.letKeyword) && !self.at(.varKeyword) && !self.at(.caseKeyword) { + // If we lack it, then this is theoretically a boolean condition. + // However, we also need to handle migrating from Swift 2 syntax, in + // which a comma followed by an expression could actually be a pattern + // clause followed by a binding. Determine what we have by checking for a + // syntactically valid pattern followed by an '=', which can never be a + // boolean condition. + // + // However, if this is the first clause, and we see "x = y", then this is + // almost certainly a typo for '==' and definitely not a continuation of + // another clause, so parse it as an expression. This also avoids + // lookahead + backtracking on simple if conditions that are obviously + // boolean conditions. + return RawSyntax(self.parseExpression(.basic)) + } + + // We're parsing a conditional binding. + assert(self.at(.letKeyword) || self.at(.varKeyword) || self.at(.caseKeyword)) + enum BindingKind { + case pattern(RawTokenSyntax, RawPatternSyntax) + case optional(RawTokenSyntax, RawPatternSyntax) + } + + let kind: BindingKind + if let caseKeyword = self.consume(if: .caseKeyword) { + let pattern = self.parseMatchingPattern() + kind = .pattern(caseKeyword, pattern) + } else { + let letOrVar = self.consumeAnyToken() + let pattern = self.parseMatchingPattern() + kind = .optional(letOrVar, pattern) + } + + // Now parse an optional type annotation. + let annotation: RawTypeAnnotationSyntax? + if self.at(.colon) { + let colon = self.eat(.colon) + let type = self.parseType() + annotation = RawTypeAnnotationSyntax( + colon: colon, type: type, + arena: self.arena) + } else { + annotation = nil + } + + // Conditional bindings can have the format: + // `let newBinding = `, or + // `let newBinding`, which is shorthand for `let newBinding = newBinding` + let initializer: RawInitializerClauseSyntax? + if self.at(.equal) { + let eq = self.eat(.equal) + let value = self.parseExpression(.basic) + initializer = RawInitializerClauseSyntax( + equal: eq, value: value, + arena: self.arena) + } else { + initializer = nil + } + + switch kind { + case let .optional(letOrVar, pattern): + return RawSyntax(RawOptionalBindingConditionSyntax( + letOrVarKeyword: letOrVar, + pattern: pattern, + typeAnnotation: annotation, + initializer: initializer, + arena: self.arena)) + case let .pattern(caseKeyword, pattern): + return RawSyntax(RawMatchingPatternConditionSyntax( + caseKeyword: caseKeyword, + pattern: pattern, + typeAnnotation: annotation, + initializer: initializer ?? RawInitializerClauseSyntax( + equal: RawTokenSyntax(missing: .equal, arena: self.arena), + value: RawExprSyntax(RawMissingExprSyntax(arena: self.arena)), + arena: self.arena + ), + arena: self.arena)) + } + } + + /// Parse an availability condition. + /// + /// Grammar + /// ======= + /// + /// availability-condition → '#available' '(' availability-arguments ')' + /// availability-condition → '#unavailable' '(' availability-arguments ')' + @_spi(RawSyntax) + public mutating func parsePoundAvailableConditionElement() -> RawSyntax { + assert(self.at(.poundAvailableKeyword) || self.at(.poundUnavailableKeyword)) + let kind: AvailabilitySpecSource = self.at(.poundAvailableKeyword) ? .available : .unavailable + let keyword = self.consumeAnyToken() + let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen) + let spec = self.parseAvailabilitySpecList(from: kind) + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + switch kind { + case .available: + return RawSyntax(RawAvailabilityConditionSyntax( + poundAvailableKeyword: keyword, + unexpectedBeforeLParen, + leftParen: lparen, + availabilitySpec: spec, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena)) + case .unavailable: + return RawSyntax(RawUnavailabilityConditionSyntax( + poundUnavailableKeyword: keyword, + unexpectedBeforeLParen, + leftParen: lparen, + availabilitySpec: spec, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena)) + case .macro: + fatalError("Macros are not allowed in this position!") + } + } +} + +// MARK: Throw Statements + +extension Parser { + /// Parse a throw statement. + /// + /// Grammar + /// ======= + /// + /// throw-statement → 'throw' expression + @_spi(RawSyntax) + public mutating func parseThrowStatement() -> RawThrowStmtSyntax { + let throwKeyword = self.eat(.throwKeyword) + let expr = self.parseExpression() + return RawThrowStmtSyntax( + throwKeyword: throwKeyword, expression: expr, + arena: self.arena) + } +} + +// MARK: Defer Statements + +extension Parser { + /// Parse a defer statement. + /// + /// Grammar + /// ======= + /// + /// defer-statement → 'defer' code-block + @_spi(RawSyntax) + public mutating func parseDeferStatement() -> RawDeferStmtSyntax { + let deferKeyword = self.eat(.deferKeyword) + let items = self.parseCodeBlock() + return RawDeferStmtSyntax( + deferKeyword: deferKeyword, body: items, + arena: self.arena) + } +} + +// MARK: Do-Catch Statements + +extension Parser { + /// Parse a do statement. + /// + /// Grammar + /// ======= + /// + /// do-statement → 'do' code-block catch-clauses? + @_spi(RawSyntax) + public mutating func parseDoStatement() -> RawDoStmtSyntax { + let doKeyword = self.eat(.doKeyword) + let body = self.parseCodeBlock() + + // If the next token is 'catch', this is a 'do'/'catch' statement. + var elements = [RawCatchClauseSyntax]() + if self.at(.catchKeyword) { + // Parse 'catch' clauses + repeat { + elements.append(self.parseCatchClause()) + } while self.at(.catchKeyword) + } + + return RawDoStmtSyntax( + doKeyword: doKeyword, + body: body, + catchClauses: elements.isEmpty ? nil : RawCatchClauseListSyntax(elements: elements, arena: self.arena), + arena: self.arena) + } + + /// Parse a catch statement. + /// + /// - Note: This is not a "first class" statement it can only appear + /// following a 'do' statement. + /// + /// Grammar + /// ======= + /// + /// catch-clauses → catch-clause catch-clauses? + /// catch-clause → catch catch-pattern-list? code-block + /// catch-pattern-list → catch-pattern | catch-pattern ',' catch-pattern-list + @_spi(RawSyntax) + public mutating func parseCatchClause() -> RawCatchClauseSyntax { + let catchKeyword = self.eat(.catchKeyword) + var catchItems = [RawCatchItemSyntax]() + if !self.at(.leftBrace) { + var keepGoing: RawTokenSyntax? = nil + repeat { + let (pattern, whereClause) = self.parseGuardedPattern(.catch) + keepGoing = self.consume(if: .comma) + catchItems.append(RawCatchItemSyntax( + pattern: pattern, whereClause: whereClause, trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + let body = self.parseCodeBlock() + return RawCatchClauseSyntax( + catchKeyword: catchKeyword, + catchItems: catchItems.isEmpty ? nil : RawCatchItemListSyntax(elements: catchItems, arena: self.arena), + body: body, + arena: self.arena) + } +} + +// MARK: Iteration Statements + +extension Parser { + /// Parse a while statement. + /// + /// Grammar + /// ======= + /// + /// while-statement → 'while' condition-list code-block + @_spi(RawSyntax) + public mutating func parseWhileStatement() -> RawWhileStmtSyntax { + let whileKeyword = self.eat(.whileKeyword) + let conditions = self.parseConditionList() + let body = self.parseCodeBlock() + return RawWhileStmtSyntax( + whileKeyword: whileKeyword, conditions: conditions, body: body, + arena: self.arena) + } +} + +extension Parser { + /// Parse a repeat-while statement. + /// + /// Grammar + /// ======= + /// + /// repeat-while-statement → 'repeat' code-block 'while' expression + @_spi(RawSyntax) + public mutating func parseRepeatWhileStatement() -> RawRepeatWhileStmtSyntax { + let repeatKeyword = self.eat(.repeatKeyword) + let body = self.parseCodeBlock() + let (unexpectedBeforeWhileKeyword, whileKeyword) = self.expect(.whileKeyword) + let condition = self.parseExpression() + return RawRepeatWhileStmtSyntax( + repeatKeyword: repeatKeyword, + body: body, + unexpectedBeforeWhileKeyword, + whileKeyword: whileKeyword, + condition: condition, + arena: self.arena) + } +} + +// MARK: For-Each Statements + +extension Parser { + /// Parse a for-in statement. + /// + /// Grammar + /// ======= + /// + /// for-in-statement → 'for' 'case'? pattern 'in' expression where-clause? code-block + @_spi(RawSyntax) + public mutating func parseForEachStatement() -> RawForInStmtSyntax { + let forKeyword = self.eat(.forKeyword) + let tryKeyword = self.consume(if: .tryKeyword) + + let awaitKeyword: RawTokenSyntax? + if self.currentToken.isContextualKeyword("await") { + awaitKeyword = self.consumeAnyToken() + } else { + awaitKeyword = nil + } + + // Parse the pattern. This is either 'case ' or just a + // normal pattern. + let caseKeyword = self.consume(if: .caseKeyword) + let pattern: RawPatternSyntax + let type: RawTypeAnnotationSyntax? + if caseKeyword != nil { + pattern = self.parseMatchingPattern() + // Now parse an optional type annotation. + if self.at(.colon) { + let colon = self.eat(.colon) + let resultType = self.parseType() + type = RawTypeAnnotationSyntax( + colon: colon, type: resultType, + arena: self.arena) + } else { + type = nil + } + } else { + (pattern, type) = self.parseTypedPattern() + } + + let (unexpectedBeforeInKeyword, inKeyword) = self.expect(.inKeyword) + + let expr = self.parseExpression(.basic) + + // Parse the 'where' expression if present. + let whereClause: RawWhereClauseSyntax? + if self.at(.whereKeyword) { + let whereKeyword = self.eat(.whereKeyword) + let guardExpr = self.parseExpression(.basic) + whereClause = RawWhereClauseSyntax( + whereKeyword: whereKeyword, guardResult: guardExpr, + arena: self.arena) + } else { + whereClause = nil + } + + // stmt-brace + let body = self.parseCodeBlock() + return RawForInStmtSyntax( + forKeyword: forKeyword, + tryKeyword: tryKeyword, + awaitKeyword: awaitKeyword, + caseKeyword: caseKeyword, + pattern: pattern, + typeAnnotation: type, + unexpectedBeforeInKeyword, + inKeyword: inKeyword, + sequenceExpr: expr, + whereClause: whereClause, + body: body, + arena: self.arena + ) + } +} + +// MARK: Switch Statements + +extension Parser { + /// Parse a switch statement. + /// + /// Grammar + /// ======= + /// + /// switch-statement → 'switch' expression '{' switch-cases? '}' + /// switch-cases → switch-case switch-cases? + @_spi(RawSyntax) + public mutating func parseSwitchStatement() -> RawSwitchStmtSyntax { + let switchKeyword = self.eat(.switchKeyword) + + let subject = self.parseExpression(.basic) + let (unexpectedBeforeLBrace, lbrace) = self.expect(.leftBrace) + + let cases = self.parseSwitchCases() + + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + return RawSwitchStmtSyntax( + switchKeyword: switchKeyword, + expression: subject, + unexpectedBeforeLBrace, + leftBrace: lbrace, + cases: cases, + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena) + } + + /// Parse a list of switch case clauses. + /// + /// Grammar + /// ======= + /// + /// switch-cases → switch-case switch-cases? + @_spi(RawSyntax) + public mutating func parseSwitchCases() -> RawSwitchCaseListSyntax { + var elements = [RawSyntax]() + while !self.at(.eof) && !self.at(.rightBrace) + && !self.at(.poundEndifKeyword) + && !self.at(.poundElseifKeyword) && !self.at(.poundElseKeyword) { + if self.lookahead().isAtStartOfSwitchCase() { + elements.append(RawSyntax(self.parseSwitchCase())) + } else if self.at(.poundIfKeyword) { + // '#if' in 'case' position can enclose zero or more 'case' or 'default' + // clauses. + elements.append(RawSyntax(self.parsePoundIfDirective { + $0.parseSwitchCases() + } + syntax: { parser, cases in + guard cases.count == 1, let firstCase = cases.first else { + assert(cases.isEmpty) + return RawSyntax(RawSwitchCaseListSyntax(elements: [], arena: parser.arena)) + } + return RawSyntax(firstCase) + })) + } else { + var tokenList = [RawTokenSyntax]() + while !self.at(.eof) && !self.at(.rightBrace) + && !self.at(.poundElseifKeyword) + && !self.at(.poundElseKeyword) && !self.at(.poundEndifKeyword) + && !self.lookahead().isStartOfConditionalSwitchCases() { + let tokens = self.recover() + guard !tokens.isEmpty else { + break + } + tokenList.append(contentsOf: tokens) + } + elements.append(RawSyntax(RawNonEmptyTokenListSyntax(elements: tokenList, arena: self.arena))) + } + } + return RawSwitchCaseListSyntax(elements: elements, arena: self.arena) + } + + /// Parse a single switch case clause. + /// + /// Grammar + /// ======= + /// + /// switch-case → case-label statements + /// switch-case → default-label statements + /// switch-case → conditional-switch-case + @_spi(RawSyntax) + public mutating func parseSwitchCase() -> RawSwitchCaseSyntax { + var unknownAttr: RawAttributeSyntax? + if self.at(.atSign) { + let at = self.eat(.atSign) + let ident = self.consumeIdentifier() + + var tokenList = [RawTokenSyntax]() + while self.at(.atSign) { + tokenList.append(self.eat(.atSign)) + tokenList.append(self.consumeIdentifier()) + + if self.at(.leftParen) { + tokenList.append(contentsOf: self.recover()) + } + } + + unknownAttr = RawAttributeSyntax( + atSignToken: at, attributeName: ident, + leftParen: nil, argument: nil, rightParen: nil, + tokenList: tokenList.isEmpty ? nil : RawTokenListSyntax(elements: tokenList, arena: self.arena), + arena: self.arena) + } else { + unknownAttr = nil + } + + let label: RawSyntax + if self.at(.caseKeyword) { + label = RawSyntax(self.parseSwitchCaseLabel()) + } else { + label = RawSyntax(self.parseSwitchDefaultLabel()) + } + + + // Parse the body. + let statements: RawCodeBlockItemListSyntax + do { + var items = [RawCodeBlockItemSyntax]() + while !self.at(.eof) && + !self.at(.rightBrace) && + !self.at(.poundEndifKeyword) && + !self.at(.poundElseifKeyword) && + !self.at(.poundElseKeyword) && + !self.lookahead().isStartOfConditionalSwitchCases() { + items.append(self.parseCodeBlockItem()) + } + statements = RawCodeBlockItemListSyntax(elements: items, arena: self.arena) + } + + return RawSwitchCaseSyntax( + unknownAttr: unknownAttr, label: label, statements: statements, + arena: self.arena) + } + + /// Parse a switch case with a 'case' label. + /// + /// Grammar + /// ======= + /// + /// case-label → attributes? case case-item-list ':' + /// case-item-list → pattern where-clause? | pattern where-clause? ',' case-item-list + @_spi(RawSyntax) + public mutating func parseSwitchCaseLabel() -> RawSwitchCaseLabelSyntax { + let caseKeyword = self.eat(.caseKeyword) + var caseItems = [RawCaseItemSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let (pattern, whereClause) = self.parseGuardedPattern(.case) + keepGoing = self.consume(if: .comma) + caseItems.append(RawCaseItemSyntax( + pattern: pattern, whereClause: whereClause, trailingComma: keepGoing, + arena: self.arena)) + } while keepGoing != nil + } + let (unexpectedBeforeColon, colon) = self.expect(.colon) + return RawSwitchCaseLabelSyntax( + caseKeyword: caseKeyword, + caseItems: RawCaseItemListSyntax(elements: caseItems, arena: self.arena), + unexpectedBeforeColon, + colon: colon, + arena: self.arena) + } + + /// Parse a switch case with a 'default' label. + /// + /// Grammar + /// ======= + /// + /// default-label → attributes? 'default' ':' + @_spi(RawSyntax) + public mutating func parseSwitchDefaultLabel() -> RawSwitchDefaultLabelSyntax { + let defaultKeyword = self.eat(.defaultKeyword) + let (unexpectedBeforeColon, colon) = self.expect(.colon) + return RawSwitchDefaultLabelSyntax( + defaultKeyword: defaultKeyword, + unexpectedBeforeColon, + colon: colon, + arena: self.arena + ) + } + + enum GuardedPatternContext { + case `case` + case `catch` + } + + /// Parse a pattern-matching clause for a case or catch statement, + /// including the guard expression. + /// + /// Grammar + /// ======= + /// + /// case-item → pattern where-clause? + /// catch-pattern → pattern where-clause? + mutating func parseGuardedPattern( + _ context: GuardedPatternContext + ) -> (RawPatternSyntax, RawWhereClauseSyntax?) { + let flavor: ExprFlavor + switch context { + // 'case' is terminated with a colon and so allows a trailing closure. + case .`case`: + flavor = .trailingClosure + // 'catch' is terminated with a brace and so cannot. + case .`catch`: + flavor = .basic + } + + let pattern = self.parseMatchingPattern() + + // Parse the optional 'where' guard, with this particular pattern's bound + // vars in scope. + let whereClause: RawWhereClauseSyntax? + if self.at(.whereKeyword) { + let whereKeyword = self.eat(.whereKeyword) + let guardExpr = self.parseExpression(flavor) + whereClause = RawWhereClauseSyntax( + whereKeyword: whereKeyword, guardResult: guardExpr, + arena: self.arena) + } else { + whereClause = nil + } + return (pattern, whereClause) + } +} + +// MARK: Control Transfer Statements + +extension Parser { + /// Parse a return statement + /// + /// Grammar + /// ======= + /// + /// return-statement → 'return' expression? + @_spi(RawSyntax) + public mutating func parseReturnStatement() -> RawReturnStmtSyntax { + let ret = self.eat(.returnKeyword) + + // Handle the ambiguity between consuming the expression and allowing the + // enclosing stmt-brace to get it by eagerly eating it unless the return is + // followed by a '}', '', statement or decl start keyword sequence. + let expr: RawExprSyntax? + if + [ + RawTokenKind.rightBrace, .semicolon, .eof, + .poundIfKeyword, .poundErrorKeyword, .poundWarningKeyword, + .poundEndifKeyword, .poundElseKeyword, .poundElseifKeyword + ].firstIndex(of: self.currentToken.tokenKind) == nil + && !self.lookahead().isStartOfStatement() && !self.lookahead().isStartOfDeclaration() { + expr = self.parseExpression() + } else { + expr = nil + } + return RawReturnStmtSyntax( + returnKeyword: ret, expression: expr, + arena: self.arena) + } +} + +extension Parser { + /// Parse a yield statement. + /// + /// Yield statements are not formally a part of the Swift language yet. + /// + /// Grammar + /// ======= + /// + /// yield-statement → 'yield' '('? expr-list? ')'? + @_spi(RawSyntax) + public mutating func parseYieldStatement() -> RawYieldStmtSyntax { + assert(self.currentToken.tokenText == "yield") + let yield = self.consume(remapping: .yield) + + let yields: RawSyntax + if self.at(.leftParen) { + let lparen = self.eat(.leftParen) + let exprList: RawExprListSyntax + do { + var keepGoing = true + var elementList = [RawExprSyntax]() + while !self.at(.eof) && !self.at(.rightParen) && keepGoing { + elementList.append(self.parseExpression()) + // FIXME: Need explicit syntax for yield lists or we'll drop this comma! + keepGoing = self.consume(if: .comma) != nil + } + exprList = RawExprListSyntax(elements: elementList, arena: self.arena) + } + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + yields = RawSyntax(RawYieldListSyntax( + leftParen: lparen, + elementList: exprList, trailingComma: nil, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena)) + } else { + yields = RawSyntax(self.parseExpression()) + } + + return RawYieldStmtSyntax( + yieldKeyword: yield, yields: yields, + arena: self.arena) + } +} + +extension Parser { + @_spi(RawSyntax) + public struct StatementLabel { + public var label: RawTokenSyntax + public var colon: RawTokenSyntax + + public init(label: RawTokenSyntax, colon: RawTokenSyntax) { + self.label = label + self.colon = colon + } + } + + /// Parse an optional label that defines a named control flow point. + /// + /// Grammar + /// ======= + /// + /// statement-label → label-name ':' + /// label-name → identifier + @_spi(RawSyntax) + public mutating func parseOptionalStatementLabel() -> StatementLabel? { + guard self.currentToken.isIdentifier && self.peek().tokenKind == .colon else { + return nil + } + + let label = self.consumeIdentifier() + let colon = self.eat(.colon) + return StatementLabel(label: label, colon: colon) + } +} + +extension Parser { + /// Parse a break statement. + /// + /// Grammar + /// ======= + /// + /// break-statement → 'break' label-name? + @_spi(RawSyntax) + public mutating func parseBreakStatement() -> RawBreakStmtSyntax { + let breakKeyword = self.eat(.breakKeyword) + let label = self.parseOptionalControlTransferTarget() + return RawBreakStmtSyntax( + breakKeyword: breakKeyword, label: label, + arena: self.arena) + } + + /// Parse a continue statement. + /// + /// Grammar + /// ======= + /// + /// continue-statement → 'continue' label-name? + @_spi(RawSyntax) + public mutating func parseContinueStatement() -> RawContinueStmtSyntax { + let continueKeyword = self.eat(.continueKeyword) + let label = self.parseOptionalControlTransferTarget() + return RawContinueStmtSyntax( + continueKeyword: continueKeyword, label: label, + arena: self.arena) + } + + /// Parse a fallthrough statement. + /// + /// Grammar + /// ======= + /// + /// fallthrough-statement → 'fallthrough' + @_spi(RawSyntax) + public mutating func parseFallthroughStatement() -> RawFallthroughStmtSyntax { + let fallthroughKeyword = self.eat(.fallthroughKeyword) + return RawFallthroughStmtSyntax( + fallthroughKeyword: fallthroughKeyword, + arena: self.arena) + } + + // label-name → identifier + @_spi(RawSyntax) + public mutating func parseOptionalControlTransferTarget() -> RawTokenSyntax? { + guard !self.currentToken.isAtStartOfLine else { + return nil + } + + guard + self.currentToken.isIdentifier && + !self.lookahead().isStartOfStatement() && + !self.lookahead().isStartOfDeclaration() + else { + return nil + } + + return self.consumeIdentifier() + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parsePoundAssertStatement() -> RawPoundAssertStmtSyntax { + let poundAssert = self.eat(.poundAssertKeyword) + let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen) + let condition = self.parseExpression() + let comma = self.consume(if: .comma) + let message: RawTokenSyntax? + if comma != nil { + message = self.consumeAnyToken() + } else { + message = nil + } + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + return RawPoundAssertStmtSyntax( + poundAssert: poundAssert, + unexpectedBeforeLParen, + leftParen: lparen, + condition: condition, + comma: comma, + message: message, + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena) + } +} + +// MARK: Lookahead + +extension Parser.Lookahead { + /// Returns `true` if the current token represents the start of a statement + /// item. + /// + /// - Note: This function must be kept in sync with `parseStatement()`. + /// - Seealso: ``Parser/parseStatement()`` + public func isStartOfStatement() -> Bool { + switch self.currentToken.tokenKind { + case .returnKeyword, + .throwKeyword, + .deferKeyword, + .ifKeyword, + .guardKeyword, + .whileKeyword, + .doKeyword, + .repeatKeyword, + .forKeyword, + .breakKeyword, + .continueKeyword, + .fallthroughKeyword, + .switchKeyword, + .caseKeyword, + .defaultKeyword, + .yield, + .poundAssertKeyword, + .poundIfKeyword, + .poundWarningKeyword, + .poundErrorKeyword, + .poundSourceLocationKeyword: + return true + + case .poundLineKeyword: + // #line at the start of a line is a directive, when within, it is an expr. + return self.currentToken.isAtStartOfLine + + case .identifier: + // "identifier ':' for/while/do/switch" is a label on a loop/switch. + guard self.peek().tokenKind == .colon else { + // "yield" in the right context begins a yield statement. + if self.currentToken.isContextualKeyword("yield") { + return true + } + return false + } + + // To disambiguate other cases of "identifier :", which might be part of a + // question colon expression or something else, we look ahead to the second + // token. + var backtrack = self.lookahead() + backtrack.consumeIdentifier() + backtrack.eat(.colon) + + // We treating IDENTIFIER: { as start of statement to provide missed 'do' + // diagnostics. This case will be handled in parseStmt(). + if self.at(.leftBrace) { + return true + } + // For better recovery, we just accept a label on any statement. We reject + // putting a label on something inappropriate in parseStmt(). + return backtrack.isStartOfStatement() + + case .atSign: + // Might be a statement or case attribute. The only one of these we have + // right now is `@unknown default`, so hardcode a check for an attribute + // without any parens. + guard self.peek().tokenKind == .identifier else { + return false + } + var backtrack = self.lookahead() + backtrack.eat(.atSign) + backtrack.consumeIdentifier() + return backtrack.isStartOfStatement() + default: + return false + } + } + + func isBooleanExpr() -> Bool { + var lookahead = self.lookahead() + return !lookahead.canParseTypedPattern() || lookahead.currentToken.tokenKind != .equal + } + + /// Returns whether the parser's current position is the start of a switch case, + /// given that we're in the middle of a switch already. + func isAtStartOfSwitchCase() -> Bool { + // Check for and consume attributes. The only valid attribute is `@unknown` + // but that's a semantic restriction. + var lookahead = self.lookahead() + while lookahead.at(.atSign) { + guard lookahead.peek().isIdentifier else { + return false + } + + lookahead.eat(.atSign) + lookahead.consumeIdentifier() + } + + return lookahead.at(.caseKeyword) || lookahead.at(.defaultKeyword) + } + + func isStartOfConditionalSwitchCases() -> Bool { + guard self.at(.poundIfKeyword) else { + return self.isAtStartOfSwitchCase() + } + + var lookahead = self.lookahead() + repeat { + lookahead.consumeAnyToken() + // just find the end of the line + lookahead.skipUntilEndOfLine() + } while lookahead.at(.poundIfKeyword) || lookahead.at(.poundElseifKeyword) || lookahead.at(.poundElseKeyword) + return lookahead.isAtStartOfSwitchCase() + } +} diff --git a/Sources/SwiftParser/SwiftParser.docc/FilingBugReports.md b/Sources/SwiftParser/SwiftParser.docc/FilingBugReports.md new file mode 100644 index 00000000000..3c407348593 --- /dev/null +++ b/Sources/SwiftParser/SwiftParser.docc/FilingBugReports.md @@ -0,0 +1,37 @@ +# Filing Parser Bug Reports + +Guide to provide steps for filing actionable bug reports for parser failures. + +Reducing a test case requires the `swift-parser-test` utility that you can build by checking out `swift-syntax` and running `swift build --product swift-parser-test` or building the `swift-parser-test` target in Xcode. + +## Round-Trip Failure or Parser Crash + +If you encounter a case where printing the parsed syntax tree does not reproduce the original source file, that’s a round-tripping failure and is considered a serious bug in SwiftSyntax – preserving the original source is a core principle of this library. To reproduce and reduce the failure, perform the following steps + +1. Reduce the failure by running + ``` + swift-parser-test reduce /pth/to/file/that/does/not/roundtrip.swift + ``` +2. File a bug report on with the reduced source code or fix the issue yourself by following the steps in . + +## Parse of Valid Source Failed or Bad Diagnostic Produced + +If you have source code that is parsed correctly by the current Swift compiler (and thus the old parser written in C++), but that is rejected by the new parser or if you think that a diagnostic produced by the new parser is not very helpful, perform the following steps to reduce the failure: + +1. Run the following command to see the diagnostics produced by the parser + ``` + swift-parser-test print-diags /path/to/file.swift + ``` +2. Remove as much code as possible from your test file and check if it still produces the same diagnostic. +3. File a bug report on with the reduced source code or fix the issue yourself by following the steps in . + +## Parse of Valid Source Code Produced an Invalid Syntax Tree + +If you have valid source code that produced a syntax tree which doesn’t represent the source code’s semantics correctly, perform the following steps: + +1. Run the following command to print the parsed syntax tree + ``` + swift-parser-test dump-tree /path/to/file.swift + ``` +2. Remove as much code as possible from your test file and check if it still produces the same invalid tree +3. File a bug report on with the reduced source code or fix the issue yourself by following the steps in . diff --git a/Sources/SwiftParser/SwiftParser.docc/FixingBugs.md b/Sources/SwiftParser/SwiftParser.docc/FixingBugs.md new file mode 100644 index 00000000000..71dde081d9d --- /dev/null +++ b/Sources/SwiftParser/SwiftParser.docc/FixingBugs.md @@ -0,0 +1,81 @@ +# Fixing Bugs + +Guide to write test cases in the parser’s test suite and how to debug failures. + +The general approach to fixing bugs in the parser is to first write an automated test case that reproduces the test case in isolation. This allows you to invoke the parser with minimal dependencies and allows you to set breakpoints inside of it. + +Once you’ve written a test case (see below), set a breakpoint in `Parser.parseSourceFile` and navigate the debugger to the place where the parser behaves unexpectedly. While the debugger is stopped at an instance function in , `po self.currentToken` can show you the next token that will be parsed. + +## Round-Trip Failure or Parser Crash + +1. Add a new test case in `SwiftParserTest` that looks like the following + ```swift + try AssertParse({ $0.parseSourceFile() }) { + """ + <#your code that does not round trip#> + """ + } + ``` +2. Run the test case, read the error message to figure out which part of the source file does not round-trip +3. Optional: Reduce the test case even further by deleting more source code and calling into a specific production of the parser instead of `Parser.parseSourceFile` + + +## Parse of Valid Source Failed + +Diagnostics are produced when the parsed syntax tree contains missing or unexpected nodes. The test case you should start with is identical to the one described in [Round-Trip Failure](#Round-Trip-Failure-or-Parser-Crash). You want to navigate the debugger to the place that incorrectly produces a missing or unexpected node. + +## Parse of Valid Source Code Produced an Invalid Syntax Tree + +1. Add a test case in `SwiftParserTest` that looks like the following + ```swift + let source = """ + <#your code that produces an invalid syntax tree#> + """ + + let tree = withParser(source: source) { + Syntax(raw: $0.parseSourceFile().raw) + } + XCTAssertHasSubstructure( + tree, + <#create a syntax node that you expect the tree to have#> + ) + ``` +2. Optional: Reduce the test case even further by deleting more source code and calling into a specific production of the parser instead of `Parser.parseSourceFile` +3. Run the test case and navigate the debugger to the place that produced the invalid syntax node. + +## Unhelpful Diagnostic Produced + +Unhelpful diagnostics can result from two reasons: +1. The parser does a bad job at recovering from invalid source code and produces a syntax tree that does not match what the developer meant to write +2. The parser recovers reasonably well from the parse failure but complains about missing and unexpected text where a more contextual error would be more helpful. + +To distinguish these cases run the following command and look at the dumped syntax tree. Use your own judgment to decide whether this models the intended meaning of the source code reasonably well. +``` +swift-parser-test print-tree /path/to/file/with/bad/diagnostic +``` + +Fixing the first case where the parser does not recover according to the user’s intent is similar to [Parse of Valid Source Code Produced an Invalid Syntax Tree](#Parse-of-Valid-Source-Code-Produced-an-Invalid-Syntax-Tree). See for documentation how parser recovery works and determine how to recover better from the invalid source code. + +To add a new, more contextual diagnostic, perform the following steps. + +1. Add a test case to `DiagnosticTests.swift` like the following: + + ```swift + let source = """ + <#your code that produces a bad diagnostic#> + } + """ + let loop = withParser(source: source) { + Syntax(raw: $0.parserSourceFile().raw) + } + ``` +2. Optional: Call a more specific production than `parseSourceFile` in the test case. +3. Determine which node encompasses all information that is necessary to produce the improved diagnostic – for example `FunctionSignatureSyntax` contains all information to diagnose if the `throws` keyword was written after the `->` instead of in front of it. +4. If the diagnostic message you want to emit does not exist yet, add a case to for the new diagnostic. +5. If the function does not already exist, write a new visit method on . +6. In that visitation method, detect the pattern for which the improved diagnostic should be emitted and emit it using `diagnostics.append`. +7. Mark the missing or garbage nodes that are covered by the new diagnostic as handled by adding their `SyntaxIdentifier`s to `handledNodes`. +8. Assert that the new diagnostic is emitted by addding the following to your test case: + ```swift + XCTAssertSingleDiagnostic(in: tree, line: <#expected line#>, column: <#expected column#>, expectedKind: .<#expected diagnostic kind#>) + ``` diff --git a/Sources/SwiftParser/SwiftParser.docc/Info.plist b/Sources/SwiftParser/SwiftParser.docc/Info.plist new file mode 100644 index 00000000000..121382683cf --- /dev/null +++ b/Sources/SwiftParser/SwiftParser.docc/Info.plist @@ -0,0 +1,38 @@ + + + + + CFBundleName + SwiftParser + CFBundleDisplayName + SwiftParser + CFBundleIdentifier + com.apple.swift-parser + CFBundleDevelopmentRegion + en + CFBundleIconFile + DocumentationIcon + CFBundleIconName + DocumentationIcon + CFBundlePackageType + DOCS + CFBundleShortVersionString + 0.1.0 + CDDefaultCodeListingLanguage + swift + CFBundleVersion + 0.1.0 + CDAppleDefaultAvailability + + SwiftParser + + + name + macOS + version + 10.15 + + + + + diff --git a/Sources/SwiftParser/SwiftParser.docc/ParserDesign.md b/Sources/SwiftParser/SwiftParser.docc/ParserDesign.md new file mode 100644 index 00000000000..1ee1dc449df --- /dev/null +++ b/Sources/SwiftParser/SwiftParser.docc/ParserDesign.md @@ -0,0 +1,191 @@ +# Parser Design + +Learn the details of the techniques and decisions guiding the parser project. + +## Overview + +The design of the Swift parser and its supporting infrastructure is influenced +heavily by the [Roslyn][roslyn] project and its family of parsers. We +adopted a number of core design tenets: + +- Source Fidelity +- Resilience +- Parsing with Minimal Context + +## Source Fidelity + +The overriding principle of the SwiftParser and SwiftSyntax libraries is the +efficient representation of the entirety of the text of source code. This +includes whitespace, comments, and `#!` directives, but also includes broken +UTF-8 bytes, the [Unicode byte order mark][BOM], and other textual oddities. +The output of the parser should remain faithful to the textual content of its +input. This implies a number of important technical points: + +### Faithful Parsing + +In order to faithfully represent input text, the parser's methods must be +implemented with a mindset that can be summarized as: never drop things on the +floor. Every byte of the input buffer must find its way into some part of the +resulting syntax tree. + +The most common kind of source fidelity bugs occur when early-exiting from a +parsing function without considering the state that has been built up during +the parse. For example, a loop that early-exits in response to an unexpected +token must either construct a syntax node with missing elements +or drain its tokens into a token list syntax node as appropriate. + +When porting productions from C++ to Swift, be mindful of whether the conditions +in the legacy code involve diagnostics and recovery, and how those conditions +have to change to maintain source fidelity. + +### Source Fidelity Goes Hand-in-Glove With Tooling + +Source fidelity as a goal optimizes for source-aware tools like formatters, +linters, and editors. These tools accept user-provided text +in a variety of malformed or partial states and, in the case of formatters, +often need to reproduce that text with some modifications. Even when these +tools don't need to produce output, they still need to take advantage of the +maximum amount of structure they can get. + +By contrast, parsers for programming languages are usually lossy. Most +compilers do not process whitespace and comments in a meaningful sense, so their +ASTs drop this information after lexical analysis completes. This leaves +tooling authors left to pick up the pieces by reimplementing whitespace-aware +ASTs. + +### `String` is Rarely Appropriate in the Syntax Tree + +Swift's `String` type provides a vocabulary type for safe, unicode-correct +handling of text. Unfortunately, unicode correctness implies that instances of +`String` do not maintain fidelity with respect to all inputs. In order to +guarantee its internal invariants around UTF-8 correctness, invalid inputs are +healed with a special replacement character. This makes `String` unsuitable not +just as a backing data structure, but also as a vocabulary type for operations +in this library that interact with source code. SwiftSyntax instead provides the +`SyntaxText` type that behaves as a safe buffer of raw bytes that does no +interpolation or healing. + +The parser should not be made to convert `SyntaxText` instances to `String` +instances without verifying that this will have no impact on the textual +content of the syntax node. For example, Swift's keywords are all ASCII +identifiers that will round-trip between `SyntaxText` and `String`. + +### Complete Insulation from Raw APIs + +In order to efficiently represent source text, SwiftSyntax provides a set of +"raw" APIs for syntax nodes. These raw nodes provide extremely low-level access +to the content of the syntax tree. However, the lifetimes of raw syntax nodes +are tightly scoped to an enclosing `SyntaxArena`, which makes escaping these +nodes an unsafe operation. In order to prevent this, the parser consumes the +raw node creation interface as SPI and does not provide external clients a way +to view the raw syntax it constructs directly. Instead, clients are provided +with a high-level, safe view of raw syntax that avoids the ownership +problem cited above while also providing a much nicer view of common operations +on the syntax tree. + +The rule of thumb is that any operation that traffics in or stores `Raw` +data should not be escaped via a public API to clients of this library. If +access to raw data is needed, the operation should be tagged `@_spi(RawSyntax)`. + +## Resilience + +Resilience refers to the extent to which the SwiftParser API can accept +ill-formed inputs and still produce a well-formed output. Our metric for +"well-formedness" is structure. The more syntactic structure the parser is able +to output, the better chance a client will have of being able to interpret +that structure and accomplish its task. As part of that resilience story, the +parser includes a number of facilities for recovering from invalid, unexpected, +and missing tokens in its input stream. The syntax tree also affords us the +ability to insert these "missing" syntax elements as explicit nodes in the tree. +To complement this, SwiftSyntax provides multiple different "views" of the +syntax tree that can include these missing or unexpected nodes on demand. + +For more information on parser recovery and resilience, see . + +## Parsing with Minimal Context + +The parser takes another unusal stance in its construction: it is mostly +stateless. It is not a hard and fast rule that the parse be stateless, but +engineering it with that mindset has enabled a relatively clean, testable +parser design to emerge. Swift is not a context-free language, so there will +inevitably need to be some state involved during the parse. The parser is +designed to sequester those stateful decisions +(e.g. ) away from the main parse productions. + +What ambient state is present occurs as arguments to certain productions that +are required to enable or disable their child productions in response to some +condition. One common case is a "mode bit" where one parsing API can be +configured by its callers to accept or reject many kinds of productions. It is +preferable to keep as few parameters on productions as possible to minimize +the surface area of the parser's APIs and prevent context from creeping in. + +Minimizing ambient state isn't just about hygiene, it also helps keep +incremental parsing practical. The more state that accumulates in the parser and +its productions, the more state must be tracked and restored across incremental +compilation sessions. The addition of state in the parse also affects the +parser's ability to reuse nodes during an incremental parse. + +### Diagnostics + +The parser library separates identifying invalid syntax from reporting invalid +syntax to users in the form of diagnostics. By restricting the parser to just +the identification of missing and unexpected syntax, the implementation becomes +far easier to reason about. + +Not emitting diagnostics is also a key component of the incremental parse. One +way to think of a diagnostic is as the outcome of a set of parsing decisions +that led to its emission. An arbitrary edit to the input can lead to a different +set of decisions being made, which means the parser would have to keep track +of the exact set of conditions that led to a particular diagnostic so it could +be invalidated. Alternatively, the parser could be made to compromise the +incremental parse by invalidating far more state than it needs to in order to +clear a set of stale diagnostics. + +## Performance Goals + +The SwiftParser library aims to replace the C++ parser that currently resides +in the [Swift compiler][Swiftc]. As such, minimizing the latency involved in +parsing is an extremely important goal. We aim to be competetive with the +performance of the C++ implementation of the Swift parser _without compromising +safety_. + +## Additional Design Considerations + +The remaining sections discuss loose design principles to bear in mind while +exploring or hacking on this codebase. + +### Types are Cheap + +The Swift programming langauge provides a rich feature set in its types +including enums with associated values, robust generics, and language facilities +for lifting raw bits and bytes into proper APIs. Prefer to make domain-specific +types rather than using e.g. `Bool` and `String`. Domain-specific types can +help an author maintain a safety contract with the rest of the parser and its +clients that is far stronger than the underyling value type would otherwise +provide. Importantly, custom types also provide an excellent opportunity to +shape the way that a particular idea or concept is conveyed to users and +developers of this library. + +### Prefer The Least Visibility Possible + +To keep types well-encapsulated, prefer to minimize the visibility of types and +methods unless there is a compelling reason to promote them to SPI or API. For +testability, prefer `@_spi` annotations to `@testable import`s of libraries. + +### Make Liberal Use of Extensions + +Extensions can help to break up the structure of a long stretch of related +functionality. Consider which methods and nested types are related and organize +them under a common extension. One common pattern is to define a type and the +methods that return that type under the same extension block. + +### Design Testable APIs + +There is no one-size-fits-all solution to designing a testable library or +testable APIs. A helpful mindset to adopt is that of an API designer, even +for internal methods and state. Consider the API you'd want to adopt as an +end user. + +[roslyn]: https://github.com/dotnet/roslyn/ +[BOM]: https://unicode.org/faq/utf_bom.html +[Swiftc]: https://github.com/apple/swift diff --git a/Sources/SwiftParser/SwiftParser.docc/ParserRecovery.md b/Sources/SwiftParser/SwiftParser.docc/ParserRecovery.md new file mode 100644 index 00000000000..97e529b26d0 --- /dev/null +++ b/Sources/SwiftParser/SwiftParser.docc/ParserRecovery.md @@ -0,0 +1,109 @@ +# Parser Recovery + +Recover from errors in parsing input. + +## Recovery + +Parser recovery is the process of identifying and resolving issues that arise +while parsing malformed Swift code. When this occurs, the Swift parser must +look forward in the input stream to a point at which parsing of valid syntax +can continue. Good parser recovery strikes the right balance between skipping +ahead in the input stream and producing as much structure from the malformed +input stream as possible. + +The Swift parser tries to represent as much structure in the input stream as +possible. To that end, the parser tries to recover by looking through the +minimum amount of tokens. This optimizes for clients like formatters and editors +that often recieve malformed, partial, and unstructured inputs and must produce +sensible outputs in response. + +The Swift parser differs from many other language parsers by intentionally +focusing on two broad classes of parsing errors: + +- Unexpected Syntax +- Missing Syntax + +Many parsers for programming languages go to great lengths to pattern match +against their inputs to produce targeted diagnostics that identify specific +user mistakes. By focusing the Swift parser on just missing and unexpected +syntax, the core of the parser is kept free of the clutter associated +with these diagnostics. + +## Recovering from Unexpected Syntax + +When the parser expects a token but can’t find it, it looks ahead to see if it +can find the expected token by using a token precedence model. Tokens are +divided in precedence groups (see ``TokenPrecedence``), +dependening on how strongly they mark the structure of the source code. For +example, keywords that start a declaration, like `func`, have a higher +precedence than expression keywords, like `self`. + +Consider a for-in loop like the following, where the `in` keyword has been +exchanged with the identifier `ys`: + +```swift +for x ys in { } +``` + +To reach the `in` keyword, the parser looks past the identifier token +because it has the same token precedence. Having found the `in` keyword, the +parser yields both the unexpected identifier token `ys` and the token for the +`in` keyword for placement in the tree. + +## Recovering from Missing Tokens + +When looking past unexpected tokens fails, the parser declines to consume the +current token and produces a missing syntax element instead. Missing tokens have +no textual content associated with them. They exist to fulfill our goal of +maintaining as much structure in the parse as possible, and have the added +benefit of being easily identifiable by the diagnostic post-pass. + +Consider a nameless function like the following: + +```swift +func () { } +``` + +The parser expects to find an identifier token after the `func` keyword, but +instead finds the opening left parenthesis token for the argument list. In this +case, looking forward in the input would be fruitless since there are no +identifiers that could be the name of this function. In practice, the +precedence of an identifier token is lower than that of a delimiter like +parentheses, so no lookahead occurs and a missing identifier is synthesized +into the function declaration. + +## Recovering from Missing Syntax + +In some cases, the parse stream is so malformed that entire syntax nodes cannot +be identified. For these cases, the parser yields `MissingSyntax` nodes of the +appropriate type. The presence of these missing nodes in the parse is the +impetus for many of the parser's recovery mechanisms to kick in, since they +signal a catastrophic parsing failure and no progress being made in parsing +the input stream. + +One prominent example of this kind of failure is when parsing declarations. The +parser detects the presence of attributes and modifiers as a signal that it +should begin parsing a declaration. But if no declaration is present, the +parser will consume the attributes and modifiers and produce a +`MissingDeclSyntax` node. + +```swift +@inlinable public /*missing function*/ +``` + +Another is when parsing labeled statements where the label is present but no +statement follows: + +```swift +label: + struct Foo {} +``` + +This results in a `LabeledStmtSyntax` node that has a `MissingStmtSyntax` node +as its associated statement. + +Missing syntax nodes, like missing tokens, help to ensure that the parser is +able to yield the maximum amount of structure back to clients. In the case of +`MissingDeclSyntax`, it also aids in keeping the textual content of the tree +faithful to the original input text, as there would otherwise be nowhere to +stick the parsed attributes and modifiers without dropping them on the floor. diff --git a/Sources/SwiftParser/SwiftParser.docc/ParsingBasics.md b/Sources/SwiftParser/SwiftParser.docc/ParsingBasics.md new file mode 100644 index 00000000000..d973b113f89 --- /dev/null +++ b/Sources/SwiftParser/SwiftParser.docc/ParsingBasics.md @@ -0,0 +1,202 @@ +# Parsing Basics + +Discover the basics of parsing the Swift grammar + +## Overview + +The Swift programming language follows a regular structure called a _grammar_ +that is composed of rules that direct a conforming implementation on what +inputs constitute valid Swift programs. These rules also come with a set of +conditions under which they can be executed. We refer to a rule that has syntax +as its output as a _production_. The production rule for parsing an optional +type from the Swift Book is reproduced below: + +``` +optional-type → type '?' +``` + +The `optional-type` production directs us to first parse a `type` production, +then parse a `?` token. Productions may be recursive - as in the reference to +`type` which contains `optional-type` as one of its child productions - +or conditional. This is usually denoted with a `|` as in: + +``` +metatype-type → type '.' 'Type' | type '.' 'Protocol' +``` + +This production directs us to first parse a type, then a '.' character, +and finally attempt to parse the identifier 'Type'. If 'Type' cannot be found, +we are to reset and try the right-side rule which directs us to instead parse +'Protocol'. + +- Note: The left and right conditions of the `metatype-type` production +share the same structure up to the last identifier, it is natural to implement +this rule as first parsing a type, then a dot, then checking to see if 'Type' or +'Protocol' is present, rather than backing up and trying again when the first +rule fails. + +A production can also account for syntactic elements that are allowed to be +absent from the input stream - denoted by `?`. Here, the `class-body` production +represents a class body that may contain zero or more `class-member` elements +surrounded by curly braces: + +``` +class-body → '{' class-members? '}' +``` + +Finally, a production can describe a sequence of syntactic elements by making +recursive reference to itself. The `class-members` production directs us to +parse a `class-member`, then to recursively parse another `class-member` if +possible: + +``` +class-members → class-member class-members? +``` + +## Productions as Syntax Nodes + +The Swift Parser is built atop a framework that faithfully represents Swift +source code called SwiftSyntax. SwiftSyntax can be viewed as an encoding of the +production rules of the Swift grammar in a tree-shaped data structure called a +syntax tree. + +To take the examples above, `RawOptionalTypeSyntax` stands in for the output of the +`optional-type` production, and `RawMetatypeTypeSyntax` stands in for the output +of the `metatype-type` production. The structure of these nodes reflects the +structure of the productions that define them: + +- `RawOptionalTypeSyntax` has a `wrappedType` property to retrieve its child `RawTypeSyntax` +- `RawMetatypeTypeSyntax` has a `baseType` property to retrieve its child `RawTypeSyntax` + +These nodes also have accessors for associated tokens like the `?` +token or the `.`, `Type`, and `Protocol` tokens. + +For sequences of syntax elements, SwiftSyntax provides corresponding +`SyntaxCollection` types. + +- Note: Many sources refer to this structure as an "Abstract Syntax Tree" or + AST. Abstract, in these contexts, refers to the tree dropping some + amount of input structure that is not needed for analysis or compilation + to proceed. Because the syntax trees in SwiftSyntax are designed to + faithfully represent source text, they are more accurately referred to + as "Concrete Syntax Trees" or just "Syntax Trees". + +## Parsing Source Code into Syntax Nodes + +Parsing is a fruitful sub-field of programming language research, with a wide +variety of techniques for dealing with many classes of inputs. One of the +simplest approaches to parsing languages like Swift is parsing by +_recursive descent_. Just as SwiftSyntax encodes the results of productions, +a recursive descent parser encodes the content of production rules as a set of +mutually-recursive functions. + +```swift +extension Parser { + // optional-type → type '?' + public mutating func parseOptionalType() -> OptionalTypeSyntax { + // First, recursively parse a type + let base = self.parseType() + // Then, parse a postfix question mark token + let mark = self.eat(.postfixQuestionMark) + // Finally, yield the optional type syntax node. + return RawOptionalTypeSyntax( + wrappedType: base, questionMark: mark, arena: self.arena) + } +} +``` + +This simple function introduces many of the basic concepts that form the +backbone of the parser's implementation. The ``Parser/eat(_:)`` method +provides a function to examine the input stream and advance one step if the +provided token kind is present. To form the node, a call to the initializer +is made, which acts to wire up all of the sub-nodes into a single +`RawOptionalTypeSyntax`. + +### Unconditional Parsing + +The ``Parser/eat(_:)`` method unconditionally consumes a token of the given +type, and an assertion is raised if the input token's kind does not match. +This function is most appropriate for encoding structural invariants during +the parse. For example, the decision to parse a `FunctionDeclSyntax` node is +made by examining the input stream for the `func` keyword. It is reasonable to +expect that the production implementing function parsing would `eat` its `func` +keyword. This ensures that callers always check for the `func` keyword before +calling the function parsing method. + +### Conditional Parsing + +To model conditional productions, the syntax tree uses `Optional`-typed +syntax nodes, and the parser uses the ``Parser/consume(if:)`` method. +For a Swift declaration item, a trailing semicolon is optional: + +```swift +extension Parser { + mutating func parseDeclarationItem() -> RawMemberDeclListItemSyntax { + // First, recursively parse a declaration + let parsedDecl = self.parseDeclaration() + // Next, consume the semicolon - if there is one. + let semicolon = self.consume(if: .semicolon) + return RawMemberDeclListItemSyntax( + decl: parsedDecl, + semicolon: semicolon, + arena: parser.arena) + } +} +``` + +Unlike ``Parser/eat(_:)``, if the parser does not encounter a token of the +given type, a `nil` token is returned and the input is left unconsumed. + +### Sequence Parsing + +To consume a sequence of syntax elements, a loop and a condition are needed. +Many sequences of elements in Swift are delimited by an inter-element token +like a comma or a period. A type's inheritance clause is a prime example of +a comma-delimited sequence of type syntax elements: + +```swift +extension Parser { + /// type-inheritance-clause → ':' type-inheritance-list + /// type-inheritance-list → attributes? type-identifier | attributes? type-identifier ',' type-inheritance-list + public mutating func parseInheritance() -> RawTypeInheritanceClauseSyntax { + // Eat the colon character. + let colon = self.eat(.colon) + // Start parsing a list of inherited types. + var elements = [RawInheritedTypeSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let type = self.parseType() + keepGoing = self.consume(if: .comma) + elements.append(RawInheritedTypeSyntax( + typeName: type, trailingComma: keepGoing, arena: self.arena)) + } while keepGoing != nil + } + // Construct the syntax for the list of inherited types. + let inheritedTypes = RawInheritedTypeListSyntax( + elements: elements, arena: self.arena) + return RawTypeInheritanceClauseSyntax( + colon: colon, + inheritedTypeCollection: inheritedTypes, + arena: self.arena) + } +} +``` + +This function populates an array of `RawInheritedTypeSyntax` elements separated +by commas. Since the commas must be present in the syntax tree, the `keepGoing` +variable plays double duty both as syntax and as the loop condition. It also +composes all of the parsing elements we have seen thus far, using both +unconditional parsing to assert an invariant, and conditional parsing to +define the loop condition. + +### Putting It All Together + +Recursive descent parsing applies these three parsing techniques at scale. The +resulting parser very closely mirrors the structure of the grammar that it is +parsing, is relatively fast, and easy to tweak. When adding methods to +parse new productions, it can often be helpful to work backwards from the +call to the initializer of the returned syntax node type. Each input to the +initializer can be obtained by recursively calling functions to parse nodes of +the appropriate type and applying the parsing techniques above to fill in +the rest. diff --git a/Sources/SwiftParser/SwiftParser.docc/SwiftParser.md b/Sources/SwiftParser/SwiftParser.docc/SwiftParser.md new file mode 100644 index 00000000000..2f837fcf5fc --- /dev/null +++ b/Sources/SwiftParser/SwiftParser.docc/SwiftParser.md @@ -0,0 +1,66 @@ +# ``SwiftParser`` + +A parser for the Swift programming language. + +## Overview + +The `SwiftParser` framework implements a parser that accepts Swift source text +as input and produces a SwiftSyntax syntax tree. This module is under active development and is not yet ready to completely replace `SwiftSyntaxParser`. + +## Quickstart + +The easiest way to parse Swift source code is to call the `Parser.parse` method, providing it with a string containing the source code: + +```swift +import SwiftParser +import SwiftSyntax + +let sourceText = +""" +func greeting(name: String) { + print("Hello, \(name)!") +} +""" + +// Parse the source code in sourceText into a syntax tree +let sourceFile: SourceFileSyntax = Parser.parse(source: sourceText) + +// The "description" of the source tree is the source-accurate view of what was parsed. +assert(sourceFile.description == sourceText) + +// Visualize the complete syntax tree. +dump(sourceFile) +``` + +## Design principles + +There are several design principles that govern the parser: + +* **Resilient**: The parser will attempt to recover from syntax errors, maintaining as much of the program structure as is feasible. It has no side effects, and in particular produces no errors regardless of how ill-formed the input source text is. Instead, all errors are described in the syntax tree itself, and can be diagnosed by a separate pass that identifies such errors. These errors come in one of two forms: + +* - *Unexpected nodes*: syntax that doesn't match any part of the Swift grammar is kept in an “unexpected” child node, which are placed in the syntax tree and can be queried by clients. + - *Missing tokens*: syntax that is required by the Swift grammar but isn't present in the source code will be recoded in the resulting tree as "missing" tokens, which the parser will introduce. For example, a missing ')' in a function declaration will be inserted by the parser as a missing token. Such tokens will be skipped when rendering back to the original source code, but can also be used by tools to provide fixes for the source code. + +* **Efficient**: The parser should provide similar parsing performance to the existing C++ parser implementation that it seeks to replace. + +- **Source-preserving**: SwiftSyntax is designed to maintain all “trivia” (including whitespace, comments, etc.) precisely as it occurs in the source text, so that a syntax tree can be rendered back into text that is byte-for-byte identical to the original source. The parser must maintain this property, regardless of whether the input text was well-formed Swift code. +- **Minimal context**: The parser requires minimal context to parse Swift code, which consists of only those things required to handle a suitable Swift dialect, e.g., whether [regex literals](https://github.com/apple/swift-evolution/blob/main/proposals/0354-regex-literals.md) are supported. The parser can be invoked on any input source code, starting at any major production in the grammar (e.g., full source file, an individual type, an individual expression). +- **Incremental**: A parse tree produced for a source file can be incrementally updated for a new version of that source file, reusing syntax nodes where possible to reduce computation overhead and memory. + +### Lexical Analysis + +- +- +- +- + +### Parsing + +- +- +- + +### Development + +- +- diff --git a/Sources/SwiftParser/TokenConsumer.swift b/Sources/SwiftParser/TokenConsumer.swift new file mode 100644 index 00000000000..5e92df3eef9 --- /dev/null +++ b/Sources/SwiftParser/TokenConsumer.swift @@ -0,0 +1,85 @@ +//===---------------------- TokenConsumer.swift ---------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +import SwiftSyntax + +/// A type that consumes instances of `TokenSyntax`. +public protocol TokenConsumer { + associatedtype Token + /// The current token syntax being examined by the consumer + @_spi(RawSyntax) + var currentToken: Lexer.Lexeme { get } + /// Whether the current token matches the given kind. + mutating func consumeAnyToken() -> Token + mutating func missingToken(_ kind: RawTokenKind) -> Token +} + +// MARK: Consuming Tokens + +extension TokenConsumer { + /// Returns whether the kind of the current token matches the given + /// kind without consuming the current token. + /// + /// - Parameter kind: The kind to test for. + /// - Returns: `true` if the given `kind` matches the current token's kind. + public func at(_ kind: RawTokenKind) -> Bool { + return self.currentToken.tokenKind == kind + } + + /// Returns whether the kind of the current token is any of the given + /// kinds without consuming the current token. + /// + /// - Parameter kinds: The kinds to test for. + /// - Returns: `true` if the current token's kind is in `kinds`. + public func atAny(_ kind: [RawTokenKind]) -> Bool { + return kind.contains(self.currentToken.tokenKind) + } + + /// Examines the current token and consumes it if its kind matches the + /// given `TokenKind`. If a token was consumed, the result is that token, else + /// the result is `nil`. + /// + /// - Parameter kind: The kind of token to consume. + /// - Returns: A token of the given kind if one was consumed, else `nil`. + public mutating func consume(if kind: RawTokenKind) -> Token? { + guard self.at(kind) else { + return nil + } + return self.consumeAnyToken() + } + + /// Examines the current token and consumes it if its kind is in `kinds`. If a + /// token was consumed, the result is that token, else the result is `nil`. + /// + /// - Parameter kind: The kinds of token to consume. + /// - Returns: A token of the given kind if one was consumed, else `nil`. + public mutating func consume(ifAny kinds: RawTokenKind...) -> Token? { + for kind in kinds { + if let consumed = self.consume(if: kind) { + return consumed + } + } + return nil + } + + /// Consumes the current token, and asserts that the kind of token that was + /// consumed matches the given kind. + /// + /// If the token kind did not match, this function will abort. It is useful + /// to insert structural invariants during parsing. + /// + /// - Parameter kind: The kind of token to consume. + /// - Returns: A token of the given kind. + public mutating func eat(_ kind: RawTokenKind) -> Token { + return self.consume(if: kind)! + } +} diff --git a/Sources/SwiftParser/TokenPrecedence.swift b/Sources/SwiftParser/TokenPrecedence.swift new file mode 100644 index 00000000000..5fafc9a367f --- /dev/null +++ b/Sources/SwiftParser/TokenPrecedence.swift @@ -0,0 +1,188 @@ +//===--- TokenPrecedence.swift --------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +import SwiftSyntax + +/// Describes how distinctive a token is for parser recovery. When expecting a +/// token, tokens with a lower token precedence may be skipped and considered +/// unexpected. +public enum TokenPrecedence: Comparable { + /// Tokens that can be used similar to variable names or literals + case identifierLike + /// Keywords and operators that can occur in the middle of an expression + case exprKeyword + /// A token that starts a bracketet expression which typically occurs inside + /// a statement. + /// `closingDelimiter` must have precedence `weakPunctuator` or `weakBracketed` + case weakBracketed(closingDelimiter: RawTokenKind) + /// A punctuator that can occur inside a statement + case weakPunctuator + /// Keywords that start a new statement. + case stmtKeyword + /// A punctuator that is a strong indicator that it separates two distinct parts of the source code, like two statements + case strongPunctuator + /// The '{' token because it typically marks the body of a declaration. + /// `closingDelimiter` must have type `strongPunctuator` + case strongBracketet(closingDelimiter: RawTokenKind) + /// Tokens that start a new declaration + case declKeyword + + /// If the precedence is `weakBracketed` or `strongBracketed`, the closing delimeter of the bracketed group. + var closingTokenKind: RawTokenKind? { + switch self { + case .weakBracketed(closingDelimiter: let closingDelimiter): + return closingDelimiter + case .strongBracketet(closingDelimiter: let closingDelimiter): + return closingDelimiter + default: + return nil + } + } + + public static func < (lhs: TokenPrecedence, rhs: TokenPrecedence) -> Bool { + func precedence(_ precedence: TokenPrecedence) -> Int { + /// Should match the order of the cases in the enum. + switch precedence { + case .identifierLike: + return 0 + case .exprKeyword: + return 1 + case .weakBracketed: + return 2 + case .weakPunctuator: + return 3 + case .stmtKeyword: + return 4 + case .strongPunctuator: + return 5 + case .strongBracketet: + return 6 + case .declKeyword: + return 7 + } + } + + return precedence(lhs) < precedence(rhs) + } + + /// When expecting a token with `stmtKeyword` precedence or highter, newlines may be skipped to find that token. + /// For lower precedence groups, we consider newlines the end of the lookahead scope. + var shouldSkipOverNewlines: Bool { + return self >= .stmtKeyword + } + + init(_ tokenKind: RawTokenKind) { + switch tokenKind { + // MARK: Identifier like + case + // Literals + .capitalSelfKeyword, .falseKeyword, .floatingLiteral, .integerLiteral, .nilKeyword, .regexLiteral, .selfKeyword, .stringLiteral, .superKeyword, .trueKeyword, + // Legacy literals + .__column__Keyword, .__dso_handle__Keyword, .__file__Keyword, .__function__Keyword, .__line__Keyword, + // Pound literals + .poundAssertKeyword, .poundAvailableKeyword, .poundColorLiteralKeyword, .poundColumnKeyword, .poundDsohandleKeyword, .poundFileIDKeyword, .poundFileKeyword, .poundFileLiteralKeyword, .poundFilePathKeyword, .poundFunctionKeyword, .poundImageLiteralKeyword, .poundKeyPathKeyword, .poundLineKeyword, .poundSelectorKeyword, .poundSourceLocationKeyword, .poundUnavailableKeyword, + // Identifiers + .dollarIdentifier, .identifier, + // '_' can occur in types to replace a type identifier + .wildcardKeyword, + // String segment, string interpolation anchor and pound don't really fit anywhere else + .pound, .stringInterpolationAnchor, .stringSegment, + // Give unknown tokens the lowest priority to eat it as unexpected if necessary + .unknown: + self = .identifierLike + + // MARK: Expr keyword + case + // Keywords + .asKeyword, .isKeyword, .tryKeyword, + // We don't know much about which contextual keyword it is, be conservative an allow considering it as unexpected. + .contextualKeyword, + // Keywords in function types (we should be allowed to skip them inside parenthesis) + .rethrowsKeyword, .throwsKeyword, + // Operators can occur inside expressions + .postfixOperator, .prefixOperator, .spacedBinaryOperator, .unspacedBinaryOperator, + // Consider 'any' and 'inout' like a prefix operator to a type and a type is expression-like. + .anyKeyword, .inoutKeyword, + // 'where' can only occur in the signature of declarations. Consider the signature expression-like. + .whereKeyword, + // 'in' occurs in closure input/output definitions and for loops. Consider both constructs expression-like. + .inKeyword: + self = .exprKeyword + + // MARK: Weak bracketet + case .leftParen: + self = .weakBracketed(closingDelimiter: .rightParen) + case .leftSquareBracket: + self = .weakBracketed(closingDelimiter: .rightSquareBracket) + case .leftAngle: + self = .weakBracketed(closingDelimiter: .rightAngle) + case .multilineStringQuote, .rawStringDelimiter, .singleQuote, .stringQuote: + self = .weakBracketed(closingDelimiter: tokenKind) + case + // Chaining punctuators + .infixQuestionMark, .period, .postfixQuestionMark, .prefixPeriod,.exclamationMark, + // Misc + .backslash, .backtick, .colon, .comma, .ellipsis, .equal, .prefixAmpersand, + // Weak brackets + .rightAngle, .rightParen, .rightSquareBracket: + self = .weakPunctuator + + // MARK: Statement keyword punctuator + case + // Control-flow constructs + .deferKeyword, .doKeyword, .forKeyword, .guardKeyword, .ifKeyword, .repeatKeyword, .switchKeyword, .whileKeyword, + // Secondary parts of control-flow constructs + .caseKeyword, .catchKeyword, .defaultKeyword, .elseKeyword, + // Return-like statements + .breakKeyword, .continueKeyword, .fallthroughKeyword, .returnKeyword, .throwKeyword, .yield, + // Misc + .importKeyword, + // #error and #warning are statement-like + .poundErrorKeyword, .poundWarningKeyword: + self = .stmtKeyword + + // MARK: Strong bracketet + case .leftBrace: + self = .strongBracketet(closingDelimiter: .rightBrace) + case .poundElseifKeyword, .poundElseKeyword, .poundIfKeyword: + self = .strongBracketet(closingDelimiter: .poundEndifKeyword) + + // MARK: Strong punctuator + case + // Semicolon separates two statements + .semicolon, + // Arrow is a strong indicator in a function type that we are now in the return type + .arrow, + // '@' typically occurs at the start of declarations + .atSign, + // Match the '}' and '#endif' as strongBracketet + .poundEndifKeyword, .rightBrace, + // EOF is here because it is a very stong marker and doesn't belong anywhere else + .eof: + self = .strongPunctuator + + // MARK: Decl keywords + case + // Types + .associatedtypeKeyword, .classKeyword, .enumKeyword, .extensionKeyword, .protocolKeyword, .structKeyword, .typealiasKeyword, + // Access modifiers + .fileprivateKeyword, .internalKeyword, .privateKeyword, .publicKeyword, .staticKeyword, + // Functions + .deinitKeyword, .funcKeyword, .initKeyword, .subscriptKeyword, + // Variables + .letKeyword, .varKeyword, + // Operator stuff + .operatorKeyword, .precedencegroupKeyword: + self = .declKeyword + } + } +} diff --git a/Sources/SwiftParser/TopLevel.swift b/Sources/SwiftParser/TopLevel.swift new file mode 100644 index 00000000000..538fb0b2ecc --- /dev/null +++ b/Sources/SwiftParser/TopLevel.swift @@ -0,0 +1,171 @@ +//===------------------------- TopLevel.swift -----------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + /// Parse the top level items in a file into a source file. + /// + /// This function is the true parsing entrypoint that the high-level + /// ``Parser/parse(source:parseTransition:filenameForDiagnostics:languageVersion:enableBareSlashRegexLiteral:)-7tndx`` + /// API calls. + /// + /// Grammar + /// ======= + /// + /// source-file → top-level-declaration? + @_spi(RawSyntax) + public mutating func parseSourceFile() -> RawSourceFileSyntax { + let items = self.parseTopLevelCodeBlockItems() + let eof = self.eat(.eof) + return .init(statements: items, eofToken: eof, arena: self.arena) + } +} + +extension Parser { + /// Parse the top level items in a source file. + /// + /// Grammar + /// ======= + /// + /// top-level-declaration → statements? + mutating func parseTopLevelCodeBlockItems() -> RawCodeBlockItemListSyntax { + var elements = [RawCodeBlockItemSyntax]() + while !self.at(.eof) { + elements.append(self.parseCodeBlockItem()) + } + return .init(elements: elements, arena: self.arena) + } + + /// The optional form of `parseCodeBlock` that checks to see if the parser has + /// encountered a left brace before proceeding. + /// + /// This function is used when parsing places where function bodies are + /// optional - like the function requirements in protocol declarations. + mutating func parseOptionalCodeBlock() -> RawCodeBlockSyntax? { + guard self.at(.leftBrace) else { + return nil + } + return self.parseCodeBlock() + } + + /// Parse a code block. + /// + /// Grammar + /// ======= + /// + /// code-block → '{' statements? '}' + mutating func parseCodeBlock() -> RawCodeBlockSyntax { + let (unexpectedBeforeLBrace, lbrace) = self.expect(.leftBrace) + var items = [RawCodeBlockItemSyntax]() + while !self.at(.eof) && !self.at(.rightBrace) { + items.append(self.parseCodeBlockItem()) + } + let (unexpectedBeforeRBrace, rbrace) = self.expect(.rightBrace) + + let itemList: RawCodeBlockItemListSyntax + if items.isEmpty && (lbrace.isMissing || rbrace.isMissing) { + itemList = .init(elements: [], arena: self.arena) + } else { + itemList = .init(elements: items, arena: self.arena) + } + return .init( + unexpectedBeforeLBrace, + leftBrace: lbrace, + statements: itemList, + unexpectedBeforeRBrace, + rightBrace: rbrace, + arena: self.arena) + } + + /// Parse an individual item - either in a code block or at the top level. + /// + /// This function performs the majority of recovery because it + /// is both the first and last opportunity the parser has to examine the + /// input stream before encountering a closing delimiter or the end of input. + /// + /// Grammar + /// ======= + /// + /// statement → expression ';'? + /// statement → declaration ';'? + /// statement → loop-statement ';'? + /// statement → branch-statement ';'? + /// statement → labeled-statement ';'? + /// statement → control-transfer-statement ';'? + /// statement → defer-statement ';'? + /// statement → do-statement ';'? + /// statement → compiler-control-statement + /// statements → statement statements? + @_spi(RawSyntax) + public mutating func parseCodeBlockItem() -> RawCodeBlockItemSyntax { + // FIXME: It is unfortunate that the Swift book refers to these as + // "statements" and not "items". + if let recovery = self.recoverFromBadItem() { + return recovery + } + + let item = self.parseItem() + let semi = self.consume(if: .semicolon) + + let errorTokens: RawSyntax? + if item.is(RawMissingExprSyntax.self) || item.is(RawMissingStmtSyntax.self) { + var elements = [RawTokenSyntax]() + if self.at(.atSign) { + // Recover from erroneously placed attribute. + elements.append(self.eat(.atSign)) + if self.currentToken.isIdentifier { + elements.append(self.consumeAnyToken()) + } + } + + while + !self.at(.eof), + !self.at(.rightBrace), + !self.at(.poundIfKeyword), !self.at(.poundElseKeyword), + !self.at(.poundElseifKeyword), + !self.lookahead().isStartOfStatement(), + !self.lookahead().isStartOfDeclaration() + { + let tokens = self.recover() + guard !tokens.isEmpty else { + break + } + elements.append(contentsOf: tokens) + } + errorTokens = RawSyntax(RawNonEmptyTokenListSyntax(elements: elements, arena: self.arena)) + } else { + errorTokens = nil + } + return .init(item: item, semicolon: semi, errorTokens: errorTokens, arena: self.arena) + } + + private mutating func parseItem() -> RawSyntax { + if self.at(.poundIfKeyword) { + return RawSyntax(self.parsePoundIfDirective { + $0.parseCodeBlockItem() + } syntax: { parser, items in + return RawSyntax(RawCodeBlockItemListSyntax(elements: items, arena: parser.arena)) + }) + } else if self.at(.poundLineKeyword) { + return RawSyntax(self.parsePoundLineDirective()) + } else if self.at(.poundSourceLocationKeyword) { + return RawSyntax(self.parsePoundSourceLocationDirective()) + } else if self.lookahead().isStartOfDeclaration() { + return RawSyntax(self.parseDeclaration()) + } else if self.lookahead().isStartOfStatement() { + return RawSyntax(self.parseStatement()) + } else { + return RawSyntax(self.parseExpression()) + } + } +} diff --git a/Sources/SwiftParser/TriviaParser.swift b/Sources/SwiftParser/TriviaParser.swift new file mode 100644 index 00000000000..973171ad186 --- /dev/null +++ b/Sources/SwiftParser/TriviaParser.swift @@ -0,0 +1,189 @@ +//===----------------------- TriviaParser.swift ---------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +public struct TriviaParser { + @_spi(RawSyntax) + public static func parseTrivia(_ source: SyntaxText, position: TriviaPosition) -> [RawTriviaPiece] { + if source.isEmpty { return [] } + + var pieces: [RawTriviaPiece] = [] + var cursor = Lexer.Cursor( + input: UnsafeBufferPointer(start: source.baseAddress, count: source.count), + previous: 0) + + while true { + let start = cursor + switch cursor.advance() { + case nil: + // Finished. + assert(cursor.isAtEndOfFile) + return pieces + + case UInt8(ascii: "\n"): + // "\n"/0A/LF: .newlines. + cursor.advance(while: { $0 == "\n" }) + pieces.append(.newlines(start.distance(to: cursor))) + continue + + case UInt8(ascii: "\r"): + // "\r\n"/0D0A/CRLF: .carriageReturnLineFeeds. + // "\r"/0D/CR: .carriageReturns. + pieces.append(cursor.lexCarriageReturn(start: start)) + continue + + case UInt8(ascii: " "): + // " "/20/Space: .spaces. + cursor.advance(while: { $0 == " " }) + pieces.append(.spaces(start.distance(to: cursor))) + continue + + case UInt8(ascii: "\t"): + // "\t"/09/HT: .tabs. + cursor.advance(while: { $0 == "\t" }) + pieces.append(.tabs(start.distance(to: cursor))) + continue + + case UInt8(ascii: "\u{000B}"): + // "\v"/0B/VT: .verticalTabs + cursor.advance(while: { $0 == "\u{000B}" }) + pieces.append(.verticalTabs(start.distance(to: cursor))) + continue + + case UInt8(ascii: "\u{000C}"): + // "\f"/0C/FF: .formfeeds + cursor.advance(while: { $0 == "\u{000C}" }) + pieces.append(.formfeeds(start.distance(to: cursor))) + continue + + case UInt8(ascii: "/"): + if !cursor.isAtEndOfFile { + switch cursor.peek() { + case UInt8(ascii: "/"): + pieces.append(cursor.lexLineComment(start: start)) + continue + case UInt8(ascii: "*"): + pieces.append(cursor.lexBlockComment(start: start)) + continue + default: + break + } + } + + case UInt8(ascii: "#"): + // "#!...": .shebang + // NOTE: .shebang appears only if this trivia is at the start of the + // file. We don't know if this trivia is at the start of the file, but + // we believe that the lexer lexed it accordingly. + if position == .leading && pieces.isEmpty && cursor.advance(if: { $0 == "!" }) { + _ = cursor.advanceToEndOfLine() + pieces.append(.shebang(start.textUpTo(cursor))) + continue + } + + case UInt8(ascii: "<"), UInt8(ascii: ">"): + // SCM conflict markers. + if cursor.tryLexConflictMarker(start: start) { + pieces.append(.unexpectedText(start.textUpTo(cursor))) + continue + } + + default: + break + } + + // Default handling for anything that didn't 'continue' in the above + // switch statement. + + // Other characters must be "unexpected text". Advance to next trivia + // piece start. + cursor.advance(while: { char in + switch char { + case "\n", "\r", "\t", "\u{000B}", "\u{000C}", "/", "#", "<", ">": + return false + default: + return true + } + }) + + // If the last piece was `.unexpectedText` (e.g `/` was in an + // unexpected text trivia piece and were not a comment), merge it to + // the last piece. + if case .unexpectedText(let preUnexpected) = pieces.last { + assert(start.pointer == preUnexpected.baseAddress! + preUnexpected.count) + let mergedText = SyntaxText( + baseAddress: preUnexpected.baseAddress, + count: preUnexpected.count + start.distance(to: cursor)) + pieces[pieces.count - 1] = .unexpectedText(mergedText) + } else { + pieces.append(.unexpectedText(start.textUpTo(cursor))) + } + } + + return pieces + } + +} + +extension Lexer.Cursor { + fileprivate mutating func lexCarriageReturn(start: Lexer.Cursor) -> RawTriviaPiece { + assert(self.previous == UInt8(ascii: "\r")) + if self.advance(if: { $0 == "\n" }) { + var mark = self + while true { + if self.advance(if: { $0 == "\r"}), + self.advance(if: { $0 == "\n"}) { + mark = self + continue + } else { + self = mark + break + } + } + return .carriageReturnLineFeeds(start.distance(to: self) / 2) + } else { + var mark = self + while true { + if self.advance(if: { $0 == "\r"}), !self.advance(if: { $0 == "\n"}) { + mark = self + continue + } else { + self = mark + break + } + } + return .carriageReturns(start.distance(to: self)) + } + } + + fileprivate mutating func lexLineComment(start: Lexer.Cursor) -> RawTriviaPiece { + // "///...": .docLineComment. + // "//...": .lineComment. + assert(self.previous == UInt8(ascii: "/") && self.peek() == UInt8(ascii: "/")) + let isDocComment = self.input.count > 1 && self.peek(at: 1) == UInt8(ascii: "/") + _ = self.advanceToEndOfLine() + let contents = start.textUpTo(self) + return isDocComment ? .docLineComment(contents) : .lineComment(contents) + } + + fileprivate mutating func lexBlockComment(start: Lexer.Cursor) -> RawTriviaPiece { + // "/**...*/": .docBlockComment. + // "/*...*/": .blockComment. + // "/**/": .blockComment. + assert(self.previous == UInt8(ascii: "/") && self.peek() == UInt8(ascii: "*")) + let isDocComment = self.input.count > 2 && self.peek(at: 1) == UInt8(ascii: "*") && self.peek(at: 2) != UInt8(ascii: "/") + _ = self.advanceToEndOfSlashStarComment() + let contents = start.textUpTo(self) + return isDocComment ? .docBlockComment(contents) : .blockComment(contents) + } +} diff --git a/Sources/SwiftParser/Types.swift b/Sources/SwiftParser/Types.swift new file mode 100644 index 00000000000..df181d1ad66 --- /dev/null +++ b/Sources/SwiftParser/Types.swift @@ -0,0 +1,879 @@ +//===-------------------------- Types.swift -------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@_spi(RawSyntax) import SwiftSyntax + +extension Parser { + /// Parse a type. + /// + /// Grammar + /// ======= + /// + /// type → function-type + /// type → array-type + /// type → dictionary-type + /// type → type-identifier + /// type → tuple-type + /// type → optional-type + /// type → implicitly-unwrapped-optional-type + /// type → protocol-composition-type + /// type → opaque-type + /// type → metatype-type + /// type → any-type + /// type → self-type + /// type → '(' type ')' + @_spi(RawSyntax) + public mutating func parseType() -> RawTypeSyntax { + let (specifier, attrList) = self.parseTypeAttributeList() + var base = RawTypeSyntax(self.parseSimpleOrCompositionType()) + if self.lookahead().isAtFunctionTypeArrow() { + let firstEffect = self.parseEffectsSpecifier() + let secondEffect = self.parseEffectsSpecifier() + let arrow = self.eat(.arrow) + let returnTy = self.parseType() + + let leftParen: RawTokenSyntax + let arguments: RawTupleTypeElementListSyntax + let rightParen: RawTokenSyntax + if let input = base.as(RawTupleTypeSyntax.self) { + leftParen = input.leftParen + arguments = input.elements + rightParen = input.rightParen + } else { + leftParen = RawTokenSyntax(missing: .leftParen, arena: self.arena) + arguments = RawTupleTypeElementListSyntax(elements: [ + RawTupleTypeElementSyntax( + inOut: nil, name: nil, secondName: nil, colon: nil, type: base, + ellipsis: nil, initializer: nil, trailingComma: nil, arena: self.arena) + ], arena: self.arena) + rightParen = RawTokenSyntax(missing: .rightParen, arena: self.arena) + } + + base = RawTypeSyntax(RawFunctionTypeSyntax( + leftParen: leftParen, + arguments: arguments, + rightParen: rightParen, + asyncKeyword: firstEffect, + throwsOrRethrowsKeyword: secondEffect, + arrow: arrow, + returnType: returnTy, + arena: self.arena)) + } + + if attrList != nil || specifier != nil { + return RawTypeSyntax(RawAttributedTypeSyntax( + specifier: specifier, + attributes: attrList, + baseType: base, arena: self.arena)) + } else { + return RawTypeSyntax(base) + } + } + + /// Parse a protocol composition involving at least one element. + /// + /// Grammar + /// ======= + /// + /// type-identifier → type-name generic-argument-clause? | type-name generic-argument-clause? '.' type-identifier + /// type-name → identifier + /// + /// protocol-composition-type → type-identifier '&' protocol-composition-continuation + /// protocol-composition-continuation → type-identifier | protocol-composition-type + @_spi(RawSyntax) + public mutating func parseSimpleOrCompositionType() -> RawTypeSyntax { + let someOrAny: RawTokenSyntax? + if self.currentToken.isContextualKeyword("some") || self.currentToken.isContextualKeyword("any") { + someOrAny = self.consumeAnyToken() + } else { + someOrAny = nil + } + + var base = self.parseSimpleType() + guard self.currentToken.isContextualPunctuator("&") else { + if let someOrAny = someOrAny { + return RawTypeSyntax(RawConstrainedSugarTypeSyntax( + someOrAnySpecifier: someOrAny, baseType: base, arena: self.arena)) + } else { + return base + } + } + + var elements = [RawCompositionTypeElementSyntax]() + if self.currentToken.isContextualPunctuator("&") { + let firstAmpersand = self.consumeAnyToken() + elements.append(RawCompositionTypeElementSyntax( + type: base, ampersand: firstAmpersand, arena: self.arena)) + + var keepGoing = false + repeat { + let elementType = self.parseSimpleType() + keepGoing = self.currentToken.isContextualPunctuator("&") + let ampersand: RawTokenSyntax? + if keepGoing { + ampersand = self.consumeAnyToken() + } else { + ampersand = nil + } + elements.append(RawCompositionTypeElementSyntax( + type: elementType, ampersand: ampersand, arena: self.arena)) + } while keepGoing + + base = RawTypeSyntax(RawCompositionTypeSyntax( + elements: RawCompositionTypeElementListSyntax(elements: elements, arena: self.arena), + arena: self.arena)) + } + + if let someOrAny = someOrAny { + return RawTypeSyntax(RawConstrainedSugarTypeSyntax( + someOrAnySpecifier: someOrAny, baseType: base, arena: self.arena)) + } else { + return base + } + } + + /// Parse a "simple" type + /// + /// Grammar + /// ======= + /// + /// type → type-identifier + /// type → tuple-type + /// type → array-type + /// type → dictionary-type + /// type → metatype-type + /// + /// metatype-type → type '.' 'Type' | type '.' 'Protocol' + @_spi(RawSyntax) + public mutating func parseSimpleType() -> RawTypeSyntax { + var base: RawTypeSyntax + switch self.currentToken.tokenKind { + case .capitalSelfKeyword, + .anyKeyword, + .identifier: + base = self.parseTypeIdentifier() + case .leftParen: + base = RawTypeSyntax(self.parseTupleTypeBody()) + case .leftSquareBracket: + base = RawTypeSyntax(self.parseCollectionType()) + case .wildcardKeyword: + base = RawTypeSyntax(self.parsePlaceholderType()) + default: + return RawTypeSyntax(RawMissingTypeSyntax(arena: self.arena)) + } + + // '.Type', '.Protocol', '?', '!', and '[]' still leave us with type-simple. + var loopCondition = LoopProgressCondition() + while loopCondition.evaluate(currentToken) { + if self.at(.period) || self.at(.prefixPeriod) { + if self.peek().isContextualKeyword("Type") || self.peek().isContextualKeyword("Protocol") { + let period = self.consumeAnyToken() + let type = self.consumeIdentifier() + base = RawTypeSyntax(RawMetatypeTypeSyntax( + baseType: base, period: period, typeOrProtocol: type, arena: self.arena)) + } + } + + if !self.currentToken.isAtStartOfLine { + if self.currentToken.isOptionalToken { + base = RawTypeSyntax(self.parseOptionalType(base)) + continue + } + if self.currentToken.isImplicitlyUnwrappedOptionalToken { + base = RawTypeSyntax(self.parseImplicitlyUnwrappedOptionalType(base)) + continue + } + } + break + } + return base + } + + /// Parse an optional type. + /// + /// Grammar + /// ======= + /// + /// optional-type → type '?' + @_spi(RawSyntax) + public mutating func parseOptionalType(_ base: RawTypeSyntax) -> RawOptionalTypeSyntax { + let mark = self.eat(.postfixQuestionMark) + return RawOptionalTypeSyntax( + wrappedType: base, questionMark: mark, arena: self.arena) + } + + /// Parse an optional type. + /// + /// Grammar + /// ======= + /// + /// implicitly-unwrapped-optional-type → type '!' + @_spi(RawSyntax) + public mutating func parseImplicitlyUnwrappedOptionalType(_ base: RawTypeSyntax) -> RawImplicitlyUnwrappedOptionalTypeSyntax { + let mark = self.eat(.exclamationMark) + return RawImplicitlyUnwrappedOptionalTypeSyntax( + wrappedType: base, exclamationMark: mark, arena: self.arena) + } + + @_spi(RawSyntax) + public mutating func parseTypeIdentifier() -> RawTypeSyntax { + if self.currentToken.tokenKind == .anyKeyword { + return RawTypeSyntax(self.parseAnyType()) + } + + var result: RawTypeSyntax? + var keepGoing: RawTokenSyntax? = nil + repeat { + let (name, _) = self.parseDeclNameRef() + let generics: RawGenericArgumentClauseSyntax? + if self.currentToken.isAnyOperator && self.currentToken.tokenText == "<" { + generics = self.parseGenericArguments() + } else { + generics = nil + } + if let keepGoing = keepGoing { + result = RawTypeSyntax(RawMemberTypeIdentifierSyntax( + baseType: result!, + period: keepGoing, + name: name, + genericArgumentClause: generics, + arena: self.arena)) + } else { + result = RawTypeSyntax(RawSimpleTypeIdentifierSyntax( + name: name, genericArgumentClause: generics, arena: self.arena)) + } + keepGoing = self.consume(if: .period) ?? self.consume(if: .prefixPeriod) + } while keepGoing != nil + + return result! + } + + /// Parse the existential `Any` type. + /// + /// Grammar + /// ======= + /// + /// any-type → Any + @_spi(RawSyntax) + public mutating func parseAnyType() -> RawSimpleTypeIdentifierSyntax { + let name = self.eat(.anyKeyword) + return RawSimpleTypeIdentifierSyntax( + name: name, genericArgumentClause: nil, arena: self.arena) + } + + /// Parse a type placeholder. + /// + /// Grammar + /// ======= + /// + /// placeholder-type → wildcard + @_spi(RawSyntax) + public mutating func parsePlaceholderType() -> RawSimpleTypeIdentifierSyntax { + let name = self.eat(.wildcardKeyword) + // FIXME: Need a better syntax node than this + return RawSimpleTypeIdentifierSyntax( + name: name, genericArgumentClause: nil, arena: self.arena) + } +} + +extension Parser { + /// Parse the generic arguments applied to a type. + /// + /// Grammar + /// ======= + /// + /// generic-argument-clause → '<' generic-argument-list '>' + /// generic-argument-list → generic-argument | generic-argument ',' generic-argument-list + /// generic-argument → type + @_spi(RawSyntax) + public mutating func parseGenericArguments() -> RawGenericArgumentClauseSyntax { + assert(self.currentToken.starts(with: "<")) + let langle = self.consumePrefix("<", as: .leftAngle) + var arguments = [RawGenericArgumentSyntax]() + do { + var keepGoing: RawTokenSyntax? = nil + repeat { + let type = self.parseType() + if arguments.isEmpty && type.is(RawMissingTypeSyntax.self) { + break + } + keepGoing = self.consume(if: .comma) + arguments.append(RawGenericArgumentSyntax( + argumentType: type, trailingComma: keepGoing, arena: self.arena)) + } while keepGoing != nil + } + + let rangle: RawTokenSyntax + if self.currentToken.starts(with: ">") { + rangle = self.consumePrefix(">", as: .rightAngle) + } else { + rangle = RawTokenSyntax(missing: .rightAngle, arena: self.arena) + } + + let args: RawGenericArgumentListSyntax + if arguments.isEmpty && rangle.isMissing { + args = RawGenericArgumentListSyntax(elements: [], arena: self.arena) + } else { + args = RawGenericArgumentListSyntax(elements: arguments, arena: self.arena) + } + return RawGenericArgumentClauseSyntax( + leftAngleBracket: langle, + arguments: args, + rightAngleBracket: rangle, + arena: self.arena) + } +} + +extension Parser { + /// Parse a tuple type. + /// + /// Grammar + /// ======= + /// + /// tuple-type → '(' ')' | '(' tuple-type-element ',' tuple-type-element-list ')' + /// tuple-type-element-list → tuple-type-element | tuple-type-element ',' tuple-type-element-list + /// tuple-type-element → element-name type-annotation | type + /// element-name → identifier + @_spi(RawSyntax) + public mutating func parseTupleTypeBody() -> RawTupleTypeSyntax { + let lparen = self.eat(.leftParen) + var elements = [RawTupleTypeElementSyntax]() + do { + var keepGoing = true + while !self.at(.eof) && !self.at(.rightParen) && keepGoing { + let first: RawTokenSyntax? + let second: RawTokenSyntax? + let unexpectedBeforeColon: RawUnexpectedNodesSyntax? + let colon: RawTokenSyntax? + if self.lookahead().startsParameterName(false) { + first = self.parseArgumentLabel() + if self.currentToken.tokenKind == .colon { + (unexpectedBeforeColon, colon) = self.expect(.colon) + second = nil + } else if self.currentToken.canBeArgumentLabel && self.peek().tokenKind == .colon { + second = self.parseArgumentLabel() + (unexpectedBeforeColon, colon) = self.expect(.colon) + } else { + second = nil + unexpectedBeforeColon = nil + colon = RawTokenSyntax(missing: .colon, arena: self.arena) + } + } else { + first = nil + second = nil + unexpectedBeforeColon = nil + colon = nil + } + // Parse the type annotation. + let type = self.parseType() + let trailingComma = self.consume(if: .comma) + keepGoing = trailingComma != nil + elements.append(RawTupleTypeElementSyntax( + inOut: nil, + name: first, + secondName: second, + unexpectedBeforeColon, + colon: colon, + type: type, + ellipsis: nil, + initializer: nil, + trailingComma: trailingComma, + arena: self.arena + )) + } + } + let (unexpectedBeforeRParen, rparen) = self.expect(.rightParen) + return RawTupleTypeSyntax( + leftParen: lparen, + elements: RawTupleTypeElementListSyntax(elements: elements, arena: self.arena), + unexpectedBeforeRParen, + rightParen: rparen, + arena: self.arena) + } +} + +extension Parser { + /// Parse an array or dictionary type.. + /// + /// Grammar + /// ======= + /// + /// array-type → '[' type ']' + /// + /// dictionary-type → '[' type ':' type ']' + @_spi(RawSyntax) + public mutating func parseCollectionType() -> RawTypeSyntax { + let lsquare = self.eat(.leftSquareBracket) + let firstType = self.parseType() + if self.at(.colon) { + let colon = self.eat(.colon) + let secondType = self.parseType() + let (unexpectedBeforeRSquareBracket, rSquareBracket) = self.expect(.rightSquareBracket) + return RawTypeSyntax(RawDictionaryTypeSyntax( + leftSquareBracket: lsquare, + keyType: firstType, + colon: colon, + valueType: secondType, + unexpectedBeforeRSquareBracket, + rightSquareBracket: rSquareBracket, + arena: self.arena + )) + } else { + let (unexpectedBeforeRSquareBracket, rSquareBracket) = self.expect(.rightSquareBracket) + return RawTypeSyntax(RawArrayTypeSyntax( + leftSquareBracket: lsquare, + elementType: firstType, + unexpectedBeforeRSquareBracket, + rightSquareBracket: rSquareBracket, + arena: self.arena + )) + } + } +} + +extension Parser.Lookahead { + mutating func canParseType() -> Bool { + // Accept 'inout' at for better recovery. + _ = self.consume(if: .inoutKeyword) + + if self.currentToken.isContextualKeyword("some") { + self.consumeAnyToken() + } else if self.currentToken.isContextualKeyword("any") { + self.consumeAnyToken() + } + + switch self.currentToken.tokenKind { + case .capitalSelfKeyword, .anyKeyword: + guard self.canParseTypeIdentifier() else { + return false + } + case .protocolKeyword, // Deprecated composition syntax + .identifier: + guard self.canParseIdentifierTypeOrCompositionType() else { + return false + } + case .leftParen: + self.consumeAnyToken() + guard self.canParseTupleBodyType() else { + return false + } + case .atSign: + self.consumeAnyToken() + self.skipTypeAttribute() + return self.canParseType() + case .leftSquareBracket: + self.consumeAnyToken() + guard self.canParseType() else { + return false + } + if self.consume(if: .colon) != nil { + guard self.canParseType() else { + return false + } + } + guard self.consume(if: .rightSquareBracket) != nil else { + return false + } + case .wildcardKeyword: + self.consumeAnyToken() + default: + return false + } + + // '.Type', '.Protocol', '?', and '!' still leave us with type-simple. + var loopCondition = LoopProgressCondition() + while loopCondition.evaluate(currentToken) { + if (self.at(.period) || self.at(.prefixPeriod)) && + (self.peek().isContextualKeyword("Type") + || self.peek().isContextualKeyword("Protocol")) { + self.consumeAnyToken() + self.consumeIdentifier() + continue + } + if self.currentToken.isOptionalToken { + self.consumePrefix("?", as: .postfixQuestionMark) + continue + } + if self.currentToken.isImplicitlyUnwrappedOptionalToken { + self.consumePrefix("!", as: .exclamationMark) + continue + } + break + } + + guard self.isAtFunctionTypeArrow() else { + return true + } + + // Handle type-function if we have an '->' with optional + // 'async' and/or 'throws'. + while self.currentToken.isEffectsSpecifier { + self.consumeAnyToken() + } + + guard self.consume(if: .arrow) != nil else { + return false + } + + return self.canParseType() + } + + mutating func canParseTupleBodyType() -> Bool { + guard + !self.at(.rightParen) && + !self.at(.rightBrace) && + !self.currentToken.isEllipsis && + !self.isStartOfDeclaration() + else { + return self.consume(if: .rightParen) != nil + } + + repeat { + // The contextual inout marker is part of argument lists. + _ = self.consume(if: .inoutKeyword) + + // If the tuple element starts with "ident :", then it is followed + // by a type annotation. + if self.startsParameterName(/*isClosure=*/false) { + self.consumeAnyToken() + if self.currentToken.canBeArgumentLabel { + self.consumeAnyToken() + guard self.at(.colon) else { + return false + } + } + self.eat(.colon) + + // Parse a type. + guard self.canParseType() else { + return false + } + + // Parse default values. This aren't actually allowed, but we recover + // better if we skip over them. + if self.consume(if: .equal) != nil { + while !self.at(.eof) && !self.at(.rightParen) + && !self.at(.rightBrace) && !self.currentToken.isEllipsis + && !self.at(.comma) && !self.isStartOfDeclaration() { + self.skipSingle() + } + } + + continue + } + + // Otherwise, this has to be a type. + guard self.canParseType() else { + return false + } + + if self.currentToken.isEllipsis { + self.consumeAnyToken() + } + } while self.consume(if: .comma) != nil + return self.consume(if: .rightParen) != nil + } + + mutating func canParseTypeIdentifier() -> Bool { + var loopCondition = LoopProgressCondition() + while loopCondition.evaluate(currentToken) { + guard self.canParseSimpleTypeIdentifier() else { + return false + } + + // Treat 'Foo.' as an attempt to write a dotted type + // unless is 'Type' or 'Protocol'. + if (self.at(.period) || self.at(.prefixPeriod)) && + !self.peek().isContextualKeyword("Type") && + !self.peek().isContextualKeyword("Protocol") { + self.consumeAnyToken() + } else { + return true + } + } + preconditionFailure("Should return from inside the loop") + } + + func isAtFunctionTypeArrow() -> Bool { + if self.at(.arrow) { + return true + } + + if self.currentToken.isEffectsSpecifier { + if self.peek().tokenKind == .arrow { + return true + } + + if self.peek().isEffectsSpecifier { + var backtrack = self.lookahead() + backtrack.consumeAnyToken() + backtrack.consumeAnyToken() + return backtrack.isAtFunctionTypeArrow() + } + + return false + } + + return false + } + + mutating func canParseIdentifierTypeOrCompositionType() -> Bool { + if self.at(.protocolKeyword) { + return self.canParseOldStyleProtocolComposition() + } + + var loopCondition = LoopProgressCondition() + while loopCondition.evaluate(currentToken) { + guard self.canParseTypeIdentifier() else { + return false + } + + if self.currentToken.isContextualPunctuator("&") { + self.consumeAnyToken() + continue + } else { + return true + } + } + preconditionFailure("Should return from inside the loop") + } + + mutating func canParseOldStyleProtocolComposition() -> Bool { + self.eat(.protocolKeyword) + + // Check for the starting '<'. + guard self.currentToken.starts(with: "<") else { + return false + } + + self.consumePrefix("<", as: .leftAngle) + + // Check for empty protocol composition. + if self.currentToken.starts(with: ">") { + self.consumePrefix(">", as: .rightAngle) + return true + } + + // Parse the type-composition-list. + repeat { + guard self.canParseTypeIdentifier() else { + return false; + } + } while self.consume(if: .comma) != nil + + // Check for the terminating '>'. + guard self.currentToken.starts(with: ">") else { + return false + } + self.consumePrefix(">", as: .rightAngle) + + return true + } + + mutating func canParseSimpleTypeIdentifier() -> Bool { + // Parse an identifier. + guard self.currentToken.isIdentifier || self.at(.capitalSelfKeyword) || self.at(.anyKeyword) else { + return false + } + self.consumeAnyToken() + + // Parse an optional generic argument list. + if self.currentToken.starts(with: "<") && !self.canParseGenericArguments() { + return false + } + + return true + } + + func canParseAsGenericArgumentList() -> Bool { + guard self.currentToken.isAnyOperator && self.currentToken.tokenText == "<" else { + return false + } + + var lookahead = self.lookahead() + guard lookahead.canParseGenericArguments() else { + return false + } + return lookahead.currentToken.isGenericTypeDisambiguatingToken + } + + mutating func canParseGenericArguments() -> Bool { + // Parse the opening '<'. + guard self.currentToken.starts(with: "<") else { + return false + } + + self.consumePrefix("<", as: .leftAngle) + repeat { + guard self.canParseType() else { + return false + } + // Parse the comma, if the list continues. + } while self.consume(if: .comma) != nil + + + guard self.currentToken.starts(with: ">") else { + return false + } + + self.consumePrefix(">", as: .rightAngle) + return true + } +} + +extension Parser { + @_spi(RawSyntax) + public mutating func parseTypeAttributeList() -> (RawTokenSyntax?, RawAttributeListSyntax?) { + let specifier: RawTokenSyntax? + if self.at(.inoutKeyword) { + specifier = self.eat(.inoutKeyword) + } else if self.currentToken.isIdentifier { + if self.currentToken.tokenText == "__shared" { + specifier = self.consumeAnyToken() + } else if self.currentToken.tokenText == "__owned" { + specifier = self.consumeAnyToken() + } else { + specifier = nil + } + } else { + specifier = nil + } + + if self.at(.atSign) || self.at(.inoutKeyword) { + return (specifier, self.parseTypeAttributeListPresent()) + } + + if self.currentToken.isIdentifier { + if self.currentToken.tokenText == "__shared" + || self.currentToken.tokenText == "__owned" + || self.currentToken.isContextualKeyword("isolated") + || self.currentToken.isContextualKeyword("_const") { + return (specifier, self.parseTypeAttributeListPresent()) + + } + } + return (specifier, nil) + } + + @_spi(RawSyntax) + public mutating func parseTypeAttributeListPresent() -> RawAttributeListSyntax { + var elements = [RawSyntax]() + while self.at(.inoutKeyword) + || self.currentToken.isContextualKeyword("__shared") + || self.currentToken.isContextualKeyword("__owned") + || self.currentToken.isContextualKeyword("isolated") + || self.currentToken.isContextualKeyword("_const") { + if self.at(.inoutKeyword) { + let inoutKeyword = self.eat(.inoutKeyword) + elements.append(RawSyntax(inoutKeyword)) + } else { + let ident = self.consumeIdentifier() + elements.append(RawSyntax(ident)) + } + } + + while self.at(.atSign) { + elements.append(RawSyntax(self.parseTypeAttribute())) + } + return RawAttributeListSyntax(elements: elements, arena: self.arena) + } + + @_spi(RawSyntax) + public mutating func parseTypeAttribute() -> RawAttributeSyntax { + let at = self.eat(.atSign) + let ident = self.consumeIdentifier() + if let attr = Parser.TypeAttribute(rawValue: ident.tokenText) { + // Ok, it is a valid attribute, eat it, and then process it. + if case .convention = attr { + let (unexpectedBeforeLeftParen, leftParen) = self.expect(.leftParen) + let convention = self.consumeIdentifier() + let (unexpectedBeforeRightParen, rightParen) = self.expect(.rightParen) + return RawAttributeSyntax( + atSignToken: at, + attributeName: ident, + unexpectedBeforeLeftParen, + leftParen: leftParen, + argument: RawSyntax(convention), + unexpectedBeforeRightParen, + rightParen: rightParen, + tokenList: nil, + arena: self.arena) + } + } + return RawAttributeSyntax( + atSignToken: at, + attributeName: ident, + leftParen: nil, + argument: nil, + rightParen: nil, + tokenList: nil, + arena: self.arena) + } +} + +extension Lexer.Lexeme { + var isOptionalToken: Bool { + // A postfix '?' by itself is obviously optional. + if self.tokenKind == .postfixQuestionMark { + return true + } + // A postfix or bound infix operator token that begins with '?' can be + // optional too. + if self.tokenKind == .postfixOperator || self.tokenKind == .unspacedBinaryOperator { + return self.tokenText.first == UInt8(ascii: "?") + } + + return false + } + + var isImplicitlyUnwrappedOptionalToken: Bool { + // A postfix !?' by itself is obviously optional. + if self.tokenKind == .exclamationMark { + return true + } + // A postfix or bound infix operator token that begins with '?' can be + // optional too. + if self.tokenKind == .postfixOperator || self.tokenKind == .unspacedBinaryOperator { + return self.tokenText.first == UInt8(ascii: "!") + } + + return false + } + + var isGenericTypeDisambiguatingToken: Bool { + switch self.tokenKind { + case .rightParen, + .rightSquareBracket, + .leftBrace, + .rightBrace, + .period, + .prefixPeriod, + .comma, + .semicolon, + .eof, + .exclamationMark, + .postfixQuestionMark, + .colon: + return true + case .spacedBinaryOperator: + return self.tokenText == "&" + case .unspacedBinaryOperator, + .postfixOperator: + return self.isOptionalToken || self.isImplicitlyUnwrappedOptionalToken + case .leftParen, .leftSquareBracket: + // These only apply to the generic type if they don't start a new line. + return !self.isAtStartOfLine + default: + return false + } + } +} diff --git a/Sources/swift-parser-test/swift-parser-test.swift b/Sources/swift-parser-test/swift-parser-test.swift new file mode 100644 index 00000000000..46e4af45aa6 --- /dev/null +++ b/Sources/swift-parser-test/swift-parser-test.swift @@ -0,0 +1,319 @@ +//===------------ main.swift - Entry point for swift-parser-test ----------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + + +import SwiftSyntax +import SwiftParser +import Foundation +import ArgumentParser +#if os(Windows) +import WinSDK +#endif + +enum CommonError: Swift.Error { + case readingSourceFileFailed(URL) + + public var description: String { + switch self { + case .readingSourceFileFailed(let url): + return "Reading the source file at \(url) failed" + } + } +} + +/// Print the given message to stderr +func printerr(_ message: String, terminator: String = "\n") { + FileHandle.standardError.write((message + terminator).data(using: .utf8)!) +} + +private func withTemporaryFile(contents: String, body: (URL) throws -> T) throws -> T { + var tempFileURL = FileManager.default.temporaryDirectory + tempFileURL.appendPathComponent("swift-parser-test-\(UUID().uuidString).swift") + try contents.write(to: tempFileURL, atomically: false, encoding: .utf8) + defer { + try? FileManager.default.removeItem(at: tempFileURL) + } + return try body(tempFileURL) +} + +private func getContentsOfSourceFile(at path: String) throws -> String { + let sourceURL = URL(fileURLWithPath: path) + guard let source = try String(data: Data(contentsOf: sourceURL), encoding: .utf8) else { + throw CommonError.readingSourceFileFailed(sourceURL) + } + return source +} + +@main +class SwiftParserTest: ParsableCommand { + required init() {} + + static var configuration = CommandConfiguration( + abstract: "Utility to test SwiftSyntax syntax tree creation.", + subcommands: [VerifyRoundTrip.self, DumpTree.self, PrintDiags.self, Reduce.self] + ) +} + +class VerifyRoundTrip: ParsableCommand { + required init() {} + + init(sourceFile: String, swiftVersion: String?, enableBareSlashRegex: Bool?) { + self.sourceFile = sourceFile + self.swiftVersion = swiftVersion + self.enableBareSlashRegex = enableBareSlashRegex + } + + @Argument(help: "The source file that should be parsed") + var sourceFile: String + + @Option(name: .long, help: "Interpret input according to a specific Swift language version number") + var swiftVersion: String? + + @Option(name: .long, help: "Enable or disable the use of forward slash regular-expression literal syntax") + var enableBareSlashRegex: Bool? + + enum Error: Swift.Error, CustomStringConvertible { + case roundTripFailed + + public var description: String { + switch self { + case .roundTripFailed: + return "Round-tripping the source file failed" + } + } + } + + func run() throws { + let source = try getContentsOfSourceFile(at: sourceFile) + + try Self.run(source: source, swiftVersion: swiftVersion, enableBareSlashRegex: enableBareSlashRegex) + } + + static func run(source: String, swiftVersion: String?, enableBareSlashRegex: Bool?) throws { + let tree = try Parser.parse( + source: source, + languageVersion: swiftVersion, + enableBareSlashRegexLiteral: enableBareSlashRegex + ) + if tree.description != source { + throw Error.roundTripFailed + } + } +} + +class PrintDiags: ParsableCommand { + required init() {} + + @Argument(help: "The source file that should be parsed") + var sourceFile: String + + @Option(name: .long, help: "Interpret input according to a specific Swift language version number") + var swiftVersion: String? + + @Option(name: .long, help: "Enable or disable the use of forward slash regular-expression literal syntax") + var enableBareSlashRegex: Bool? + + func run() throws { + let source = try getContentsOfSourceFile(at: sourceFile) + + let tree = try Parser.parse( + source: source, + languageVersion: swiftVersion, + enableBareSlashRegexLiteral: enableBareSlashRegex + ) + let diags = ParseDiagnosticsGenerator.diagnostics(for: tree) + let locationConverter = SourceLocationConverter(file: sourceFile, tree: tree) + if diags.isEmpty { + print("No diagnostics produced") + } + for diag in diags { + let location = diag.location(converter: locationConverter) + let message = diag.message + print("\(location): \(message)") + } + } +} + +class DumpTree: ParsableCommand { + required init() {} + + @Argument(help: "The source file that should be parsed") + var sourceFile: String + + @Option(name: .long, help: "Interpret input according to a specific Swift language version number") + var swiftVersion: String? + + @Option(name: .long, help: "Enable or disable the use of forward slash regular-expression literal syntax") + var enableBareSlashRegex: Bool? + + func run() throws { + let source = try getContentsOfSourceFile(at: sourceFile) + + let tree = try Parser.parse( + source: source, + languageVersion: swiftVersion, + enableBareSlashRegexLiteral: enableBareSlashRegex + ) + print(tree.recursiveDescription) + } +} + +class Reduce: ParsableCommand { + required init() {} + + @Argument(help: "The test case that should be reduced") + var sourceFile: String + + @Option(name: .long, help: "Interpret input according to a specific Swift language version number") + var swiftVersion: String? + + @Option(name: .long, help: "Enable or disable the use of forward slash regular-expression literal syntax") + var enableBareSlashRegex: Bool? + + @Flag(help: "Print status updates while reducing the test case") + var verbose: Bool = false + + enum Error: Swift.Error, CustomStringConvertible { + case testDoesNotFail + + public var description: String { + switch self { + case .testDoesNotFail: + return "Source file passed to reduce subcommand does not fail to roundtrip" + } + } + } + + enum ProcessExit { + /// The process finished successfully. + case success + /// The process finished with an exit code of 1, indicating that it failed but did not crash + case failure + /// Running the process didn't finish in a specified timeout + case timeout + /// The process exited with an exit code that was neither 0 nor 1 and might have been a crash. + case potentialCrash + } + + /// Invoke `swift-parser-test verify-round-trip` with the same arguments as this `reduce` subcommand. + /// Returns the exit code of the invocation. + private func runVerifyRoundTripInSeparateProcess(source: String) throws -> ProcessExit { + return try withTemporaryFile(contents: source) { tempFileURL in + let process = Process() + process.executableURL = URL(fileURLWithPath: ProcessInfo.processInfo.arguments[0]) + process.arguments = [ + "verify-round-trip", tempFileURL.path, + ] + if let enableBareSlashRegex = enableBareSlashRegex { + process.arguments! += [ + "--enable-bare-slash-regex", enableBareSlashRegex ? "true" : "false" + ] + } + if let swiftVersion = swiftVersion { + process.arguments! += [ + "--swift-version", swiftVersion + ] + } + let sema = DispatchSemaphore(value: 0) + process.standardOutput = FileHandle.nullDevice + process.standardError = FileHandle.nullDevice + process.terminationHandler = { process in + sema.signal() + } + + try process.run() + if sema.wait(timeout: DispatchTime.now() + .seconds(1)) == .timedOut { +#if os(Windows) + _ = TerminateProcess(process.processHandle, 0) +#else + kill(pid_t(process.processIdentifier), SIGKILL) +#endif + return .timeout + } + switch process.terminationStatus { + case 0: + return .success + case 1: + return .potentialCrash + default: + return .potentialCrash + } + } + } + + /// Runs the `verify-round-trip` subcommand in process. + /// Returns `true` if `source` round-tripped successfully, `false` otherwise. + private func runVerifyRoundTripInCurrentProcess(source: String) throws -> Bool { + do { + try VerifyRoundTrip.run(source: source, swiftVersion: self.swiftVersion, enableBareSlashRegex: self.enableBareSlashRegex) + } catch { + return false + } + return true + } + + private func reduce(source: String, testPasses: (String) throws -> Bool) throws -> String { + var reduced = source + var chunkSize = source.count / 4 + while chunkSize > 0 { + if verbose { + printerr("Current source size \(reduced.count), reducing with chunk size \(chunkSize)") + } + reduced = try reduceImpl(source: reduced, chunkSize: chunkSize, testPasses: testPasses) + chunkSize = min(reduced.count / 2, chunkSize / 2) + } + return reduced + } + + /// Reduces a test case with `source` by iteratively attempting to remove `chunkSize` characters - ie. removing the chunk if `testPasses` returns `false`. + private func reduceImpl(source: String, chunkSize: Int, testPasses: (String) throws -> Bool) rethrows -> String { + var reduced = "" + // Characters that stil need to be checked whether they can be removed. + var remaining = source + while !remaining.isEmpty { + let index = remaining.index(remaining.startIndex, offsetBy: chunkSize, limitedBy: remaining.endIndex) ?? remaining.endIndex + let testChunk = String(remaining[.. Bool + switch try runVerifyRoundTripInSeparateProcess(source: source) { + case .success: + throw Error.testDoesNotFail + case .failure: + // Round-tripping did not crash. We can run the checks in-process + testPasses = self.runVerifyRoundTripInCurrentProcess + case .potentialCrash, .timeout: + // Invoking verify-round-trip might have crashed. We don’t want to crash this process, so run in a separate process. + testPasses = { try self.runVerifyRoundTripInSeparateProcess(source: $0) == .success } + } + + var checks = 0 + let reduced = try reduce(source: source) { reducedSource in + checks += 1 + return try testPasses(reducedSource) + } + if verbose { + printerr("Reduced from \(source.count) to \(reduced.count) characters in \(checks) iterations") + } + print(reduced) + } +} diff --git a/Tests/PerformanceTest/ParsingPerformanceTests.swift b/Tests/PerformanceTest/ParsingPerformanceTests.swift index f4685a355e1..e6ee4477355 100644 --- a/Tests/PerformanceTest/ParsingPerformanceTests.swift +++ b/Tests/PerformanceTest/ParsingPerformanceTests.swift @@ -1,6 +1,7 @@ import XCTest import SwiftSyntax import SwiftSyntaxParser +import SwiftParser public class ParsingPerformanceTests: XCTestCase { @@ -20,4 +21,15 @@ public class ParsingPerformanceTests: XCTestCase { } } } + + func testNativeParsingPerformance() throws { + measure { + do { + let source = try String(contentsOf: inputFile) + _ = try SwiftParser.Parser.parse(source: source) + } catch { + XCTFail(error.localizedDescription) + } + } + } } diff --git a/Tests/SwiftParserTest/Assertions.swift b/Tests/SwiftParserTest/Assertions.swift new file mode 100644 index 00000000000..9b9a8c01d5d --- /dev/null +++ b/Tests/SwiftParserTest/Assertions.swift @@ -0,0 +1,158 @@ +import XCTest +@_spi(RawSyntax) import SwiftSyntax +@_spi(Testing) @_spi(RawSyntax) import SwiftParser + +// MARK: Lexing Assertions + +/// Asserts that two lexical streams are structurally equal, including their trivia and any +/// text. +/// +/// - Parameters: +/// - lhs: The actual lexeme stream. +/// - rhs: The expected lexeme stream. +/// - file: The file in which failure occurred. Defaults to the file name of the test case in +/// which this function was called. +/// - line: The line number on which failure occurred. Defaults to the line number on which this +/// function was called. +func AssertEqualTokens(_ actual: [Lexer.Lexeme], _ expected: [Lexer.Lexeme], file: StaticString = #file, line: UInt = #line) { + guard actual.count == expected.count else { + return XCTFail("Number of tokens does not match! \(actual.count) != \(expected.count)", file: file, line: line) + } + + for (idx, (l, r)) in zip(actual, expected).enumerated() { + guard l.tokenKind == r.tokenKind else { + return XCTFail("Token at index \(idx) does not match! \(l.tokenKind) != \(r.tokenKind)", file: file, line: line) + } + + guard l.leadingTriviaText == r.leadingTriviaText else { + return XCTFail(""" + Token at index \(idx) does not have matching leading trivia! \ + \(l.leadingTriviaText.debugDescription) != \(r.leadingTriviaText.debugDescription) + """, file: file, line: line) + } + + guard l.tokenText == r.tokenText else { + return XCTFail(""" + Text at index \(idx) does not have matching text! \ + \(l.tokenText.debugDescription) != \(r.tokenText.debugDescription)" + """, file: file, line: line) + } + + guard l.trailingTriviaText == r.trailingTriviaText else { + return XCTFail(""" + Token at index \(idx) does not have matching trailing trivia! \ + \(l.trailingTriviaText.debugDescription) != \(r.trailingTriviaText.debugDescription) + """, file: file, line: line) + } + } +} + +// MARK: Parsing Assertions + +func AssertParse( + _ parseSyntax: (inout Parser) -> Node, + allowErrors: Bool = true, + file: StaticString = #file, + line: UInt = #line, + _ source: () -> String +) throws { + // Verify the parser can round-trip the source + let src = source() + var source = src + source.withUTF8 { buf in + var parser = Parser(buf) + withExtendedLifetime(parser) { + let parse = Syntax(raw: parseSyntax(&parser).raw) + AssertStringsEqualWithDiff("\(parse)", src, additionalInfo: """ + Actual syntax tree: + \(parse.recursiveDescription) + """, file: file, line: line) + if !allowErrors { + let diagnostics = ParseDiagnosticsGenerator.diagnostics(for: Syntax(raw: parse.raw)) + XCTAssertEqual( + diagnostics.count, 0, + """ + Received the following diagnostics while parsing the source code: + \(diagnostics) + """, + file: file, line: line) + } + } + } +} + +/// Asserts that the two strings are equal, providing Unix `diff`-style output if they are not. +/// +/// - Parameters: +/// - actual: The actual string. +/// - expected: The expected string. +/// - message: An optional description of the failure. +/// - additionalInfo: Additional information about the failed test case that will be printed after the diff +/// - file: The file in which failure occurred. Defaults to the file name of the test case in +/// which this function was called. +/// - line: The line number on which failure occurred. Defaults to the line number on which this +/// function was called. +func AssertStringsEqualWithDiff( + _ actual: String, + _ expected: String, + _ message: String = "", + additionalInfo: @autoclosure () -> String? = nil, + file: StaticString = #file, + line: UInt = #line +) { + // Use `CollectionDifference` on supported platforms to get `diff`-like line-based output. On + // older platforms, fall back to simple string comparison. + if #available(macOS 10.15, *) { + let actualLines = actual.components(separatedBy: .newlines) + let expectedLines = expected.components(separatedBy: .newlines) + + let difference = actualLines.difference(from: expectedLines) + if difference.isEmpty { return } + + var result = "" + + var insertions = [Int: String]() + var removals = [Int: String]() + + for change in difference { + switch change { + case .insert(let offset, let element, _): + insertions[offset] = element + case .remove(let offset, let element, _): + removals[offset] = element + } + } + + var expectedLine = 0 + var actualLine = 0 + + while expectedLine < expectedLines.count || actualLine < actualLines.count { + if let removal = removals[expectedLine] { + result += "-\(removal)\n" + expectedLine += 1 + } else if let insertion = insertions[actualLine] { + result += "+\(insertion)\n" + actualLine += 1 + } else { + result += " \(expectedLines[expectedLine])\n" + expectedLine += 1 + actualLine += 1 + } + } + + let failureMessage = "Actual output (+) differed from expected output (-):\n\(result)" + var fullMessage = message.isEmpty ? failureMessage : "\(message) - \(failureMessage)" + if let additionalInfo = additionalInfo() { + fullMessage = """ + \(fullMessage) + \(additionalInfo) + """ + } + XCTFail(fullMessage, file: file, line: line) + } else { + // Fall back to simple string comparison on platforms that don't support CollectionDifference. + let failureMessage = "Actual output differed from expected output:" + let fullMessage = message.isEmpty ? failureMessage : "\(message) - \(failureMessage)" + XCTAssertEqual(actual, expected, fullMessage, file: file, line: line) + } +} diff --git a/Tests/SwiftParserTest/Availability.swift b/Tests/SwiftParserTest/Availability.swift new file mode 100644 index 00000000000..534253e62b1 --- /dev/null +++ b/Tests/SwiftParserTest/Availability.swift @@ -0,0 +1,52 @@ +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class AvailabilityTests: XCTestCase { + func testAvailableMember() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + @available(OSX 10.0, introduced: 10.12) + // expected-error@-1 {{'introduced' can't be combined with shorthand specification 'OSX 10.0'}} + // expected-error@-2 {{expected declaration}} + func shorthandFollowedByIntroduced() {} + + @available(iOS 6.0, OSX 10.8, *) + func availableOnMultiplePlatforms() {} + """ + } + + try AssertParse({ $0.parseClassDeclaration(.empty) }) { + """ + class IncrementalParseTransition { + @available(*, deprecated, message: "Use the initializer taking 'ConcurrentEdits' instead") + public convenience init() {} + } + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + extension String { + @available(macOS 10.15.4, iOS 13.4, watchOS 6.2, tvOS 13.4, *) + public func fiddle() { } + + @available(SwiftStdlib 5.2, *) + public func fiddle() { } + } + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + @available( + iOSApplicationExtension, + introduced: 10.0, + deprecated: 11.0, + message: + "Use something else because this is definitely deprecated.") + func f2() {} + """ + } + } +} diff --git a/Tests/SwiftParserTest/Declarations.swift b/Tests/SwiftParserTest/Declarations.swift new file mode 100644 index 00000000000..2d224283312 --- /dev/null +++ b/Tests/SwiftParserTest/Declarations.swift @@ -0,0 +1,425 @@ +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class DeclarationTests: XCTestCase { + func testImports() throws { + try AssertParse({ $0.parseImportDeclaration(.empty) }) { + "import Foundation" + } + + try AssertParse({ $0.parseDeclaration() }) { + "@_spi(Private) import SwiftUI" + } + + try AssertParse({ $0.parseDeclaration() }) { + "@_exported import class Foundation.Thread" + } + + try AssertParse({ $0.parseDeclaration() }) { + """ + @_private(sourceFile: "YetAnotherFile.swift") import Foundation + """ + } + } + func testStructParsing() throws { + try AssertParse({ $0.parseStructDeclaration(.empty) }) { + """ + struct Foo { + } + """ + } + } + + func testFuncParsing() throws { + try AssertParse({ $0.parseFuncDeclaration(.empty) }) { + """ + func foo() { + } + """ + } + + try AssertParse({ $0.parseFuncDeclaration(.empty) }) { + """ + func foo() -> Slice> { + } + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + func onEscapingAutoclosure(_ fn: @Sendable @autoclosure @escaping () -> Int) { } + func onEscapingAutoclosure2(_ fn: @escaping @autoclosure @Sendable () -> Int) { } + func bar(_ : String) async throws -> [[String]: Array] {} + func tupleMembersFunc() -> (Type.Inner, Type2.Inner2) {} + func myFun(var1: S) { + // do stuff + } + """ + } + } + + func testClassParsing() throws { + try AssertParse({ $0.parseClassDeclaration(.empty) }) { + """ + class Foo { + } + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + @dynamicMemberLookup @available(swift 4.0) + public class MyClass { + let A: Int + let B: Double + } + """ + } + + try AssertParse({ $0.parseGenericParameters() }) { + "<@NSApplicationMain T: AnyObject>" + } + } + + func testActorParsing() throws { + try AssertParse({ $0.parseActorDeclaration(.empty) }) { + """ + actor Foo { + } + """ + } + + try AssertParse({ $0.parseActorDeclaration(.empty) }) { + """ + actor Foo { + nonisolated init?() throws { + for (x, y, z) in self.triples { + assert(isSafe) + } + } + subscript(_ param: String) -> Int { + return 42 + } + } + """ + } + } + + func testProtocolParsing() throws { + try AssertParse({ $0.parseProtocolDeclaration(.empty) }) { + """ + protocol Foo { + } + """ + } + + try AssertParse({ $0.parseProtocolDeclaration(.empty) }) { + """ + protocol P { init() } + """ + } + + try AssertParse({ $0.parseProtocolDeclaration(.empty) }) { + """ + protocol P { + associatedtype Foo: Bar where X.Y == Z.W.W.Self + + var foo: Bool { get set } + subscript(index: Int) -> R + } + """ + } + } + + func testVariableDeclarations() throws { + try AssertParse({ $0.parseDeclaration() }) { + """ + private unowned(unsafe) var foo: Int + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + "_ = foo/* */?.description" + } + + try AssertParse({ $0.parseLetOrVarDeclaration(.empty) }) { + "var a = Array?(from: decoder)" + } + + try AssertParse({ $0.parseSourceFile() }) { + "@Wrapper var café = 42" + } + + try AssertParse({ $0.parseLetOrVarDeclaration(.empty) }) { + """ + var x: T { + get async throws { + foo() + bar() + } + } + """ + } + + try AssertParse({ $0.parseLetOrVarDeclaration(.empty) }) { + """ + var foo: Int { + _read { + yield 1234567890 + } + _modify { + var someLongVariable = 0 + yield &someLongVariable + } + } + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + async let a = fetch("1.jpg") + async let b: Image = fetch("2.jpg") + async let secondPhotoToFetch = fetch("3.jpg") + async let theVeryLastPhotoWeWant = fetch("4.jpg") + """ + } + } + + func testTypealias() throws { + try AssertParse({ $0.parseTypealiasDeclaration(.empty) }) { + """ + typealias Foo = Int + """ + } + + try AssertParse({ $0.parseTypealiasDeclaration(.empty) }) { + """ + typealias MyAlias = (_ a: Int, _ b: Double, _ c: Bool, _ d: String) -> Bool + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + typealias A = @attr1 @attr2(hello) (Int) -> Void + """ + } + } + + func testPrecedenceGroup() throws { + try AssertParse({ $0.parsePrecedenceGroupDeclaration(.empty) }) { + """ + precedencegroup FooGroup { + higherThan: Group1, Group2 + lowerThan: Group3, Group4 + associativity: left + assignment: false + } + """ + } + + try AssertParse({ $0.parsePrecedenceGroupDeclaration(.empty) }) { + """ + precedencegroup FunnyPrecedence { + associativity: left + higherThan: MultiplicationPrecedence + } + """ + } + } + + func testOperators() throws { + try AssertParse({ $0.parseDeclaration() }) { + """ + infix operator *-* : FunnyPrecedence + """ + } + } + + func testObjCAttribute() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + @objc( + thisMethodHasAVeryLongName: + foo: + bar: + ) + func f() {} + """ + } + } + + func testDifferentiableAttribute() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + @differentiable(wrt: x where T: D) + func foo(_ x: T) -> T {} + + @differentiable(wrt: x where T: Differentiable) + func foo(_ x: T) -> T {} + + @differentiable(wrt: theVariableNamedX where T: Differentiable) + func foo(_ theVariableNamedX: T) -> T {} + + @differentiable(wrt: (x, y)) + func foo(_ x: T) -> T {} + """ + } + } + + func testParsePoundError() throws { + try AssertParse({ $0.parsePoundDiagnosticDeclaration() }) { + #"#error("Unsupported platform")"# + } + } + + func testParsePoundWarning() throws { + try AssertParse({ $0.parsePoundDiagnosticDeclaration() }) { + #"#warning("Unsupported platform")"# + } + } + + func testParseSpecializeAttribute() throws { + try AssertParse({ $0.parseSourceFile() }) { + #""" + @_specialize(where T == Int, U == Float) + mutating func exchangeSecond(_ u: U, _ t: T) -> (U, T) { + x = t + return (u, x) + } + + @_specialize(exported: true, kind: full, where K == Int, V == Int) + @_specialize(exported: false, kind: partial, where K: _Trivial64) + func dictFunction(dict: Dictionary) { + } + + @_specialize(where T == Int) + public func play() { + for _ in 0...100_000_000 { t = t.ping() } + } + + @_specialize(exported: true, + spi: SwiftSpecialization, + target: copy(), + where Key == AnyHashable, Value == Any) + @_specialize(exported: true, + spi: SwiftSpecialization, + target: copy(), + where Key == AnyHashable, Value == String) + @_specialize(exported: true, + spi: SwiftSpecialization, + target: copy(), + where Key == String, Value == Any) + @_specialize(exported: true, + spi: SwiftSpecialization, + target: copy(), + where Key == String, Value == AnyHashable) + @_specialize(exported: true, + spi: SwiftSpecialization, + target: copy(), + where Key == String, Value == String) + @available(SwiftStdlib 5.5, *) + @usableFromInline + mutating func __specialize_copy() { Builtin.unreachable() } + + @_specializeExtension + extension Sequence { + @_specialize(exported: true, + spi: SwiftSpecialization, + target: _copyContents(initializing:), + where Self == [String]) + @_specialize(exported: true, + spi: SwiftSpecialization, + target: _copyContents(initializing:), + where Self == Set) + @available(SwiftStdlib 5.5, *) + @usableFromInline + __consuming func __specialize__copyContents(initializing: Swift.UnsafeMutableBufferPointer) -> (Iterator, Int) { Builtin.unreachable() } + } + """# + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + @_specialize(where T: _Trivial(32), T: _Trivial(64), T: _Trivial, T: _RefCountedObject) + @_specialize(where T: _Trivial, T: _Trivial(64)) + @_specialize(where T: _RefCountedObject, T: _NativeRefCountedObject) + @_specialize(where Array == Int) + @_specialize(where T.Element == Int) + public func funcWithComplexSpecializeRequirements(t: T) -> Int { + return 55555 + } + """ + } + } + + func testParseDynamicReplacement() throws { + try AssertParse({ $0.parseDeclaration() }) { + """ + @_dynamicReplacement(for: dynamic_replaceable()) + func replacement() { + dynamic_replaceable() + } + """ + } + + try AssertParse({ $0.parseDeclaration() }) { + """ + @_dynamicReplacement(for: subscript(_:)) + subscript(x y: Int) -> Int { + get { + return self[y] + } + set { + self[y] = newValue + } + } + """ + } + + try AssertParse({ $0.parseDeclaration() }) { + """ + @_dynamicReplacement(for: dynamic_replaceable_var) + var r : Int { + return 0 + } + """ + } + + try AssertParse({ $0.parseDeclaration() }) { + """ + @_dynamicReplacement(for: init(x:)) + init(y: Int) { + self.init(x: y + 1) + } + """ + } + } + + func testEnumParsing() throws { + try AssertParse({ $0.parseEnumDeclaration(.empty) }) { + """ + enum Content { + case keyPath(KeyPath) + case keyPath(KeyPath?>) + case value(Value?) + } + """ + } + } + + func testStandaloneModifier() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + struct a { + public + } + """ + } + } +} + +extension Parser.DeclAttributes { + static let empty = Parser.DeclAttributes(attributes: nil, modifiers: nil) +} + diff --git a/Tests/SwiftParserTest/DiagnosticAssertions.swift b/Tests/SwiftParserTest/DiagnosticAssertions.swift new file mode 100644 index 00000000000..4f62e424ed2 --- /dev/null +++ b/Tests/SwiftParserTest/DiagnosticAssertions.swift @@ -0,0 +1,51 @@ +import SwiftSyntax +import SwiftParser +import XCTest +import _SwiftSyntaxTestSupport + +/// Asserts that the diagnostics `diag` inside `tree` occurs at `line` and +/// `column`. +/// If `message` is not `nil`, assert that the diagnostic has the given message. +/// If `id` is not `nil`, assert that the diagnostic has the given message. +func XCTAssertDiagnostic( + _ diag: Diagnostic, + in tree: T, + line: Int, + column: Int, + id: DiagnosticMessageID? = nil, + message: String? = nil, + testFile: StaticString = #filePath, + testLine: UInt = #line +) { + let locationConverter = SourceLocationConverter(file: "/test.swift", source: tree.description) + let location = diag.location(converter: locationConverter) + XCTAssertEqual(location.line, line, "Expected diagnostic on line \(line) but got \(location.line ?? -1)", file: testFile, line: testLine) + XCTAssertEqual(location.column, column, "Expected diagnostic on column \(column) but got \(location.column ?? -1)", file: testFile, line: testLine) + if let id = id { + XCTAssertEqual(diag.diagnosticID, id, file: testFile, line: testLine) + } + if let message = message { + XCTAssertEqual(diag.message, message, file: testFile, line: testLine) + } +} + +/// Assert that producing diagnostics for `tree` generates a single diagnostic +/// at `line` and `column`. +/// If `message` is not `nil`, assert that the diagnostic has the given message. +/// If `id` is not `nil`, assert that the diagnostic has the given message. +func XCTAssertSingleDiagnostic( + in tree: T, + line: Int, + column: Int, + id: DiagnosticMessageID? = nil, + message: String? = nil, + testFile: StaticString = #filePath, + testLine: UInt = #line +) { + let diags = ParseDiagnosticsGenerator.diagnostics(for: tree) + guard diags.count == 1 else { + XCTFail("Received \(diags.count) diagnostics but expected excatly one: \(diags)", file: testFile, line: testLine) + return + } + XCTAssertDiagnostic(diags.first!, in: tree, line: line, column: column, id: id, message: message, testFile: testFile, testLine: testLine) +} diff --git a/Tests/SwiftParserTest/DiagnosticInfrastructureTests.swift b/Tests/SwiftParserTest/DiagnosticInfrastructureTests.swift new file mode 100644 index 00000000000..85b8c9e9991 --- /dev/null +++ b/Tests/SwiftParserTest/DiagnosticInfrastructureTests.swift @@ -0,0 +1,14 @@ +import XCTest +import SwiftParser + +public class DiagnosticInfrastructureTests: XCTestCase { + public func testDiagnosticID() throws { + struct TestDiagnostic: TypedDiagnosticMessage { + let message: String = "My test diagnostic" + } + + let diag = TestDiagnostic() + XCTAssertEqual(diag.diagnosticID, DiagnosticMessageID("TestDiagnostic")) + XCTAssertEqual(diag.message, "My test diagnostic") + } +} diff --git a/Tests/SwiftParserTest/DiagnosticTests.swift b/Tests/SwiftParserTest/DiagnosticTests.swift new file mode 100644 index 00000000000..7890c9982d6 --- /dev/null +++ b/Tests/SwiftParserTest/DiagnosticTests.swift @@ -0,0 +1,70 @@ +import XCTest +@_spi(RawSyntax) import SwiftSyntax +@_spi(Testing) @_spi(RawSyntax) import SwiftParser +import _SwiftSyntaxTestSupport + +public class DiagnosticTests: XCTestCase { + public func testMissingTokenDiags() throws { + let source = """ + (first second Int) + """ + let signature = withParser(source: source) { Syntax(raw: $0.parseFunctionSignature().raw) } + + XCTAssertSingleDiagnostic(in: signature, line: 1, column: 15, id: MissingTokenDiagnostic.diagnosticID, message: "Expected ':' in function parameter") + } + + public func testUnexpectedDiags() throws { + let source = """ + (first second third fourth: Int) + """ + let signature = withParser(source: source) { Syntax(raw: $0.parseFunctionSignature().raw) } + + XCTAssertSingleDiagnostic(in: signature, line: 1, column: 15, message: "Unexpected text 'third fourth' found in function parameter") + } + + public func testCStyleForLoop() throws { + let source = """ + for let x = 0; x < 10; x += 1, y += 1 { + } + """ + let loop = withParser(source: source) { + Syntax(raw: $0.parseForEachStatement().raw).as(ForInStmtSyntax.self)! + } + + XCTAssertSingleDiagnostic(in: loop, line: 1, column: 1, message: "C-style for statement has been removed in Swift 3") + } + + public func testMissingClosingParen() throws { + let source = """ + (first second: Int + """ + let signature = withParser(source: source) { + Syntax(raw: $0.parseFunctionSignature().raw).as(FunctionSignatureSyntax.self)! + } + + XCTAssertSingleDiagnostic(in: signature, line: 1, column: 19, message: "Expected ')' to end parameter clause") + } + + public func testMissingOpeningParen() throws { + let source = """ + first second: Int) + """ + let signature = withParser(source: source) { + Syntax(raw: $0.parseFunctionSignature().raw).as(FunctionSignatureSyntax.self)! + } + + XCTAssertSingleDiagnostic(in: signature, line: 1, column: 1, message: "Expected '(' to start parameter clause") + } + + public func testThrowsInWrongLocation() throws { + let source = """ + () -> throws Int + """ + + let signature = withParser(source: source) { + Syntax(raw: $0.parseFunctionSignature().raw).as(FunctionSignatureSyntax.self)! + } + + XCTAssertSingleDiagnostic(in: signature, line: 1, column: 7, message: "'throws' may only occur before '->'") + } +} diff --git a/Tests/SwiftParserTest/Directives.swift b/Tests/SwiftParserTest/Directives.swift new file mode 100644 index 00000000000..ce7356c589b --- /dev/null +++ b/Tests/SwiftParserTest/Directives.swift @@ -0,0 +1,73 @@ +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class DirectiveTests: XCTestCase { + func testSwitchIfConfig() throws { + try AssertParse({ $0.parseStatement() }) { + """ + switch x { + case 1: fallthrough + #if FOO + case 2: fallthrough + case 3: print(3) + case 4: print(4) + #endif + case 5: fallthrough + case 6: print(6) + #if BAR + #if BAZ + case 7: print(7) + case 8: fallthrough + #endif + case 9: fallthrough + #endif + case 10: print(10) + } + """ + } + } + + func testPostfixIfConfigExpression() throws { + try AssertParse({ $0.parseExpression() }) { + """ + foo + .bar() + .baz() + #if CONFIG1 + .quux + .garp + #if CONFIG2 + .quux + #if CONFIG3 + #if INNER1 + .quux + .garp + #endif + #elseif CONFIG3 + .quux + .garp + #else + .gorp + #endif + .garp + #endif + #endif + """ + } + } + + func testSourceLocation() throws { + try AssertParse({ $0.parsePoundSourceLocationDirective() }) { + """ + #sourceLocation() + """ + } + + try AssertParse({ $0.parsePoundSourceLocationDirective() }) { + """ + #sourceLocation(file: "foo", line: 42) + """ + } + } +} diff --git a/Tests/SwiftParserTest/Expressions.swift b/Tests/SwiftParserTest/Expressions.swift new file mode 100644 index 00000000000..9755ba0e91e --- /dev/null +++ b/Tests/SwiftParserTest/Expressions.swift @@ -0,0 +1,286 @@ +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class ExpressionTests: XCTestCase { + func testTernary() throws { + try AssertParse({ $0.parseSourceFile() }) { + "let a =" + } + + try AssertParse({ $0.parseExpression() }) { + """ + a ? b : c ? d : e + """ + } + } + + func testClosureLiterals() throws { + try AssertParse({ $0.parseClosureExpression() }) { + #""" + { @MainActor (a: Int) async -> Int in print("hi") } + """# + } + + try AssertParse({ $0.parseClosureExpression() }) { + """ + { [weak self, weak weakB = b] foo in + return 0 + } + """ + } + } + + func testTrailingClosures() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + var button = View.Button[5, 4, 3 + ] { + // comment #0 + Text("ABC") + } + """ + } + + try AssertParse({ $0.parseExpression() }) { + """ + compactMap { (parserDiag) in } + """ + } + } + + func testSequenceExpressions() throws { + try AssertParse({ $0.parseSequenceExpressionElement(.basic) }) { + """ + await a() + """ + } + } + + func testNestedTypeSpecialization() throws { + try AssertParse({ $0.parseExpression() }) { + """ + Swift.Array>() + """ + } + } + + func testObjectLiterals() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + #colorLiteral() + #colorLiteral(red: 1.0) + #colorLiteral(red: 1.0, green: 1.0) + #colorLiteral(red: 1.0, green: 1.0, blue: 1.0, alpha: 1.0) + """ + } + + try AssertParse({ $0.parseSourceFile() }) { + """ + #imageLiteral() + #imageLiteral(resourceName: "foo.png") + #imageLiteral(resourceName: "foo/bar/baz/qux.png") + #imageLiteral(resourceName: "foo/bar/baz/quux.png") + """ + } + } + + func testKeypathExpression() throws { + try AssertParse({ $0.parseExpression() }) { + #""" + children.filter(\.type.defaultInitialization.isEmpty) + """# + } + } + + func testBasicLiterals() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + #file + (#line) + #column + #function + #dsohandle + __FILE__ + __LINE__ + __COLUMN__ + __FUNCTION__ + __DSO_HANDLE__ + """ + } + } + + func testRegexLiteral() throws { + try AssertParse({ $0.parseExpression() }) { + #""" + /(?[[:alpha:]]\w*) = (?[0-9A-F]+)/ + """# + } + } + + func testInitializerExpression() throws { + try AssertParse({ $0.parseExpression() }) { + """ + Lexer.Cursor(input: input, previous: 0) + """ + } + } + + func testCollectionLiterals() throws { + try AssertParse({ $0.parseExpression() }) { + "[Dictionary: Int]()" + } + + try AssertParse({ $0.parseExpression() }) { + "[(Int, Double) -> Bool]()" + } + + try AssertParse({ $0.parseExpression() }) { + "[(Int, Double) throws -> Bool]()" + } + + try AssertParse({ $0.parseExpression() }) { + "_ = [@convention(block) () -> Int]().count" + } + + try AssertParse({ $0.parseExpression() }) { + "A<@convention(c) () -> Int32>.c()" + } + + try AssertParse({ $0.parseExpression() }) { + "A<(@autoclosure @escaping () -> Int, Int) -> Void>.c()" + } + + try AssertParse({ $0.parseExpression() }) { + "_ = [String: (@escaping (A) -> Int) -> Void]().keys" + } + + try AssertParse({ $0.parseExpression() }) { + """ + [ + condition ? firstOption : secondOption, + bar(), + ] + """ + } + + try AssertParse({ $0.parseExpression() }) { + "[," + } + + try AssertParse({ $0.parseExpression() }) { + """ + ([1:) + """ + } + } + + func testInterpolatedStringLiterals() throws { + try AssertParse({ $0.parseSourceFile() }) { + #""" + return "Fixit: \(range.debugDescription) Text: \"\(text)\"" + """# + } + } + + func testStringLiterals() throws { + try AssertParse({ $0.parseExpression() }) { + #""" + "" + """# + } + + try AssertParse({ $0.parseExpression() }) { + #""" + """ + """ + """# + } + + try AssertParse({ $0.parseExpression() }) { + #"" >> \( abc } ) << ""# + } + + try AssertParse({ $0.parseSourceFile() }) { + ##""" + + + #"Hello World"# + + "Hello World" + + + """## + } + + try AssertParse({ $0.parseExpression() }) { + #""\","# + } + + try AssertParse({ $0.parseExpression() }) { + #""" + "(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)" + + "(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*" + + "\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))" + """# + } + + try AssertParse({ $0.parseExpression() }) { + #""" + """ + Custom(custom: \(interval),\ + Expr: \(pause?.debugDescription ?? "–"), \ + PlainWithContinuation: \(countdown), \ + Plain: \(units))" + """ + """# + } + + try AssertParse({ $0.parseExpression() }) { + #""" + "Founded: \(Date.appleFounding, format: 📆)" + """# + } + + try AssertParse({ $0.parseExpression()}) { + """ + + "" + """ + } + + try AssertParse({ $0.parseExpression() }, allowErrors: false) { + ##""" + #"""# + """## + } + + try AssertParse({ $0.parseExpression() }, allowErrors: false) { + ##""" + #"""""# + """## + } + + try AssertParse({ $0.parseExpression() }, allowErrors: false) { + ##""" + #""" + multiline raw + """# + """## + } + + try AssertParse({ $0.parseExpression() }, allowErrors: false) { + #""" + "\(x)" + """# + } + } + + func testRangeSubscript() throws { + try AssertParse({ $0.parseExpression() }, allowErrors: false) { + """ + text[...] + """ + } + } +} diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift new file mode 100644 index 00000000000..3caf91637b1 --- /dev/null +++ b/Tests/SwiftParserTest/LexerTests.swift @@ -0,0 +1,494 @@ +import XCTest +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser + + +private func lexeme( + _ kind: RawTokenKind, + _ wholeText: SyntaxText, + leading: Int = 0, + trailing: Int = 0 +) -> Lexer.Lexeme { + return Lexer.Lexeme( + tokenKind: kind, isAtStartOfLine: true, + start: wholeText.baseAddress!, + leadingTriviaLength: leading, + textLength: wholeText.count - leading - trailing, + trailingTriviaLength: trailing) +} + +public class LexerTests: XCTestCase { + func testIdentifiers() throws { + var data = + """ + Hello World + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.identifier, "Hello ", trailing: 1), + lexeme(.identifier, "World"), + lexeme(.eof, ""), + ]) + } + } + + func testEscapedIdentifiers() throws { + var data = + """ + `Hello` `World` `$` + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.identifier, "`Hello` ", trailing: 1), + lexeme(.identifier, "`World` ", trailing: 1), + lexeme(.identifier, "`$`"), + lexeme(.eof, ""), + ]) + } + } + + func testBlockComments() throws { + var data = + """ + /* */ + /**/ + /* /* */ */ + """ + + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.eof, "/* */\n/**/\n/* /* */ */", leading: 22), + ]) + } + } + + func testDeepTupleAccess() throws { + var data = + #""" + x.1.0 + """# + + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.identifier, "x"), + lexeme(.period, "."), + lexeme(.integerLiteral, "1"), + lexeme(.period, "."), + lexeme(.integerLiteral, "0"), + lexeme(.eof, ""), + ]) + } + } + + func testUnicodeLiteral() throws { + do { + var data = + #""" + "\u{1234}" + """# + + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.stringLiteral, #""\u{1234}""#), + lexeme(.eof, ""), + ]) + } + } + + do { + var data = + #""" + "\u{12341234}" + """# + + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.unknown, #""\u{12341234}""#), + lexeme(.eof, ""), + ]) + } + } + } + + + func testNumberLiterals() throws { + var data = + """ + 1234567890 + 0b1010101 + 0xABC + 1.0 + 1.0e10 + 1.0E10 + 0xfeed_beef + 0xff.0p2 + -0xff.0p2 + +0xff.0p2 + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.integerLiteral, "1234567890"), + lexeme(.integerLiteral, "\n0b1010101", leading: 1), + lexeme(.integerLiteral, "\n0xABC", leading: 1), + lexeme(.floatingLiteral, "\n1.0", leading: 1), + lexeme(.floatingLiteral, "\n1.0e10", leading: 1), + lexeme(.floatingLiteral, "\n1.0E10", leading: 1), + lexeme(.integerLiteral, "\n0xfeed_beef", leading: 1), + lexeme(.floatingLiteral, "\n0xff.0p2", leading: 1), + lexeme(.prefixOperator, "\n-", leading: 1), + lexeme(.floatingLiteral, "0xff.0p2"), + lexeme(.prefixOperator, "\n+", leading: 1), + lexeme(.floatingLiteral, "0xff.0p2"), + lexeme(.eof, ""), + ]) + } + } + + func testRawStringLiterals() throws { + do { + var data = + """ + ###"this is a ##"raw"## string"### + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.stringLiteral, ####"###"this is a ##"raw"## string"###"####), + lexeme(.eof, ""), + ]) + } + } + + do { + var data = + """ + #"#"abc"# + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.stringLiteral, ####"#"#"abc"#"####), + lexeme(.eof, ""), + ]) + } + } + + do { + var data = + """ + ###"##"abc"### + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.stringLiteral, ####"###"##"abc"###"####), + lexeme(.eof, ""), + ]) + } + } + + do { + var data = + #####""" + ##"""abc"#### + """##### + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.stringLiteral, ###"##"""abc"##"###), + lexeme(.pound, "#"), + lexeme(.pound, "#"), + lexeme(.eof, ""), + ]) + } + } + } + + func testShebang() throws { + var data = + """ + #!/usr/bin/swiftc + let x = 42 + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.letKeyword, "#!/usr/bin/swiftc\nlet ", leading: 18, trailing: 1), + lexeme(.identifier, "x ", trailing: 1), + lexeme(.equal, "= ", trailing: 1), + lexeme(.integerLiteral, "42"), + lexeme(.eof, ""), + ]) + } + } + + func testDocComment() throws { + var data = + """ + /** hello */ + var x: Int + /* regular comment */ + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.varKeyword, "/** hello */\nvar ", leading: 13, trailing: 1), + lexeme(.identifier, "x"), + lexeme(.colon, ": ", trailing: 1), + lexeme(.identifier, "Int"), + lexeme(.eof, "\n/* regular comment */", leading: 22), + ]) + } + } + + func testMain() throws { + var data = + """ + /* TestApp */ + @main struct TestApp { + static func main() { + print("Hello World") + } + } + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.atSign, "/* TestApp */\n@", leading: 14), + lexeme(.identifier, "main ", trailing: 1), + lexeme(.structKeyword, "struct ", trailing: 1), + lexeme(.identifier, "TestApp ", trailing: 1), + lexeme(.leftBrace, "{"), + lexeme(.staticKeyword, "\n static ", leading: 3, trailing: 1), + lexeme(.funcKeyword, "func ", trailing: 1), + lexeme(.identifier, "main"), + lexeme(.leftParen, "("), + lexeme(.rightParen, ") ", trailing: 1), + lexeme(.leftBrace, "{"), + lexeme(.identifier, "\n print", leading: 5), + lexeme(.leftParen, "("), + lexeme(.stringLiteral, "\"Hello World\""), + lexeme(.rightParen, ")"), + lexeme(.rightBrace, "\n }", leading: 3), + lexeme(.rightBrace, "\n}", leading: 1), + lexeme(.eof, ""), + ]) + } + } + + func testRegexLexing() throws { + let fixtures: [(String, [Lexer.Lexeme])] = [ + ("/abc/", [ + lexeme(.regexLiteral, "/abc/"), + lexeme(.eof, ""), + ]), + ("#/abc/#", [ + lexeme(.regexLiteral, "#/abc/#"), + lexeme(.eof, ""), + ]), + ("###/abc/###", [ + lexeme(.regexLiteral, "###/abc/###"), + lexeme(.eof, ""), + ]), + (""" + #/ + a + b + /# + """, [ + lexeme(.regexLiteral, "#/\na\nb\n/#"), + lexeme(.eof, ""), + ]), + ("#/ \na\nb\n /#", [ + lexeme(.regexLiteral, "#/ \na\nb\n /#"), + lexeme(.eof, ""), + ]), + ("##/ \na\nb\n /##", [ + lexeme(.regexLiteral, "##/ \na\nb\n /##"), + lexeme(.eof, ""), + ]), + ("#/abc/def/#", [ + lexeme(.regexLiteral, "#/abc/def/#"), + lexeme(.eof, ""), + ]), + ("#/abc\\/#def/#", [ + lexeme(.regexLiteral, "#/abc\\/#def/#"), + lexeme(.eof, ""), + ]), + ("#/abc|#def/#", [ + lexeme(.regexLiteral, "#/abc|#def/#"), + lexeme(.eof, ""), + ]), + ("#/abc|#def/", [ + lexeme(.pound, "#"), + lexeme(.regexLiteral, "/abc|#def/"), + lexeme(.eof, ""), + ]), + ("#/abc\n/#", [ + lexeme(.pound, "#"), + lexeme(.unspacedBinaryOperator, "/"), + lexeme(.identifier, "abc"), + lexeme(.prefixOperator, "\n/", leading: 1), + lexeme(.pound, "#"), + lexeme(.eof, ""), + ]), + ("#/abc\r/#", [ + lexeme(.pound, "#"), + lexeme(.unspacedBinaryOperator, "/"), + lexeme(.identifier, "abc"), + lexeme(.prefixOperator, "\r/", leading: 1), + lexeme(.pound, "#"), + lexeme(.eof, ""), + ]), + ] + for (fixture, expectation) in fixtures { + var fixture = fixture + fixture.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, expectation) + } + } + } + + func testUnexpectedLexing() { + var data = "static func �() {}" + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.staticKeyword, "static ", trailing: 1), + lexeme(.funcKeyword, "func �", trailing: 4), + lexeme(.leftParen, "("), + lexeme(.rightParen, ") ", trailing: 1), + lexeme(.leftBrace, "{"), + lexeme(.rightBrace, "}"), + lexeme(.eof, ""), + ]) + } + } + + func testBOMLexing() { + let bom: Unicode.Scalar = "\u{feff}" + var data = + """ + \(bom)Hello + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.identifier, "\u{feff}Hello", leading: 3), + lexeme(.eof, "") + ]) + } + } + + func testConflictLexing() { + do { + var data = """ + // diff3-style conflict markers + + <<<<<<< HEAD:conflict_markers.swift // expected-error {{source control conflict marker in source file}} + var a : String = "A" + var b : String = "b" + ======= + var a : String = "a" + var b : String = "B" + >>>>>>> 18844bc65229786b96b89a9fc7739c0fc897905e:conflict_markers.swift + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.eof, """ + // diff3-style conflict markers + + <<<<<<< HEAD:conflict_markers.swift // expected-error {{source control conflict marker in source file}} + var a : String = "A" + var b : String = "b" + ======= + var a : String = "a" + var b : String = "B" + >>>>>>> 18844bc65229786b96b89a9fc7739c0fc897905e:conflict_markers.swift + """, leading: 300) + ]) + } + + } + + do { + var data = """ + // Perforce-style conflict markers + + >>>> ORIGINAL + var a : String = "A" + var b : String = "B" + ==== THEIRS + var a : String = "A" + var b : String = "b" + ==== YOURS + var a : String = "a" + var b : String = "B" + <<<< + + """ + data.withUTF8 { buf in + let lexemes = Lexer.lex(buf) + AssertEqualTokens(lexemes, [ + lexeme(.eof, """ + // Perforce-style conflict markers + + >>>> ORIGINAL + var a : String = "A" + var b : String = "B" + ==== THEIRS + var a : String = "A" + var b : String = "b" + ==== YOURS + var a : String = "a" + var b : String = "B" + <<<< + + """, leading: 204), + ]) + } + } + } + + func testUnicodeStringLiteralLexing() { + do { + var data = + #""" + "\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))" + """# + let lexemes = data.withUTF8 { buf in + Lexer.lex(buf) + } + AssertEqualTokens(lexemes, [ + lexeme(.stringLiteral, #""\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))""#), + lexeme(.eof, ""), + ]) + } + } +} + +extension Lexer { + fileprivate static func lex( + _ input: UnsafeBufferPointer, + from startIndex: Int = 0 + ) -> [Self.Lexeme] { + var lexemes = [Lexeme]() + for token in Lexer.tokenize(input, from: startIndex) { + lexemes.append(token) + + guard token.tokenKind != .eof else { + break + } + } + return lexemes + } +} diff --git a/Tests/SwiftParserTest/ParserTests.swift b/Tests/SwiftParserTest/ParserTests.swift new file mode 100644 index 00000000000..02af3e44b9c --- /dev/null +++ b/Tests/SwiftParserTest/ParserTests.swift @@ -0,0 +1,24 @@ +import XCTest +import SwiftSyntax +import SwiftParser + +public class ParserTests: XCTestCase { + func testSelfParse() throws { + let currentDir = URL(fileURLWithPath: #file) + .deletingLastPathComponent() + .deletingLastPathComponent() + .deletingLastPathComponent() + .appendingPathComponent("Sources") + let fileURLs = FileManager.default + .enumerator(at: currentDir, includingPropertiesForKeys: nil)! + .compactMap({ $0 as? URL }) + .filter({$0.pathExtension == "swift"}) + for fileURL in fileURLs { + XCTAssertNoThrow(try { + let fileContents = try String(contentsOf: fileURL) + let parsed = try Parser.parse(source: fileContents) + AssertStringsEqualWithDiff("\(parsed)", fileContents) + }()) + } + } +} diff --git a/Tests/SwiftParserTest/RecoveryTests.swift b/Tests/SwiftParserTest/RecoveryTests.swift new file mode 100644 index 00000000000..56092e85b00 --- /dev/null +++ b/Tests/SwiftParserTest/RecoveryTests.swift @@ -0,0 +1,352 @@ +import XCTest +@_spi(RawSyntax) import SwiftSyntax +@_spi(Testing) @_spi(RawSyntax) import SwiftParser +import _SwiftSyntaxTestSupport + +public class RecoveryTests: XCTestCase { + func testTopLevelCaseRecovery() throws { + try AssertParse({ $0.parseSourceFile() }) { + "/*#-editable-code Swift Platground editable area*/default/*#-end-editable-code*/" + } + + try AssertParse({ $0.parseSourceFile() }) { + "case:" + } + + try AssertParse({ $0.parseSourceFile() }) { + #"case: { ("Hello World") }"# + } + } + + func testBogusKeypathBaseRecovery() throws { + try AssertParse({ $0.parseSourceFile() }) { + "func nestThoseIfs() {\\n if false != true {\\n print \"\\(i)\"\\n" + } + } + + func testExtraneousRightBraceRecovery() throws { + try AssertParse({ $0.parseSourceFile() }) { + "class ABC { let def = ghi(jkl: mno) } }" + } + } + + func testMissingIfClauseIntroducer() throws { + try AssertParse({ $0.parseSourceFile() }) { + "if _ = 42 {}" + } + } + + func testMissingSubscriptReturnClause() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + struct Foo { + subscript(x: String) {} + } + """ + } + } + + func testSingleQuoteStringLiteral() throws { + try AssertParse({ $0.parseExpression() }) { + #""" + 'red' + """# + } + } + + func testClassWithLeadingNumber() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + class 23class { + // expected-error@-1 {{class name can only start with a letter or underscore, not a number}} + // expected-error@-2 {{'c' is not a valid digit in integer literal}} + func 24method() {} + // expected-error@-1 {{function name can only start with a letter or underscore, not a number}} + // expected-error@-2 {{'m' is not a valid digit in integer literal}} + } + """ + } + } + + func testAttributesOnStatements() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + func test1() { + @s return + } + func test2() { + @unknown return + } + """ + } + } + + func testMissingArrowInArrowExpr() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + [(Int) -> throws Int]() + let _ = [Int throws Int]() + """ + } + } + + func testBogusSwitchStatement() throws { + try AssertParse({ $0.parseStatement() }) { + """ + switch x { + print() + #if true + print() + #endif + case .A, .B: + break + } + """ + } + + try AssertParse({ $0.parseStatement() }) { + """ + switch x { + print() + #if ENABLE_C + case .NOT_EXIST: + break + case .C: + break + #endif + case .A, .B: + break + } + """ + } + } + + func testBogusLineLabel() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + LABEL: + """ + } + } + + func testStringBogusClosingDelimiters() throws { + try AssertParse({ $0.parseSourceFile() }) { + #"\\("# + } + + try AssertParse({ $0.parseExpression() }) { + ##""" + #"\\("# + """## + } + + try AssertParse({ $0.parseStringLiteral() }) { + #""" + " + """# + } + + try AssertParse({ $0.parseStringLiteral() }) { + #""" + "' + """# + } + } + + func testMissingArgumentToAttribute() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + @_dynamicReplacement( + func test_dynamic_replacement_for2() { + } + """ + } + } + + func testBogusThrowingTernary() throws { + try AssertParse({ $0.parseStatement() }) { + """ + do { + true ? () : throw opaque_error() + } catch _ { + } + """ + } + } + + func testAccessors() throws { + try AssertParse({ $0.parseDeclaration() }) { + """ + var bad1 : Int { + _read async { 0 } + } + """ + } + + try AssertParse({ $0.parseDeclaration() }) { + """ + var bad2 : Int { + get reasync { 0 } + } + """ + } + } + + func testExpressionMember() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + struct S { + / ###line 25 "line-directive.swift" + } + """ + } + } + + func testBogusProtocolRequirements() throws { + try AssertParse({ $0.parseDeclaration() }) { + """ + protocol P { + var prop : Int { get bogus rethrows set } + } + """ + } + } + + func testExtraSyntaxInDirective() throws { + try AssertParse({ $0.parseDeclaration() }) { + """ + #if os(iOS) + func foo() {} + } // expected-error{{unexpected '}' in conditional compilation block}} + #else + func bar() {} + func baz() {} + } // expected-error{{unexpected '}' in conditional compilation block}} + #endif + """ + } + } + + func testRecoverOneExtraLabel() throws { + try XCTAssertHasSubstructure( + "(first second third: Int)", + parse: { withParser(source: $0) { Syntax(raw: $0.parseFunctionSignature().raw) } }, + FunctionParameterSyntax( + attributes: nil, + firstName: TokenSyntax.identifier("first"), + secondName: TokenSyntax.identifier("second"), + UnexpectedNodesSyntax([Syntax(TokenSyntax.identifier("third"))]), + colon: TokenSyntax.colonToken(), + type: TypeSyntax(SimpleTypeIdentifierSyntax(name: TokenSyntax.identifier("Int"), genericArgumentClause: nil)), + ellipsis: nil, + defaultArgument: nil, + trailingComma: nil + ) + ) + } + + func testRecoverTwoExtraLabels() throws { + try XCTAssertHasSubstructure( + "(first second third fourth: Int)", + parse: { withParser(source: $0) { Syntax(raw: $0.parseFunctionSignature().raw) } }, + FunctionParameterSyntax( + attributes: nil, + firstName: TokenSyntax.identifier("first"), + secondName: TokenSyntax.identifier("second"), + UnexpectedNodesSyntax([Syntax(TokenSyntax.identifier("third")), Syntax(TokenSyntax.identifier("fourth"))]), + colon: TokenSyntax.colonToken(), + type: TypeSyntax(SimpleTypeIdentifierSyntax(name: TokenSyntax.identifier("Int"), genericArgumentClause: nil)), + ellipsis: nil, + defaultArgument: nil, + trailingComma: nil + ) + ) + } + + func testDontRecoverFromDeclKeyword() throws { + var source = """ + (first second third struct: Int) + """ + let (_, currentToken) = source.withUTF8 { buffer in + var parser = Parser(buffer) + return (parser.parseFunctionSignature(), parser.currentToken) + } + + // The 'struct' keyword should be taken as an indicator that a new decl + // starts here, so `parseFunctionSignature` shouldn't eat it. + XCTAssertEqual(currentToken.tokenKind, .structKeyword) + } + + func testRecoverFromParens() throws { + try XCTAssertHasSubstructure( + "(first second [third fourth]: Int)", + parse: { withParser(source: $0) { Syntax(raw: $0.parseFunctionSignature().raw) } }, + FunctionParameterSyntax( + attributes: nil, + firstName: TokenSyntax.identifier("first"), + secondName: TokenSyntax.identifier("second"), + UnexpectedNodesSyntax([ + Syntax(TokenSyntax.leftSquareBracketToken()), + Syntax(TokenSyntax.identifier("third")), + Syntax(TokenSyntax.identifier("fourth")), + Syntax(TokenSyntax.rightSquareBracketToken()) + ]), + colon: TokenSyntax.colonToken(), + type: TypeSyntax(SimpleTypeIdentifierSyntax(name: TokenSyntax.identifier("Int"), genericArgumentClause: nil)), + ellipsis: nil, + defaultArgument: nil, + trailingComma: nil + ) + ) + } + + func testDontRecoverFromUnbalancedParens() throws { + let source = """ + (first second [third fourth: Int) + """ + try withParser(source: source) { parser in + let signature = Syntax(raw: parser.parseFunctionSignature().raw) + let currentToken = parser.currentToken + XCTAssertEqual(currentToken.tokenKind, .identifier) + XCTAssertEqual(currentToken.tokenText, "fourth") + try XCTAssertHasSubstructure( + signature, + FunctionParameterSyntax( + attributes: nil, + firstName: TokenSyntax.identifier("first"), + secondName: TokenSyntax.identifier("second"), + colon: TokenSyntax(.colon, presence: .missing), + type: TypeSyntax(ArrayTypeSyntax( + leftSquareBracket: TokenSyntax.leftSquareBracketToken(), + elementType: TypeSyntax(SimpleTypeIdentifierSyntax(name: TokenSyntax.identifier("third"), genericArgumentClause: nil)), + rightSquareBracket: TokenSyntax(.rightSquareBracket, presence: .missing) + )), + ellipsis: nil, + defaultArgument: nil, + trailingComma: nil + ) + ) + } + } + + func testDontRecoverIfNewlineIsBeforeColon() throws { + var source = """ + (first second third + : Int) + """ + let (_, currentToken) = source.withUTF8 { buffer in + var parser = Parser(buffer) + return (parser.parseFunctionSignature(), parser.currentToken) + } + + XCTAssertEqual(currentToken.tokenKind, .colon) + } + + func testTextRecovery() throws { + try AssertParse({ $0.parseSourceFile() }) { + """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + """ + } + } +} diff --git a/Tests/SwiftParserTest/Statements.swift b/Tests/SwiftParserTest/Statements.swift new file mode 100644 index 00000000000..9758fa4e278 --- /dev/null +++ b/Tests/SwiftParserTest/Statements.swift @@ -0,0 +1,114 @@ +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class StatementTests: XCTestCase { + func testIf() throws { + try AssertParse({ $0.parseIfStatement() }) { + """ + if let x { } + """ + } + + try AssertParse({ $0.parseIfStatement() }) { + """ + if case* ! = x { + bar() + } + """ + } + } + + func testNestedIfs() throws { + try AssertParse({ $0.parseDeclaration() }) { + let nest = 22 + var example = "func nestThoseIfs() {\n" + for index in (0...nest) { + let indent = String(repeating: " ", count: index + 1) + example += indent + "if false != true {\n" + example += indent + " print \"\\(i)\"\n" + } + + for index in (0...nest).reversed() { + let indent = String(repeating: " ", count: index + 1) + example += indent + "}\n" + } + example += "}" + return example + } + } + + func testDo() throws { + try AssertParse({ $0.parseDoStatement() }) { + """ + do { + + } + """ + } + } + + func testDoCatch() throws { + try AssertParse({ $0.parseDoStatement() }) { + """ + do { + + } catch { + + } + """ + } + } + + func testReturn() throws { + try AssertParse({ $0.parseReturnStatement() }) { + "return" + } + + try AssertParse({ $0.parseReturnStatement() }) { + #""" + return "assert(\(assertChoices.joined(separator: " || ")))" + """# + } + + try AssertParse({ $0.parseReturnStatement() }) { + "return true ? nil : nil" + } + } + + func testSwitch() throws { + try AssertParse({ $0.parseStatement() }) { + """ + switch x { + case .A, .B: + break + } + """ + } + + try AssertParse({ $0.parseStatement() }) { + """ + switch 0 { + @$dollar case _: + break + } + """ + } + + try AssertParse({ $0.parseStatement() }) { + """ + switch x { + case .A: + break + case .B: + break + #if NEVER + #elseif ENABLE_C + case .C: + break + #endif + } + """ + } + } +} diff --git a/Tests/SwiftParserTest/TriviaParserTests.swift b/Tests/SwiftParserTest/TriviaParserTests.swift new file mode 100644 index 00000000000..5c46bb9ca08 --- /dev/null +++ b/Tests/SwiftParserTest/TriviaParserTests.swift @@ -0,0 +1,166 @@ +import XCTest +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser + +final class TriviaParserTests: XCTestCase { + + func testTriviaParsing() throws { + + XCTAssertEqual( + TriviaParser.parseTrivia(""" + /* */ + /**/ + /* /** */ */ + /** /* */ */ + """, position: .leading), + [ + .blockComment("/* */"), + .newlines(1), + .blockComment("/**/"), + .newlines(1), + .blockComment("/* /** */ */"), + .newlines(1), + .docBlockComment("/** /* */ */"), + ]) + + XCTAssertEqual( + TriviaParser.parseTrivia(""" + #!/bin/env swift + + + """, position: .leading), + [ + .shebang("#!/bin/env swift"), + .newlines(2), + ]) + + XCTAssertEqual( + TriviaParser.parseTrivia(""" + #!/bin/env swift + """, position: .trailing), + [ + .unexpectedText("#!/bin/env swift"), + ]) + + XCTAssertEqual( + TriviaParser.parseTrivia("\r\n\r\r\n\r\n\n", position: .leading), + [ + .carriageReturnLineFeeds(1), + .carriageReturns(1), + .carriageReturnLineFeeds(2), + .newlines(1) + ]) + + let bom: Unicode.Scalar = "\u{feff}" + var bomStr = "\(bom)/// Foo\n" + bomStr.withSyntaxText { triviaText in + XCTAssertEqual(TriviaParser.parseTrivia(triviaText, position: .leading), [ + .unexpectedText("\u{feff}"), + .docLineComment("/// Foo"), + .newlines(1) + ]) + } + + XCTAssertEqual( + TriviaParser.parseTrivia(""" + // diff3-style conflict markers + + <<<<<<< HEAD:conflict_markers.swift // expected-error {{source control conflict marker in source file}} + var a : String = "A" + var b : String = "b" + ======= + var a : String = "a" + var b : String = "B" + >>>>>>> 18844bc65229786b96b89a9fc7739c0fc897905e:conflict_markers.swift + + """, position: .leading), + [ + .lineComment("// diff3-style conflict markers"), + .newlines(2), + .unexpectedText(""" + <<<<<<< HEAD:conflict_markers.swift // expected-error {{source control conflict marker in source file}} + var a : String = "A" + var b : String = "b" + ======= + var a : String = "a" + var b : String = "B" + >>>>>>> 18844bc65229786b96b89a9fc7739c0fc897905e:conflict_markers.swift + """), + .newlines(1) + ]) + + XCTAssertEqual( + TriviaParser.parseTrivia(""" + // Perforce-style conflict markers + + >>>> ORIGINAL + var a : String = "A" + var b : String = "B" + ==== THEIRS + var a : String = "A" + var b : String = "b" + ==== YOURS + var a : String = "a" + var b : String = "B" + <<<< + + """, position: .leading), + [ + .lineComment("// Perforce-style conflict markers"), + .newlines(2), + .unexpectedText(""" + >>>> ORIGINAL + var a : String = "A" + var b : String = "B" + ==== THEIRS + var a : String = "A" + var b : String = "b" + ==== YOURS + var a : String = "a" + var b : String = "B" + <<<< + + """), + ]) + } + + func testRawSyntaxLazyTriviaPieces() throws { + withParser(source: """ + /// Foo. + func foo() { + } + """) { parser in + let fn = parser.parseDeclaration().as(RawFunctionDeclSyntax.self)! + + XCTAssertEqual(fn.funcKeyword.leadingTriviaPieces, [ + .docLineComment("/// Foo."), + .newlines(1), + ]) + XCTAssertEqual(fn.funcKeyword.trailingTriviaPieces, [ + .spaces(1), + ]) + + XCTAssertEqual(fn.body!.leftBrace.leadingTriviaPieces, []) + XCTAssertEqual(fn.body!.leftBrace.trailingTriviaPieces, []) + + XCTAssertEqual(fn.body!.rightBrace.leadingTriviaPieces, [ + .newlines(1), + ]) + XCTAssertEqual(fn.body!.rightBrace.trailingTriviaPieces, []) + } + + } + + + func testSyntaxLazyTrivia() throws { + let source = """ + /* comment only */ + + """ + let sourceFileSyntax = try! Parser.parse(source: source) + XCTAssertEqual(sourceFileSyntax.leadingTrivia, [ + .blockComment("/* comment only */"), + .newlines(1) + ]) + } +} diff --git a/Tests/SwiftParserTest/Types.swift b/Tests/SwiftParserTest/Types.swift new file mode 100644 index 00000000000..815d11e7bbe --- /dev/null +++ b/Tests/SwiftParserTest/Types.swift @@ -0,0 +1,26 @@ +@_spi(RawSyntax) import SwiftSyntax +@_spi(RawSyntax) import SwiftParser +import XCTest + +final class TypeTests: XCTestCase { + func testClosureParsing() throws { + try AssertParse({ $0.parseType() }) { + "(a, b) -> c" + } + + try AssertParse({ $0.parseType() }) { + "@MainActor (a, b) async throws -> c" + } + } + + func testGenericTypeWithTrivia() throws { + // N.B. Whitespace is significant here. + try AssertParse({ $0.parseType() }) { + """ + Foo + >> + """ + } + } +} diff --git a/Tests/SwiftParserTest/Utils.swift b/Tests/SwiftParserTest/Utils.swift new file mode 100644 index 00000000000..982c1e01e05 --- /dev/null +++ b/Tests/SwiftParserTest/Utils.swift @@ -0,0 +1,10 @@ +@_spi(Testing) import SwiftParser + +func withParser(source: String, _ body: (inout Parser) throws -> T) rethrows -> T { + var source = source + return try source.withUTF8 { buffer in + var parser = Parser(buffer) + return try body(&parser) + } +} +