完善 w2 解析器

This commit is contained in:
chai2010
2025-10-04 04:43:03 +08:00
parent c3bc85c516
commit 02d5b7f982
10 changed files with 534 additions and 44 deletions

View File

@@ -260,4 +260,6 @@ func 任意底的对数 {
- nil: 未定义
- true/false: 真/假
- init: 准备
- main: 主控
- this: 自身

View File

@@ -23,7 +23,6 @@ import (
// If src != nil, readSource converts src to a []byte if possible;
// otherwise it returns an error. If src == nil, readSource returns
// the result of reading the file specified by filename.
//
func readSource(vfs fs.FS, filename string, src interface{}) ([]byte, error) {
if src != nil {
switch s := src.(type) {
@@ -50,7 +49,6 @@ func readSource(vfs fs.FS, filename string, src interface{}) ([]byte, error) {
// A Mode value is a set of flags (or 0).
// They control the amount of source code parsed and other optional
// parser functionality.
//
type Mode uint
const (
@@ -81,7 +79,6 @@ const (
// errors were found, the result is a partial AST (with ast.Bad* nodes
// representing the fragments of erroneous source code). Multiple errors
// are returned via a scanner.ErrorList which is sorted by file position.
//
func ParseFile(vfs fs.FS, fset *token.FileSet, filename string, src interface{}, mode Mode) (f *ast.File, err error) {
if fset == nil {
panic("parser.ParseFile: no token.FileSet provided (fset == nil)")
@@ -119,7 +116,11 @@ func ParseFile(vfs fs.FS, fset *token.FileSet, filename string, src interface{},
// parse source
p.init(fset, filename, text, mode)
f = p.parseFile()
if p.w2Mode {
f = p.parseFile_zh()
} else {
f = p.parseFile()
}
return
}
@@ -136,7 +137,6 @@ func ParseFile(vfs fs.FS, fset *token.FileSet, filename string, src interface{},
// If the directory couldn't be read, a nil map and the respective error are
// returned. If a parse error occurred, a non-nil but incomplete map and the
// first error encountered are returned.
//
func ParseDir(vfs fs.FS, fset *token.FileSet, path string, filter func(os.FileInfo) bool, mode Mode) (pkgs map[string]*ast.Package, first error) {
fd, err := os.Open(path)
if err != nil {
@@ -151,7 +151,7 @@ func ParseDir(vfs fs.FS, fset *token.FileSet, path string, filter func(os.FileIn
pkgs = make(map[string]*ast.Package)
for _, d := range list {
if strHasSuffix(d.Name(), ".wa") {
if strHasSuffix(d.Name(), ".wa", ".w2") {
if filter == nil || filter(d) {
filename := filepath.Join(path, d.Name())
if src, err := ParseFile(vfs, fset, filename, nil, mode); err == nil {
@@ -188,7 +188,6 @@ func strHasSuffix(s string, ext ...string) bool {
// The arguments have the same meaning as for ParseFile, but the source must
// be a valid Go (type or value) expression. Specifically, fset must not
// be nil.
//
func ParseExprFrom(fset *token.FileSet, filename string, src interface{}, mode Mode) (ast.Expr, error) {
if fset == nil {
panic("parser.ParseExprFrom: no token.FileSet provided (fset == nil)")
@@ -242,7 +241,6 @@ func ParseExprFrom(fset *token.FileSet, filename string, src interface{}, mode M
// ParseExpr is a convenience function for obtaining the AST of an expression x.
// The position information recorded in the AST is undefined. The filename used
// in error messages is the empty string.
//
func ParseExpr(x string) (ast.Expr, error) {
return ParseExprFrom(token.NewFileSet(), "", []byte(x), 0)
}

View File

@@ -776,12 +776,7 @@ func (p *parser) parseStructType(keyword token.Token) *ast.StructType {
// (parseFieldDecl will check and complain if necessary)
list = append(list, p.parseFieldDecl(scope))
}
rbrace := token.NoPos
if keyword == token.STRUCT {
rbrace = p.expect(token.RBRACE)
} else {
rbrace = p.expect(token.Zh_类终)
}
rbrace := p.expect(token.RBRACE)
return &ast.StructType{
TokPos: pos,
@@ -1017,7 +1012,7 @@ func (p *parser) parseMethodSpec(scope *ast.Scope) *ast.Field {
}
var keyword token.Token
if p.tok == token.FUNC || p.tok == token.Zh_函始 {
if p.tok == token.FUNC {
keyword = p.tok
p.next()
}
@@ -1118,11 +1113,11 @@ func (p *parser) tryIdentOrType() ast.Expr {
return typ
case token.LBRACK:
return p.parseArrayType()
case token.STRUCT, token.Zh_类始:
case token.STRUCT:
return p.parseStructType(p.tok)
case token.MUL:
return p.parsePointerType()
case token.FUNC, token.Zh_算始, token.Zh_函始:
case token.FUNC:
typ, _ := p.parseFuncType(p.tok)
return typ
case token.INTERFACE:
@@ -1245,7 +1240,7 @@ func (p *parser) parseOperand(lhs bool) ast.Expr {
rparen := p.expect(token.RPAREN)
return &ast.ParenExpr{Lparen: lparen, X: x, Rparen: rparen}
case token.FUNC, token.Zh_函始:
case token.FUNC:
return p.parseFuncTypeOrLit(p.tok)
}
@@ -1999,7 +1994,7 @@ func (p *parser) parseIfStmt(keyword token.Token) *ast.IfStmt {
if p.tok == token.ELSE {
p.next()
switch p.tok {
case token.IF, token.Zh_若始:
case token.IF:
else_ = p.parseIfStmt(p.tok)
case token.LBRACE:
else_ = p.parseBlockStmt()
@@ -2037,7 +2032,7 @@ func (p *parser) parseCaseClause(typeSwitch bool) *ast.CaseClause {
pos := p.pos
var list []ast.Expr
var keyword token.Token
if p.tok == token.CASE || p.tok == token.Zh_岔道 {
if p.tok == token.CASE {
keyword = p.tok
p.next()
if typeSwitch {
@@ -2047,11 +2042,7 @@ func (p *parser) parseCaseClause(typeSwitch bool) *ast.CaseClause {
}
} else {
keyword = p.tok
if p.tok == token.DEFAULT {
p.expect(token.DEFAULT)
} else {
p.expect(token.Zh_主道)
}
p.expect(token.DEFAULT)
}
colon := p.expect(token.COLON)
@@ -2239,7 +2230,7 @@ func (p *parser) parseStmt() (s ast.Stmt) {
switch p.tok {
case token.CONST, token.TYPE, token.VAR:
// TODO(chai2010): var declaration not allowed in func body
s = &ast.DeclStmt{Decl: p.parseDecl(stmtStart, false)}
s = &ast.DeclStmt{Decl: p.parseDecl(stmtStart)}
case
// tokens that may start an expression
token.IDENT, token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operands
@@ -2252,20 +2243,20 @@ func (p *parser) parseStmt() (s ast.Stmt) {
if _, isLabeledStmt := s.(*ast.LabeledStmt); !isLabeledStmt {
p.expectSemi()
}
case token.DEFER, token.Zh_押后:
case token.DEFER:
s = p.parseDeferStmt(p.tok)
case token.RETURN, token.Zh_返回:
case token.RETURN:
s = p.parseReturnStmt(p.tok)
case token.BREAK, token.CONTINUE:
s = p.parseBranchStmt(p.tok)
case token.LBRACE:
s = p.parseBlockStmt()
p.expectSemi()
case token.IF, token.Zh_若始:
case token.IF:
s = p.parseIfStmt(p.tok)
case token.SWITCH, token.Zh_岔始:
case token.SWITCH:
s = p.parseSwitchStmt(p.tok)
case token.FOR, token.Zh_当始:
case token.FOR:
s = p.parseForStmt(p.tok)
case token.SEMICOLON:
// Is it ever possible to have an implicit semicolon
@@ -2393,11 +2384,11 @@ func (p *parser) parseValueSpec(doc *ast.CommentGroup, keyword token.Token, iota
p.expectSemi() // call before accessing p.linecomment
switch keyword {
case token.VAR, token.GLOBAL, token.Zh_全局:
case token.VAR, token.GLOBAL:
if typ == nil && values == nil {
p.error(pos, "missing variable type or initialization")
}
case token.CONST, token.Zh_常量, token.Zh_定义:
case token.CONST:
if typ == nil && values == nil && iota == 0 {
p.error(pos, "missing const type or initialization")
}
@@ -2560,7 +2551,7 @@ func (p *parser) parseFuncDecl(keyword token.Token) *ast.FuncDecl {
return decl
}
func (p *parser) parseDecl(sync map[token.Token]bool, isPkgScope bool) ast.Decl {
func (p *parser) parseDecl(sync map[token.Token]bool) ast.Decl {
if p.trace {
defer un(trace(p, "Declaration"))
}
@@ -2570,21 +2561,15 @@ func (p *parser) parseDecl(sync map[token.Token]bool, isPkgScope bool) ast.Decl
case token.CONST, token.VAR, token.GLOBAL:
f = p.parseValueSpec
case token.Zh_常量, token.Zh_定义, token.Zh_全局:
f = p.parseValueSpec
case token.TYPE:
f = p.parseTypeSpec
case token.FUNC:
return p.parseFuncDecl(p.tok)
case token.Zh_算始, token.Zh_函始:
return p.parseFuncDecl(p.tok)
default:
pos := p.pos
p.errorExpected(pos, "declaration")
p.errorExpected(pos, "declaration:"+p.lit+p.tok.String())
p.advance(sync)
return &ast.BadDecl{From: pos, To: p.pos}
}
@@ -2635,14 +2620,14 @@ func (p *parser) parseFile() *ast.File {
var decls []ast.Decl
if p.mode&PackageClauseOnly == 0 {
// import decls
for p.tok == token.IMPORT || p.tok == token.Zh_引入 {
for p.tok == token.IMPORT {
decls = append(decls, p.parseGenDecl(p.tok, p.parseImportSpec))
}
if p.mode&ImportsOnly == 0 {
// rest of package body
for p.tok != token.EOF {
decls = append(decls, p.parseDecl(declStart, true))
decls = append(decls, p.parseDecl(declStart))
}
}
}

View File

@@ -0,0 +1,99 @@
// Copyright (C) 2025 武汉凹语言科技有限公司
// SPDX-License-Identifier: AGPL-3.0-or-later
package parser
import (
"wa-lang.org/wa/internal/ast"
"wa-lang.org/wa/internal/token"
)
func (p *parser) parseFile_zh() *ast.File {
if p.trace {
defer un(trace(p, "文件"))
}
if p.errors.Len() != 0 {
return nil
}
doc := p.leadComment
pos := p.pos
ident := &ast.Ident{}
p.openScope()
p.pkgScope = p.topScope
var decls []ast.Decl
if p.mode&PackageClauseOnly == 0 {
for p.tok == token.Zh_引入 {
decls = append(decls, p.parseGenDecl_zh(p.tok, p.parseImportSpec_zh))
}
if p.mode&ImportsOnly == 0 {
for p.tok != token.EOF {
decls = append(decls, p.parseDecl_zh(declStart))
}
}
}
p.closeScope()
assert(p.topScope == nil, "unbalanced scopes")
assert(p.labelScope == nil, "unbalanced label scopes")
// resolve global identifiers within the same file
i := 0
for _, ident := range p.unresolved {
// i <= index for current ident
assert(ident.Obj == unresolved, "object already resolved")
ident.Obj = p.pkgScope.Lookup(ident.Name) // also removes unresolved sentinel
if ident.Obj == nil {
p.unresolved[i] = ident
i++
}
}
return &ast.File{
Doc: doc,
Package: pos,
Name: ident,
Decls: decls,
Scope: p.pkgScope,
Imports: p.imports,
Unresolved: p.unresolved[0:i],
Comments: p.comments,
}
}
func (p *parser) expectSemi_zh() {
// semicolon is optional before a closing ')' or '}'
if p.tok != token.RPAREN && p.tok != token.RBRACE {
switch p.tok {
case token.COMMA:
// permit a ',' instead of a ';' but complain
p.errorExpected(p.pos, "';'")
fallthrough
case token.SEMICOLON:
p.next()
default:
p.errorExpected(p.pos, "';'")
p.advance(stmtStart)
}
}
}
func (p *parser) errorExpected_zh(pos token.Pos, msg string) {
msg = "期望 " + msg
if pos == p.pos {
// the error happened at the current position;
// make the error message more specific
switch {
case p.tok == token.SEMICOLON && p.lit == "\n":
msg += ", 实获 换行符"
case p.tok.IsLiteral():
// print 123 rather than 'INT', etc.
msg += ", 实获 " + p.lit
default:
msg += ", 实获 '" + p.tok.String() + "'"
}
}
p.error(pos, msg)
}

View File

@@ -0,0 +1,34 @@
// Copyright (C) 2025 武汉凹语言科技有限公司
// SPDX-License-Identifier: AGPL-3.0-or-later
package parser
import (
"wa-lang.org/wa/internal/ast"
"wa-lang.org/wa/internal/token"
)
func (p *parser) parseBody_zh(keyword token.Token, scope *ast.Scope) *ast.BlockStmt {
if p.trace {
defer un(trace(p, keyword.String()))
}
lbrace := token.NoPos
p.topScope = scope // open function scope
p.openLabelScope()
list := p.parseStmtList_zh()
p.closeLabelScope()
p.closeScope()
rbrace := token.NoPos
switch keyword {
case token.Zh_算始:
rbrace = p.expect(token.Zh_算终)
case token.Zh_函始:
rbrace = p.expect(token.Zh_函终)
default:
panic("unreachable")
}
return &ast.BlockStmt{Lbrace: lbrace, List: list, Rbrace: rbrace}
}

View File

@@ -0,0 +1,131 @@
// Copyright (C) 2025 武汉凹语言科技有限公司
// SPDX-License-Identifier: AGPL-3.0-or-later
package parser
import (
"wa-lang.org/wa/internal/ast"
"wa-lang.org/wa/internal/token"
)
func (p *parser) parseDecl_zh(sync map[token.Token]bool) ast.Decl {
if p.trace {
defer un(trace(p, "Declaration"))
}
var f parseSpecFunction
switch p.tok {
case token.Zh_常量, token.Zh_定义, token.Zh_全局:
f = p.parseValueSpec_zh
case token.Zh_类型:
f = p.parseTypeSpec_zh
case token.Zh_算始, token.Zh_函始:
return p.parseFuncDecl_zh(p.tok)
default:
pos := p.pos
p.errorExpected_zh(pos, "宣告:"+p.lit+p.tok.String())
p.advance(sync)
return &ast.BadDecl{From: pos, To: p.pos}
}
return p.parseGenDecl(p.tok, f)
}
func (p *parser) parseGenDecl_zh(keyword token.Token, f parseSpecFunction) *ast.GenDecl {
if p.trace {
defer un(trace(p, "GenDecl("+keyword.String()+")"))
}
doc := p.leadComment
pos := p.expect(keyword)
var lparen, rparen token.Pos
var list []ast.Spec
if p.tok == token.LPAREN {
lparen = p.pos
p.next()
for iota := 0; p.tok != token.RPAREN && p.tok != token.EOF; iota++ {
list = append(list, f(p.leadComment, keyword, iota))
}
rparen = p.expect(token.RPAREN)
p.expectSemi()
} else {
list = append(list, f(nil, keyword, 0))
}
return &ast.GenDecl{
Doc: doc,
TokPos: pos,
Tok: keyword,
Lparen: lparen,
Specs: list,
Rparen: rparen,
}
}
func (p *parser) parseFuncDecl_zh(keyword token.Token) *ast.FuncDecl {
if p.trace {
defer un(trace(p, "FunctionDecl"))
}
doc := p.leadComment
pos := p.expect(keyword)
scope := ast.NewScope(p.topScope) // function scope
ident := p.parseIdent()
// func Type.method()
var recv *ast.FieldList
{
if p.tok == token.PERIOD {
thisIdent := &ast.Ident{Name: "自身"}
thisField := &ast.Field{
Names: []*ast.Ident{thisIdent},
Type: &ast.StarExpr{X: ident},
}
recv = &ast.FieldList{
List: []*ast.Field{thisField},
}
p.declare(thisField, nil, scope, ast.Var, thisIdent)
p.next()
ident = p.parseIdent()
}
}
params, results, arrowPos := p.parseSignature(scope)
body := p.parseBody_zh(keyword, scope)
p.expectSemi_zh()
decl := &ast.FuncDecl{
Doc: doc,
Recv: recv,
Name: ident,
Type: &ast.FuncType{
TokPos: pos,
Tok: keyword,
Params: params,
ArrowPos: arrowPos,
Results: results,
},
Body: body,
}
if recv == nil {
// Wa spec: The scope of an identifier denoting a constant, type,
// variable, or function (but not method) declared at top level
// (outside any function) is the package block.
//
// 准备() functions cannot be referred to and there may
// be more than one - don't put them in the pkgScope
if ident.Name != "准备" {
p.declare(decl, nil, p.pkgScope, ast.Fun, ident)
}
}
return decl
}

View File

@@ -0,0 +1,59 @@
// Copyright (C) 2025 武汉凹语言科技有限公司
// SPDX-License-Identifier: AGPL-3.0-or-later
package parser
import (
"wa-lang.org/wa/internal/ast"
"wa-lang.org/wa/internal/token"
)
func (p *parser) parseImportSpec_zh(doc *ast.CommentGroup, _ token.Token, _ int) ast.Spec {
if p.trace {
defer un(trace(p, "ImportSpec"))
}
pos := p.pos
var path string
if p.tok == token.STRING {
path = p.lit
if !isValidImport(path) {
p.error(pos, "invalid import path: "+path)
}
p.next()
} else {
p.expect(token.STRING) // use expect() error handling
}
// parse => asname
var ident *ast.Ident
var arrowPos token.Pos
if p.tok == token.ARROW {
arrowPos = p.pos
p.next() // skip =>
switch p.tok {
case token.PERIOD:
ident = &ast.Ident{NamePos: p.pos, Name: "."}
p.next()
case token.IDENT:
ident = p.parseIdent()
default:
p.expect(token.IDENT)
}
}
p.expectSemi_zh() // call before accessing p.linecomment
// collect imports
spec := &ast.ImportSpec{
Doc: doc,
Name: ident,
Path: &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: path},
ArrowPos: arrowPos,
Comment: p.lineComment,
}
p.imports = append(p.imports, spec)
return spec
}

View File

@@ -0,0 +1,77 @@
// Copyright (C) 2025 武汉凹语言科技有限公司
// SPDX-License-Identifier: AGPL-3.0-or-later
package parser
import (
"wa-lang.org/wa/internal/ast"
"wa-lang.org/wa/internal/token"
)
func (p *parser) parseStmtList_zh() (list []ast.Stmt) {
if p.trace {
defer un(trace(p, "StatementList"))
}
for p.tok != token.Zh_岔道 && p.tok != token.Zh_主道 && p.tok != token.Zh_函终 && p.tok != token.Zh_算终 && p.tok != token.EOF {
list = append(list, p.parseStmt_zh())
}
return
}
func (p *parser) parseStmt_zh() (s ast.Stmt) {
if p.trace {
defer un(trace(p, "Statement"))
}
switch p.tok {
case token.CONST, token.TYPE, token.VAR:
// TODO(chai2010): var declaration not allowed in func body
s = &ast.DeclStmt{Decl: p.parseDecl_zh(stmtStart)}
case
// tokens that may start an expression
token.IDENT, token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operands
token.LBRACK, token.STRUCT, token.MAP, token.INTERFACE, // composite types
token.ADD, token.SUB, token.MUL, token.AND, token.XOR, token.NOT: // unary operators
s, _ = p.parseSimpleStmt(labelOk)
// because of the required look-ahead, labeled statements are
// parsed by parseSimpleStmt - don't expect a semicolon after
// them
if _, isLabeledStmt := s.(*ast.LabeledStmt); !isLabeledStmt {
p.expectSemi()
}
case token.DEFER, token.Zh_押后:
s = p.parseDeferStmt(p.tok)
case token.RETURN, token.Zh_返回:
s = p.parseReturnStmt(p.tok)
case token.BREAK, token.CONTINUE:
s = p.parseBranchStmt(p.tok)
case token.LBRACE:
s = p.parseBlockStmt()
p.expectSemi()
case token.IF, token.Zh_若始:
s = p.parseIfStmt(p.tok)
case token.SWITCH, token.Zh_岔始:
s = p.parseSwitchStmt(p.tok)
case token.FOR, token.Zh_当始:
s = p.parseForStmt(p.tok)
case token.SEMICOLON:
// Is it ever possible to have an implicit semicolon
// producing an empty statement in a valid program?
// (handle correctly anyway)
s = &ast.EmptyStmt{Semicolon: p.pos, Implicit: p.lit == "\n"}
p.next()
case token.RBRACE:
// a semicolon may be omitted before a closing "}"
s = &ast.EmptyStmt{Semicolon: p.pos, Implicit: true}
default:
// no statement found
pos := p.pos
p.errorExpected(pos, "statement")
p.advance(stmtStart)
s = &ast.BadStmt{From: pos, To: p.pos}
}
return
}

View File

@@ -0,0 +1,33 @@
// Copyright (C) 2025 武汉凹语言科技有限公司
// SPDX-License-Identifier: AGPL-3.0-or-later
package parser
import (
"wa-lang.org/wa/internal/ast"
"wa-lang.org/wa/internal/token"
)
func (p *parser) parseTypeSpec_zh(doc *ast.CommentGroup, _ token.Token, _ int) ast.Spec {
if p.trace {
defer un(trace(p, "TypeSpec"))
}
ident := p.parseIdent()
// Go spec: The scope of a type identifier declared inside a function begins
// at the identifier in the TypeSpec and ends at the end of the innermost
// containing block.
// (Global identifiers are resolved in a separate phase after parsing.)
spec := &ast.TypeSpec{Doc: doc, Name: ident}
p.declare(spec, nil, p.topScope, ast.Typ, ident)
if p.tok == token.COLON {
spec.ColonPos = p.pos
p.next()
}
spec.Type = p.parseType()
p.expectSemi() // call before accessing p.linecomment
spec.Comment = p.lineComment
return spec
}

View File

@@ -0,0 +1,72 @@
// Copyright (C) 2025 武汉凹语言科技有限公司
// SPDX-License-Identifier: AGPL-3.0-or-later
package parser
import (
"wa-lang.org/wa/internal/ast"
"wa-lang.org/wa/internal/token"
)
func (p *parser) parseValueSpec_zh(doc *ast.CommentGroup, keyword token.Token, iota int) ast.Spec {
if p.trace {
defer un(trace(p, keyword.String()+"Spec"))
}
pos := p.pos
idents := p.parseIdentList()
var colonPos token.Pos
if !p.wagoMode {
if p.tok == token.COLON {
colonPos = p.pos
p.next()
} else {
if p.tok != token.ASSIGN && p.tok != token.SEMICOLON {
p.expect(token.COLON)
}
}
}
typ := p.tryType()
var values []ast.Expr
// always permit optional initialization for more tolerant parsing
if p.tok == token.ASSIGN {
p.next()
values = p.parseRhsList()
}
p.expectSemi() // call before accessing p.linecomment
switch keyword {
case token.VAR, token.GLOBAL:
if typ == nil && values == nil {
p.error(pos, "missing variable type or initialization")
}
case token.CONST:
if typ == nil && values == nil && iota == 0 {
p.error(pos, "missing const type or initialization")
}
// if values == nil && (iota == 0 || typ != nil) {
// p.error(pos, "missing constant value")
// }
}
// Wa spec: The scope of a constant or variable identifier declared inside
// a function begins at the end of the ConstSpec or VarSpec and ends at
// the end of the innermost containing block.
// (Global identifiers are resolved in a separate phase after parsing.)
spec := &ast.ValueSpec{
Doc: doc,
Names: idents,
ColonPos: colonPos,
Type: typ,
Values: values,
Comment: p.lineComment,
}
kind := ast.Con
if keyword == token.VAR || keyword == token.GLOBAL {
kind = ast.Var
}
p.declare(spec, iota, p.topScope, kind, idents...)
return spec
}