添加第三方的pdf包

2025-12-06 17:19:15 +08:00 · 2025-11-24 07:13:31 +08:00
parent 67abc9884f
commit 96d9eacd41
12 changed files with 7570 additions and 0 deletions
--- a/internal/3rdparty/pdf/LICENSE
+++ b/internal/3rdparty/pdf/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/internal/3rdparty/pdf/README.md
+++ b/internal/3rdparty/pdf/README.md
@@ -0,0 +1,3 @@
+go get rsc.io/pdf
+
+http://godoc.org/rsc.io/pdf
--- a/internal/3rdparty/pdf/lex.go
+++ b/internal/3rdparty/pdf/lex.go
@@ -0,0 +1,529 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Reading of PDF tokens and objects from a raw byte stream.
+
+package pdf
+
+import (
+	"fmt"
+	"io"
+	"strconv"
+)
+
+// A token is a PDF token in the input stream, one of the following Go types:
+//
+//	bool, a PDF boolean
+//	int64, a PDF integer
+//	float64, a PDF real
+//	string, a PDF string literal
+//	keyword, a PDF keyword
+//	name, a PDF name without the leading slash
+//
+type token interface{}
+
+// A name is a PDF name, without the leading slash.
+type name string
+
+// A keyword is a PDF keyword.
+// Delimiter tokens used in higher-level syntax,
+// such as "<<", ">>", "[", "]", "{", "}", are also treated as keywords.
+type keyword string
+
+// A buffer holds buffered input bytes from the PDF file.
+type buffer struct {
+	r           io.Reader // source of data
+	buf         []byte    // buffered data
+	pos         int       // read index in buf
+	offset      int64     // offset at end of buf; aka offset of next read
+	tmp         []byte    // scratch space for accumulating token
+	unread      []token   // queue of read but then unread tokens
+	allowEOF    bool
+	allowObjptr bool
+	allowStream bool
+	eof         bool
+	key         []byte
+	useAES      bool
+	objptr      objptr
+}
+
+// newBuffer returns a new buffer reading from r at the given offset.
+func newBuffer(r io.Reader, offset int64) *buffer {
+	return &buffer{
+		r:           r,
+		offset:      offset,
+		buf:         make([]byte, 0, 4096),
+		allowObjptr: true,
+		allowStream: true,
+	}
+}
+
+func (b *buffer) seek(offset int64) {
+	b.offset = offset
+	b.buf = b.buf[:0]
+	b.pos = 0
+	b.unread = b.unread[:0]
+}
+
+func (b *buffer) readByte() byte {
+	if b.pos >= len(b.buf) {
+		b.reload()
+		if b.pos >= len(b.buf) {
+			return '\n'
+		}
+	}
+	c := b.buf[b.pos]
+	b.pos++
+	return c
+}
+
+func (b *buffer) errorf(format string, args ...interface{}) {
+	panic(fmt.Errorf(format, args...))
+}
+
+func (b *buffer) reload() bool {
+	n := cap(b.buf) - int(b.offset%int64(cap(b.buf)))
+	n, err := b.r.Read(b.buf[:n])
+	if n == 0 && err != nil {
+		b.buf = b.buf[:0]
+		b.pos = 0
+		if b.allowEOF && err == io.EOF {
+			b.eof = true
+			return false
+		}
+		b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err)
+		return false
+	}
+	b.offset += int64(n)
+	b.buf = b.buf[:n]
+	b.pos = 0
+	return true
+}
+
+func (b *buffer) seekForward(offset int64) {
+	for b.offset < offset {
+		if !b.reload() {
+			return
+		}
+	}
+	b.pos = len(b.buf) - int(b.offset-offset)
+}
+
+func (b *buffer) readOffset() int64 {
+	return b.offset - int64(len(b.buf)) + int64(b.pos)
+}
+
+func (b *buffer) unreadByte() {
+	if b.pos > 0 {
+		b.pos--
+	}
+}
+
+func (b *buffer) unreadToken(t token) {
+	b.unread = append(b.unread, t)
+}
+
+func (b *buffer) readToken() token {
+	if n := len(b.unread); n > 0 {
+		t := b.unread[n-1]
+		b.unread = b.unread[:n-1]
+		return t
+	}
+
+	// Find first non-space, non-comment byte.
+	c := b.readByte()
+	for {
+		if isSpace(c) {
+			if b.eof {
+				return io.EOF
+			}
+			c = b.readByte()
+		} else if c == '%' {
+			for c != '\r' && c != '\n' {
+				c = b.readByte()
+			}
+		} else {
+			break
+		}
+	}
+
+	switch c {
+	case '<':
+		if b.readByte() == '<' {
+			return keyword("<<")
+		}
+		b.unreadByte()
+		return b.readHexString()
+
+	case '(':
+		return b.readLiteralString()
+
+	case '[', ']', '{', '}':
+		return keyword(string(c))
+
+	case '/':
+		return b.readName()
+
+	case '>':
+		if b.readByte() == '>' {
+			return keyword(">>")
+		}
+		b.unreadByte()
+		fallthrough
+
+	default:
+		if isDelim(c) {
+			b.errorf("unexpected delimiter %#q", rune(c))
+			return nil
+		}
+		b.unreadByte()
+		return b.readKeyword()
+	}
+}
+
+func (b *buffer) readHexString() token {
+	tmp := b.tmp[:0]
+	for {
+	Loop:
+		c := b.readByte()
+		if c == '>' {
+			break
+		}
+		if isSpace(c) {
+			goto Loop
+		}
+	Loop2:
+		c2 := b.readByte()
+		if isSpace(c2) {
+			goto Loop2
+		}
+		x := unhex(c)<<4 | unhex(c2)
+		if x < 0 {
+			b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:])
+			break
+		}
+		tmp = append(tmp, byte(x))
+	}
+	b.tmp = tmp
+	return string(tmp)
+}
+
+func unhex(b byte) int {
+	switch {
+	case '0' <= b && b <= '9':
+		return int(b) - '0'
+	case 'a' <= b && b <= 'f':
+		return int(b) - 'a' + 10
+	case 'A' <= b && b <= 'F':
+		return int(b) - 'A' + 10
+	}
+	return -1
+}
+
+func (b *buffer) readLiteralString() token {
+	tmp := b.tmp[:0]
+	depth := 1
+Loop:
+	for {
+		c := b.readByte()
+		switch c {
+		default:
+			tmp = append(tmp, c)
+		case '(':
+			depth++
+			tmp = append(tmp, c)
+		case ')':
+			if depth--; depth == 0 {
+				break Loop
+			}
+			tmp = append(tmp, c)
+		case '\\':
+			switch c = b.readByte(); c {
+			default:
+				b.errorf("invalid escape sequence \\%c", c)
+				tmp = append(tmp, '\\', c)
+			case 'n':
+				tmp = append(tmp, '\n')
+			case 'r':
+				tmp = append(tmp, '\r')
+			case 'b':
+				tmp = append(tmp, '\b')
+			case 't':
+				tmp = append(tmp, '\t')
+			case 'f':
+				tmp = append(tmp, '\f')
+			case '(', ')', '\\':
+				tmp = append(tmp, c)
+			case '\r':
+				if b.readByte() != '\n' {
+					b.unreadByte()
+				}
+				fallthrough
+			case '\n':
+				// no append
+			case '0', '1', '2', '3', '4', '5', '6', '7':
+				x := int(c - '0')
+				for i := 0; i < 2; i++ {
+					c = b.readByte()
+					if c < '0' || c > '7' {
+						b.unreadByte()
+						break
+					}
+					x = x*8 + int(c-'0')
+				}
+				if x > 255 {
+					b.errorf("invalid octal escape \\%03o", x)
+				}
+				tmp = append(tmp, byte(x))
+			}
+		}
+	}
+	b.tmp = tmp
+	return string(tmp)
+}
+
+func (b *buffer) readName() token {
+	tmp := b.tmp[:0]
+	for {
+		c := b.readByte()
+		if isDelim(c) || isSpace(c) {
+			b.unreadByte()
+			break
+		}
+		if c == '#' {
+			x := unhex(b.readByte())<<4 | unhex(b.readByte())
+			if x < 0 {
+				b.errorf("malformed name")
+			}
+			tmp = append(tmp, byte(x))
+			continue
+		}
+		tmp = append(tmp, c)
+	}
+	b.tmp = tmp
+	return name(string(tmp))
+}
+
+func (b *buffer) readKeyword() token {
+	tmp := b.tmp[:0]
+	for {
+		c := b.readByte()
+		if isDelim(c) || isSpace(c) {
+			b.unreadByte()
+			break
+		}
+		tmp = append(tmp, c)
+	}
+	b.tmp = tmp
+	s := string(tmp)
+	switch {
+	case s == "true":
+		return true
+	case s == "false":
+		return false
+	case isInteger(s):
+		x, err := strconv.ParseInt(s, 10, 64)
+		if err != nil {
+			b.errorf("invalid integer %s", s)
+		}
+		return x
+	case isReal(s):
+		x, err := strconv.ParseFloat(s, 64)
+		if err != nil {
+			b.errorf("invalid real %s", s)
+		}
+		return x
+	}
+	return keyword(string(tmp))
+}
+
+func isInteger(s string) bool {
+	if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
+		s = s[1:]
+	}
+	if len(s) == 0 {
+		return false
+	}
+	for _, c := range s {
+		if c < '0' || '9' < c {
+			return false
+		}
+	}
+	return true
+}
+
+func isReal(s string) bool {
+	if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
+		s = s[1:]
+	}
+	if len(s) == 0 {
+		return false
+	}
+	ndot := 0
+	for _, c := range s {
+		if c == '.' {
+			ndot++
+			continue
+		}
+		if c < '0' || '9' < c {
+			return false
+		}
+	}
+	return ndot == 1
+}
+
+// An object is a PDF syntax object, one of the following Go types:
+//
+//	bool, a PDF boolean
+//	int64, a PDF integer
+//	float64, a PDF real
+//	string, a PDF string literal
+//	name, a PDF name without the leading slash
+//	dict, a PDF dictionary
+//	array, a PDF array
+//	stream, a PDF stream
+//	objptr, a PDF object reference
+//	objdef, a PDF object definition
+//
+// An object may also be nil, to represent the PDF null.
+type object interface{}
+
+type dict map[name]object
+
+type array []object
+
+type stream struct {
+	hdr    dict
+	ptr    objptr
+	offset int64
+}
+
+type objptr struct {
+	id  uint32
+	gen uint16
+}
+
+type objdef struct {
+	ptr objptr
+	obj object
+}
+
+func (b *buffer) readObject() object {
+	tok := b.readToken()
+	if kw, ok := tok.(keyword); ok {
+		switch kw {
+		case "null":
+			return nil
+		case "<<":
+			return b.readDict()
+		case "[":
+			return b.readArray()
+		}
+		b.errorf("unexpected keyword %q parsing object", kw)
+		return nil
+	}
+
+	if str, ok := tok.(string); ok && b.key != nil && b.objptr.id != 0 {
+		tok = decryptString(b.key, b.useAES, b.objptr, str)
+	}
+
+	if !b.allowObjptr {
+		return tok
+	}
+
+	if t1, ok := tok.(int64); ok && int64(uint32(t1)) == t1 {
+		tok2 := b.readToken()
+		if t2, ok := tok2.(int64); ok && int64(uint16(t2)) == t2 {
+			tok3 := b.readToken()
+			switch tok3 {
+			case keyword("R"):
+				return objptr{uint32(t1), uint16(t2)}
+			case keyword("obj"):
+				old := b.objptr
+				b.objptr = objptr{uint32(t1), uint16(t2)}
+				obj := b.readObject()
+				if _, ok := obj.(stream); !ok {
+					tok4 := b.readToken()
+					if tok4 != keyword("endobj") {
+						b.errorf("missing endobj after indirect object definition")
+						b.unreadToken(tok4)
+					}
+				}
+				b.objptr = old
+				return objdef{objptr{uint32(t1), uint16(t2)}, obj}
+			}
+			b.unreadToken(tok3)
+		}
+		b.unreadToken(tok2)
+	}
+	return tok
+}
+
+func (b *buffer) readArray() object {
+	var x array
+	for {
+		tok := b.readToken()
+		if tok == nil || tok == keyword("]") {
+			break
+		}
+		b.unreadToken(tok)
+		x = append(x, b.readObject())
+	}
+	return x
+}
+
+func (b *buffer) readDict() object {
+	x := make(dict)
+	for {
+		tok := b.readToken()
+		if tok == nil || tok == keyword(">>") {
+			break
+		}
+		n, ok := tok.(name)
+		if !ok {
+			b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok)
+			continue
+		}
+		x[n] = b.readObject()
+	}
+
+	if !b.allowStream {
+		return x
+	}
+
+	tok := b.readToken()
+	if tok != keyword("stream") {
+		b.unreadToken(tok)
+		return x
+	}
+
+	switch b.readByte() {
+	case '\r':
+		if b.readByte() != '\n' {
+			b.unreadByte()
+		}
+	case '\n':
+		// ok
+	default:
+		b.errorf("stream keyword not followed by newline")
+	}
+
+	return stream{x, b.objptr, b.readOffset()}
+}
+
+func isSpace(b byte) bool {
+	switch b {
+	case '\x00', '\t', '\n', '\f', '\r', ' ':
+		return true
+	}
+	return false
+}
+
+func isDelim(b byte) bool {
+	switch b {
+	case '<', '>', '(', ')', '[', ']', '{', '}', '/', '%':
+		return true
+	}
+	return false
+}
--- a/internal/3rdparty/pdf/name.go
+++ b/internal/3rdparty/pdf/name.go
--- a/internal/3rdparty/pdf/page.go
+++ b/internal/3rdparty/pdf/page.go
@@ -0,0 +1,667 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pdf
+
+import (
+	"fmt"
+	"strings"
+)
+
+// A Page represent a single page in a PDF file.
+// The methods interpret a Page dictionary stored in V.
+type Page struct {
+	V Value
+}
+
+// Page returns the page for the given page number.
+// Page numbers are indexed starting at 1, not 0.
+// If the page is not found, Page returns a Page with p.V.IsNull().
+func (r *Reader) Page(num int) Page {
+	num-- // now 0-indexed
+	page := r.Trailer().Key("Root").Key("Pages")
+Search:
+	for page.Key("Type").Name() == "Pages" {
+		count := int(page.Key("Count").Int64())
+		if count < num {
+			return Page{}
+		}
+		kids := page.Key("Kids")
+		for i := 0; i < kids.Len(); i++ {
+			kid := kids.Index(i)
+			if kid.Key("Type").Name() == "Pages" {
+				c := int(kid.Key("Count").Int64())
+				if num < c {
+					page = kid
+					continue Search
+				}
+				num -= c
+				continue
+			}
+			if kid.Key("Type").Name() == "Page" {
+				if num == 0 {
+					return Page{kid}
+				}
+				num--
+			}
+		}
+		break
+	}
+	return Page{}
+}
+
+// NumPage returns the number of pages in the PDF file.
+func (r *Reader) NumPage() int {
+	return int(r.Trailer().Key("Root").Key("Pages").Key("Count").Int64())
+}
+
+func (p Page) findInherited(key string) Value {
+	for v := p.V; !v.IsNull(); v = v.Key("Parent") {
+		if r := v.Key(key); !r.IsNull() {
+			return r
+		}
+	}
+	return Value{}
+}
+
+/*
+func (p Page) MediaBox() Value {
+	return p.findInherited("MediaBox")
+}
+
+func (p Page) CropBox() Value {
+	return p.findInherited("CropBox")
+}
+*/
+
+// Resources returns the resources dictionary associated with the page.
+func (p Page) Resources() Value {
+	return p.findInherited("Resources")
+}
+
+// Fonts returns a list of the fonts associated with the page.
+func (p Page) Fonts() []string {
+	return p.Resources().Key("Font").Keys()
+}
+
+// Font returns the font with the given name associated with the page.
+func (p Page) Font(name string) Font {
+	return Font{p.Resources().Key("Font").Key(name)}
+}
+
+// A Font represent a font in a PDF file.
+// The methods interpret a Font dictionary stored in V.
+type Font struct {
+	V Value
+}
+
+// BaseFont returns the font's name (BaseFont property).
+func (f Font) BaseFont() string {
+	return f.V.Key("BaseFont").Name()
+}
+
+// FirstChar returns the code point of the first character in the font.
+func (f Font) FirstChar() int {
+	return int(f.V.Key("FirstChar").Int64())
+}
+
+// LastChar returns the code point of the last character in the font.
+func (f Font) LastChar() int {
+	return int(f.V.Key("LastChar").Int64())
+}
+
+// Widths returns the widths of the glyphs in the font.
+// In a well-formed PDF, len(f.Widths()) == f.LastChar()+1 - f.FirstChar().
+func (f Font) Widths() []float64 {
+	x := f.V.Key("Widths")
+	var out []float64
+	for i := 0; i < x.Len(); i++ {
+		out = append(out, x.Index(i).Float64())
+	}
+	return out
+}
+
+// Width returns the width of the given code point.
+func (f Font) Width(code int) float64 {
+	first := f.FirstChar()
+	last := f.LastChar()
+	if code < first || last < code {
+		return 0
+	}
+	return f.V.Key("Widths").Index(code - first).Float64()
+}
+
+// Encoder returns the encoding between font code point sequences and UTF-8.
+func (f Font) Encoder() TextEncoding {
+	enc := f.V.Key("Encoding")
+	switch enc.Kind() {
+	case Name:
+		switch enc.Name() {
+		case "WinAnsiEncoding":
+			return &byteEncoder{&winAnsiEncoding}
+		case "MacRomanEncoding":
+			return &byteEncoder{&macRomanEncoding}
+		case "Identity-H":
+			// TODO: Should be big-endian UCS-2 decoder
+			return &nopEncoder{}
+		default:
+			println("unknown encoding", enc.Name())
+			return &nopEncoder{}
+		}
+	case Dict:
+		return &dictEncoder{enc.Key("Differences")}
+	case Null:
+		// ok, try ToUnicode
+	default:
+		println("unexpected encoding", enc.String())
+		return &nopEncoder{}
+	}
+
+	toUnicode := f.V.Key("ToUnicode")
+	if toUnicode.Kind() == Dict {
+		m := readCmap(toUnicode)
+		if m == nil {
+			return &nopEncoder{}
+		}
+		return m
+	}
+
+	return &byteEncoder{&pdfDocEncoding}
+}
+
+type dictEncoder struct {
+	v Value
+}
+
+func (e *dictEncoder) Decode(raw string) (text string) {
+	r := make([]rune, 0, len(raw))
+	for i := 0; i < len(raw); i++ {
+		ch := rune(raw[i])
+		n := -1
+		for j := 0; j < e.v.Len(); j++ {
+			x := e.v.Index(j)
+			if x.Kind() == Integer {
+				n = int(x.Int64())
+				continue
+			}
+			if x.Kind() == Name {
+				if int(raw[i]) == n {
+					r := nameToRune[x.Name()]
+					if r != 0 {
+						ch = r
+						break
+					}
+				}
+				n++
+			}
+		}
+		r = append(r, ch)
+	}
+	return string(r)
+}
+
+// A TextEncoding represents a mapping between
+// font code points and UTF-8 text.
+type TextEncoding interface {
+	// Decode returns the UTF-8 text corresponding to
+	// the sequence of code points in raw.
+	Decode(raw string) (text string)
+}
+
+type nopEncoder struct {
+}
+
+func (e *nopEncoder) Decode(raw string) (text string) {
+	return raw
+}
+
+type byteEncoder struct {
+	table *[256]rune
+}
+
+func (e *byteEncoder) Decode(raw string) (text string) {
+	r := make([]rune, 0, len(raw))
+	for i := 0; i < len(raw); i++ {
+		r = append(r, e.table[raw[i]])
+	}
+	return string(r)
+}
+
+type cmap struct {
+	space   [4][][2]string
+	bfrange []bfrange
+}
+
+func (m *cmap) Decode(raw string) (text string) {
+	var r []rune
+Parse:
+	for len(raw) > 0 {
+		for n := 1; n <= 4 && n <= len(raw); n++ {
+			for _, space := range m.space[n-1] {
+				if space[0] <= raw[:n] && raw[:n] <= space[1] {
+					text := raw[:n]
+					raw = raw[n:]
+					for _, bf := range m.bfrange {
+						if len(bf.lo) == n && bf.lo <= text && text <= bf.hi {
+							if bf.dst.Kind() == String {
+								s := bf.dst.RawString()
+								if bf.lo != text {
+									b := []byte(s)
+									b[len(b)-1] += text[len(text)-1] - bf.lo[len(bf.lo)-1]
+									s = string(b)
+								}
+								r = append(r, []rune(utf16Decode(s))...)
+								continue Parse
+							}
+							if bf.dst.Kind() == Array {
+								fmt.Printf("array %v\n", bf.dst)
+							} else {
+								fmt.Printf("unknown dst %v\n", bf.dst)
+							}
+							r = append(r, noRune)
+							continue Parse
+						}
+					}
+					fmt.Printf("no text for %q", text)
+					r = append(r, noRune)
+					continue Parse
+				}
+			}
+		}
+		println("no code space found")
+		r = append(r, noRune)
+		raw = raw[1:]
+	}
+	return string(r)
+}
+
+type bfrange struct {
+	lo  string
+	hi  string
+	dst Value
+}
+
+func readCmap(toUnicode Value) *cmap {
+	n := -1
+	var m cmap
+	ok := true
+	Interpret(toUnicode, func(stk *Stack, op string) {
+		if !ok {
+			return
+		}
+		switch op {
+		case "findresource":
+			category := stk.Pop()
+			key := stk.Pop()
+			fmt.Println("findresource", key, category)
+			stk.Push(newDict())
+		case "begincmap":
+			stk.Push(newDict())
+		case "endcmap":
+			stk.Pop()
+		case "begincodespacerange":
+			n = int(stk.Pop().Int64())
+		case "endcodespacerange":
+			if n < 0 {
+				println("missing begincodespacerange")
+				ok = false
+				return
+			}
+			for i := 0; i < n; i++ {
+				hi, lo := stk.Pop().RawString(), stk.Pop().RawString()
+				if len(lo) == 0 || len(lo) != len(hi) {
+					println("bad codespace range")
+					ok = false
+					return
+				}
+				m.space[len(lo)-1] = append(m.space[len(lo)-1], [2]string{lo, hi})
+			}
+			n = -1
+		case "beginbfrange":
+			n = int(stk.Pop().Int64())
+		case "endbfrange":
+			if n < 0 {
+				panic("missing beginbfrange")
+			}
+			for i := 0; i < n; i++ {
+				dst, srcHi, srcLo := stk.Pop(), stk.Pop().RawString(), stk.Pop().RawString()
+				m.bfrange = append(m.bfrange, bfrange{srcLo, srcHi, dst})
+			}
+		case "defineresource":
+			category := stk.Pop().Name()
+			value := stk.Pop()
+			key := stk.Pop().Name()
+			fmt.Println("defineresource", key, value, category)
+			stk.Push(value)
+		default:
+			println("interp\t", op)
+		}
+	})
+	if !ok {
+		return nil
+	}
+	return &m
+}
+
+type matrix [3][3]float64
+
+var ident = matrix{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}
+
+func (x matrix) mul(y matrix) matrix {
+	var z matrix
+	for i := 0; i < 3; i++ {
+		for j := 0; j < 3; j++ {
+			for k := 0; k < 3; k++ {
+				z[i][j] += x[i][k] * y[k][j]
+			}
+		}
+	}
+	return z
+}
+
+// A Text represents a single piece of text drawn on a page.
+type Text struct {
+	Font     string  // the font used
+	FontSize float64 // the font size, in points (1/72 of an inch)
+	X        float64 // the X coordinate, in points, increasing left to right
+	Y        float64 // the Y coordinate, in points, increasing bottom to top
+	W        float64 // the width of the text, in points
+	S        string  // the actual UTF-8 text
+}
+
+// A Rect represents a rectangle.
+type Rect struct {
+	Min, Max Point
+}
+
+// A Point represents an X, Y pair.
+type Point struct {
+	X float64
+	Y float64
+}
+
+// Content describes the basic content on a page: the text and any drawn rectangles.
+type Content struct {
+	Text []Text
+	Rect []Rect
+}
+
+type gstate struct {
+	Tc    float64
+	Tw    float64
+	Th    float64
+	Tl    float64
+	Tf    Font
+	Tfs   float64
+	Tmode int
+	Trise float64
+	Tm    matrix
+	Tlm   matrix
+	Trm   matrix
+	CTM   matrix
+}
+
+// Content returns the page's content.
+func (p Page) Content() Content {
+	strm := p.V.Key("Contents")
+	var enc TextEncoding = &nopEncoder{}
+
+	var g = gstate{
+		Th:  1,
+		CTM: ident,
+	}
+
+	var text []Text
+	showText := func(s string) {
+		n := 0
+		for _, ch := range enc.Decode(s) {
+			Trm := matrix{{g.Tfs * g.Th, 0, 0}, {0, g.Tfs, 0}, {0, g.Trise, 1}}.mul(g.Tm).mul(g.CTM)
+			w0 := g.Tf.Width(int(s[n]))
+			n++
+			if ch != ' ' {
+				f := g.Tf.BaseFont()
+				if i := strings.Index(f, "+"); i >= 0 {
+					f = f[i+1:]
+				}
+				text = append(text, Text{f, Trm[0][0], Trm[2][0], Trm[2][1], w0 / 1000 * Trm[0][0], string(ch)})
+			}
+			tx := w0/1000*g.Tfs + g.Tc
+			if ch == ' ' {
+				tx += g.Tw
+			}
+			tx *= g.Th
+			g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
+		}
+	}
+
+	var rect []Rect
+	var gstack []gstate
+	Interpret(strm, func(stk *Stack, op string) {
+		n := stk.Len()
+		args := make([]Value, n)
+		for i := n - 1; i >= 0; i-- {
+			args[i] = stk.Pop()
+		}
+		switch op {
+		default:
+			//fmt.Println(op, args)
+			return
+
+		case "cm": // update g.CTM
+			if len(args) != 6 {
+				panic("bad g.Tm")
+			}
+			var m matrix
+			for i := 0; i < 6; i++ {
+				m[i/2][i%2] = args[i].Float64()
+			}
+			m[2][2] = 1
+			g.CTM = m.mul(g.CTM)
+
+		case "gs": // set parameters from graphics state resource
+			gs := p.Resources().Key("ExtGState").Key(args[0].Name())
+			font := gs.Key("Font")
+			if font.Kind() == Array && font.Len() == 2 {
+				//fmt.Println("FONT", font)
+			}
+
+		case "f": // fill
+		case "g": // setgray
+		case "l": // lineto
+		case "m": // moveto
+
+		case "cs": // set colorspace non-stroking
+		case "scn": // set color non-stroking
+
+		case "re": // append rectangle to path
+			if len(args) != 4 {
+				panic("bad re")
+			}
+			x, y, w, h := args[0].Float64(), args[1].Float64(), args[2].Float64(), args[3].Float64()
+			rect = append(rect, Rect{Point{x, y}, Point{x + w, y + h}})
+
+		case "q": // save graphics state
+			gstack = append(gstack, g)
+
+		case "Q": // restore graphics state
+			n := len(gstack) - 1
+			g = gstack[n]
+			gstack = gstack[:n]
+
+		case "BT": // begin text (reset text matrix and line matrix)
+			g.Tm = ident
+			g.Tlm = g.Tm
+
+		case "ET": // end text
+
+		case "T*": // move to start of next line
+			x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
+			g.Tlm = x.mul(g.Tlm)
+			g.Tm = g.Tlm
+
+		case "Tc": // set character spacing
+			if len(args) != 1 {
+				panic("bad g.Tc")
+			}
+			g.Tc = args[0].Float64()
+
+		case "TD": // move text position and set leading
+			if len(args) != 2 {
+				panic("bad Td")
+			}
+			g.Tl = -args[1].Float64()
+			fallthrough
+		case "Td": // move text position
+			if len(args) != 2 {
+				panic("bad Td")
+			}
+			tx := args[0].Float64()
+			ty := args[1].Float64()
+			x := matrix{{1, 0, 0}, {0, 1, 0}, {tx, ty, 1}}
+			g.Tlm = x.mul(g.Tlm)
+			g.Tm = g.Tlm
+
+		case "Tf": // set text font and size
+			if len(args) != 2 {
+				panic("bad TL")
+			}
+			f := args[0].Name()
+			g.Tf = p.Font(f)
+			enc = g.Tf.Encoder()
+			if enc == nil {
+				println("no cmap for", f)
+				enc = &nopEncoder{}
+			}
+			g.Tfs = args[1].Float64()
+
+		case "\"": // set spacing, move to next line, and show text
+			if len(args) != 3 {
+				panic("bad \" operator")
+			}
+			g.Tw = args[0].Float64()
+			g.Tc = args[1].Float64()
+			args = args[2:]
+			fallthrough
+		case "'": // move to next line and show text
+			if len(args) != 1 {
+				panic("bad ' operator")
+			}
+			x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
+			g.Tlm = x.mul(g.Tlm)
+			g.Tm = g.Tlm
+			fallthrough
+		case "Tj": // show text
+			if len(args) != 1 {
+				panic("bad Tj operator")
+			}
+			showText(args[0].RawString())
+
+		case "TJ": // show text, allowing individual glyph positioning
+			v := args[0]
+			for i := 0; i < v.Len(); i++ {
+				x := v.Index(i)
+				if x.Kind() == String {
+					showText(x.RawString())
+				} else {
+					tx := -x.Float64() / 1000 * g.Tfs * g.Th
+					g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
+				}
+			}
+
+		case "TL": // set text leading
+			if len(args) != 1 {
+				panic("bad TL")
+			}
+			g.Tl = args[0].Float64()
+
+		case "Tm": // set text matrix and line matrix
+			if len(args) != 6 {
+				panic("bad g.Tm")
+			}
+			var m matrix
+			for i := 0; i < 6; i++ {
+				m[i/2][i%2] = args[i].Float64()
+			}
+			m[2][2] = 1
+			g.Tm = m
+			g.Tlm = m
+
+		case "Tr": // set text rendering mode
+			if len(args) != 1 {
+				panic("bad Tr")
+			}
+			g.Tmode = int(args[0].Int64())
+
+		case "Ts": // set text rise
+			if len(args) != 1 {
+				panic("bad Ts")
+			}
+			g.Trise = args[0].Float64()
+
+		case "Tw": // set word spacing
+			if len(args) != 1 {
+				panic("bad g.Tw")
+			}
+			g.Tw = args[0].Float64()
+
+		case "Tz": // set horizontal text scaling
+			if len(args) != 1 {
+				panic("bad Tz")
+			}
+			g.Th = args[0].Float64() / 100
+		}
+	})
+	return Content{text, rect}
+}
+
+// TextVertical implements sort.Interface for sorting
+// a slice of Text values in vertical order, top to bottom,
+// and then left to right within a line.
+type TextVertical []Text
+
+func (x TextVertical) Len() int      { return len(x) }
+func (x TextVertical) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
+func (x TextVertical) Less(i, j int) bool {
+	if x[i].Y != x[j].Y {
+		return x[i].Y > x[j].Y
+	}
+	return x[i].X < x[j].X
+}
+
+// TextHorizontal implements sort.Interface for sorting
+// a slice of Text values in horizontal order, left to right,
+// and then top to bottom within a column.
+type TextHorizontal []Text
+
+func (x TextHorizontal) Len() int      { return len(x) }
+func (x TextHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
+func (x TextHorizontal) Less(i, j int) bool {
+	if x[i].X != x[j].X {
+		return x[i].X < x[j].X
+	}
+	return x[i].Y > x[j].Y
+}
+
+// An Outline is a tree describing the outline (also known as the table of contents)
+// of a document.
+type Outline struct {
+	Title string    // title for this element
+	Child []Outline // child elements
+}
+
+// Outline returns the document outline.
+// The Outline returned is the root of the outline tree and typically has no Title itself.
+// That is, the children of the returned root are the top-level entries in the outline.
+func (r *Reader) Outline() Outline {
+	return buildOutline(r.Trailer().Key("Root").Key("Outlines"))
+}
+
+func buildOutline(entry Value) Outline {
+	var x Outline
+	x.Title = entry.Key("Title").Text()
+	for child := entry.Key("First"); child.Kind() == Dict; child = child.Key("Next") {
+		x.Child = append(x.Child, buildOutline(child))
+	}
+	return x
+}
--- a/internal/3rdparty/pdf/pdfpasswd/main.go
+++ b/internal/3rdparty/pdf/pdfpasswd/main.go
@@ -0,0 +1,110 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Pdfpasswd searches for the password for an encrypted PDF
+// by trying all strings over a given alphabet up to a given length.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"os"
+
+	"wa-lang.org/wa/internal/3rdparty/pdf"
+)
+
+var (
+	alphabet  = flag.String("a", "0123456789", "alphabet")
+	maxLength = flag.Int("m", 4, "max length")
+)
+
+func usage() {
+	fmt.Fprintf(os.Stderr, "usage: pdfpasswd [-a alphabet] [-m maxlength] file\n")
+	os.Exit(2)
+}
+
+func main() {
+	log.SetFlags(0)
+	log.SetPrefix("pdfpasswd: ")
+
+	flag.Usage = usage
+	flag.Parse()
+	if flag.NArg() != 1 {
+		usage()
+	}
+
+	f, err := os.Open(flag.Arg(0))
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	last := ""
+	alpha := *alphabet
+	ctr := make([]int, *maxLength)
+	pw := func() string {
+		inc(ctr, len(alpha)+1)
+		for !valid(ctr) {
+			inc(ctr, len(alpha)+1)
+		}
+		if done(ctr) {
+			return ""
+		}
+		buf := make([]byte, len(ctr))
+		var i int
+		for i = 0; i < len(buf); i++ {
+			if ctr[i] == 0 {
+				break
+			}
+			buf[i] = alpha[ctr[i]-1]
+		}
+		last = string(buf[:i])
+		println(last)
+		return last
+	}
+	st, err := f.Stat()
+	if err != nil {
+		log.Fatal(err)
+	}
+	_, err = pdf.NewReaderEncrypted(f, st.Size(), pw)
+	if err != nil {
+		if err == pdf.ErrInvalidPassword {
+			log.Fatal("password not found")
+		}
+		log.Fatalf("reading pdf: %v", err)
+	}
+	fmt.Printf("password: %q\n", last)
+}
+
+func inc(ctr []int, n int) {
+	for i := 0; i < len(ctr); i++ {
+		ctr[i]++
+		if ctr[i] < n {
+			break
+		}
+		ctr[i] = 0
+	}
+}
+
+func done(ctr []int) bool {
+	for _, x := range ctr {
+		if x != 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func valid(ctr []int) bool {
+	i := len(ctr)
+	for i > 0 && ctr[i-1] == 0 {
+		i--
+	}
+	for i--; i >= 0; i-- {
+		if ctr[i] == 0 {
+			return false
+		}
+	}
+	return true
+}
--- a/internal/3rdparty/pdf/ps.go
+++ b/internal/3rdparty/pdf/ps.go
@@ -0,0 +1,138 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pdf
+
+import (
+	"fmt"
+	"io"
+)
+
+// A Stack represents a stack of values.
+type Stack struct {
+	stack []Value
+}
+
+func (stk *Stack) Len() int {
+	return len(stk.stack)
+}
+
+func (stk *Stack) Push(v Value) {
+	stk.stack = append(stk.stack, v)
+}
+
+func (stk *Stack) Pop() Value {
+	n := len(stk.stack)
+	if n == 0 {
+		return Value{}
+	}
+	v := stk.stack[n-1]
+	stk.stack[n-1] = Value{}
+	stk.stack = stk.stack[:n-1]
+	return v
+}
+
+func newDict() Value {
+	return Value{nil, objptr{}, make(dict)}
+}
+
+// Interpret interprets the content in a stream as a basic PostScript program,
+// pushing values onto a stack and then calling the do function to execute
+// operators. The do function may push or pop values from the stack as needed
+// to implement op.
+//
+// Interpret handles the operators "dict", "currentdict", "begin", "end", "def", and "pop" itself.
+//
+// Interpret is not a full-blown PostScript interpreter. Its job is to handle the
+// very limited PostScript found in certain supporting file formats embedded
+// in PDF files, such as cmap files that describe the mapping from font code
+// points to Unicode code points.
+//
+// There is no support for executable blocks, among other limitations.
+//
+func Interpret(strm Value, do func(stk *Stack, op string)) {
+	rd := strm.Reader()
+	b := newBuffer(rd, 0)
+	b.allowEOF = true
+	b.allowObjptr = false
+	b.allowStream = false
+	var stk Stack
+	var dicts []dict
+Reading:
+	for {
+		tok := b.readToken()
+		if tok == io.EOF {
+			break
+		}
+		if kw, ok := tok.(keyword); ok {
+			switch kw {
+			case "null", "[", "]", "<<", ">>":
+				break
+			default:
+				for i := len(dicts) - 1; i >= 0; i-- {
+					if v, ok := dicts[i][name(kw)]; ok {
+						stk.Push(Value{nil, objptr{}, v})
+						continue Reading
+					}
+				}
+				do(&stk, string(kw))
+				continue
+			case "dict":
+				stk.Pop()
+				stk.Push(Value{nil, objptr{}, make(dict)})
+				continue
+			case "currentdict":
+				if len(dicts) == 0 {
+					panic("no current dictionary")
+				}
+				stk.Push(Value{nil, objptr{}, dicts[len(dicts)-1]})
+				continue
+			case "begin":
+				d := stk.Pop()
+				if d.Kind() != Dict {
+					panic("cannot begin non-dict")
+				}
+				dicts = append(dicts, d.data.(dict))
+				continue
+			case "end":
+				if len(dicts) <= 0 {
+					panic("mismatched begin/end")
+				}
+				dicts = dicts[:len(dicts)-1]
+				continue
+			case "def":
+				if len(dicts) <= 0 {
+					panic("def without open dict")
+				}
+				val := stk.Pop()
+				key, ok := stk.Pop().data.(name)
+				if !ok {
+					panic("def of non-name")
+				}
+				dicts[len(dicts)-1][key] = val.data
+				continue
+			case "pop":
+				stk.Pop()
+				continue
+			}
+		}
+		b.unreadToken(tok)
+		obj := b.readObject()
+		stk.Push(Value{nil, objptr{}, obj})
+	}
+}
+
+type seqReader struct {
+	rd     io.Reader
+	offset int64
+}
+
+func (r *seqReader) ReadAt(buf []byte, offset int64) (int, error) {
+	if offset != r.offset {
+		return 0, fmt.Errorf("non-sequential read of stream")
+	}
+	n, err := io.ReadFull(r.rd, buf)
+	r.offset += int64(n)
+	return n, err
+}
--- a/internal/3rdparty/pdf/read.go
+++ b/internal/3rdparty/pdf/read.go
--- a/internal/3rdparty/pdf/text.go
+++ b/internal/3rdparty/pdf/text.go
@@ -0,0 +1,158 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pdf
+
+import (
+	"unicode"
+	"unicode/utf16"
+)
+
+const noRune = unicode.ReplacementChar
+
+func isPDFDocEncoded(s string) bool {
+	if isUTF16(s) {
+		return false
+	}
+	for i := 0; i < len(s); i++ {
+		if pdfDocEncoding[s[i]] == noRune {
+			return false
+		}
+	}
+	return true
+}
+
+func pdfDocDecode(s string) string {
+	for i := 0; i < len(s); i++ {
+		if s[i] >= 0x80 || pdfDocEncoding[s[i]] != rune(s[i]) {
+			goto Decode
+		}
+	}
+	return s
+
+Decode:
+	r := make([]rune, len(s))
+	for i := 0; i < len(s); i++ {
+		r[i] = pdfDocEncoding[s[i]]
+	}
+	return string(r)
+}
+
+func isUTF16(s string) bool {
+	return len(s) >= 2 && s[0] == 0xfe && s[1] == 0xff && len(s)%2 == 0
+}
+
+func utf16Decode(s string) string {
+	var u []uint16
+	for i := 0; i < len(s); i += 2 {
+		u = append(u, uint16(s[i])<<8|uint16(s[i+1]))
+	}
+	return string(utf16.Decode(u))
+}
+
+// See PDF 32000-1:2008, Table D.2
+var pdfDocEncoding = [256]rune{
+	noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
+	noRune, 0x0009, 0x000a, noRune, noRune, 0x000d, noRune, noRune,
+	noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
+	0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, noRune,
+	0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
+	0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
+	0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
+	0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, noRune,
+	0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, noRune, 0x00ae, 0x00af,
+	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
+}
+
+var winAnsiEncoding = [256]rune{
+	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+	0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
+	0x20ac, noRune, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
+	0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, noRune, 0x017d, noRune,
+	noRune, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+	0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, noRune, 0x017e, 0x0178,
+	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
+}
+
+var macRomanEncoding = [256]rune{
+	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+	0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
+	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
+	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
+	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
+	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
+	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
+	0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1,
+	0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8,
+	0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3,
+	0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc,
+	0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
+	0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
+	0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
+	0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
+	0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
+	0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153,
+	0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
+	0x00ff, 0x0178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
+	0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1,
+	0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4,
+	0xf8ff, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc,
+	0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7,
+}
--- a/internal/native/loong64spec/.gitignore
+++ b/internal/native/loong64spec/.gitignore
@@ -0,0 +1,2 @@
+LoongArch-Vol1-EN.pdf
+tables.go
--- a/internal/native/loong64spec/readme.md
+++ b/internal/native/loong64spec/readme.md
@@ -0,0 +1,5 @@
+# 读取龙芯指令规范
+
+1. 下载英文版到当前目录 https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf
+2. 执行 `go run spec.go`, 生成 `tables.go` 表格
+
--- a/internal/native/loong64spec/spec.go
+++ b/internal/native/loong64spec/spec.go
@@ -0,0 +1,566 @@
+// Copyright (C) 2025 武汉凹语言科技有限公司
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+// 龙芯英文版指令集
+// https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"log"
+	"math"
+	"os"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+
+	"wa-lang.org/wa/internal/3rdparty/pdf"
+)
+
+var (
+	flagPdf     = flag.String("pdf", "LoongArch-Vol1-EN.pdf", "set loong64 spec pdf")
+	flagPackage = flag.String("pkg", "loong64", "set package name")
+	flagOutput  = flag.String("output", "tables.go", "set output file")
+)
+
+func init() {
+	log.SetFlags(0)
+	log.SetPrefix("loong64spec: ")
+}
+
+func main() {
+	flag.Parse()
+
+	if *flagPdf == "" {
+		fmt.Fprintf(os.Stderr, "usage: pdf missing\n")
+		flag.Usage()
+		os.Exit(2)
+	}
+	if *flagPackage == "" {
+		fmt.Fprintf(os.Stderr, "usage: package name missing\n")
+		flag.Usage()
+		os.Exit(2)
+	}
+	if *flagOutput == "" {
+		fmt.Fprintf(os.Stderr, "usage: output file missing\n")
+		flag.Usage()
+		os.Exit(2)
+	}
+
+	f, err := pdf.Open(*flagPdf)
+	if err != nil {
+		log.Fatal(err)
+	}
+	var prologue bytes.Buffer
+	prologue.Write([]byte(`// 此代码是程序生成, 不要手动修改!!!
+
+// Copyright (C) 2025 武汉凹语言科技有限公司
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+//go:build ignore
+
+package loong64
+
+`))
+
+	var op_f bytes.Buffer
+	op_f.Write([]byte("const (\n\t_ Op = iota\n"))
+
+	var opstr_f bytes.Buffer
+	opstr_f.Write([]byte("var opstr = [...]string{\n"))
+
+	var instFormats_f bytes.Buffer
+	instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n"))
+
+	// Scan document looking for instructions.
+	n := f.NumPage()
+	var ops []string
+	opstrs := map[string]string{}
+	instFormatComments := map[string]string{}
+	instFormats := map[string]string{}
+	var fp int
+
+	mergeMap := func(m1 map[string]string, m2 map[string]string) {
+		for k := range m2 {
+			m1[k] = m2[k]
+		}
+	}
+
+	for pageNum := 1; pageNum <= n; pageNum++ {
+		p := f.Page(pageNum)
+		if fp == 0 {
+			if !isFirstPage(p) {
+				continue
+			}
+			fp = pageNum
+		}
+		cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum)
+		ops = append(ops, cPageOps...)
+		mergeMap(opstrs, cPageOpstrs)
+		mergeMap(instFormatComments, cPageInstFormatComments)
+		mergeMap(instFormats, cPageInstFormats)
+	}
+
+	sort.Strings(ops)
+
+	for _, op := range ops {
+		// 1. write op
+		op_f.Write([]byte(fmt.Sprintf("\t%s\n", op)))
+		// 2. write opstr
+		opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op])))
+		// 3. write instFormat
+		instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op])))
+	}
+
+	op_f.Write([]byte(")\n\n"))
+	opstr_f.Write([]byte("}\n\n"))
+	instFormats_f.Write([]byte("}\n"))
+
+	fileTables, err := os.Create(*flagOutput)
+	if err != nil {
+		panic(err)
+	}
+	defer fileTables.Close()
+
+	fileTables.Write(prologue.Bytes())
+	fileTables.Write(op_f.Bytes())
+	fileTables.Write(opstr_f.Bytes())
+	fileTables.Write(instFormats_f.Bytes())
+
+	fileTables.Close()
+}
+
+func isFirstPage(page pdf.Page) bool {
+	content := page.Content()
+	appendixb := "AppendixB"
+	ct := ""
+	for _, t := range content.Text {
+		ct += t.S
+		if ct == "AppendixB" {
+			return true
+		}
+		if strings.HasPrefix(appendixb, ct) {
+			continue
+		} else {
+			return false
+		}
+	}
+	return false
+}
+
+func getArg(name string) (length int, argName string) {
+	switch {
+	case strings.Contains("arg_fd", name):
+		return 5, "arg_fd"
+	case strings.Contains("arg_fj", name):
+		return 5, "arg_fj"
+	case strings.Contains("arg_fk", name):
+		return 5, "arg_fk"
+	case strings.Contains("arg_fa", name):
+		return 5, "arg_fa"
+	case strings.Contains("arg_rd", name):
+		return 5, "arg_rd"
+	case strings.Contains("arg_rj", name) || name == "rj!=0,1":
+		return 5, "arg_rj"
+	case strings.Contains("arg_rk", name):
+		return 5, "arg_rk"
+	case name == "csr":
+		return 14, "arg_csr_23_10"
+	case strings.Contains("arg_cd", name):
+		return 5, "arg_cd"
+	case strings.Contains("arg_cj", name):
+		return 5, "arg_cj"
+	case strings.Contains("arg_ca", name):
+		return 5, "arg_ca"
+	case strings.Contains(name, "sa"):
+		length, _ := strconv.Atoi(strings.Split(name, "sa")[1])
+		if length == 2 {
+			argName = "arg_sa2_16_15"
+		} else {
+			argName = "arg_sa3_17_15"
+		}
+		return length, argName
+	case strings.Contains("arg_seq_17_10", name):
+		return 8, "arg_seq_17_10"
+	case strings.Contains("arg_op_4_0", name):
+		return 5, "arg_op_4_0"
+	case strings.Contains(name, "ui"):
+		length, _ := strconv.Atoi(strings.Split(name, "ui")[1])
+		if length == 5 {
+			argName = "arg_ui5_14_10"
+		} else if length == 6 {
+			argName = "arg_ui6_15_10"
+		} else {
+			argName = "arg_ui12_21_10"
+		}
+		return length, argName
+	case strings.Contains("arg_lsbw", name):
+		return 5, "arg_lsbw"
+	case strings.Contains("arg_msbw", name):
+		return 5, "arg_msbw"
+	case strings.Contains("arg_lsbd", name):
+		return 6, "arg_lsbd"
+	case strings.Contains("arg_msbd", name):
+		return 6, "arg_msbd"
+	case strings.Contains(name, "si"):
+		length, _ := strconv.Atoi(strings.Split(name, "si")[1])
+		if length == 12 {
+			argName = "arg_si12_21_10"
+		} else if length == 14 {
+			argName = "arg_si14_23_10"
+		} else if length == 16 {
+			argName = "arg_si16_25_10"
+		} else {
+			argName = "arg_si20_24_5"
+		}
+		return length, argName
+	case strings.Contains(name, "offs"):
+		splitName := strings.Split(name, ":")
+		left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1])
+		right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0])
+		return left - right + 1, "offs"
+	default:
+		return 0, ""
+	}
+}
+
+func binstrToHex(str string) string {
+	rst := 0
+	hex := "0x"
+	charArray := []byte(str)
+	for i := 0; i < 32; {
+		rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48)
+		switch rst {
+		case 10:
+			hex = hex + "a"
+		case 11:
+			hex = hex + "b"
+		case 12:
+			hex = hex + "c"
+		case 13:
+			hex = hex + "d"
+		case 14:
+			hex = hex + "e"
+		case 15:
+			hex = hex + "f"
+		default:
+			hex += strconv.Itoa(rst)
+		}
+
+		i = i + 4
+	}
+	return hex
+}
+
+/*
+Here we deal with the instruction FCMP.cond.S/D, which has the following format:
+
+	| 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 |
+	|---------|---------|---------|-------|---|---|-------|
+	|   op    |  cond   |    fk   |   fj  | 0 | 0 |  cd   |
+
+The `cond` field has these possible values:
+
+	"CAF": "00",
+	"CUN": "08",
+	"CEQ": "04",
+	"CUEQ": "0c",
+	"CLT": "02",
+	"CULT": "0a",
+	"CLE": "06",
+	"CULE": "0e",
+	"CNE": "10",
+	"COR": "14",
+	"CUNE": "18",
+	"SAF": "01",
+	"SUN": "09",
+	"SEQ": "05",
+	"SUEQ": "0d",
+	"SLT": "03",
+	"SULT": "0b",
+	"SLE": "07",
+	"SULE": "0f",
+	"SNE": "11",
+	"SOR": "15",
+	"SUNE": "19",
+
+These values are the hexadecimal numbers of bits 19 to 15, the same as
+described in the instruction set manual.
+
+The following code defines a map, the values in it represent the hexadecimal
+encoding of the cond field in the entire instruction. In this case, the upper
+4 bits and the lowest 1 bit are encoded separately, so the encoding is
+different from the encoding described above.
+*/
+func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) {
+	conds := map[string]string{
+		"CAF":  "00",
+		"CUN":  "40",
+		"CEQ":  "20",
+		"CUEQ": "60",
+		"CLT":  "10",
+		"CULT": "50",
+		"CLE":  "30",
+		"CULE": "70",
+		"CNE":  "80",
+		"COR":  "a0",
+		"CUNE": "c0",
+		"SAF":  "08",
+		"SUN":  "48",
+		"SEQ":  "28",
+		"SUEQ": "68",
+		"SLT":  "18",
+		"SULT": "58",
+		"SLE":  "38",
+		"SULE": "78",
+		"SNE":  "88",
+		"SOR":  "a8",
+		"SUNE": "c8",
+	}
+	fcmpConditions = make(map[string]map[string]string)
+	for k, v := range conds {
+		op := fmt.Sprintf("FCMP_%s_%s", k, ds)
+		opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds)
+		instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds)
+		var instFormat string
+		if ds == "D" {
+			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
+		} else {
+			instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
+		}
+
+		fcmpConditions[op] = make(map[string]string)
+		fcmpConditions[op]["op"] = op
+		fcmpConditions[op]["opstr"] = opstr
+		fcmpConditions[op]["instFormatComment"] = instFormatComment
+		fcmpConditions[op]["instFormat"] = instFormat
+	}
+	return
+}
+
+func findWords(chars []pdf.Text) (words []pdf.Text) {
+	for i := 0; i < len(chars); {
+		xRange := []float64{chars[i].X, chars[i].X}
+		j := i + 1
+
+		// Find all chars on one line.
+		for j < len(chars) && chars[j].Y == chars[i].Y {
+			xRange[1] = chars[j].X
+			j++
+		}
+
+		// we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued
+		// chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should
+		// be contact to current word, because the next word's X should bigger than current one.
+		for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] {
+			j++
+		}
+
+		var end float64
+		// Split line into words (really, phrases).
+		for k := i; k < j; {
+			ck := &chars[k]
+			s := ck.S
+			end = ck.X + ck.W
+			charSpace := ck.FontSize / 6
+			wordSpace := ck.FontSize * 2 / 3
+			l := k + 1
+			for l < j {
+				// Grow word.
+				cl := &chars[l]
+
+				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
+					s += cl.S
+					end = cl.X + cl.W
+					l++
+					continue
+				}
+				// Add space to phrase before next word.
+				if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
+					s += " " + cl.S
+					end = cl.X + cl.W
+					l++
+					continue
+				}
+				break
+			}
+			f := ck.Font
+			words = append(words, pdf.Text{
+				Font:     f,
+				FontSize: ck.FontSize,
+				X:        ck.X,
+				Y:        ck.Y,
+				W:        end - ck.X,
+				S:        s,
+			})
+			k = l
+		}
+		i = j
+	}
+
+	return words
+}
+
+func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) {
+	opstrs = make(map[string]string)
+	instFormatComments = make(map[string]string)
+	instFormats = make(map[string]string)
+
+	content := p.Content()
+
+	var text []pdf.Text
+	for _, t := range content.Text {
+		text = append(text, t)
+	}
+
+	// table name(70), table header(64), page num(3)
+	if isFP {
+		text = text[134 : len(text)-3]
+	} else {
+		text = text[64 : len(text)-3]
+	}
+
+	text = findWords(text)
+
+	for i := 0; i < len(text); {
+		var fcmpConditions map[string]map[string]string
+		if strings.HasPrefix(text[i].S, "FCMP") {
+			fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2])
+
+			for fc, inst := range fcmpConditions {
+				ops = append(ops, inst["op"])
+				opstrs[fc] = inst["opstr"]
+				instFormatComments[fc] = inst["instFormatComment"]
+				instFormats[fc] = inst["instFormat"]
+			}
+			t := i + 1
+			for ; text[t].Y == text[i].Y; t++ {
+				continue
+			}
+			i = t
+			continue
+		}
+
+		op := strings.Replace(text[i].S, ".", "_", -1)
+		opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S)
+		instFormatComment := ""
+		binValue := ""
+		binMask := ""
+		instArgs := ""
+		offs := false
+		var offArgs []string
+
+		j := i + 1
+		for ; j < len(text) && text[j].Y == text[i].Y; j++ {
+
+			// Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped.
+			if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false {
+				instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1))
+				continue
+			}
+			if text[j].S == "0" || text[j].S == "1" {
+				binValue += text[j].S
+				binMask += "1"
+			} else {
+				argLen, argName := getArg(text[j].S)
+
+				// Get argument's length failed, compute it by other arguments.
+				if argLen == 0 {
+					left := 31 - len(binValue)
+					right := 0
+					l := j + 1
+					if l < len(text) && text[l].Y == text[j].Y {
+						for ; text[l].Y == text[j].Y; l++ {
+							if text[l].S == "0" || text[l].S == "1" {
+								right += 1
+							} else {
+								tArgLen, _ := getArg(text[l].S)
+								if tArgLen == 0 {
+									fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n")
+								}
+								right += tArgLen
+							}
+						}
+					}
+					argLen = left - right + 1
+					argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10)
+				}
+
+				for k := 0; k < argLen; k++ {
+					binValue += "0"
+					binMask += "0"
+				}
+
+				if argName != "offs" {
+					if instArgs != "" {
+						instArgs = ", " + instArgs
+					}
+					instArgs = argName + instArgs
+				} else {
+					offs = true
+					offArgs = append(offArgs, text[j].S)
+				}
+			}
+		}
+
+		// The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ
+		if offs && offArgs != nil {
+			var left int
+			var right int
+			if len(offArgs) == 1 {
+				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1])
+				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
+			} else if len(offArgs) == 2 {
+				left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1])
+				right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
+			}
+
+			if instArgs == "" {
+				instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right)
+			} else {
+				instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right)
+			}
+		}
+
+		ops = append(ops, op)
+		opstrs[op] = opstr
+		if instFormatComment == "" {
+			instFormatComment = "// " + text[i].S
+		} else if strings.HasPrefix(op, "AM") {
+			instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S)
+		}
+		instFormatComments[op] = instFormatComment
+		// The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.*
+		if instArgs != "" {
+			args := strings.Split(instFormatComment, " ")[2:]
+			tInstArgs := strings.Split(instArgs, ", ")
+			newOrderedInstArgs := []string{}
+			for _, a := range args {
+				a = strings.Split(a, ",")[0]
+				for _, aa := range tInstArgs {
+					if strings.Contains(aa, a) {
+						newOrderedInstArgs = append(newOrderedInstArgs, aa)
+						break
+					} else if a == "rd" && aa == "arg_fd" {
+						newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd")
+						break
+					}
+				}
+			}
+			instArgs = strings.Join(newOrderedInstArgs, ", ")
+		}
+		if strings.HasPrefix(op, "AM") {
+			instArgs = "arg_rd, arg_rk, arg_rj"
+		}
+		instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs)
+		instFormats[op] = instFormat
+
+		i = j // next instruction
+	}
+
+	return
+}