mirror of
https://gitee.com/wa-lang/wa.git
synced 2025-12-06 17:19:15 +08:00
添加第三方的pdf包
This commit is contained in:
27
internal/3rdparty/pdf/LICENSE
vendored
Normal file
27
internal/3rdparty/pdf/LICENSE
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
3
internal/3rdparty/pdf/README.md
vendored
Normal file
3
internal/3rdparty/pdf/README.md
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
go get rsc.io/pdf
|
||||
|
||||
http://godoc.org/rsc.io/pdf
|
||||
529
internal/3rdparty/pdf/lex.go
vendored
Normal file
529
internal/3rdparty/pdf/lex.go
vendored
Normal file
@@ -0,0 +1,529 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Reading of PDF tokens and objects from a raw byte stream.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// A token is a PDF token in the input stream, one of the following Go types:
|
||||
//
|
||||
// bool, a PDF boolean
|
||||
// int64, a PDF integer
|
||||
// float64, a PDF real
|
||||
// string, a PDF string literal
|
||||
// keyword, a PDF keyword
|
||||
// name, a PDF name without the leading slash
|
||||
//
|
||||
type token interface{}
|
||||
|
||||
// A name is a PDF name, without the leading slash.
|
||||
type name string
|
||||
|
||||
// A keyword is a PDF keyword.
|
||||
// Delimiter tokens used in higher-level syntax,
|
||||
// such as "<<", ">>", "[", "]", "{", "}", are also treated as keywords.
|
||||
type keyword string
|
||||
|
||||
// A buffer holds buffered input bytes from the PDF file.
|
||||
type buffer struct {
|
||||
r io.Reader // source of data
|
||||
buf []byte // buffered data
|
||||
pos int // read index in buf
|
||||
offset int64 // offset at end of buf; aka offset of next read
|
||||
tmp []byte // scratch space for accumulating token
|
||||
unread []token // queue of read but then unread tokens
|
||||
allowEOF bool
|
||||
allowObjptr bool
|
||||
allowStream bool
|
||||
eof bool
|
||||
key []byte
|
||||
useAES bool
|
||||
objptr objptr
|
||||
}
|
||||
|
||||
// newBuffer returns a new buffer reading from r at the given offset.
|
||||
func newBuffer(r io.Reader, offset int64) *buffer {
|
||||
return &buffer{
|
||||
r: r,
|
||||
offset: offset,
|
||||
buf: make([]byte, 0, 4096),
|
||||
allowObjptr: true,
|
||||
allowStream: true,
|
||||
}
|
||||
}
|
||||
|
||||
func (b *buffer) seek(offset int64) {
|
||||
b.offset = offset
|
||||
b.buf = b.buf[:0]
|
||||
b.pos = 0
|
||||
b.unread = b.unread[:0]
|
||||
}
|
||||
|
||||
func (b *buffer) readByte() byte {
|
||||
if b.pos >= len(b.buf) {
|
||||
b.reload()
|
||||
if b.pos >= len(b.buf) {
|
||||
return '\n'
|
||||
}
|
||||
}
|
||||
c := b.buf[b.pos]
|
||||
b.pos++
|
||||
return c
|
||||
}
|
||||
|
||||
func (b *buffer) errorf(format string, args ...interface{}) {
|
||||
panic(fmt.Errorf(format, args...))
|
||||
}
|
||||
|
||||
func (b *buffer) reload() bool {
|
||||
n := cap(b.buf) - int(b.offset%int64(cap(b.buf)))
|
||||
n, err := b.r.Read(b.buf[:n])
|
||||
if n == 0 && err != nil {
|
||||
b.buf = b.buf[:0]
|
||||
b.pos = 0
|
||||
if b.allowEOF && err == io.EOF {
|
||||
b.eof = true
|
||||
return false
|
||||
}
|
||||
b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err)
|
||||
return false
|
||||
}
|
||||
b.offset += int64(n)
|
||||
b.buf = b.buf[:n]
|
||||
b.pos = 0
|
||||
return true
|
||||
}
|
||||
|
||||
func (b *buffer) seekForward(offset int64) {
|
||||
for b.offset < offset {
|
||||
if !b.reload() {
|
||||
return
|
||||
}
|
||||
}
|
||||
b.pos = len(b.buf) - int(b.offset-offset)
|
||||
}
|
||||
|
||||
func (b *buffer) readOffset() int64 {
|
||||
return b.offset - int64(len(b.buf)) + int64(b.pos)
|
||||
}
|
||||
|
||||
func (b *buffer) unreadByte() {
|
||||
if b.pos > 0 {
|
||||
b.pos--
|
||||
}
|
||||
}
|
||||
|
||||
func (b *buffer) unreadToken(t token) {
|
||||
b.unread = append(b.unread, t)
|
||||
}
|
||||
|
||||
func (b *buffer) readToken() token {
|
||||
if n := len(b.unread); n > 0 {
|
||||
t := b.unread[n-1]
|
||||
b.unread = b.unread[:n-1]
|
||||
return t
|
||||
}
|
||||
|
||||
// Find first non-space, non-comment byte.
|
||||
c := b.readByte()
|
||||
for {
|
||||
if isSpace(c) {
|
||||
if b.eof {
|
||||
return io.EOF
|
||||
}
|
||||
c = b.readByte()
|
||||
} else if c == '%' {
|
||||
for c != '\r' && c != '\n' {
|
||||
c = b.readByte()
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '<':
|
||||
if b.readByte() == '<' {
|
||||
return keyword("<<")
|
||||
}
|
||||
b.unreadByte()
|
||||
return b.readHexString()
|
||||
|
||||
case '(':
|
||||
return b.readLiteralString()
|
||||
|
||||
case '[', ']', '{', '}':
|
||||
return keyword(string(c))
|
||||
|
||||
case '/':
|
||||
return b.readName()
|
||||
|
||||
case '>':
|
||||
if b.readByte() == '>' {
|
||||
return keyword(">>")
|
||||
}
|
||||
b.unreadByte()
|
||||
fallthrough
|
||||
|
||||
default:
|
||||
if isDelim(c) {
|
||||
b.errorf("unexpected delimiter %#q", rune(c))
|
||||
return nil
|
||||
}
|
||||
b.unreadByte()
|
||||
return b.readKeyword()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *buffer) readHexString() token {
|
||||
tmp := b.tmp[:0]
|
||||
for {
|
||||
Loop:
|
||||
c := b.readByte()
|
||||
if c == '>' {
|
||||
break
|
||||
}
|
||||
if isSpace(c) {
|
||||
goto Loop
|
||||
}
|
||||
Loop2:
|
||||
c2 := b.readByte()
|
||||
if isSpace(c2) {
|
||||
goto Loop2
|
||||
}
|
||||
x := unhex(c)<<4 | unhex(c2)
|
||||
if x < 0 {
|
||||
b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:])
|
||||
break
|
||||
}
|
||||
tmp = append(tmp, byte(x))
|
||||
}
|
||||
b.tmp = tmp
|
||||
return string(tmp)
|
||||
}
|
||||
|
||||
func unhex(b byte) int {
|
||||
switch {
|
||||
case '0' <= b && b <= '9':
|
||||
return int(b) - '0'
|
||||
case 'a' <= b && b <= 'f':
|
||||
return int(b) - 'a' + 10
|
||||
case 'A' <= b && b <= 'F':
|
||||
return int(b) - 'A' + 10
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func (b *buffer) readLiteralString() token {
|
||||
tmp := b.tmp[:0]
|
||||
depth := 1
|
||||
Loop:
|
||||
for {
|
||||
c := b.readByte()
|
||||
switch c {
|
||||
default:
|
||||
tmp = append(tmp, c)
|
||||
case '(':
|
||||
depth++
|
||||
tmp = append(tmp, c)
|
||||
case ')':
|
||||
if depth--; depth == 0 {
|
||||
break Loop
|
||||
}
|
||||
tmp = append(tmp, c)
|
||||
case '\\':
|
||||
switch c = b.readByte(); c {
|
||||
default:
|
||||
b.errorf("invalid escape sequence \\%c", c)
|
||||
tmp = append(tmp, '\\', c)
|
||||
case 'n':
|
||||
tmp = append(tmp, '\n')
|
||||
case 'r':
|
||||
tmp = append(tmp, '\r')
|
||||
case 'b':
|
||||
tmp = append(tmp, '\b')
|
||||
case 't':
|
||||
tmp = append(tmp, '\t')
|
||||
case 'f':
|
||||
tmp = append(tmp, '\f')
|
||||
case '(', ')', '\\':
|
||||
tmp = append(tmp, c)
|
||||
case '\r':
|
||||
if b.readByte() != '\n' {
|
||||
b.unreadByte()
|
||||
}
|
||||
fallthrough
|
||||
case '\n':
|
||||
// no append
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
x := int(c - '0')
|
||||
for i := 0; i < 2; i++ {
|
||||
c = b.readByte()
|
||||
if c < '0' || c > '7' {
|
||||
b.unreadByte()
|
||||
break
|
||||
}
|
||||
x = x*8 + int(c-'0')
|
||||
}
|
||||
if x > 255 {
|
||||
b.errorf("invalid octal escape \\%03o", x)
|
||||
}
|
||||
tmp = append(tmp, byte(x))
|
||||
}
|
||||
}
|
||||
}
|
||||
b.tmp = tmp
|
||||
return string(tmp)
|
||||
}
|
||||
|
||||
func (b *buffer) readName() token {
|
||||
tmp := b.tmp[:0]
|
||||
for {
|
||||
c := b.readByte()
|
||||
if isDelim(c) || isSpace(c) {
|
||||
b.unreadByte()
|
||||
break
|
||||
}
|
||||
if c == '#' {
|
||||
x := unhex(b.readByte())<<4 | unhex(b.readByte())
|
||||
if x < 0 {
|
||||
b.errorf("malformed name")
|
||||
}
|
||||
tmp = append(tmp, byte(x))
|
||||
continue
|
||||
}
|
||||
tmp = append(tmp, c)
|
||||
}
|
||||
b.tmp = tmp
|
||||
return name(string(tmp))
|
||||
}
|
||||
|
||||
func (b *buffer) readKeyword() token {
|
||||
tmp := b.tmp[:0]
|
||||
for {
|
||||
c := b.readByte()
|
||||
if isDelim(c) || isSpace(c) {
|
||||
b.unreadByte()
|
||||
break
|
||||
}
|
||||
tmp = append(tmp, c)
|
||||
}
|
||||
b.tmp = tmp
|
||||
s := string(tmp)
|
||||
switch {
|
||||
case s == "true":
|
||||
return true
|
||||
case s == "false":
|
||||
return false
|
||||
case isInteger(s):
|
||||
x, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
b.errorf("invalid integer %s", s)
|
||||
}
|
||||
return x
|
||||
case isReal(s):
|
||||
x, err := strconv.ParseFloat(s, 64)
|
||||
if err != nil {
|
||||
b.errorf("invalid real %s", s)
|
||||
}
|
||||
return x
|
||||
}
|
||||
return keyword(string(tmp))
|
||||
}
|
||||
|
||||
func isInteger(s string) bool {
|
||||
if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
|
||||
s = s[1:]
|
||||
}
|
||||
if len(s) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, c := range s {
|
||||
if c < '0' || '9' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isReal(s string) bool {
|
||||
if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
|
||||
s = s[1:]
|
||||
}
|
||||
if len(s) == 0 {
|
||||
return false
|
||||
}
|
||||
ndot := 0
|
||||
for _, c := range s {
|
||||
if c == '.' {
|
||||
ndot++
|
||||
continue
|
||||
}
|
||||
if c < '0' || '9' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return ndot == 1
|
||||
}
|
||||
|
||||
// An object is a PDF syntax object, one of the following Go types:
|
||||
//
|
||||
// bool, a PDF boolean
|
||||
// int64, a PDF integer
|
||||
// float64, a PDF real
|
||||
// string, a PDF string literal
|
||||
// name, a PDF name without the leading slash
|
||||
// dict, a PDF dictionary
|
||||
// array, a PDF array
|
||||
// stream, a PDF stream
|
||||
// objptr, a PDF object reference
|
||||
// objdef, a PDF object definition
|
||||
//
|
||||
// An object may also be nil, to represent the PDF null.
|
||||
type object interface{}
|
||||
|
||||
type dict map[name]object
|
||||
|
||||
type array []object
|
||||
|
||||
type stream struct {
|
||||
hdr dict
|
||||
ptr objptr
|
||||
offset int64
|
||||
}
|
||||
|
||||
type objptr struct {
|
||||
id uint32
|
||||
gen uint16
|
||||
}
|
||||
|
||||
type objdef struct {
|
||||
ptr objptr
|
||||
obj object
|
||||
}
|
||||
|
||||
func (b *buffer) readObject() object {
|
||||
tok := b.readToken()
|
||||
if kw, ok := tok.(keyword); ok {
|
||||
switch kw {
|
||||
case "null":
|
||||
return nil
|
||||
case "<<":
|
||||
return b.readDict()
|
||||
case "[":
|
||||
return b.readArray()
|
||||
}
|
||||
b.errorf("unexpected keyword %q parsing object", kw)
|
||||
return nil
|
||||
}
|
||||
|
||||
if str, ok := tok.(string); ok && b.key != nil && b.objptr.id != 0 {
|
||||
tok = decryptString(b.key, b.useAES, b.objptr, str)
|
||||
}
|
||||
|
||||
if !b.allowObjptr {
|
||||
return tok
|
||||
}
|
||||
|
||||
if t1, ok := tok.(int64); ok && int64(uint32(t1)) == t1 {
|
||||
tok2 := b.readToken()
|
||||
if t2, ok := tok2.(int64); ok && int64(uint16(t2)) == t2 {
|
||||
tok3 := b.readToken()
|
||||
switch tok3 {
|
||||
case keyword("R"):
|
||||
return objptr{uint32(t1), uint16(t2)}
|
||||
case keyword("obj"):
|
||||
old := b.objptr
|
||||
b.objptr = objptr{uint32(t1), uint16(t2)}
|
||||
obj := b.readObject()
|
||||
if _, ok := obj.(stream); !ok {
|
||||
tok4 := b.readToken()
|
||||
if tok4 != keyword("endobj") {
|
||||
b.errorf("missing endobj after indirect object definition")
|
||||
b.unreadToken(tok4)
|
||||
}
|
||||
}
|
||||
b.objptr = old
|
||||
return objdef{objptr{uint32(t1), uint16(t2)}, obj}
|
||||
}
|
||||
b.unreadToken(tok3)
|
||||
}
|
||||
b.unreadToken(tok2)
|
||||
}
|
||||
return tok
|
||||
}
|
||||
|
||||
func (b *buffer) readArray() object {
|
||||
var x array
|
||||
for {
|
||||
tok := b.readToken()
|
||||
if tok == nil || tok == keyword("]") {
|
||||
break
|
||||
}
|
||||
b.unreadToken(tok)
|
||||
x = append(x, b.readObject())
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func (b *buffer) readDict() object {
|
||||
x := make(dict)
|
||||
for {
|
||||
tok := b.readToken()
|
||||
if tok == nil || tok == keyword(">>") {
|
||||
break
|
||||
}
|
||||
n, ok := tok.(name)
|
||||
if !ok {
|
||||
b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok)
|
||||
continue
|
||||
}
|
||||
x[n] = b.readObject()
|
||||
}
|
||||
|
||||
if !b.allowStream {
|
||||
return x
|
||||
}
|
||||
|
||||
tok := b.readToken()
|
||||
if tok != keyword("stream") {
|
||||
b.unreadToken(tok)
|
||||
return x
|
||||
}
|
||||
|
||||
switch b.readByte() {
|
||||
case '\r':
|
||||
if b.readByte() != '\n' {
|
||||
b.unreadByte()
|
||||
}
|
||||
case '\n':
|
||||
// ok
|
||||
default:
|
||||
b.errorf("stream keyword not followed by newline")
|
||||
}
|
||||
|
||||
return stream{x, b.objptr, b.readOffset()}
|
||||
}
|
||||
|
||||
func isSpace(b byte) bool {
|
||||
switch b {
|
||||
case '\x00', '\t', '\n', '\f', '\r', ' ':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isDelim(b byte) bool {
|
||||
switch b {
|
||||
case '<', '>', '(', ')', '[', ']', '{', '}', '/', '%':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
4286
internal/3rdparty/pdf/name.go
vendored
Normal file
4286
internal/3rdparty/pdf/name.go
vendored
Normal file
File diff suppressed because it is too large
Load Diff
667
internal/3rdparty/pdf/page.go
vendored
Normal file
667
internal/3rdparty/pdf/page.go
vendored
Normal file
@@ -0,0 +1,667 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Page represent a single page in a PDF file.
|
||||
// The methods interpret a Page dictionary stored in V.
|
||||
type Page struct {
|
||||
V Value
|
||||
}
|
||||
|
||||
// Page returns the page for the given page number.
|
||||
// Page numbers are indexed starting at 1, not 0.
|
||||
// If the page is not found, Page returns a Page with p.V.IsNull().
|
||||
func (r *Reader) Page(num int) Page {
|
||||
num-- // now 0-indexed
|
||||
page := r.Trailer().Key("Root").Key("Pages")
|
||||
Search:
|
||||
for page.Key("Type").Name() == "Pages" {
|
||||
count := int(page.Key("Count").Int64())
|
||||
if count < num {
|
||||
return Page{}
|
||||
}
|
||||
kids := page.Key("Kids")
|
||||
for i := 0; i < kids.Len(); i++ {
|
||||
kid := kids.Index(i)
|
||||
if kid.Key("Type").Name() == "Pages" {
|
||||
c := int(kid.Key("Count").Int64())
|
||||
if num < c {
|
||||
page = kid
|
||||
continue Search
|
||||
}
|
||||
num -= c
|
||||
continue
|
||||
}
|
||||
if kid.Key("Type").Name() == "Page" {
|
||||
if num == 0 {
|
||||
return Page{kid}
|
||||
}
|
||||
num--
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return Page{}
|
||||
}
|
||||
|
||||
// NumPage returns the number of pages in the PDF file.
|
||||
func (r *Reader) NumPage() int {
|
||||
return int(r.Trailer().Key("Root").Key("Pages").Key("Count").Int64())
|
||||
}
|
||||
|
||||
func (p Page) findInherited(key string) Value {
|
||||
for v := p.V; !v.IsNull(); v = v.Key("Parent") {
|
||||
if r := v.Key(key); !r.IsNull() {
|
||||
return r
|
||||
}
|
||||
}
|
||||
return Value{}
|
||||
}
|
||||
|
||||
/*
|
||||
func (p Page) MediaBox() Value {
|
||||
return p.findInherited("MediaBox")
|
||||
}
|
||||
|
||||
func (p Page) CropBox() Value {
|
||||
return p.findInherited("CropBox")
|
||||
}
|
||||
*/
|
||||
|
||||
// Resources returns the resources dictionary associated with the page.
|
||||
func (p Page) Resources() Value {
|
||||
return p.findInherited("Resources")
|
||||
}
|
||||
|
||||
// Fonts returns a list of the fonts associated with the page.
|
||||
func (p Page) Fonts() []string {
|
||||
return p.Resources().Key("Font").Keys()
|
||||
}
|
||||
|
||||
// Font returns the font with the given name associated with the page.
|
||||
func (p Page) Font(name string) Font {
|
||||
return Font{p.Resources().Key("Font").Key(name)}
|
||||
}
|
||||
|
||||
// A Font represent a font in a PDF file.
|
||||
// The methods interpret a Font dictionary stored in V.
|
||||
type Font struct {
|
||||
V Value
|
||||
}
|
||||
|
||||
// BaseFont returns the font's name (BaseFont property).
|
||||
func (f Font) BaseFont() string {
|
||||
return f.V.Key("BaseFont").Name()
|
||||
}
|
||||
|
||||
// FirstChar returns the code point of the first character in the font.
|
||||
func (f Font) FirstChar() int {
|
||||
return int(f.V.Key("FirstChar").Int64())
|
||||
}
|
||||
|
||||
// LastChar returns the code point of the last character in the font.
|
||||
func (f Font) LastChar() int {
|
||||
return int(f.V.Key("LastChar").Int64())
|
||||
}
|
||||
|
||||
// Widths returns the widths of the glyphs in the font.
|
||||
// In a well-formed PDF, len(f.Widths()) == f.LastChar()+1 - f.FirstChar().
|
||||
func (f Font) Widths() []float64 {
|
||||
x := f.V.Key("Widths")
|
||||
var out []float64
|
||||
for i := 0; i < x.Len(); i++ {
|
||||
out = append(out, x.Index(i).Float64())
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Width returns the width of the given code point.
|
||||
func (f Font) Width(code int) float64 {
|
||||
first := f.FirstChar()
|
||||
last := f.LastChar()
|
||||
if code < first || last < code {
|
||||
return 0
|
||||
}
|
||||
return f.V.Key("Widths").Index(code - first).Float64()
|
||||
}
|
||||
|
||||
// Encoder returns the encoding between font code point sequences and UTF-8.
|
||||
func (f Font) Encoder() TextEncoding {
|
||||
enc := f.V.Key("Encoding")
|
||||
switch enc.Kind() {
|
||||
case Name:
|
||||
switch enc.Name() {
|
||||
case "WinAnsiEncoding":
|
||||
return &byteEncoder{&winAnsiEncoding}
|
||||
case "MacRomanEncoding":
|
||||
return &byteEncoder{&macRomanEncoding}
|
||||
case "Identity-H":
|
||||
// TODO: Should be big-endian UCS-2 decoder
|
||||
return &nopEncoder{}
|
||||
default:
|
||||
println("unknown encoding", enc.Name())
|
||||
return &nopEncoder{}
|
||||
}
|
||||
case Dict:
|
||||
return &dictEncoder{enc.Key("Differences")}
|
||||
case Null:
|
||||
// ok, try ToUnicode
|
||||
default:
|
||||
println("unexpected encoding", enc.String())
|
||||
return &nopEncoder{}
|
||||
}
|
||||
|
||||
toUnicode := f.V.Key("ToUnicode")
|
||||
if toUnicode.Kind() == Dict {
|
||||
m := readCmap(toUnicode)
|
||||
if m == nil {
|
||||
return &nopEncoder{}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
return &byteEncoder{&pdfDocEncoding}
|
||||
}
|
||||
|
||||
type dictEncoder struct {
|
||||
v Value
|
||||
}
|
||||
|
||||
func (e *dictEncoder) Decode(raw string) (text string) {
|
||||
r := make([]rune, 0, len(raw))
|
||||
for i := 0; i < len(raw); i++ {
|
||||
ch := rune(raw[i])
|
||||
n := -1
|
||||
for j := 0; j < e.v.Len(); j++ {
|
||||
x := e.v.Index(j)
|
||||
if x.Kind() == Integer {
|
||||
n = int(x.Int64())
|
||||
continue
|
||||
}
|
||||
if x.Kind() == Name {
|
||||
if int(raw[i]) == n {
|
||||
r := nameToRune[x.Name()]
|
||||
if r != 0 {
|
||||
ch = r
|
||||
break
|
||||
}
|
||||
}
|
||||
n++
|
||||
}
|
||||
}
|
||||
r = append(r, ch)
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
// A TextEncoding represents a mapping between
|
||||
// font code points and UTF-8 text.
|
||||
type TextEncoding interface {
|
||||
// Decode returns the UTF-8 text corresponding to
|
||||
// the sequence of code points in raw.
|
||||
Decode(raw string) (text string)
|
||||
}
|
||||
|
||||
type nopEncoder struct {
|
||||
}
|
||||
|
||||
func (e *nopEncoder) Decode(raw string) (text string) {
|
||||
return raw
|
||||
}
|
||||
|
||||
type byteEncoder struct {
|
||||
table *[256]rune
|
||||
}
|
||||
|
||||
func (e *byteEncoder) Decode(raw string) (text string) {
|
||||
r := make([]rune, 0, len(raw))
|
||||
for i := 0; i < len(raw); i++ {
|
||||
r = append(r, e.table[raw[i]])
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
type cmap struct {
|
||||
space [4][][2]string
|
||||
bfrange []bfrange
|
||||
}
|
||||
|
||||
func (m *cmap) Decode(raw string) (text string) {
|
||||
var r []rune
|
||||
Parse:
|
||||
for len(raw) > 0 {
|
||||
for n := 1; n <= 4 && n <= len(raw); n++ {
|
||||
for _, space := range m.space[n-1] {
|
||||
if space[0] <= raw[:n] && raw[:n] <= space[1] {
|
||||
text := raw[:n]
|
||||
raw = raw[n:]
|
||||
for _, bf := range m.bfrange {
|
||||
if len(bf.lo) == n && bf.lo <= text && text <= bf.hi {
|
||||
if bf.dst.Kind() == String {
|
||||
s := bf.dst.RawString()
|
||||
if bf.lo != text {
|
||||
b := []byte(s)
|
||||
b[len(b)-1] += text[len(text)-1] - bf.lo[len(bf.lo)-1]
|
||||
s = string(b)
|
||||
}
|
||||
r = append(r, []rune(utf16Decode(s))...)
|
||||
continue Parse
|
||||
}
|
||||
if bf.dst.Kind() == Array {
|
||||
fmt.Printf("array %v\n", bf.dst)
|
||||
} else {
|
||||
fmt.Printf("unknown dst %v\n", bf.dst)
|
||||
}
|
||||
r = append(r, noRune)
|
||||
continue Parse
|
||||
}
|
||||
}
|
||||
fmt.Printf("no text for %q", text)
|
||||
r = append(r, noRune)
|
||||
continue Parse
|
||||
}
|
||||
}
|
||||
}
|
||||
println("no code space found")
|
||||
r = append(r, noRune)
|
||||
raw = raw[1:]
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
type bfrange struct {
|
||||
lo string
|
||||
hi string
|
||||
dst Value
|
||||
}
|
||||
|
||||
func readCmap(toUnicode Value) *cmap {
|
||||
n := -1
|
||||
var m cmap
|
||||
ok := true
|
||||
Interpret(toUnicode, func(stk *Stack, op string) {
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
switch op {
|
||||
case "findresource":
|
||||
category := stk.Pop()
|
||||
key := stk.Pop()
|
||||
fmt.Println("findresource", key, category)
|
||||
stk.Push(newDict())
|
||||
case "begincmap":
|
||||
stk.Push(newDict())
|
||||
case "endcmap":
|
||||
stk.Pop()
|
||||
case "begincodespacerange":
|
||||
n = int(stk.Pop().Int64())
|
||||
case "endcodespacerange":
|
||||
if n < 0 {
|
||||
println("missing begincodespacerange")
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
hi, lo := stk.Pop().RawString(), stk.Pop().RawString()
|
||||
if len(lo) == 0 || len(lo) != len(hi) {
|
||||
println("bad codespace range")
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
m.space[len(lo)-1] = append(m.space[len(lo)-1], [2]string{lo, hi})
|
||||
}
|
||||
n = -1
|
||||
case "beginbfrange":
|
||||
n = int(stk.Pop().Int64())
|
||||
case "endbfrange":
|
||||
if n < 0 {
|
||||
panic("missing beginbfrange")
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
dst, srcHi, srcLo := stk.Pop(), stk.Pop().RawString(), stk.Pop().RawString()
|
||||
m.bfrange = append(m.bfrange, bfrange{srcLo, srcHi, dst})
|
||||
}
|
||||
case "defineresource":
|
||||
category := stk.Pop().Name()
|
||||
value := stk.Pop()
|
||||
key := stk.Pop().Name()
|
||||
fmt.Println("defineresource", key, value, category)
|
||||
stk.Push(value)
|
||||
default:
|
||||
println("interp\t", op)
|
||||
}
|
||||
})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return &m
|
||||
}
|
||||
|
||||
type matrix [3][3]float64
|
||||
|
||||
var ident = matrix{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}
|
||||
|
||||
func (x matrix) mul(y matrix) matrix {
|
||||
var z matrix
|
||||
for i := 0; i < 3; i++ {
|
||||
for j := 0; j < 3; j++ {
|
||||
for k := 0; k < 3; k++ {
|
||||
z[i][j] += x[i][k] * y[k][j]
|
||||
}
|
||||
}
|
||||
}
|
||||
return z
|
||||
}
|
||||
|
||||
// A Text represents a single piece of text drawn on a page.
|
||||
type Text struct {
|
||||
Font string // the font used
|
||||
FontSize float64 // the font size, in points (1/72 of an inch)
|
||||
X float64 // the X coordinate, in points, increasing left to right
|
||||
Y float64 // the Y coordinate, in points, increasing bottom to top
|
||||
W float64 // the width of the text, in points
|
||||
S string // the actual UTF-8 text
|
||||
}
|
||||
|
||||
// A Rect represents a rectangle.
|
||||
type Rect struct {
|
||||
Min, Max Point
|
||||
}
|
||||
|
||||
// A Point represents an X, Y pair.
|
||||
type Point struct {
|
||||
X float64
|
||||
Y float64
|
||||
}
|
||||
|
||||
// Content describes the basic content on a page: the text and any drawn rectangles.
|
||||
type Content struct {
|
||||
Text []Text
|
||||
Rect []Rect
|
||||
}
|
||||
|
||||
type gstate struct {
|
||||
Tc float64
|
||||
Tw float64
|
||||
Th float64
|
||||
Tl float64
|
||||
Tf Font
|
||||
Tfs float64
|
||||
Tmode int
|
||||
Trise float64
|
||||
Tm matrix
|
||||
Tlm matrix
|
||||
Trm matrix
|
||||
CTM matrix
|
||||
}
|
||||
|
||||
// Content returns the page's content.
|
||||
func (p Page) Content() Content {
|
||||
strm := p.V.Key("Contents")
|
||||
var enc TextEncoding = &nopEncoder{}
|
||||
|
||||
var g = gstate{
|
||||
Th: 1,
|
||||
CTM: ident,
|
||||
}
|
||||
|
||||
var text []Text
|
||||
showText := func(s string) {
|
||||
n := 0
|
||||
for _, ch := range enc.Decode(s) {
|
||||
Trm := matrix{{g.Tfs * g.Th, 0, 0}, {0, g.Tfs, 0}, {0, g.Trise, 1}}.mul(g.Tm).mul(g.CTM)
|
||||
w0 := g.Tf.Width(int(s[n]))
|
||||
n++
|
||||
if ch != ' ' {
|
||||
f := g.Tf.BaseFont()
|
||||
if i := strings.Index(f, "+"); i >= 0 {
|
||||
f = f[i+1:]
|
||||
}
|
||||
text = append(text, Text{f, Trm[0][0], Trm[2][0], Trm[2][1], w0 / 1000 * Trm[0][0], string(ch)})
|
||||
}
|
||||
tx := w0/1000*g.Tfs + g.Tc
|
||||
if ch == ' ' {
|
||||
tx += g.Tw
|
||||
}
|
||||
tx *= g.Th
|
||||
g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
|
||||
}
|
||||
}
|
||||
|
||||
var rect []Rect
|
||||
var gstack []gstate
|
||||
Interpret(strm, func(stk *Stack, op string) {
|
||||
n := stk.Len()
|
||||
args := make([]Value, n)
|
||||
for i := n - 1; i >= 0; i-- {
|
||||
args[i] = stk.Pop()
|
||||
}
|
||||
switch op {
|
||||
default:
|
||||
//fmt.Println(op, args)
|
||||
return
|
||||
|
||||
case "cm": // update g.CTM
|
||||
if len(args) != 6 {
|
||||
panic("bad g.Tm")
|
||||
}
|
||||
var m matrix
|
||||
for i := 0; i < 6; i++ {
|
||||
m[i/2][i%2] = args[i].Float64()
|
||||
}
|
||||
m[2][2] = 1
|
||||
g.CTM = m.mul(g.CTM)
|
||||
|
||||
case "gs": // set parameters from graphics state resource
|
||||
gs := p.Resources().Key("ExtGState").Key(args[0].Name())
|
||||
font := gs.Key("Font")
|
||||
if font.Kind() == Array && font.Len() == 2 {
|
||||
//fmt.Println("FONT", font)
|
||||
}
|
||||
|
||||
case "f": // fill
|
||||
case "g": // setgray
|
||||
case "l": // lineto
|
||||
case "m": // moveto
|
||||
|
||||
case "cs": // set colorspace non-stroking
|
||||
case "scn": // set color non-stroking
|
||||
|
||||
case "re": // append rectangle to path
|
||||
if len(args) != 4 {
|
||||
panic("bad re")
|
||||
}
|
||||
x, y, w, h := args[0].Float64(), args[1].Float64(), args[2].Float64(), args[3].Float64()
|
||||
rect = append(rect, Rect{Point{x, y}, Point{x + w, y + h}})
|
||||
|
||||
case "q": // save graphics state
|
||||
gstack = append(gstack, g)
|
||||
|
||||
case "Q": // restore graphics state
|
||||
n := len(gstack) - 1
|
||||
g = gstack[n]
|
||||
gstack = gstack[:n]
|
||||
|
||||
case "BT": // begin text (reset text matrix and line matrix)
|
||||
g.Tm = ident
|
||||
g.Tlm = g.Tm
|
||||
|
||||
case "ET": // end text
|
||||
|
||||
case "T*": // move to start of next line
|
||||
x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
|
||||
g.Tlm = x.mul(g.Tlm)
|
||||
g.Tm = g.Tlm
|
||||
|
||||
case "Tc": // set character spacing
|
||||
if len(args) != 1 {
|
||||
panic("bad g.Tc")
|
||||
}
|
||||
g.Tc = args[0].Float64()
|
||||
|
||||
case "TD": // move text position and set leading
|
||||
if len(args) != 2 {
|
||||
panic("bad Td")
|
||||
}
|
||||
g.Tl = -args[1].Float64()
|
||||
fallthrough
|
||||
case "Td": // move text position
|
||||
if len(args) != 2 {
|
||||
panic("bad Td")
|
||||
}
|
||||
tx := args[0].Float64()
|
||||
ty := args[1].Float64()
|
||||
x := matrix{{1, 0, 0}, {0, 1, 0}, {tx, ty, 1}}
|
||||
g.Tlm = x.mul(g.Tlm)
|
||||
g.Tm = g.Tlm
|
||||
|
||||
case "Tf": // set text font and size
|
||||
if len(args) != 2 {
|
||||
panic("bad TL")
|
||||
}
|
||||
f := args[0].Name()
|
||||
g.Tf = p.Font(f)
|
||||
enc = g.Tf.Encoder()
|
||||
if enc == nil {
|
||||
println("no cmap for", f)
|
||||
enc = &nopEncoder{}
|
||||
}
|
||||
g.Tfs = args[1].Float64()
|
||||
|
||||
case "\"": // set spacing, move to next line, and show text
|
||||
if len(args) != 3 {
|
||||
panic("bad \" operator")
|
||||
}
|
||||
g.Tw = args[0].Float64()
|
||||
g.Tc = args[1].Float64()
|
||||
args = args[2:]
|
||||
fallthrough
|
||||
case "'": // move to next line and show text
|
||||
if len(args) != 1 {
|
||||
panic("bad ' operator")
|
||||
}
|
||||
x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
|
||||
g.Tlm = x.mul(g.Tlm)
|
||||
g.Tm = g.Tlm
|
||||
fallthrough
|
||||
case "Tj": // show text
|
||||
if len(args) != 1 {
|
||||
panic("bad Tj operator")
|
||||
}
|
||||
showText(args[0].RawString())
|
||||
|
||||
case "TJ": // show text, allowing individual glyph positioning
|
||||
v := args[0]
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
x := v.Index(i)
|
||||
if x.Kind() == String {
|
||||
showText(x.RawString())
|
||||
} else {
|
||||
tx := -x.Float64() / 1000 * g.Tfs * g.Th
|
||||
g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
|
||||
}
|
||||
}
|
||||
|
||||
case "TL": // set text leading
|
||||
if len(args) != 1 {
|
||||
panic("bad TL")
|
||||
}
|
||||
g.Tl = args[0].Float64()
|
||||
|
||||
case "Tm": // set text matrix and line matrix
|
||||
if len(args) != 6 {
|
||||
panic("bad g.Tm")
|
||||
}
|
||||
var m matrix
|
||||
for i := 0; i < 6; i++ {
|
||||
m[i/2][i%2] = args[i].Float64()
|
||||
}
|
||||
m[2][2] = 1
|
||||
g.Tm = m
|
||||
g.Tlm = m
|
||||
|
||||
case "Tr": // set text rendering mode
|
||||
if len(args) != 1 {
|
||||
panic("bad Tr")
|
||||
}
|
||||
g.Tmode = int(args[0].Int64())
|
||||
|
||||
case "Ts": // set text rise
|
||||
if len(args) != 1 {
|
||||
panic("bad Ts")
|
||||
}
|
||||
g.Trise = args[0].Float64()
|
||||
|
||||
case "Tw": // set word spacing
|
||||
if len(args) != 1 {
|
||||
panic("bad g.Tw")
|
||||
}
|
||||
g.Tw = args[0].Float64()
|
||||
|
||||
case "Tz": // set horizontal text scaling
|
||||
if len(args) != 1 {
|
||||
panic("bad Tz")
|
||||
}
|
||||
g.Th = args[0].Float64() / 100
|
||||
}
|
||||
})
|
||||
return Content{text, rect}
|
||||
}
|
||||
|
||||
// TextVertical implements sort.Interface for sorting
|
||||
// a slice of Text values in vertical order, top to bottom,
|
||||
// and then left to right within a line.
|
||||
type TextVertical []Text
|
||||
|
||||
func (x TextVertical) Len() int { return len(x) }
|
||||
func (x TextVertical) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x TextVertical) Less(i, j int) bool {
|
||||
if x[i].Y != x[j].Y {
|
||||
return x[i].Y > x[j].Y
|
||||
}
|
||||
return x[i].X < x[j].X
|
||||
}
|
||||
|
||||
// TextHorizontal implements sort.Interface for sorting
|
||||
// a slice of Text values in horizontal order, left to right,
|
||||
// and then top to bottom within a column.
|
||||
type TextHorizontal []Text
|
||||
|
||||
func (x TextHorizontal) Len() int { return len(x) }
|
||||
func (x TextHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x TextHorizontal) Less(i, j int) bool {
|
||||
if x[i].X != x[j].X {
|
||||
return x[i].X < x[j].X
|
||||
}
|
||||
return x[i].Y > x[j].Y
|
||||
}
|
||||
|
||||
// An Outline is a tree describing the outline (also known as the table of contents)
|
||||
// of a document.
|
||||
type Outline struct {
|
||||
Title string // title for this element
|
||||
Child []Outline // child elements
|
||||
}
|
||||
|
||||
// Outline returns the document outline.
|
||||
// The Outline returned is the root of the outline tree and typically has no Title itself.
|
||||
// That is, the children of the returned root are the top-level entries in the outline.
|
||||
func (r *Reader) Outline() Outline {
|
||||
return buildOutline(r.Trailer().Key("Root").Key("Outlines"))
|
||||
}
|
||||
|
||||
func buildOutline(entry Value) Outline {
|
||||
var x Outline
|
||||
x.Title = entry.Key("Title").Text()
|
||||
for child := entry.Key("First"); child.Kind() == Dict; child = child.Key("Next") {
|
||||
x.Child = append(x.Child, buildOutline(child))
|
||||
}
|
||||
return x
|
||||
}
|
||||
110
internal/3rdparty/pdf/pdfpasswd/main.go
vendored
Normal file
110
internal/3rdparty/pdf/pdfpasswd/main.go
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Pdfpasswd searches for the password for an encrypted PDF
|
||||
// by trying all strings over a given alphabet up to a given length.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"wa-lang.org/wa/internal/3rdparty/pdf"
|
||||
)
|
||||
|
||||
var (
|
||||
alphabet = flag.String("a", "0123456789", "alphabet")
|
||||
maxLength = flag.Int("m", 4, "max length")
|
||||
)
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, "usage: pdfpasswd [-a alphabet] [-m maxlength] file\n")
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.SetFlags(0)
|
||||
log.SetPrefix("pdfpasswd: ")
|
||||
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
if flag.NArg() != 1 {
|
||||
usage()
|
||||
}
|
||||
|
||||
f, err := os.Open(flag.Arg(0))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
last := ""
|
||||
alpha := *alphabet
|
||||
ctr := make([]int, *maxLength)
|
||||
pw := func() string {
|
||||
inc(ctr, len(alpha)+1)
|
||||
for !valid(ctr) {
|
||||
inc(ctr, len(alpha)+1)
|
||||
}
|
||||
if done(ctr) {
|
||||
return ""
|
||||
}
|
||||
buf := make([]byte, len(ctr))
|
||||
var i int
|
||||
for i = 0; i < len(buf); i++ {
|
||||
if ctr[i] == 0 {
|
||||
break
|
||||
}
|
||||
buf[i] = alpha[ctr[i]-1]
|
||||
}
|
||||
last = string(buf[:i])
|
||||
println(last)
|
||||
return last
|
||||
}
|
||||
st, err := f.Stat()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
_, err = pdf.NewReaderEncrypted(f, st.Size(), pw)
|
||||
if err != nil {
|
||||
if err == pdf.ErrInvalidPassword {
|
||||
log.Fatal("password not found")
|
||||
}
|
||||
log.Fatalf("reading pdf: %v", err)
|
||||
}
|
||||
fmt.Printf("password: %q\n", last)
|
||||
}
|
||||
|
||||
func inc(ctr []int, n int) {
|
||||
for i := 0; i < len(ctr); i++ {
|
||||
ctr[i]++
|
||||
if ctr[i] < n {
|
||||
break
|
||||
}
|
||||
ctr[i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
func done(ctr []int) bool {
|
||||
for _, x := range ctr {
|
||||
if x != 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func valid(ctr []int) bool {
|
||||
i := len(ctr)
|
||||
for i > 0 && ctr[i-1] == 0 {
|
||||
i--
|
||||
}
|
||||
for i--; i >= 0; i-- {
|
||||
if ctr[i] == 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
138
internal/3rdparty/pdf/ps.go
vendored
Normal file
138
internal/3rdparty/pdf/ps.go
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// A Stack represents a stack of values.
|
||||
type Stack struct {
|
||||
stack []Value
|
||||
}
|
||||
|
||||
func (stk *Stack) Len() int {
|
||||
return len(stk.stack)
|
||||
}
|
||||
|
||||
func (stk *Stack) Push(v Value) {
|
||||
stk.stack = append(stk.stack, v)
|
||||
}
|
||||
|
||||
func (stk *Stack) Pop() Value {
|
||||
n := len(stk.stack)
|
||||
if n == 0 {
|
||||
return Value{}
|
||||
}
|
||||
v := stk.stack[n-1]
|
||||
stk.stack[n-1] = Value{}
|
||||
stk.stack = stk.stack[:n-1]
|
||||
return v
|
||||
}
|
||||
|
||||
func newDict() Value {
|
||||
return Value{nil, objptr{}, make(dict)}
|
||||
}
|
||||
|
||||
// Interpret interprets the content in a stream as a basic PostScript program,
|
||||
// pushing values onto a stack and then calling the do function to execute
|
||||
// operators. The do function may push or pop values from the stack as needed
|
||||
// to implement op.
|
||||
//
|
||||
// Interpret handles the operators "dict", "currentdict", "begin", "end", "def", and "pop" itself.
|
||||
//
|
||||
// Interpret is not a full-blown PostScript interpreter. Its job is to handle the
|
||||
// very limited PostScript found in certain supporting file formats embedded
|
||||
// in PDF files, such as cmap files that describe the mapping from font code
|
||||
// points to Unicode code points.
|
||||
//
|
||||
// There is no support for executable blocks, among other limitations.
|
||||
//
|
||||
func Interpret(strm Value, do func(stk *Stack, op string)) {
|
||||
rd := strm.Reader()
|
||||
b := newBuffer(rd, 0)
|
||||
b.allowEOF = true
|
||||
b.allowObjptr = false
|
||||
b.allowStream = false
|
||||
var stk Stack
|
||||
var dicts []dict
|
||||
Reading:
|
||||
for {
|
||||
tok := b.readToken()
|
||||
if tok == io.EOF {
|
||||
break
|
||||
}
|
||||
if kw, ok := tok.(keyword); ok {
|
||||
switch kw {
|
||||
case "null", "[", "]", "<<", ">>":
|
||||
break
|
||||
default:
|
||||
for i := len(dicts) - 1; i >= 0; i-- {
|
||||
if v, ok := dicts[i][name(kw)]; ok {
|
||||
stk.Push(Value{nil, objptr{}, v})
|
||||
continue Reading
|
||||
}
|
||||
}
|
||||
do(&stk, string(kw))
|
||||
continue
|
||||
case "dict":
|
||||
stk.Pop()
|
||||
stk.Push(Value{nil, objptr{}, make(dict)})
|
||||
continue
|
||||
case "currentdict":
|
||||
if len(dicts) == 0 {
|
||||
panic("no current dictionary")
|
||||
}
|
||||
stk.Push(Value{nil, objptr{}, dicts[len(dicts)-1]})
|
||||
continue
|
||||
case "begin":
|
||||
d := stk.Pop()
|
||||
if d.Kind() != Dict {
|
||||
panic("cannot begin non-dict")
|
||||
}
|
||||
dicts = append(dicts, d.data.(dict))
|
||||
continue
|
||||
case "end":
|
||||
if len(dicts) <= 0 {
|
||||
panic("mismatched begin/end")
|
||||
}
|
||||
dicts = dicts[:len(dicts)-1]
|
||||
continue
|
||||
case "def":
|
||||
if len(dicts) <= 0 {
|
||||
panic("def without open dict")
|
||||
}
|
||||
val := stk.Pop()
|
||||
key, ok := stk.Pop().data.(name)
|
||||
if !ok {
|
||||
panic("def of non-name")
|
||||
}
|
||||
dicts[len(dicts)-1][key] = val.data
|
||||
continue
|
||||
case "pop":
|
||||
stk.Pop()
|
||||
continue
|
||||
}
|
||||
}
|
||||
b.unreadToken(tok)
|
||||
obj := b.readObject()
|
||||
stk.Push(Value{nil, objptr{}, obj})
|
||||
}
|
||||
}
|
||||
|
||||
type seqReader struct {
|
||||
rd io.Reader
|
||||
offset int64
|
||||
}
|
||||
|
||||
func (r *seqReader) ReadAt(buf []byte, offset int64) (int, error) {
|
||||
if offset != r.offset {
|
||||
return 0, fmt.Errorf("non-sequential read of stream")
|
||||
}
|
||||
n, err := io.ReadFull(r.rd, buf)
|
||||
r.offset += int64(n)
|
||||
return n, err
|
||||
}
|
||||
1079
internal/3rdparty/pdf/read.go
vendored
Normal file
1079
internal/3rdparty/pdf/read.go
vendored
Normal file
File diff suppressed because it is too large
Load Diff
158
internal/3rdparty/pdf/text.go
vendored
Normal file
158
internal/3rdparty/pdf/text.go
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package pdf
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
)
|
||||
|
||||
const noRune = unicode.ReplacementChar
|
||||
|
||||
func isPDFDocEncoded(s string) bool {
|
||||
if isUTF16(s) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(s); i++ {
|
||||
if pdfDocEncoding[s[i]] == noRune {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func pdfDocDecode(s string) string {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] >= 0x80 || pdfDocEncoding[s[i]] != rune(s[i]) {
|
||||
goto Decode
|
||||
}
|
||||
}
|
||||
return s
|
||||
|
||||
Decode:
|
||||
r := make([]rune, len(s))
|
||||
for i := 0; i < len(s); i++ {
|
||||
r[i] = pdfDocEncoding[s[i]]
|
||||
}
|
||||
return string(r)
|
||||
}
|
||||
|
||||
func isUTF16(s string) bool {
|
||||
return len(s) >= 2 && s[0] == 0xfe && s[1] == 0xff && len(s)%2 == 0
|
||||
}
|
||||
|
||||
func utf16Decode(s string) string {
|
||||
var u []uint16
|
||||
for i := 0; i < len(s); i += 2 {
|
||||
u = append(u, uint16(s[i])<<8|uint16(s[i+1]))
|
||||
}
|
||||
return string(utf16.Decode(u))
|
||||
}
|
||||
|
||||
// See PDF 32000-1:2008, Table D.2
|
||||
var pdfDocEncoding = [256]rune{
|
||||
noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
|
||||
noRune, 0x0009, 0x000a, noRune, noRune, 0x000d, noRune, noRune,
|
||||
noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
|
||||
0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, noRune,
|
||||
0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
|
||||
0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
|
||||
0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
|
||||
0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, noRune,
|
||||
0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
|
||||
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, noRune, 0x00ae, 0x00af,
|
||||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
|
||||
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
|
||||
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
|
||||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
|
||||
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
|
||||
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
|
||||
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
|
||||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
|
||||
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
|
||||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
|
||||
}
|
||||
|
||||
var winAnsiEncoding = [256]rune{
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
|
||||
0x20ac, noRune, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
|
||||
0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, noRune, 0x017d, noRune,
|
||||
noRune, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
|
||||
0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, noRune, 0x017e, 0x0178,
|
||||
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
|
||||
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
|
||||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
|
||||
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
|
||||
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
|
||||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
|
||||
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
|
||||
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
|
||||
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
|
||||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
|
||||
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
|
||||
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
|
||||
}
|
||||
|
||||
var macRomanEncoding = [256]rune{
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
|
||||
0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1,
|
||||
0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8,
|
||||
0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3,
|
||||
0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc,
|
||||
0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
|
||||
0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
|
||||
0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
|
||||
0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
|
||||
0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
|
||||
0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153,
|
||||
0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
|
||||
0x00ff, 0x0178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
|
||||
0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1,
|
||||
0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4,
|
||||
0xf8ff, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc,
|
||||
0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7,
|
||||
}
|
||||
2
internal/native/loong64spec/.gitignore
vendored
Normal file
2
internal/native/loong64spec/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
LoongArch-Vol1-EN.pdf
|
||||
tables.go
|
||||
5
internal/native/loong64spec/readme.md
Normal file
5
internal/native/loong64spec/readme.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# 读取龙芯指令规范
|
||||
|
||||
1. 下载英文版到当前目录 https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf
|
||||
2. 执行 `go run spec.go`, 生成 `tables.go` 表格
|
||||
|
||||
566
internal/native/loong64spec/spec.go
Normal file
566
internal/native/loong64spec/spec.go
Normal file
@@ -0,0 +1,566 @@
|
||||
// Copyright (C) 2025 武汉凹语言科技有限公司
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
// 龙芯英文版指令集
|
||||
// https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.pdf
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"wa-lang.org/wa/internal/3rdparty/pdf"
|
||||
)
|
||||
|
||||
var (
|
||||
flagPdf = flag.String("pdf", "LoongArch-Vol1-EN.pdf", "set loong64 spec pdf")
|
||||
flagPackage = flag.String("pkg", "loong64", "set package name")
|
||||
flagOutput = flag.String("output", "tables.go", "set output file")
|
||||
)
|
||||
|
||||
func init() {
|
||||
log.SetFlags(0)
|
||||
log.SetPrefix("loong64spec: ")
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if *flagPdf == "" {
|
||||
fmt.Fprintf(os.Stderr, "usage: pdf missing\n")
|
||||
flag.Usage()
|
||||
os.Exit(2)
|
||||
}
|
||||
if *flagPackage == "" {
|
||||
fmt.Fprintf(os.Stderr, "usage: package name missing\n")
|
||||
flag.Usage()
|
||||
os.Exit(2)
|
||||
}
|
||||
if *flagOutput == "" {
|
||||
fmt.Fprintf(os.Stderr, "usage: output file missing\n")
|
||||
flag.Usage()
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
f, err := pdf.Open(*flagPdf)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
var prologue bytes.Buffer
|
||||
prologue.Write([]byte(`// 此代码是程序生成, 不要手动修改!!!
|
||||
|
||||
// Copyright (C) 2025 武汉凹语言科技有限公司
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
//go:build ignore
|
||||
|
||||
package loong64
|
||||
|
||||
`))
|
||||
|
||||
var op_f bytes.Buffer
|
||||
op_f.Write([]byte("const (\n\t_ Op = iota\n"))
|
||||
|
||||
var opstr_f bytes.Buffer
|
||||
opstr_f.Write([]byte("var opstr = [...]string{\n"))
|
||||
|
||||
var instFormats_f bytes.Buffer
|
||||
instFormats_f.Write([]byte("var instFormats = [...]instFormat{\n"))
|
||||
|
||||
// Scan document looking for instructions.
|
||||
n := f.NumPage()
|
||||
var ops []string
|
||||
opstrs := map[string]string{}
|
||||
instFormatComments := map[string]string{}
|
||||
instFormats := map[string]string{}
|
||||
var fp int
|
||||
|
||||
mergeMap := func(m1 map[string]string, m2 map[string]string) {
|
||||
for k := range m2 {
|
||||
m1[k] = m2[k]
|
||||
}
|
||||
}
|
||||
|
||||
for pageNum := 1; pageNum <= n; pageNum++ {
|
||||
p := f.Page(pageNum)
|
||||
if fp == 0 {
|
||||
if !isFirstPage(p) {
|
||||
continue
|
||||
}
|
||||
fp = pageNum
|
||||
}
|
||||
cPageOps, cPageOpstrs, cPageInstFormatComments, cPageInstFormats := parsePage(pageNum, p, fp == pageNum)
|
||||
ops = append(ops, cPageOps...)
|
||||
mergeMap(opstrs, cPageOpstrs)
|
||||
mergeMap(instFormatComments, cPageInstFormatComments)
|
||||
mergeMap(instFormats, cPageInstFormats)
|
||||
}
|
||||
|
||||
sort.Strings(ops)
|
||||
|
||||
for _, op := range ops {
|
||||
// 1. write op
|
||||
op_f.Write([]byte(fmt.Sprintf("\t%s\n", op)))
|
||||
// 2. write opstr
|
||||
opstr_f.Write([]byte(fmt.Sprintf("\t%s\n", opstrs[op])))
|
||||
// 3. write instFormat
|
||||
instFormats_f.Write([]byte(fmt.Sprintf("\t%s\n\t%s\n", instFormatComments[op], instFormats[op])))
|
||||
}
|
||||
|
||||
op_f.Write([]byte(")\n\n"))
|
||||
opstr_f.Write([]byte("}\n\n"))
|
||||
instFormats_f.Write([]byte("}\n"))
|
||||
|
||||
fileTables, err := os.Create(*flagOutput)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer fileTables.Close()
|
||||
|
||||
fileTables.Write(prologue.Bytes())
|
||||
fileTables.Write(op_f.Bytes())
|
||||
fileTables.Write(opstr_f.Bytes())
|
||||
fileTables.Write(instFormats_f.Bytes())
|
||||
|
||||
fileTables.Close()
|
||||
}
|
||||
|
||||
func isFirstPage(page pdf.Page) bool {
|
||||
content := page.Content()
|
||||
appendixb := "AppendixB"
|
||||
ct := ""
|
||||
for _, t := range content.Text {
|
||||
ct += t.S
|
||||
if ct == "AppendixB" {
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(appendixb, ct) {
|
||||
continue
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getArg(name string) (length int, argName string) {
|
||||
switch {
|
||||
case strings.Contains("arg_fd", name):
|
||||
return 5, "arg_fd"
|
||||
case strings.Contains("arg_fj", name):
|
||||
return 5, "arg_fj"
|
||||
case strings.Contains("arg_fk", name):
|
||||
return 5, "arg_fk"
|
||||
case strings.Contains("arg_fa", name):
|
||||
return 5, "arg_fa"
|
||||
case strings.Contains("arg_rd", name):
|
||||
return 5, "arg_rd"
|
||||
case strings.Contains("arg_rj", name) || name == "rj!=0,1":
|
||||
return 5, "arg_rj"
|
||||
case strings.Contains("arg_rk", name):
|
||||
return 5, "arg_rk"
|
||||
case name == "csr":
|
||||
return 14, "arg_csr_23_10"
|
||||
case strings.Contains("arg_cd", name):
|
||||
return 5, "arg_cd"
|
||||
case strings.Contains("arg_cj", name):
|
||||
return 5, "arg_cj"
|
||||
case strings.Contains("arg_ca", name):
|
||||
return 5, "arg_ca"
|
||||
case strings.Contains(name, "sa"):
|
||||
length, _ := strconv.Atoi(strings.Split(name, "sa")[1])
|
||||
if length == 2 {
|
||||
argName = "arg_sa2_16_15"
|
||||
} else {
|
||||
argName = "arg_sa3_17_15"
|
||||
}
|
||||
return length, argName
|
||||
case strings.Contains("arg_seq_17_10", name):
|
||||
return 8, "arg_seq_17_10"
|
||||
case strings.Contains("arg_op_4_0", name):
|
||||
return 5, "arg_op_4_0"
|
||||
case strings.Contains(name, "ui"):
|
||||
length, _ := strconv.Atoi(strings.Split(name, "ui")[1])
|
||||
if length == 5 {
|
||||
argName = "arg_ui5_14_10"
|
||||
} else if length == 6 {
|
||||
argName = "arg_ui6_15_10"
|
||||
} else {
|
||||
argName = "arg_ui12_21_10"
|
||||
}
|
||||
return length, argName
|
||||
case strings.Contains("arg_lsbw", name):
|
||||
return 5, "arg_lsbw"
|
||||
case strings.Contains("arg_msbw", name):
|
||||
return 5, "arg_msbw"
|
||||
case strings.Contains("arg_lsbd", name):
|
||||
return 6, "arg_lsbd"
|
||||
case strings.Contains("arg_msbd", name):
|
||||
return 6, "arg_msbd"
|
||||
case strings.Contains(name, "si"):
|
||||
length, _ := strconv.Atoi(strings.Split(name, "si")[1])
|
||||
if length == 12 {
|
||||
argName = "arg_si12_21_10"
|
||||
} else if length == 14 {
|
||||
argName = "arg_si14_23_10"
|
||||
} else if length == 16 {
|
||||
argName = "arg_si16_25_10"
|
||||
} else {
|
||||
argName = "arg_si20_24_5"
|
||||
}
|
||||
return length, argName
|
||||
case strings.Contains(name, "offs"):
|
||||
splitName := strings.Split(name, ":")
|
||||
left, _ := strconv.Atoi(strings.Split(splitName[0], "[")[1])
|
||||
right, _ := strconv.Atoi(strings.Split(splitName[1], "]")[0])
|
||||
return left - right + 1, "offs"
|
||||
default:
|
||||
return 0, ""
|
||||
}
|
||||
}
|
||||
|
||||
func binstrToHex(str string) string {
|
||||
rst := 0
|
||||
hex := "0x"
|
||||
charArray := []byte(str)
|
||||
for i := 0; i < 32; {
|
||||
rst = 1*(int(charArray[i+3])-48) + 2*(int(charArray[i+2])-48) + 4*(int(charArray[i+1])-48) + 8*(int(charArray[i])-48)
|
||||
switch rst {
|
||||
case 10:
|
||||
hex = hex + "a"
|
||||
case 11:
|
||||
hex = hex + "b"
|
||||
case 12:
|
||||
hex = hex + "c"
|
||||
case 13:
|
||||
hex = hex + "d"
|
||||
case 14:
|
||||
hex = hex + "e"
|
||||
case 15:
|
||||
hex = hex + "f"
|
||||
default:
|
||||
hex += strconv.Itoa(rst)
|
||||
}
|
||||
|
||||
i = i + 4
|
||||
}
|
||||
return hex
|
||||
}
|
||||
|
||||
/*
|
||||
Here we deal with the instruction FCMP.cond.S/D, which has the following format:
|
||||
|
||||
| 31 - 20 | 19 - 15 | 14 - 10 | 9 - 5 | 4 | 3 | 2 - 0 |
|
||||
|---------|---------|---------|-------|---|---|-------|
|
||||
| op | cond | fk | fj | 0 | 0 | cd |
|
||||
|
||||
The `cond` field has these possible values:
|
||||
|
||||
"CAF": "00",
|
||||
"CUN": "08",
|
||||
"CEQ": "04",
|
||||
"CUEQ": "0c",
|
||||
"CLT": "02",
|
||||
"CULT": "0a",
|
||||
"CLE": "06",
|
||||
"CULE": "0e",
|
||||
"CNE": "10",
|
||||
"COR": "14",
|
||||
"CUNE": "18",
|
||||
"SAF": "01",
|
||||
"SUN": "09",
|
||||
"SEQ": "05",
|
||||
"SUEQ": "0d",
|
||||
"SLT": "03",
|
||||
"SULT": "0b",
|
||||
"SLE": "07",
|
||||
"SULE": "0f",
|
||||
"SNE": "11",
|
||||
"SOR": "15",
|
||||
"SUNE": "19",
|
||||
|
||||
These values are the hexadecimal numbers of bits 19 to 15, the same as
|
||||
described in the instruction set manual.
|
||||
|
||||
The following code defines a map, the values in it represent the hexadecimal
|
||||
encoding of the cond field in the entire instruction. In this case, the upper
|
||||
4 bits and the lowest 1 bit are encoded separately, so the encoding is
|
||||
different from the encoding described above.
|
||||
*/
|
||||
func dealWithFcmp(ds string) (fcmpConditions map[string]map[string]string) {
|
||||
conds := map[string]string{
|
||||
"CAF": "00",
|
||||
"CUN": "40",
|
||||
"CEQ": "20",
|
||||
"CUEQ": "60",
|
||||
"CLT": "10",
|
||||
"CULT": "50",
|
||||
"CLE": "30",
|
||||
"CULE": "70",
|
||||
"CNE": "80",
|
||||
"COR": "a0",
|
||||
"CUNE": "c0",
|
||||
"SAF": "08",
|
||||
"SUN": "48",
|
||||
"SEQ": "28",
|
||||
"SUEQ": "68",
|
||||
"SLT": "18",
|
||||
"SULT": "58",
|
||||
"SLE": "38",
|
||||
"SULE": "78",
|
||||
"SNE": "88",
|
||||
"SOR": "a8",
|
||||
"SUNE": "c8",
|
||||
}
|
||||
fcmpConditions = make(map[string]map[string]string)
|
||||
for k, v := range conds {
|
||||
op := fmt.Sprintf("FCMP_%s_%s", k, ds)
|
||||
opstr := fmt.Sprintf("FCMP_%s_%s:\t\"FCMP.%s.%s\",", k, ds, k, ds)
|
||||
instFormatComment := fmt.Sprintf("// FCMP.%s.%s cd, fj, fk", k, ds)
|
||||
var instFormat string
|
||||
if ds == "D" {
|
||||
instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c2%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
|
||||
} else {
|
||||
instFormat = fmt.Sprintf("{mask: 0xffff8018, value: 0x0c1%s000, op: FCMP_%s_%s, args: instArgs{arg_cd, arg_fj, arg_fk}},", v, k, ds)
|
||||
}
|
||||
|
||||
fcmpConditions[op] = make(map[string]string)
|
||||
fcmpConditions[op]["op"] = op
|
||||
fcmpConditions[op]["opstr"] = opstr
|
||||
fcmpConditions[op]["instFormatComment"] = instFormatComment
|
||||
fcmpConditions[op]["instFormat"] = instFormat
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func findWords(chars []pdf.Text) (words []pdf.Text) {
|
||||
for i := 0; i < len(chars); {
|
||||
xRange := []float64{chars[i].X, chars[i].X}
|
||||
j := i + 1
|
||||
|
||||
// Find all chars on one line.
|
||||
for j < len(chars) && chars[j].Y == chars[i].Y {
|
||||
xRange[1] = chars[j].X
|
||||
j++
|
||||
}
|
||||
|
||||
// we need to note that the word may change line(Y) but belong to one cell. So, after loop over all continued
|
||||
// chars whose Y are same, check if the next char's X belong to the range of xRange, if true, means it should
|
||||
// be contact to current word, because the next word's X should bigger than current one.
|
||||
for j < len(chars) && chars[j].X >= xRange[0] && chars[j].X <= xRange[1] {
|
||||
j++
|
||||
}
|
||||
|
||||
var end float64
|
||||
// Split line into words (really, phrases).
|
||||
for k := i; k < j; {
|
||||
ck := &chars[k]
|
||||
s := ck.S
|
||||
end = ck.X + ck.W
|
||||
charSpace := ck.FontSize / 6
|
||||
wordSpace := ck.FontSize * 2 / 3
|
||||
l := k + 1
|
||||
for l < j {
|
||||
// Grow word.
|
||||
cl := &chars[l]
|
||||
|
||||
if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+charSpace {
|
||||
s += cl.S
|
||||
end = cl.X + cl.W
|
||||
l++
|
||||
continue
|
||||
}
|
||||
// Add space to phrase before next word.
|
||||
if math.Abs(cl.FontSize-ck.FontSize) < 0.1 && cl.X <= end+wordSpace {
|
||||
s += " " + cl.S
|
||||
end = cl.X + cl.W
|
||||
l++
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
f := ck.Font
|
||||
words = append(words, pdf.Text{
|
||||
Font: f,
|
||||
FontSize: ck.FontSize,
|
||||
X: ck.X,
|
||||
Y: ck.Y,
|
||||
W: end - ck.X,
|
||||
S: s,
|
||||
})
|
||||
k = l
|
||||
}
|
||||
i = j
|
||||
}
|
||||
|
||||
return words
|
||||
}
|
||||
|
||||
func parsePage(num int, p pdf.Page, isFP bool) (ops []string, opstrs map[string]string, instFormatComments map[string]string, instFormats map[string]string) {
|
||||
opstrs = make(map[string]string)
|
||||
instFormatComments = make(map[string]string)
|
||||
instFormats = make(map[string]string)
|
||||
|
||||
content := p.Content()
|
||||
|
||||
var text []pdf.Text
|
||||
for _, t := range content.Text {
|
||||
text = append(text, t)
|
||||
}
|
||||
|
||||
// table name(70), table header(64), page num(3)
|
||||
if isFP {
|
||||
text = text[134 : len(text)-3]
|
||||
} else {
|
||||
text = text[64 : len(text)-3]
|
||||
}
|
||||
|
||||
text = findWords(text)
|
||||
|
||||
for i := 0; i < len(text); {
|
||||
var fcmpConditions map[string]map[string]string
|
||||
if strings.HasPrefix(text[i].S, "FCMP") {
|
||||
fcmpConditions = dealWithFcmp(strings.Split(text[i].S, ".")[2])
|
||||
|
||||
for fc, inst := range fcmpConditions {
|
||||
ops = append(ops, inst["op"])
|
||||
opstrs[fc] = inst["opstr"]
|
||||
instFormatComments[fc] = inst["instFormatComment"]
|
||||
instFormats[fc] = inst["instFormat"]
|
||||
}
|
||||
t := i + 1
|
||||
for ; text[t].Y == text[i].Y; t++ {
|
||||
continue
|
||||
}
|
||||
i = t
|
||||
continue
|
||||
}
|
||||
|
||||
op := strings.Replace(text[i].S, ".", "_", -1)
|
||||
opstr := fmt.Sprintf("%s:\t\"%s\",", op, text[i].S)
|
||||
instFormatComment := ""
|
||||
binValue := ""
|
||||
binMask := ""
|
||||
instArgs := ""
|
||||
offs := false
|
||||
var offArgs []string
|
||||
|
||||
j := i + 1
|
||||
for ; j < len(text) && text[j].Y == text[i].Y; j++ {
|
||||
|
||||
// Some instruction has no arguments, so the next word(text[j].S) is not the arguments string but 0/1 bit, it shouldn't be skipped.
|
||||
if res, _ := regexp.MatchString("^\\d+$", text[j].S); j == i+1 && res == false {
|
||||
instFormatComment = fmt.Sprintf("// %s %s", text[i].S, strings.Replace(text[j].S, ",", ", ", -1))
|
||||
continue
|
||||
}
|
||||
if text[j].S == "0" || text[j].S == "1" {
|
||||
binValue += text[j].S
|
||||
binMask += "1"
|
||||
} else {
|
||||
argLen, argName := getArg(text[j].S)
|
||||
|
||||
// Get argument's length failed, compute it by other arguments.
|
||||
if argLen == 0 {
|
||||
left := 31 - len(binValue)
|
||||
right := 0
|
||||
l := j + 1
|
||||
if l < len(text) && text[l].Y == text[j].Y {
|
||||
for ; text[l].Y == text[j].Y; l++ {
|
||||
if text[l].S == "0" || text[l].S == "1" {
|
||||
right += 1
|
||||
} else {
|
||||
tArgLen, _ := getArg(text[l].S)
|
||||
if tArgLen == 0 {
|
||||
fmt.Fprintf(os.Stderr, "there are more than two args whose length is unknown.\n")
|
||||
}
|
||||
right += tArgLen
|
||||
}
|
||||
}
|
||||
}
|
||||
argLen = left - right + 1
|
||||
argName = "arg_" + text[j].S + "_" + strconv.FormatInt(int64(left), 10) + "_" + strconv.FormatInt(int64(right), 10)
|
||||
}
|
||||
|
||||
for k := 0; k < argLen; k++ {
|
||||
binValue += "0"
|
||||
binMask += "0"
|
||||
}
|
||||
|
||||
if argName != "offs" {
|
||||
if instArgs != "" {
|
||||
instArgs = ", " + instArgs
|
||||
}
|
||||
instArgs = argName + instArgs
|
||||
} else {
|
||||
offs = true
|
||||
offArgs = append(offArgs, text[j].S)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The real offset is a combination of two offsets in the binary code of the instruction, for example: BEQZ
|
||||
if offs && offArgs != nil {
|
||||
var left int
|
||||
var right int
|
||||
if len(offArgs) == 1 {
|
||||
left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[0], "[")[1])
|
||||
right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
|
||||
} else if len(offArgs) == 2 {
|
||||
left, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[1], ":")[0], "[")[1])
|
||||
right, _ = strconv.Atoi(strings.Split(strings.Split(offArgs[0], ":")[1], "]")[0])
|
||||
}
|
||||
|
||||
if instArgs == "" {
|
||||
instArgs = fmt.Sprintf("arg_offset_%d_%d", left, right)
|
||||
} else {
|
||||
instArgs += fmt.Sprintf(", arg_offset_%d_%d", left, right)
|
||||
}
|
||||
}
|
||||
|
||||
ops = append(ops, op)
|
||||
opstrs[op] = opstr
|
||||
if instFormatComment == "" {
|
||||
instFormatComment = "// " + text[i].S
|
||||
} else if strings.HasPrefix(op, "AM") {
|
||||
instFormatComment = fmt.Sprintf("// %s rd, rk, rj", text[i].S)
|
||||
}
|
||||
instFormatComments[op] = instFormatComment
|
||||
// The parameter order of some instructions is inconsistent in encoding and syntax, such as BSTRINS.*
|
||||
if instArgs != "" {
|
||||
args := strings.Split(instFormatComment, " ")[2:]
|
||||
tInstArgs := strings.Split(instArgs, ", ")
|
||||
newOrderedInstArgs := []string{}
|
||||
for _, a := range args {
|
||||
a = strings.Split(a, ",")[0]
|
||||
for _, aa := range tInstArgs {
|
||||
if strings.Contains(aa, a) {
|
||||
newOrderedInstArgs = append(newOrderedInstArgs, aa)
|
||||
break
|
||||
} else if a == "rd" && aa == "arg_fd" {
|
||||
newOrderedInstArgs = append(newOrderedInstArgs, "arg_rd")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
instArgs = strings.Join(newOrderedInstArgs, ", ")
|
||||
}
|
||||
if strings.HasPrefix(op, "AM") {
|
||||
instArgs = "arg_rd, arg_rk, arg_rj"
|
||||
}
|
||||
instFormat := fmt.Sprintf("{mask: %s, value: %s, op: %s, args: instArgs{%s}},", binstrToHex(binMask), binstrToHex(binValue), op, instArgs)
|
||||
instFormats[op] = instFormat
|
||||
|
||||
i = j // next instruction
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user