Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,18 @@ import (
"github.com/speedata/pdfdisassembler/internal/lex"
)

// maxParseDepth caps array/dict nesting so a hostile PDF can't stack-overflow
// the recursive parser.
const maxParseDepth = 1000

// parser is a recursive descent parser over a lex.Lexer that emits direct
// PDF Objects. It does not chase indirect references — every Reference
// token becomes a Reference value.
type parser struct {
lx *lex.Lexer
r *Reader
queue []lex.Token
depth int
}

func newParser(lx *lex.Lexer, r *Reader) *parser {
Expand Down Expand Up @@ -135,6 +140,11 @@ func (p *parser) parseObjectFrom(tok lex.Token) (Object, error) {
}

func (p *parser) parseArray() (Array, error) {
p.depth++
defer func() { p.depth-- }()
if p.depth > maxParseDepth {
return nil, fmt.Errorf("pdfdisassembler/parse: nesting too deep (> %d)", maxParseDepth)
}
var out Array
for {
t, err := p.peek()
Expand All @@ -157,6 +167,11 @@ func (p *parser) parseArray() (Array, error) {
}

func (p *parser) parseDict() (*Dict, error) {
p.depth++
defer func() { p.depth-- }()
if p.depth > maxParseDepth {
return nil, fmt.Errorf("pdfdisassembler/parse: nesting too deep (> %d)", maxParseDepth)
}
d := newDict(p.r)
for {
t, err := p.peek()
Expand Down
70 changes: 70 additions & 0 deletions parse_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package pdfdisassembler

import (
"bytes"
"fmt"
"strings"
"testing"
)

// buildPDFWithObjectBody puts body as object 3 in a minimal classical-xref PDF.
func buildPDFWithObjectBody(t *testing.T, body string) []byte {
t.Helper()
var buf bytes.Buffer
off := func() int { return buf.Len() }
fmt.Fprint(&buf, "%PDF-1.7\n%\xE2\xE3\xCF\xD3\n")
offsets := make([]int, 4)
offsets[1] = off()
fmt.Fprint(&buf, "1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
offsets[2] = off()
fmt.Fprint(&buf, "2 0 obj\n<< /Type /Pages /Count 0 /Kids [] >>\nendobj\n")
offsets[3] = off()
fmt.Fprintf(&buf, "3 0 obj\n%s\nendobj\n", body)
xrefOff := off()
fmt.Fprint(&buf, "xref\n0 4\n")
fmt.Fprintf(&buf, "%010d %05d f \n", 0, 65535)
for i := 1; i <= 3; i++ {
fmt.Fprintf(&buf, "%010d %05d n \n", offsets[i], 0)
}
fmt.Fprint(&buf, "trailer\n<< /Size 4 /Root 1 0 R >>\n")
fmt.Fprintf(&buf, "startxref\n%d\n%%%%EOF\n", xrefOff)
return buf.Bytes()
}

func TestDeeplyNestedRejected(t *testing.T) {
// Far above the parser's depth cap, but well below a real stack overflow.
const depth = 2000
tests := []struct{ name, body string }{
{"array", strings.Repeat("[", depth) + strings.Repeat("]", depth)},
{"dict", strings.Repeat("<< /K ", depth) + "0" + strings.Repeat(" >>", depth)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data := buildPDFWithObjectBody(t, tt.body)
r, err := Open(bytes.NewReader(data))
if err != nil {
t.Fatalf("Open: %v", err)
}
defer r.Close()
if _, err := r.Resolve(Reference{Number: 3, Generation: 0}); err == nil {
t.Fatal("expected error for over-deep nesting")
}
})
}
}

func TestModeratelyNestedArrayResolves(t *testing.T) {
data := buildPDFWithObjectBody(t, strings.Repeat("[", 100)+strings.Repeat("]", 100))
r, err := Open(bytes.NewReader(data))
if err != nil {
t.Fatalf("Open: %v", err)
}
defer r.Close()
obj, err := r.Resolve(Reference{Number: 3, Generation: 0})
if err != nil {
t.Fatalf("Resolve: %v", err)
}
if _, ok := obj.(Array); !ok {
t.Fatalf("got %T, want Array", obj)
}
}