From 216a09c3c167c5961d14e52c3415354fb2bfa998 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 31 Jul 2022 18:55:17 -0600 Subject: better syntax errors for reencoder --- lib/lowmemjson/adapter_test.go | 7 ------- lib/lowmemjson/borrowed_misc.go | 14 ++++++++++++++ lib/lowmemjson/reencode.go | 32 +++++++++++++++++--------------- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/lib/lowmemjson/adapter_test.go b/lib/lowmemjson/adapter_test.go index 0d82f78..630c7ca 100644 --- a/lib/lowmemjson/adapter_test.go +++ b/lib/lowmemjson/adapter_test.go @@ -112,13 +112,6 @@ type ( UnmarshalTypeError = json.UnmarshalTypeError ) -type SyntaxError struct { - msg string - Offset int64 -} - -func (e *SyntaxError) Error() string { return e.msg } - const ( startDetectingCyclesAfter = 1000 ) diff --git a/lib/lowmemjson/borrowed_misc.go b/lib/lowmemjson/borrowed_misc.go index 343c924..5c6bbb6 100644 --- a/lib/lowmemjson/borrowed_misc.go +++ b/lib/lowmemjson/borrowed_misc.go @@ -5,9 +5,23 @@ package lowmemjson import ( + "fmt" "reflect" ) +// A SyntaxError is a description of a JSON syntax error. +// +// from scanner.go +type SyntaxError struct { + msg string // description of error + Offset int64 // error occurred after reading Offset bytes +} + +func (e *SyntaxError) Error() string { + return fmt.Sprintf("JSON syntax error at input byte %v: %v", + e.Offset, e.msg) +} + // from encode.go func isEmptyValue(v reflect.Value) bool { switch v.Kind() { diff --git a/lib/lowmemjson/reencode.go b/lib/lowmemjson/reencode.go index 76aedc9..e0783b0 100644 --- a/lib/lowmemjson/reencode.go +++ b/lib/lowmemjson/reencode.go @@ -39,6 +39,7 @@ type ReEncoder struct { // state: .WriteRune err error + inputPos int64 written int stack []reencodeState stack0IsNumber bool @@ -77,7 +78,7 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { func (enc *ReEncoder) Flush() error { if enc.bufLen > 0 { - return fmt.Errorf("unflushed unicode garbage: %q", enc.buf[:enc.bufLen]) + return &SyntaxError{fmt.Sprintf("EOF: unflushed unicode garbage: %q", enc.buf[:enc.bufLen]), enc.inputPos} } switch len(enc.stack) { case 0: @@ -89,7 +90,7 @@ func (enc *ReEncoder) Flush() error { } fallthrough default: - return fmt.Errorf("in the middle of a value") + return &SyntaxError{fmt.Sprintf("EOF: in the middle of a value"), enc.inputPos} } } @@ -103,6 +104,7 @@ func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { } enc.written = 0 enc.err = enc.state(c) + enc.inputPos += int64(utf8.RuneLen(c)) return enc.written, enc.err } @@ -203,7 +205,7 @@ func (enc *ReEncoder) stateAny(c rune) error { enc.replaceState(enc.stateInNull, false) enc.stateBuf = append(enc.stateBuf[:0], 'n') default: - return fmt.Errorf("decode value: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("any: unexpected character: %c", c), enc.inputPos} } return enc.emitByte(byte(c)) } @@ -233,7 +235,7 @@ func (enc *ReEncoder) _stateInObject(c rune, nonempty bool) error { } } default: - return fmt.Errorf("decode object: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("object: unexpected character: %c", c), enc.inputPos} } return enc.emitByte(byte(c)) } @@ -255,7 +257,7 @@ func (enc *ReEncoder) stateInKV(c rune) error { } return nil default: - return fmt.Errorf("decode object member: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("object member: unexpected character: %c", c), enc.inputPos} } } func (enc *ReEncoder) stateAfterV(c rune) error { @@ -273,7 +275,7 @@ func (enc *ReEncoder) stateAfterV(c rune) error { return err } default: - return fmt.Errorf("decode object member: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("object member: unexpected character: %c", c), enc.inputPos} } return enc.emitByte(byte(c)) } @@ -321,7 +323,7 @@ func (enc *ReEncoder) stateAfterItem(c rune) error { return err } default: - return fmt.Errorf("decode array: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("array: unexpected character: %c", c), enc.inputPos} } return enc.emitByte(byte(c)) } @@ -339,7 +341,7 @@ func (enc *ReEncoder) stateInString(c rune) error { case 0x0020 <= c && c <= 0x10FFFF: return enc.emit(writeStringChar(enc.Out, c, false, enc.UnicodeEscape)) default: - return fmt.Errorf("decode string: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("string: unexpected character: %c", c), enc.inputPos} } } func (enc *ReEncoder) stateInBackslash(c rune) error { @@ -369,7 +371,7 @@ func (enc *ReEncoder) stateInBackslash(c rune) error { enc.replaceState(enc.stateInUnicode, false) return nil default: - return fmt.Errorf("decode backslash sequence: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("string backslash sequence: unexpected character: %c", c), enc.inputPos} } } func (enc *ReEncoder) stateInUnicode(c rune) error { @@ -381,7 +383,7 @@ func (enc *ReEncoder) stateInUnicode(c rune) error { case 'A' <= c && c <= 'F': enc.stateBuf = append(enc.stateBuf, byte(c)-'A'+10) default: - return fmt.Errorf("decode backslash sequence: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("string unicode sequence: unexpected character: %c", c), enc.inputPos} } if len(enc.stateBuf) == 4 { enc.replaceState(enc.stateInString, false) @@ -447,7 +449,7 @@ func (enc *ReEncoder) stateNumberA(c rune) error { // start case '1', '2', '3', '4', '5', '6', '7', '8', '9': enc.replaceState(enc.stateNumberD, true) default: - return fmt.Errorf("decode number: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} } return enc.emitByte(byte(c)) } @@ -458,7 +460,7 @@ func (enc *ReEncoder) stateNumberB(c rune) error { // got a leading "-" case '1', '2', '3', '4', '5', '6', '7', '8', '9': enc.replaceState(enc.stateNumberD, true) default: - return fmt.Errorf("decode number: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} } return enc.emitByte(byte(c)) } @@ -500,7 +502,7 @@ func (enc *ReEncoder) stateNumberE(c rune) error { // got a ".", ready to read a enc.replaceState(enc.stateNumberF, true) return enc.emitByte(byte(c)) default: - return fmt.Errorf("decode number: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} } } func (enc *ReEncoder) stateNumberF(c rune) error { // in the fraction part @@ -550,7 +552,7 @@ func (enc *ReEncoder) stateNumberG(c rune) error { // got a leading "e" return enc.emitByte(byte(c)) default: enc.stateBuf = enc.stateBuf[:0] - return fmt.Errorf("decode number: unexpected character: %c", c) + return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} } } func (enc *ReEncoder) stateNumberH(c rune) error { // in the exponent's number part @@ -582,7 +584,7 @@ func (enc *ReEncoder) stateInFalse(c rune) error { return enc._stateInLiteral(c, func (enc *ReEncoder) stateInNull(c rune) error { return enc._stateInLiteral(c, "null") } func (enc *ReEncoder) _stateInLiteral(c rune, full string) error { if c != rune(full[len(enc.stateBuf)]) { - return fmt.Errorf("decode %s: unexpected character: %c", full, c) + return &SyntaxError{fmt.Sprintf("%s: unexpected character: %c", full, c), enc.inputPos} } enc.stateBuf = append(enc.stateBuf, byte(c)) if len(enc.stateBuf) == len(full) { -- cgit v1.2.3-2-g168b