diff options
-rw-r--r-- | compat/json/compat.go | 61 | ||||
-rw-r--r-- | decode_scan.go | 9 | ||||
-rw-r--r-- | decode_scan_test.go | 5 | ||||
-rw-r--r-- | encode.go | 61 | ||||
-rw-r--r-- | errors.go | 10 | ||||
-rw-r--r-- | misc.go | 21 | ||||
-rw-r--r-- | parse.go | 6 | ||||
-rw-r--r-- | reencode.go | 17 | ||||
-rw-r--r-- | test_export.go | 6 |
9 files changed, 126 insertions, 70 deletions
diff --git a/compat/json/compat.go b/compat/json/compat.go index 37a0d5b..8dcb266 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -71,40 +71,46 @@ func Marshal(v any) ([]byte, error) { } type Encoder struct { - inner lowmemjson.ReEncoder + encoder lowmemjson.Encoder + formatter lowmemjson.ReEncoder } func NewEncoder(w io.Writer) *Encoder { - return &Encoder{ - inner: lowmemjson.ReEncoder{ - Out: w, - Compact: true, + ret := &Encoder{ + formatter: lowmemjson.ReEncoder{ + Out: w, + + AllowMultipleValues: true, + + Compact: true, + ForceTrailingNewlines: true, }, } + ret.encoder.W = &ret.formatter + return ret } func (enc *Encoder) Encode(v any) error { - if err := convertEncodeError(lowmemjson.Encode(&enc.inner, v)); err != nil { - return err - } - if err := convertEncodeError(enc.inner.Close()); err != nil { - return err + err := enc.encoder.Encode(v) + if err == nil { + err = enc.formatter.Close() } - return nil + return convertEncodeError(err) } func (enc *Encoder) SetEscapeHTML(on bool) { - if on { - enc.inner.BackslashEscape = nil - } else { - enc.inner.BackslashEscape = lowmemjson.EscapeJSSafe + var escaper lowmemjson.BackslashEscaper + if !on { + escaper = lowmemjson.EscapeDefaultNonHTMLSafe } + enc.encoder.BackslashEscape = escaper + enc.formatter.BackslashEscape = escaper } func (enc *Encoder) SetIndent(prefix, indent string) { - enc.inner.Compact = prefix == "" && indent == "" - enc.inner.Prefix = prefix - enc.inner.Indent = indent + enc.formatter.Compact = prefix == "" && indent == "" + enc.formatter.Prefix = prefix + enc.formatter.Indent = indent } // ReEncode wrappers ///////////////////////////////////////////////// @@ -169,12 +175,19 @@ func convertDecodeError(err error) error { if derr, ok := err.(*lowmemjson.DecodeError); ok { switch terr := derr.Err.(type) { case *lowmemjson.DecodeSyntaxError: - err = &SyntaxError{ - msg: terr.Err.Error(), - Offset: terr.Offset, - } - if errors.Is(terr.Err, io.ErrUnexpectedEOF) { - err.(*SyntaxError).msg = "unexpected end of JSON input" + switch { + case errors.Is(terr.Err, io.EOF): + err = io.EOF + case errors.Is(terr.Err, io.ErrUnexpectedEOF): + err = &SyntaxError{ + msg: "unexpected end of JSON input", + Offset: terr.Offset, + } + default: + err = &SyntaxError{ + msg: terr.Err.Error(), + Offset: terr.Offset, + } } case *lowmemjson.DecodeTypeError: if typeErr, ok := terr.Err.(*json.UnmarshalTypeError); ok { diff --git a/decode_scan.go b/decode_scan.go index 9fa6181..2830d0b 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -13,9 +13,10 @@ type runeTypeScanner interface { // The returned error is a *ReadError, a *SyntaxError, or nil. // An EOF condition is represented as one of: // - // end of value but not file: (_, >0, RuneTypeEOF, nil) - // end of both value and file: (_, 0, RuneTypeEOF, nil) - // end of file but not value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) + // end of value but not file: (_, >0, RuneTypeEOF, nil) + // end of both value and file: (_, 0, RuneTypeEOF, nil) + // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) + // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) ReadRuneType() (rune, int, RuneType, error) // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil. ReadRune() (rune, int, error) @@ -127,8 +128,6 @@ func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { } } -var ErrInvalidUnreadRune = errors.New("lowmemjson: invalid use of UnreadRune") - // UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). // // If the last call to .ReadRune() or .ReadRuneType() has already been diff --git a/decode_scan_test.go b/decode_scan_test.go index 6fd9369..27b60c0 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -152,6 +152,11 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, }}, + "empty": {``, ``, []ReadRuneTypeResult{ + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, + }}, } testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner { return &runeTypeScannerImpl{ @@ -39,7 +39,12 @@ func encodeWriteString(w io.Writer, str string) { } } -func Encode(w io.Writer, obj any) (err error) { +type Encoder struct { + W io.Writer + BackslashEscape BackslashEscaper +} + +func (enc *Encoder) Encode(obj any) (err error) { defer func() { if r := recover(); r != nil { if e, ok := r.(encodeError); ok { @@ -49,13 +54,18 @@ func Encode(w io.Writer, obj any) (err error) { } } }() - encode(w, reflect.ValueOf(obj), false, 0, map[any]struct{}{}) - if f, ok := w.(interface{ Flush() error }); ok { + encode(enc.W, reflect.ValueOf(obj), enc.BackslashEscape, false, 0, map[any]struct{}{}) + if f, ok := enc.W.(interface{ Flush() error }); ok { return f.Flush() } return nil } +func Encode(w io.Writer, obj any) (err error) { + enc := &Encoder{W: w} + return enc.Encode(obj) +} + var ( encodableType = reflect.TypeOf((*Encodable)(nil)).Elem() jsonMarshalerType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() @@ -64,7 +74,7 @@ var ( const startDetectingCyclesAfter = 1000 -func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) { +func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) { if !val.IsValid() { encodeWriteString(w, "null") return @@ -84,7 +94,7 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe encodeWriteString(w, "null") return } - validator := &ReEncoder{Out: w} + validator := &ReEncoder{Out: w, BackslashEscape: escaper} if err := obj.EncodeJSON(validator); err != nil { panic(encodeError{&EncodeMethodError{ Type: val.Type(), @@ -117,7 +127,7 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe SourceFunc: "MarshalJSON", }}) } - validator := &ReEncoder{Out: w} + validator := &ReEncoder{Out: w, BackslashEscape: escaper} if _, err := validator.Write(dat); err != nil { panic(encodeError{err}) } @@ -146,7 +156,7 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe SourceFunc: "MarshalText", }}) } - encodeStringFromBytes(w, text) + encodeStringFromBytes(w, escaper, text) default: switch val.Kind() { @@ -202,17 +212,17 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe } else { if quote { var buf bytes.Buffer - encodeStringFromString(&buf, val.String()) - encodeStringFromBytes(w, buf.Bytes()) + encodeStringFromString(&buf, escaper, val.String()) + encodeStringFromBytes(w, escaper, buf.Bytes()) } else { - encodeStringFromString(w, val.String()) + encodeStringFromString(w, escaper, val.String()) } } case reflect.Interface: if val.IsNil() { encodeWriteString(w, "null") } else { - encode(w, val.Elem(), quote, cycleDepth, cycleSeen) + encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen) } case reflect.Struct: encodeWriteByte(w, '{') @@ -229,9 +239,9 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe encodeWriteByte(w, ',') } empty = false - encodeStringFromString(w, field.Name) + encodeStringFromString(w, escaper, field.Name) encodeWriteByte(w, ':') - encode(w, fVal, field.Quote, cycleDepth, cycleSeen) + encode(w, fVal, escaper, field.Quote, cycleDepth, cycleSeen) } encodeWriteByte(w, '}') case reflect.Map: @@ -263,15 +273,16 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe kvs := make([]kv, val.Len()) iter := val.MapRange() for i := 0; iter.Next(); i++ { + // TODO: Avoid buffering the map key var k strings.Builder - encode(&k, iter.Key(), false, cycleDepth, cycleSeen) + encode(&k, iter.Key(), escaper, false, cycleDepth, cycleSeen) kStr := k.String() if kStr == "null" { kStr = `""` } if !strings.HasPrefix(kStr, `"`) { k.Reset() - encodeStringFromString(&k, kStr) + encodeStringFromString(&k, escaper, kStr) kStr = k.String() } kvs[i].K = kStr @@ -287,7 +298,7 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe } encodeWriteString(w, kv.K) encodeWriteByte(w, ':') - encode(w, kv.V, false, cycleDepth, cycleSeen) + encode(w, kv.V, escaper, false, cycleDepth, cycleSeen) } encodeWriteByte(w, '}') case reflect.Slice: @@ -341,10 +352,10 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encodeArray(w, val, cycleDepth, cycleSeen) + encodeArray(w, val, escaper, cycleDepth, cycleSeen) } case reflect.Array: - encodeArray(w, val, cycleDepth, cycleSeen) + encodeArray(w, val, escaper, cycleDepth, cycleSeen) case reflect.Pointer: if val.IsNil() { encodeWriteString(w, "null") @@ -360,7 +371,7 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encode(w, val.Elem(), quote, cycleDepth, cycleSeen) + encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen) } default: panic(encodeError{&EncodeTypeError{ @@ -370,21 +381,21 @@ func encode(w io.Writer, val reflect.Value, quote bool, cycleDepth uint, cycleSe } } -func encodeStringFromString(w io.Writer, str string) { +func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { encodeWriteByte(w, '"') for _, c := range str { - if _, err := writeStringChar(w, c, BackslashEscapeNone, nil); err != nil { + if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { panic(encodeError{err}) } } encodeWriteByte(w, '"') } -func encodeStringFromBytes(w io.Writer, str []byte) { +func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { encodeWriteByte(w, '"') for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) - if _, err := writeStringChar(w, c, BackslashEscapeNone, nil); err != nil { + if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { panic(encodeError{err}) } i += size @@ -392,14 +403,14 @@ func encodeStringFromBytes(w io.Writer, str []byte) { encodeWriteByte(w, '"') } -func encodeArray(w io.Writer, val reflect.Value, cycleDepth uint, cycleSeen map[any]struct{}) { +func encodeArray(w io.Writer, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) { encodeWriteByte(w, '[') n := val.Len() for i := 0; i < n; i++ { if i > 0 { encodeWriteByte(w, ',') } - encode(w, val.Index(i), false, cycleDepth, cycleSeen) + encode(w, val.Index(i), escaper, false, cycleDepth, cycleSeen) } encodeWriteByte(w, ']') } @@ -12,12 +12,11 @@ import ( "strings" ) -// parser errors /////////////////////////////////////////////////////////////////////////////////// +var ( + ErrInvalidUnreadRune = errors.New("lowmemjson: invalid use of UnreadRune") +) -type ParseError struct { - Err error - Offset int64 -} +// parser errors /////////////////////////////////////////////////////////////////////////////////// var ( ErrParserExceededMaxDepth = errors.New("exceeded max depth") @@ -155,3 +154,4 @@ type ReEncodeSyntaxError struct { func (e *ReEncodeSyntaxError) Error() string { return fmt.Sprintf("json: syntax error at input byte %v: %v", e.Offset, e.Err) } +func (e *ReEncodeSyntaxError) Unwrap() error { return e.Err } @@ -67,7 +67,13 @@ const ( BackslashEscapeUnicode ) -func EscapeJSSafe(c rune, _ BackslashEscapeMode) BackslashEscapeMode { +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + return wasEscaped +} + +func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { // JSON is notionally a JS subset, but that's not actually // true. // @@ -76,7 +82,7 @@ func EscapeJSSafe(c rune, _ BackslashEscapeMode) BackslashEscapeMode { case '\u2028', '\u2029': return BackslashEscapeUnicode default: - return BackslashEscapeNone + return wasEscaped } } @@ -98,8 +104,13 @@ func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { } } -func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - return wasEscaped +func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\b', '\f', utf8.RuneError: + return BackslashEscapeUnicode + default: + return EscapeJSSafe(c, wasEscaped) + } } func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { @@ -134,7 +145,7 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { buf := [2]byte{'\\', b} return w.Write(buf[:]) } -func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper func(rune, BackslashEscapeMode) BackslashEscapeMode) (int, error) { +func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { if escaper == nil { escaper = EscapeDefault } @@ -327,10 +327,14 @@ func (par *Parser) HandleEOF() (RuneType, error) { case 0: return RuneTypeEOF, nil case 1: - if par.stack[0].IsNumber() { + switch { + case par.stack[0].IsNumber(): if _, err := par.HandleRune('\n'); err == nil { return RuneTypeEOF, nil } + case par.stack[0] == RuneTypeError: + par.err = io.EOF + return RuneTypeError, par.err } fallthrough default: diff --git a/reencode.go b/reencode.go index 7c5ce52..bcb3932 100644 --- a/reencode.go +++ b/reencode.go @@ -23,13 +23,16 @@ type ReEncoder struct { Indent string // String to put before indents. Prefix string + // Whether to emit a newline after each top-level value, even + // if it could unambiguously be omitted. + ForceTrailingNewlines bool // Returns whether a given character in a string should be // backslash-escaped. The bool argument is whether it was // \u-escaped in the input. This does not affect characters // that must or must-not be escaped to be valid JSON. // // If not set, then EscapeDefault is used. - BackslashEscape func(rune, BackslashEscapeMode) BackslashEscapeMode + BackslashEscape BackslashEscaper // state: .Write's utf8-decoding buffer buf [utf8.UTFMax]byte @@ -267,8 +270,16 @@ func (enc *ReEncoder) handleRune(c rune, t RuneType) error { rune(enc.uhex[3])<<0 return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape)) - case RuneTypeError: // EOF - return nil + case RuneTypeEOF: // start of next top-level value + if !enc.ForceTrailingNewlines && (enc.Compact || enc.Indent == "") && !enc.lastNonSpace.IsNumber() { + return nil + } + return enc.emitByte('\n') + case RuneTypeError: // .Close() + if !enc.ForceTrailingNewlines { + return nil + } + return enc.emitByte('\n') default: return enc.emitByte(byte(c)) } diff --git a/test_export.go b/test_export.go index ccac2b2..76d29d2 100644 --- a/test_export.go +++ b/test_export.go @@ -5,12 +5,14 @@ package lowmemjson import ( + "io" + "git.lukeshu.com/go/lowmemjson/internal" ) func init() { - internal.EncodeStringFromString = encodeStringFromString - internal.EncodeStringFromBytes = encodeStringFromBytes + internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(w, nil, s) } + internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(w, nil, s) } } var parseTag = internal.ParseTag |