// Copyright (C) 2022 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "errors" "fmt" "io" "unicode/utf8" ) type reencodeState func(rune) error type ReEncoder struct { Out io.Writer // Whether to minify the JSON. Compact bool // String to use to indent; ignored if Compact is true. Indent string // String to put before indents, for testing-compat with // encoding/json only. prefix string // Returns whether a given character in a string should be // backslash-escaped. The bool argument is whether it was // \u-escaped in the input. This does not affect characters // that must or must-not be escaped to be valid JSON. // // If not set, then EscapeDefault is used. BackslashEscape func(rune, BackslashEscapeMode) BackslashEscapeMode bailAfterCurrent bool // state: .Write's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int // state: .WriteRune err error par Parser written int inputPos int64 // state: .handleRune lastNonSpace RuneType curIndent int uhex [4]byte // "\uABCD"-encoded characters in strings fracZeros int64 expZero bool } // public API ////////////////////////////////////////////////////////////////// func (enc *ReEncoder) Write(p []byte) (int, error) { if len(p) == 0 { return 0, nil } var n int if enc.bufLen > 0 { copy(enc.buf[enc.bufLen:], p) c, size := utf8.DecodeRune(enc.buf[:]) n += size - enc.bufLen enc.bufLen = 0 if _, err := enc.WriteRune(c); err != nil { return 0, err } } for utf8.FullRune(p[n:]) { c, size := utf8.DecodeRune(p[n:]) if _, err := enc.WriteRune(c); err != nil { return n, err } n += size } enc.bufLen = copy(enc.buf[:], p[n:]) return len(p), nil } func (enc *ReEncoder) Close() error { if enc.bufLen > 0 { return &SyntaxError{ Offset: enc.inputPos, msg: fmt.Sprintf("%v: unflushed unicode garbage: %q", io.ErrUnexpectedEOF, enc.buf[:enc.bufLen]), } } if _, err := enc.par.HandleEOF(); err != nil { enc.err = err return enc.err } if err := enc.handleRune(0, 0); err != nil { enc.err = err return enc.err } return nil } var errBailedAfterCurrent = errors.New("bailed after current") func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { if enc.err != nil { return 0, enc.err } if enc.bufLen != 0 { enc.err = errors.New("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()n") return 0, enc.err } if enc.bailAfterCurrent && len(enc.par.stack) == 0 { return 0, errBailedAfterCurrent } t, err := enc.par.HandleRune(c) if err != nil { enc.err = err return 0, enc.err } enc.written = 0 enc.err = enc.handleRune(c, t) enc.inputPos += int64(utf8.RuneLen(c)) return enc.written, enc.err } // internal //////////////////////////////////////////////////////////////////// func (enc *ReEncoder) handleRune(c rune, t RuneType) error { // whitespace switch t { case RuneTypeSpace: if enc.Compact || enc.Indent != "" { return nil } } defer func() { enc.lastNonSpace = t }() // shorten numbers switch t { // trim trailing '0's from the fraction-part, but don't remove all digits case RuneTypeNumberFracDot: enc.fracZeros = 0 case RuneTypeNumberFracDig: if c == '0' && enc.lastNonSpace == RuneTypeNumberFracDig { enc.fracZeros++ return nil } fallthrough default: for enc.fracZeros > 0 { if err := enc.emitByte('0'); err != nil { return err } enc.fracZeros-- } } switch t { // trim leading '0's from the exponent-part, but don't remove all digits case RuneTypeNumberExpE, RuneTypeNumberExpSign: enc.expZero = true case RuneTypeNumberExpDig: if c == '0' && enc.expZero { return nil } enc.expZero = false default: if enc.expZero { if err := enc.emitByte('0'); err != nil { return err } enc.expZero = false } } // indent switch t { case RuneTypeObjectBeg, RuneTypeArrayBeg: enc.curIndent++ case RuneTypeObjectEnd, RuneTypeArrayEnd: enc.curIndent-- switch enc.lastNonSpace { case RuneTypeObjectBeg, RuneTypeArrayEnd: // collapse default: if err := enc.emitNlIndent(); err != nil { return err } } case RuneTypeObjectColon: if !enc.Compact && enc.Indent != "" { if err := enc.emitByte(':'); err != nil { return err } return enc.emitByte(' ') } default: switch enc.lastNonSpace { case RuneTypeObjectBeg, RuneTypeObjectComma, RuneTypeArrayBeg, RuneTypeArrayComma: if err := enc.emitNlIndent(); err != nil { return err } } } // main switch t { case RuneTypeStringChar: return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeNone, enc.BackslashEscape)) case RuneTypeStringEsc, RuneTypeStringEscU: return nil case RuneTypeStringEsc1: switch c { case '"': return enc.emit(writeStringChar(enc.Out, '"', BackslashEscapeShort, enc.BackslashEscape)) case '\\': return enc.emit(writeStringChar(enc.Out, '\\', BackslashEscapeShort, enc.BackslashEscape)) case '/': return enc.emit(writeStringChar(enc.Out, '/', BackslashEscapeShort, enc.BackslashEscape)) case 'b': return enc.emit(writeStringChar(enc.Out, '\b', BackslashEscapeShort, enc.BackslashEscape)) case 'f': return enc.emit(writeStringChar(enc.Out, '\f', BackslashEscapeShort, enc.BackslashEscape)) case 'n': return enc.emit(writeStringChar(enc.Out, '\n', BackslashEscapeShort, enc.BackslashEscape)) case 'r': return enc.emit(writeStringChar(enc.Out, '\r', BackslashEscapeShort, enc.BackslashEscape)) case 't': return enc.emit(writeStringChar(enc.Out, '\t', BackslashEscapeShort, enc.BackslashEscape)) default: panic("should not happen") } case RuneTypeStringEscUA: enc.uhex[0], _ = hex2int(c) return nil case RuneTypeStringEscUB: enc.uhex[1], _ = hex2int(c) return nil case RuneTypeStringEscUC: enc.uhex[2], _ = hex2int(c) return nil case RuneTypeStringEscUD: enc.uhex[3], _ = hex2int(c) c := 0 | rune(enc.uhex[0])<<12 | rune(enc.uhex[1])<<8 | rune(enc.uhex[2])<<4 | rune(enc.uhex[3])<<0 return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case RuneTypeError: // EOF return nil default: return enc.emitByte(byte(c)) } } func (enc *ReEncoder) emitByte(c byte) error { err := writeByte(enc.Out, c) if err == nil { enc.written++ } return err } func (enc *ReEncoder) emit(n int, err error) error { enc.written += n return err } func (enc *ReEncoder) emitNlIndent() error { if enc.Compact || enc.Indent == "" { return nil } if err := enc.emitByte('\n'); err != nil { return err } if enc.prefix != "" { if err := enc.emit(io.WriteString(enc.Out, enc.prefix)); err != nil { return err } } for i := 0; i < enc.curIndent; i++ { if err := enc.emit(io.WriteString(enc.Out, enc.Indent)); err != nil { return err } } return nil }