diff options
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | decode.go | 23 | ||||
-rw-r--r-- | decode_scan.go | 7 | ||||
-rw-r--r-- | encode.go | 5 | ||||
-rw-r--r-- | internal/parse.go | 13 | ||||
-rw-r--r-- | misc.go | 3 | ||||
-rw-r--r-- | reencode.go | 22 | ||||
-rw-r--r-- | struct.go | 12 |
8 files changed, 62 insertions, 27 deletions
@@ -128,6 +128,10 @@ lowmemjson.NewEncoder(&lowmemjson.ReEncoder{ }).Encode(val) ``` +`*Encoder` and `*ReEncoder` both tend to make many small writes; if +writes are syscalls, you may want to wrap their output in a +`bufio.Writer`. + ### Callee API For defining Go types with custom JSON representations, `lowmemjson` @@ -30,22 +30,29 @@ import ( // JSON representation of themselves. Decodable is a // low-memory-overhead replacement for the json.Unmarshaler interface. // -// The io.RuneScanner passed to DecodeJSON... +// On the io.RuneScanner passed to DecodeJSON: // -// - ...will return ErrInvalidUnreadRune .UnreadRune if the last +// - .UnreadRune() will return ErrInvalidUnreadRune if the last // operation was not a successful .ReadRune() call. // -// - ...will return EOF at the end of the JSON value; it is not -// possible for DecodeJSON to read past the end of the value in to -// another value. +// - .ReadRune() will return io.EOF at the end of the JSON value; it +// is not possible for .ReadRune() to read past the end of the +// value in to another value. // -// - ...if invalid JSON is encountered, will return the invalid rune -// with err!=nil. Implementations are encouraged to simply -// `return err` if .ReadRune returns an error. +// - .ReadRune() will never return invalid JSON; if invalid JSON is +// encountered, it will use a panic-based mechanism to transfer +// control back to the Decoder. +// +// - .ReadRune() never return an error other than io.EOF; if an I/O +// error is encountered, it will use a panic-based mechanism to +// transfer control back to the Decoder. // // DecodeJSON is expected to consume the entire scanner until io.EOF // or another is encountered; if it does not, then the parent Decode // call will return a *DecodeTypeError. +// +// Implementor's note: "limitingScanner" is the thing to search for in +// decode.go if you want to read up on that io.RuneScanner. type Decodable interface { DecodeJSON(io.RuneScanner) error } diff --git a/decode_scan.go b/decode_scan.go index 5e33760..249975d 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -30,11 +30,14 @@ type runeTypeScanner interface { // runeTypeScannerImpl ///////////////////////////////////////////////////////////////////////////// type runeTypeScannerImpl struct { - inner io.RuneScanner + // everything that is not "initialized by constructor" starts + // out as the zero value. + + inner io.RuneScanner // initialized by constructor initialized bool - parser internal.Parser + parser internal.Parser // initialized by constructor offset int64 repeat bool @@ -61,11 +61,14 @@ type Encoder struct { closeAfterEncode bool } -// NewEncoder returns a new encoder. +// NewEncoder returns a new Encoder that writes to w. // // If w is an *ReEncoder, then the inner backslash-escaping of // double-encoded ",string" tagged string values obeys the // *ReEncoder's BackslashEscape policy. +// +// An Encoder tends to make many small writes; if w.Write calls are +// syscalls, then you may want to wrap w in a bufio.Writer. func NewEncoder(w io.Writer) *Encoder { re, ok := w.(*ReEncoder) if !ok { diff --git a/internal/parse.go b/internal/parse.go index cefcca0..73470ae 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -226,13 +226,13 @@ type Parser struct { err error closed bool - // We reuse RuneTypes to store the stack. The base idea is - // that, stack items are "the most recently read - // stack-relevant RuneType". + // We reuse RuneTypes to store the stack. The base idea is: + // stack items are "the most recently read stack-relevant + // RuneType". // // We treat RuneTypeError as a wildcard. // - // The "normal"stack-relevant RuneTypes are: + // The "normal" stack-relevant RuneTypes are: // // "\uABC for strings // -01.2e+3 for numbers @@ -245,7 +245,6 @@ type Parser struct { // // { object: waiting for key to start or '}' // » object: reading key / waiting for colon - // : object: waiting for value to start // o object: reading value / waiting for ',' or '}' // // [ array: waiting for item to start or ']' @@ -266,7 +265,7 @@ type Parser struct { // »" {" // »" {"x // » {"x" - // : {"x": + // ox {"x": // o" {"x":" // o" {"x":"y // o {"x":"y" @@ -274,7 +273,7 @@ type Parser struct { // »" {"x":"y"," // »" {"x":"y","a // » {"x":"y","a" - // : {"x":"y","a": + // ox {"x":"y","a": // o" {"x":"y","a":" // o" {"x":"y","a":"b // o {"x":"y","a":"b" @@ -65,6 +65,9 @@ const ( // JSON string. The `rune` argument is the character being // considered, and the `BackslashEscapeMode` argument is how it was // originally encoded in the input. +// +// The ReEncoder will panic if a BackslashEscaper returns an unknown +// BackslashEscapeMode. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode // EscapePreserve is a BackslashEscaper that preserves the original diff --git a/reencode.go b/reencode.go index b20a503..1bcfc74 100644 --- a/reencode.go +++ b/reencode.go @@ -30,6 +30,10 @@ type speculation struct { // The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). type ReEncoder struct { // The output stream to write the re-encoded JSON to. + // + // A ReEncoder tends to make many small writes; if Out.Write + // calls are syscalls, then you may want to wrap Out in a + // bufio.Writer. Out io.Writer // A JSON document is specified to be a single JSON element; @@ -99,14 +103,14 @@ type ReEncoder struct { // public API ////////////////////////////////////////////////////////////////// -// Write implements io.Writer; it does what you'd expect, mostly. +// Write implements io.Writer; it does what you'd expect. // -// Rather than returning the number of bytes written to the output -// stream, it returns the nubmer of bytes from p that it successfully -// handled. This distinction is because *ReEncoder transforms the -// data written to it, and the number of bytes written may be wildly -// different than the number of bytes handled; and that would break -// virtually all users of io.Writer. +// It is worth noting that Write returns the number of bytes consumed +// from p, not number of bytes written to the output stream. This +// distinction that most io.Writer implementations don't need to make, +// but *ReEncoder does because it transforms the data written to it, +// and the number of bytes written may be wildly different than the +// number of bytes handled. func (enc *ReEncoder) Write(p []byte) (int, error) { if len(p) == 0 { return 0, nil @@ -163,7 +167,7 @@ func (enc *ReEncoder) Close() error { return nil } -// WriteRune write a single Unicode code point, returning the number +// WriteRune writes a single Unicode code point, returning the number // of bytes written to the output stream and any error. // // Even when there is no error, the number of bytes written may be @@ -245,7 +249,7 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { } } else { // speculating - // conCompress is whether we're 1-up from the leaf; + // canCompress is whether we're 1-up from the leaf; // set this *before* the calls to .handleRune. canCompress := enc.handleRuneState.specu.indentFmt.handleRuneState.specu == nil @@ -16,11 +16,15 @@ type structField struct { Quote bool } +// A structIndex is used by Decoder.Decode() and Encoder.Encode() when +// decoding-to or encoding-from a struct. type structIndex struct { byPos []structField byName map[string]int } +// indexStruct takes a struct Type, and indexes its fields for use by +// Decoder.Decode() and Encoder.Encode(). func indexStruct(typ reflect.Type) structIndex { var byPos []structField byName := make(map[string][]int) @@ -104,6 +108,12 @@ func indexStruct(typ reflect.Type) structIndex { return ret } +// indexStructInner crawls the struct `typ`, storing information on +// all struct fields foun in to `byPos` and `byName`. If `typ` +// contains other structs as fields, indexStructInner will recurse and +// call itself; keeping track of stack information with `stackPath` +// (which identifies where we are in the parent struct) and +// `stackSeen` (which is used for detecting loops). func indexStructInner(typ reflect.Type, byPos *[]structField, byName map[string][]int, stackPath []int, stackSeen map[reflect.Type]struct{}) { if _, ok := stackSeen[typ]; ok { return @@ -161,6 +171,8 @@ func indexStructInner(typ reflect.Type, byPos *[]structField, byName map[string] } } +// isQuotable returns whether a type is eligible for `json:,string` +// quoting. func isQuotable(typ reflect.Type) bool { for typ.Kind() == reflect.Pointer { typ = typ.Elem() |