// Copyright (C) 2022 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "bufio" "bytes" "encoding" "encoding/json" "fmt" "io" "reflect" "strconv" "strings" "unicode/utf16" "unicode/utf8" ) type Decodable interface { DecodeJSON(io.RuneScanner) error } type runeBuffer interface { io.Writer WriteRune(rune) (int, error) Reset() } type decodeStackItem struct { par reflect.Type idx any } type Decoder struct { io runeTypeScanner // config disallowUnknownFields bool useNumber bool // state err error stack []decodeStackItem } const maxNestingDepth = 10000 func NewDecoder(r io.Reader) *Decoder { rr, ok := r.(io.RuneReader) if !ok { rr = bufio.NewReader(r) } return &Decoder{ io: &noWSRuneTypeScanner{ inner: &runeTypeScannerImpl{ inner: rr, parser: Parser{ MaxDepth: maxNestingDepth, }, }, }, } } func (dec *Decoder) DisallowUnknownFields() { dec.disallowUnknownFields = true } func (dec *Decoder) UseNumber() { dec.useNumber = true } func (dec *Decoder) InputOffset() int64 { return dec.io.InputOffset() } func (dec *Decoder) More() bool { dec.io.Reset() _, _, t, e := dec.io.ReadRuneType() dec.io.UnreadRune() return e == nil && t != RuneTypeEOF } func (dec *Decoder) stackPush(par reflect.Type, idx any) { dec.stack = append(dec.stack, decodeStackItem{par, idx}) } func (dec *Decoder) stackPop() { dec.stack = dec.stack[:len(dec.stack)-1] } func (dec *Decoder) stackStr() string { var buf strings.Builder buf.WriteString("v") for _, item := range dec.stack { fmt.Fprintf(&buf, "[%#v]", item.idx) } return buf.String() } func (dec *Decoder) stackParent() string { last := len(dec.stack) - 1 if last > 0 && dec.stack[last].par.Kind() != reflect.Struct && dec.stack[last-1].par.Kind() == reflect.Struct { last-- } if last >= 0 && dec.stack[last].par.Kind() == reflect.Struct { return dec.stack[last].par.Name() } return "" } func (dec *Decoder) stackName() string { if dec.stackParent() == "" { return "" } var fields []string for _, elem := range dec.stack { if elem.par.Kind() == reflect.Struct { fields = append(fields, elem.idx.(string)) } } return strings.Join(fields, ".") } func Decode(r io.Reader, ptr any) error { return NewDecoder(r).Decode(ptr) } func (dec *Decoder) Decode(ptr any) (err error) { ptrVal := reflect.ValueOf(ptr) if ptrVal.Kind() != reflect.Pointer || ptrVal.IsNil() || !ptrVal.Elem().CanSet() { return &DecodeArgumentError{ // don't use ptrVal.Type() because ptrVal might be invalid if ptr==nil Type: reflect.TypeOf(ptr), } } if dec.err != nil { return dec.err } dec.io.Reset() defer func() { if r := recover(); r != nil { if de, ok := r.(decodeError); ok { pub := DecodeError(de) dec.err = &pub err = dec.err } else { panic(r) } } }() dec.decode(ptrVal.Elem(), false) return nil } // io helpers ////////////////////////////////////////////////////////////////////////////////////// type decodeError DecodeError func (dec *Decoder) panicType(jTyp string, gTyp reflect.Type, err error) { panic(decodeError{ Field: dec.stackStr(), FieldParent: dec.stackParent(), FieldName: dec.stackName(), Err: &DecodeTypeError{ GoType: gTyp, JSONType: jTyp, Err: err, Offset: dec.InputOffset(), }, }) } func (dec *Decoder) readRune() (rune, RuneType) { c, _, t, e := dec.io.ReadRuneType() if e != nil { panic(decodeError{ Field: dec.stackStr(), FieldParent: dec.stackParent(), FieldName: dec.stackName(), Err: e, }) } return c, t } func (dec *Decoder) unreadRune() { if err := dec.io.UnreadRune(); err != nil { // .UnreadRune() must succeed if the previous call was // .ReadRune(), which it always is for this code. panic("should not happen") } } func (dec *Decoder) peekRuneType() RuneType { _, t := dec.readRune() dec.unreadRune() return t } func (dec *Decoder) expectRune(ec rune, et RuneType) { ac, at := dec.readRune() if ac != ec || at != et { panic("should not happen") } } func (dec *Decoder) expectRuneType(ec rune, et RuneType, gt reflect.Type) { ac, at := dec.readRune() if ac != ec || at != et { dec.panicType(at.jsonType(), gt, nil) } } type decRuneTypeScanner struct { dec *Decoder } func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { c, s, t, e := sc.dec.io.ReadRuneType() if e != nil { panic(decodeError{ Field: sc.dec.stackStr(), FieldParent: sc.dec.stackParent(), FieldName: sc.dec.stackName(), Err: e, }) } return c, s, t, nil } func (sc *decRuneTypeScanner) ReadRune() (rune, int, error) { r, s, t, _ := sc.ReadRuneType() switch t { case RuneTypeEOF: return 0, 0, io.EOF default: return r, s, nil } } func (sc *decRuneTypeScanner) UnreadRune() error { return sc.dec.io.UnreadRune() } func (sc *decRuneTypeScanner) InputOffset() int64 { return sc.dec.InputOffset() } func (sc *decRuneTypeScanner) Reset() { sc.dec.io.Reset() } func (dec *Decoder) limitingScanner() runeTypeScanner { return &elemRuneTypeScanner{ inner: &decRuneTypeScanner{ dec: dec, }, } } // decoder main //////////////////////////////////////////////////////////////////////////////////// var ( rawMessagePtrType = reflect.TypeOf((*json.RawMessage)(nil)) decodableType = reflect.TypeOf((*Decodable)(nil)).Elem() jsonUnmarshalerType = reflect.TypeOf((*json.Unmarshaler)(nil)).Elem() textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() boolType = reflect.TypeOf(true) ) var kind2bits = map[reflect.Kind]int{ reflect.Int: int(32 << (^uint(0) >> 63)), reflect.Int8: 8, reflect.Int16: 16, reflect.Int32: 32, reflect.Int64: 64, reflect.Uint: int(32 << (^uint(0) >> 63)), reflect.Uint8: 8, reflect.Uint16: 16, reflect.Uint32: 32, reflect.Uint64: 64, reflect.Uintptr: int(32 << (^uintptr(0) >> 63)), reflect.Float32: 32, reflect.Float64: 64, } func (dec *Decoder) decode(val reflect.Value, nullOK bool) { typ := val.Type() switch { case val.CanAddr() && reflect.PointerTo(typ) == rawMessagePtrType: t := dec.peekRuneType() var buf bytes.Buffer dec.scan(&buf) if err := val.Addr().Interface().(*json.RawMessage).UnmarshalJSON(buf.Bytes()); err != nil { dec.panicType(t.jsonType(), typ, err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(decodableType): t := dec.peekRuneType() obj := val.Addr().Interface().(Decodable) if err := obj.DecodeJSON(dec.limitingScanner()); err != nil { dec.panicType(t.jsonType(), typ, err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(jsonUnmarshalerType): t := dec.peekRuneType() var buf bytes.Buffer dec.scan(&buf) obj := val.Addr().Interface().(json.Unmarshaler) if err := obj.UnmarshalJSON(buf.Bytes()); err != nil { dec.panicType(t.jsonType(), typ, err) } case val.CanAddr() && reflect.PointerTo(typ).Implements(textUnmarshalerType): if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } var buf bytes.Buffer dec.decodeString(reflect.PointerTo(typ), &buf) obj := val.Addr().Interface().(encoding.TextUnmarshaler) if err := obj.UnmarshalText(buf.Bytes()); err != nil { dec.panicType("string", reflect.PointerTo(typ), err) } default: switch kind := typ.Kind(); kind { case reflect.Bool: if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } val.SetBool(dec.decodeBool(typ)) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } var buf strings.Builder dec.scanNumber(typ, &buf) n, err := strconv.ParseInt(buf.String(), 10, kind2bits[kind]) if err != nil { dec.panicType("number "+buf.String(), typ, err) } val.SetInt(n) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } var buf strings.Builder dec.scanNumber(typ, &buf) n, err := strconv.ParseUint(buf.String(), 10, kind2bits[kind]) if err != nil { dec.panicType("number "+buf.String(), typ, err) } val.SetUint(n) case reflect.Float32, reflect.Float64: if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } var buf strings.Builder dec.scanNumber(typ, &buf) n, err := strconv.ParseFloat(buf.String(), kind2bits[kind]) if err != nil { dec.panicType("number "+buf.String(), typ, err) } val.SetFloat(n) case reflect.String: if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } var buf strings.Builder if typ == numberType { t := dec.peekRuneType() dec.scan(&buf) if !t.IsNumber() { dec.panicType(t.jsonType(), typ, fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", buf.String())) } val.SetString(buf.String()) } else { dec.decodeString(typ, &buf) val.SetString(buf.String()) } case reflect.Interface: if typ.NumMethod() > 0 { dec.panicType(dec.peekRuneType().jsonType(), typ, ErrDecodeNonEmptyInterface) } // If the interface stores a pointer, try to use the type information of the pointer. if !val.IsNil() && val.Elem().Kind() == reflect.Pointer { // Follow a chain of pointers until we find the first settable // pointer (if any). ptr := val.Elem() for { if ptr.CanSet() || ptr.IsNil() || ptr.Elem().Kind() != reflect.Pointer { // We've reached the end of the line, good or bad. break } ptr = ptr.Elem() } // ptr.Elem() != val // // Avoid the loop of an interface storing a pointer to its own // address. We only need to worry about this at the leaf (and not // in the loop) because the only way it's possible is if there's // an interface in there, which'd break from the loop on its own. // // ptr.CanSet() || dec.peekRuneType() != RuneTypeNullN // // We only need the pointer itself to be settable if we're // decoding null. if ptr.Elem() != val && (ptr.CanSet() || dec.peekRuneType() != RuneTypeNullN) { dec.decode(ptr, false) break } } // Couldn't get type information from a pointer; fall back to untyped mode. switch dec.peekRuneType() { case RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) default: val.Set(reflect.ValueOf(dec.decodeAny())) } case reflect.Struct: if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } index := indexStruct(typ) var nameBuf strings.Builder dec.decodeObject(typ, &nameBuf, func() { name := nameBuf.String() dec.stackPush(typ, name) defer dec.stackPop() idx, ok := index.byName[name] if !ok { for oidx := range index.byPos { if strings.EqualFold(name, index.byPos[oidx].Name) { idx = oidx ok = true break } } } if !ok { if dec.disallowUnknownFields { dec.panicType("", typ, fmt.Errorf("json: unknown field %q", name)) } dec.scan(io.Discard) return } field := index.byPos[idx] fVal := val for _, idx := range field.Path { if fVal.Kind() == reflect.Pointer { if fVal.IsNil() && !fVal.CanSet() { // https://golang.org/issue/21357 dec.panicType("", fVal.Type().Elem(), fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", fVal.Type().Elem())) } if dec.peekRuneType() != RuneTypeNullN { if fVal.IsNil() { fVal.Set(reflect.New(fVal.Type().Elem())) } fVal = fVal.Elem() } } fVal = fVal.Field(idx) } if field.Quote { switch t := dec.peekRuneType(); t { case RuneTypeNullN: dec.decodeNull() switch fVal.Kind() { // XXX: I can't justify this list, other than "it's what encoding/json // does, but I don't understand their rationale". case reflect.Interface, reflect.Pointer, reflect.Map, reflect.Slice: fVal.Set(reflect.Zero(fVal.Type())) default: // do nothing??? } case RuneTypeStringBeg: // TODO: Figure out how to do this without buffering, have correct offsets. var buf bytes.Buffer dec.decodeString(nil, &buf) if err := Decode(bytes.NewReader(buf.Bytes()), fVal.Addr().Interface()); err != nil { if str := buf.String(); str != "null" { dec.panicType("", fVal.Type(), fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", str, fVal.Type())) } } default: dec.panicType(t.jsonType(), fVal.Type(), fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", fVal.Type())) } } else { dec.decode(fVal, true) } }) case reflect.Map: switch t := dec.peekRuneType(); t { case RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) case RuneTypeObjectBeg: if val.IsNil() { val.Set(reflect.MakeMap(typ)) } var nameBuf bytes.Buffer dec.decodeObject(typ, &nameBuf, func() { nameValTyp := typ.Key() nameValPtr := reflect.New(nameValTyp) switch { case reflect.PointerTo(nameValTyp).Implements(textUnmarshalerType): obj := nameValPtr.Interface().(encoding.TextUnmarshaler) if err := obj.UnmarshalText(nameBuf.Bytes()); err != nil { dec.panicType("string", nameValTyp, err) } default: switch nameValTyp.Kind() { case reflect.String: nameValPtr.Elem().SetString(nameBuf.String()) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: n, err := strconv.ParseInt(nameBuf.String(), 10, kind2bits[nameValTyp.Kind()]) if err != nil { dec.panicType("number "+nameBuf.String(), nameValTyp, err) } nameValPtr.Elem().SetInt(n) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: n, err := strconv.ParseUint(nameBuf.String(), 10, kind2bits[nameValTyp.Kind()]) if err != nil { dec.panicType("number "+nameBuf.String(), nameValTyp, err) } nameValPtr.Elem().SetUint(n) default: dec.panicType("object", typ, &DecodeArgumentError{nameValTyp}) } } dec.stackPush(typ, nameValPtr.Elem()) defer dec.stackPop() fValPtr := reflect.New(typ.Elem()) dec.decode(fValPtr.Elem(), false) val.SetMapIndex(nameValPtr.Elem(), fValPtr.Elem()) }) default: dec.panicType(t.jsonType(), typ, nil) } case reflect.Slice: switch { case typ.Elem().Kind() == reflect.Uint8 && !(dec.peekRuneType() == RuneTypeArrayBeg && (false || reflect.PointerTo(typ.Elem()).Implements(decodableType) || reflect.PointerTo(typ.Elem()).Implements(jsonUnmarshalerType) || reflect.PointerTo(typ.Elem()).Implements(textUnmarshalerType))): switch t := dec.peekRuneType(); t { case RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) case RuneTypeStringBeg: if typ.Elem() == byteType { var buf bytes.Buffer dec.decodeString(typ, newBase64Decoder(&buf)) val.Set(reflect.ValueOf(buf.Bytes())) } else { // TODO: Surely there's a better way. At the very least, we should // avoid buffering. var buf bytes.Buffer dec.decodeString(typ, newBase64Decoder(&buf)) bs := buf.Bytes() val.Set(reflect.MakeSlice(typ, len(bs), len(bs))) for i := 0; i < len(bs); i++ { val.Index(i).Set(reflect.ValueOf(bs[i]).Convert(typ.Elem())) } } default: dec.panicType(t.jsonType(), typ, nil) } default: switch t := dec.peekRuneType(); t { case RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) case RuneTypeArrayBeg: if val.IsNil() { val.Set(reflect.MakeSlice(typ, 0, 0)) } if val.Len() > 0 { val.Set(val.Slice(0, 0)) } i := 0 dec.decodeArray(typ, func() { dec.stackPush(typ, i) defer dec.stackPop() mValPtr := reflect.New(typ.Elem()) dec.decode(mValPtr.Elem(), false) val.Set(reflect.Append(val, mValPtr.Elem())) i++ }) default: dec.panicType(t.jsonType(), typ, nil) } } case reflect.Array: if nullOK && dec.peekRuneType() == RuneTypeNullN { dec.decodeNull() return } i := 0 n := val.Len() dec.decodeArray(typ, func() { dec.stackPush(typ, i) defer dec.stackPop() if i < n { mValPtr := reflect.New(typ.Elem()) dec.decode(mValPtr.Elem(), false) val.Index(i).Set(mValPtr.Elem()) } else { dec.scan(io.Discard) } i++ }) for ; i < n; i++ { val.Index(i).Set(reflect.Zero(typ.Elem())) } case reflect.Pointer: switch dec.peekRuneType() { case RuneTypeNullN: dec.decodeNull() val.Set(reflect.Zero(typ)) default: if val.IsNil() { val.Set(reflect.New(typ.Elem())) } dec.decode(val.Elem(), false) } default: dec.panicType("", typ, fmt.Errorf("unsupported type (kind=%v)", typ.Kind())) } } } func (dec *Decoder) scan(out io.Writer) { limiter := dec.limitingScanner() for { c, _, err := limiter.ReadRune() if err == io.EOF { return } _, _ = writeRune(out, c) } } func (dec *Decoder) scanNumber(gTyp reflect.Type, out io.Writer) { if t := dec.peekRuneType(); !t.IsNumber() { dec.panicType(t.jsonType(), gTyp, nil) } dec.scan(out) } func (dec *Decoder) decodeAny() any { c, _ := dec.readRune() dec.unreadRune() switch c { case '{': ret := make(map[string]any) typ := reflect.TypeOf(ret) var nameBuf strings.Builder dec.decodeObject(typ, &nameBuf, func() { name := nameBuf.String() dec.stackPush(typ, name) defer dec.stackPop() ret[name] = dec.decodeAny() }) return ret case '[': ret := []any{} typ := reflect.TypeOf(ret) dec.decodeArray(typ, func() { dec.stackPush(typ, len(ret)) defer dec.stackPop() ret = append(ret, dec.decodeAny()) }) return ret case '"': var buf strings.Builder dec.decodeString(nil, &buf) return buf.String() case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': var buf strings.Builder dec.scan(&buf) num := json.Number(buf.String()) if dec.useNumber { return num } f64, err := num.Float64() if err != nil { panic("should not happen") } return f64 case 't', 'f': return dec.decodeBool(nil) case 'n': dec.decodeNull() return nil default: panic("should not happen") } } func (dec *Decoder) decodeObject(gTyp reflect.Type, nameBuf runeBuffer, decodeKVal func()) { dec.expectRuneType('{', RuneTypeObjectBeg, gTyp) _, t := dec.readRune() switch t { case RuneTypeObjectEnd: return case RuneTypeStringBeg: decodeMember: dec.unreadRune() nameBuf.Reset() dec.decodeString(nil, nameBuf) dec.expectRune(':', RuneTypeObjectColon) decodeKVal() _, t := dec.readRune() switch t { case RuneTypeObjectComma: dec.expectRune('"', RuneTypeStringBeg) goto decodeMember case RuneTypeObjectEnd: return default: panic("should not happen") } default: panic("should not happen") } } func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { dec.expectRuneType('[', RuneTypeArrayBeg, gTyp) _, t := dec.readRune() switch t { case RuneTypeArrayEnd: return default: dec.unreadRune() decodeNextMember: decodeMember() _, t := dec.readRune() switch t { case RuneTypeArrayComma: goto decodeNextMember case RuneTypeArrayEnd: return default: panic("should not happen") } } } func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { dec.expectRuneType('"', RuneTypeStringBeg, gTyp) var uhex [4]byte for { c, t := dec.readRune() switch t { case RuneTypeStringChar: _, _ = writeRune(out, c) case RuneTypeStringEsc, RuneTypeStringEscU: // do nothing case RuneTypeStringEsc1: switch c { case '"': _, _ = writeRune(out, '"') case '\\': _, _ = writeRune(out, '\\') case '/': _, _ = writeRune(out, '/') case 'b': _, _ = writeRune(out, '\b') case 'f': _, _ = writeRune(out, '\f') case 'n': _, _ = writeRune(out, '\n') case 'r': _, _ = writeRune(out, '\r') case 't': _, _ = writeRune(out, '\t') default: panic("should not happen") } case RuneTypeStringEscUA: uhex[0], _ = hex2int(c) case RuneTypeStringEscUB: uhex[1], _ = hex2int(c) case RuneTypeStringEscUC: uhex[2], _ = hex2int(c) case RuneTypeStringEscUD: uhex[3], _ = hex2int(c) c = 0 | rune(uhex[0])<<12 | rune(uhex[1])<<8 | rune(uhex[2])<<4 | rune(uhex[3])<<0 handleUnicode: if utf16.IsSurrogate(c) { if dec.peekRuneType() != RuneTypeStringEsc { _, _ = writeRune(out, utf8.RuneError) break } dec.expectRune('\\', RuneTypeStringEsc) if dec.peekRuneType() != RuneTypeStringEscU { _, _ = writeRune(out, utf8.RuneError) break } dec.expectRune('u', RuneTypeStringEscU) b, _ := dec.readRune() uhex[0], _ = hex2int(b) b, _ = dec.readRune() uhex[1], _ = hex2int(b) b, _ = dec.readRune() uhex[2], _ = hex2int(b) b, _ = dec.readRune() uhex[3], _ = hex2int(b) c2 := 0 | rune(uhex[0])<<12 | rune(uhex[1])<<8 | rune(uhex[2])<<4 | rune(uhex[3])<<0 d := utf16.DecodeRune(c, c2) if d == utf8.RuneError { _, _ = writeRune(out, utf8.RuneError) c = c2 goto handleUnicode } _, _ = writeRune(out, d) } else { _, _ = writeRune(out, c) } case RuneTypeStringEnd: return default: panic("should not happen") } } } func (dec *Decoder) decodeBool(gTyp reflect.Type) bool { c, t := dec.readRune() switch c { case 't': dec.expectRune('r', RuneTypeTrueR) dec.expectRune('u', RuneTypeTrueU) dec.expectRune('e', RuneTypeTrueE) return true case 'f': dec.expectRune('a', RuneTypeFalseA) dec.expectRune('l', RuneTypeFalseL) dec.expectRune('s', RuneTypeFalseS) dec.expectRune('e', RuneTypeFalseE) return false default: dec.panicType(t.jsonType(), gTyp, nil) panic("not reached") } } func (dec *Decoder) decodeNull() { dec.expectRune('n', RuneTypeNullN) dec.expectRune('u', RuneTypeNullU) dec.expectRune('l', RuneTypeNullL1) dec.expectRune('l', RuneTypeNullL2) }