From ff6dc0bc519886905e758a84e572f5e34d6c03d1 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 26 Jan 2023 22:31:32 -0700 Subject: Move things between files --- common.go | 16 ++++ encode.go | 23 ------ encode_escape.go | 103 +++++++++++++++++++++++ encode_string.go | 111 +++++++++++++++++++++++++ internal/encode.go | 14 ++++ internal/export_tags.go | 16 ---- internal/tags.go | 7 ++ ioutil.go | 31 +++++++ misc.go | 211 ------------------------------------------------ struct.go | 4 +- test_export.go | 18 ----- 11 files changed, 285 insertions(+), 269 deletions(-) create mode 100644 common.go create mode 100644 encode_escape.go create mode 100644 encode_string.go create mode 100644 internal/encode.go delete mode 100644 internal/export_tags.go create mode 100644 internal/tags.go create mode 100644 ioutil.go delete mode 100644 misc.go delete mode 100644 test_export.go diff --git a/common.go b/common.go new file mode 100644 index 0000000..90156b9 --- /dev/null +++ b/common.go @@ -0,0 +1,16 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "encoding/json" + "reflect" +) + +var ( + numberType = reflect.TypeOf(json.Number("")) + byteType = reflect.TypeOf(byte(0)) + byteSliceType = reflect.TypeOf(([]byte)(nil)) +) diff --git a/encode.go b/encode.go index 41032e5..fa337ad 100644 --- a/encode.go +++ b/encode.go @@ -17,7 +17,6 @@ import ( "sort" "strconv" "strings" - "unicode/utf8" "unsafe" ) @@ -426,28 +425,6 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool } } -func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { - encodeWriteByte(w, '"') - for _, c := range str { - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) - } - } - encodeWriteByte(w, '"') -} - -func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { - encodeWriteByte(w, '"') - for i := 0; i < len(str); { - c, size := utf8.DecodeRune(str[i:]) - if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) - } - i += size - } - encodeWriteByte(w, '"') -} - func encodeArray(w io.Writer, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) { encodeWriteByte(w, '[') n := val.Len() diff --git a/encode_escape.go b/encode_escape.go new file mode 100644 index 0000000..ab0d9c1 --- /dev/null +++ b/encode_escape.go @@ -0,0 +1,103 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "unicode/utf8" +) + +// BackslashEscapeMode identifies one of the three ways that a +// character may be represented in a JSON string: +// +// - literally (no backslash escaping) +// +// - as a short "well-known" `\X` backslash sequence (where `X` is a +// single-character) +// +// - as a long Unicode `\uXXXX` backslash sequence +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone BackslashEscapeMode = iota + BackslashEscapeShort + BackslashEscapeUnicode +) + +// A BackslashEscaper controls how a ReEncoder emits a character in a +// JSON string. The `rune` argument is the character being +// considered, and the `BackslashEscapeMode` argument is how it was +// originally encoded in the input. +// +// The ReEncoder will panic if a BackslashEscaper returns an unknown +// BackslashEscapeMode. +type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode + +// EscapePreserve is a BackslashEscaper that preserves the original +// input escaping. +func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + return wasEscaped +} + +// EscapeJSSafe is a BackslashEscaper that escapes strings such that +// the JSON safe to embed in JS; it otherwise preserves the original +// input escaping. +// +// JSON is notionally a JS subset, but that's not actually true; so +// more conservative backslash-escaping is necessary to safely embed +// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset +func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\u2028', '\u2029': + return BackslashEscapeUnicode + default: + return wasEscaped + } +} + +// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that +// the JSON is safe to embed in HTML; it otherwise preserves the +// original input escaping. +func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '&', '<', '>': + return BackslashEscapeUnicode + default: + return EscapeJSSafe(c, wasEscaped) + } +} + +// EscapeDefault is a BackslashEscaper that mimics the default +// behavior of encoding/json. +// +// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` +// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement +// character. +// +// A ReEncoder uses EscapeDefault if a BackslashEscaper is not +// specified. +func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\b', '\f', utf8.RuneError: + return BackslashEscapeUnicode + default: + return EscapeHTMLSafe(c, wasEscaped) + } +} + +// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the +// default behavior of an encoding/json.Encoder that has had +// SetEscapeHTML(false) called on it. +// +// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` +// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement +// character. +func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { + switch c { + case '\b', '\f', utf8.RuneError: + return BackslashEscapeUnicode + default: + return EscapeJSSafe(c, wasEscaped) + } +} diff --git a/encode_string.go b/encode_string.go new file mode 100644 index 0000000..c5cb442 --- /dev/null +++ b/encode_string.go @@ -0,0 +1,111 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "io" + "unicode/utf8" + + "git.lukeshu.com/go/lowmemjson/internal" +) + +func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + buf := [6]byte{ + '\\', + 'u', + internal.Hex[(c>>12)&0xf], + internal.Hex[(c>>8)&0xf], + internal.Hex[(c>>4)&0xf], + internal.Hex[(c>>0)&0xf], + } + return w.Write(buf[:]) +} + +func writeStringShortEscape(w io.Writer, c rune) (int, error) { + var b byte + switch c { + case '"', '\\', '/': + b = byte(c) + case '\b': + b = 'b' + case '\f': + b = 'f' + case '\n': + b = 'n' + case '\r': + b = 'r' + case '\t': + b = 't' + default: + panic("should not happen") + } + buf := [2]byte{'\\', b} + return w.Write(buf[:]) +} + +func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { + if escaper == nil { + escaper = EscapeDefault + } + switch escaper(c, wasEscaped) { + case BackslashEscapeNone: + switch { + case c < 0x0020: // override, gotta escape these + switch c { + case '\b', '\f', '\n', '\r', '\t': // short-escape if possible + return writeStringShortEscape(w, c) + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': // override, gotta escape these + return writeStringShortEscape(w, c) + default: // obey + return writeRune(w, c) + } + case BackslashEscapeShort: + switch c { + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey + return writeStringShortEscape(w, c) + default: // override, can't short-escape these + return writeRune(w, c) + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) + return writeRune(w, c) + default: // obey + return writeStringUnicodeEscape(w, c) + } + default: + panic("escaper returned an invalid escape mode") + } +} + +func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { + encodeWriteByte(w, '"') + for _, c := range str { + if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { + panic(encodeError{err}) + } + } + encodeWriteByte(w, '"') +} + +func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { + encodeWriteByte(w, '"') + for i := 0; i < len(str); { + c, size := utf8.DecodeRune(str[i:]) + if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { + panic(encodeError{err}) + } + i += size + } + encodeWriteByte(w, '"') +} + +func init() { + internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(w, nil, s) } + internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(w, nil, s) } +} diff --git a/internal/encode.go b/internal/encode.go new file mode 100644 index 0000000..8aae673 --- /dev/null +++ b/internal/encode.go @@ -0,0 +1,14 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "io" +) + +var ( + EncodeStringFromBytes func(io.Writer, []byte) + EncodeStringFromString func(io.Writer, string) +) diff --git a/internal/export_tags.go b/internal/export_tags.go deleted file mode 100644 index d8cf622..0000000 --- a/internal/export_tags.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package internal - -import ( - "io" -) - -var ParseTag = parseTag - -var ( - EncodeStringFromBytes func(io.Writer, []byte) - EncodeStringFromString func(io.Writer, string) -) diff --git a/internal/tags.go b/internal/tags.go new file mode 100644 index 0000000..bdf1f72 --- /dev/null +++ b/internal/tags.go @@ -0,0 +1,7 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +var ParseTag = parseTag diff --git a/ioutil.go b/ioutil.go new file mode 100644 index 0000000..a53eac3 --- /dev/null +++ b/ioutil.go @@ -0,0 +1,31 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "io" + "unicode/utf8" +) + +func writeByte(w io.Writer, c byte) error { + if br, ok := w.(interface{ WriteByte(byte) error }); ok { + return br.WriteByte(c) + } + var buf [1]byte + buf[0] = c + if _, err := w.Write(buf[:]); err != nil { + return err + } + return nil +} + +func writeRune(w io.Writer, c rune) (int, error) { + if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { + return rw.WriteRune(c) + } + var buf [utf8.UTFMax]byte + n := utf8.EncodeRune(buf[:], c) + return w.Write(buf[:n]) +} diff --git a/misc.go b/misc.go deleted file mode 100644 index fb96b4e..0000000 --- a/misc.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "encoding/json" - "io" - "reflect" - "unicode/utf8" - - "git.lukeshu.com/go/lowmemjson/internal" -) - -var ( - numberType = reflect.TypeOf(json.Number("")) - byteType = reflect.TypeOf(byte(0)) - byteSliceType = reflect.TypeOf(([]byte)(nil)) -) - -// generic I/O ///////////////////////////////////////////////////////////////// - -func writeByte(w io.Writer, c byte) error { - if br, ok := w.(interface{ WriteByte(byte) error }); ok { - return br.WriteByte(c) - } - var buf [1]byte - buf[0] = c - if _, err := w.Write(buf[:]); err != nil { - return err - } - return nil -} - -func writeRune(w io.Writer, c rune) (int, error) { - if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { - return rw.WriteRune(c) - } - var buf [utf8.UTFMax]byte - n := utf8.EncodeRune(buf[:], c) - return w.Write(buf[:n]) -} - -// JSON string encoding //////////////////////////////////////////////////////// - -// BackslashEscapeMode identifies one of the three ways that a -// character may be represented in a JSON string: -// -// - literally (no backslash escaping) -// -// - as a short "well-known" `\X` backslash sequence (where `X` is a -// single-character) -// -// - as a long Unicode `\uXXXX` backslash sequence -type BackslashEscapeMode uint8 - -const ( - BackslashEscapeNone BackslashEscapeMode = iota - BackslashEscapeShort - BackslashEscapeUnicode -) - -// A BackslashEscaper controls how a ReEncoder emits a character in a -// JSON string. The `rune` argument is the character being -// considered, and the `BackslashEscapeMode` argument is how it was -// originally encoded in the input. -// -// The ReEncoder will panic if a BackslashEscaper returns an unknown -// BackslashEscapeMode. -type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode - -// EscapePreserve is a BackslashEscaper that preserves the original -// input escaping. -func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - return wasEscaped -} - -// EscapeJSSafe is a BackslashEscaper that escapes strings such that -// the JSON safe to embed in JS; it otherwise preserves the original -// input escaping. -// -// JSON is notionally a JS subset, but that's not actually true; so -// more conservative backslash-escaping is necessary to safely embed -// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset -func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\u2028', '\u2029': - return BackslashEscapeUnicode - default: - return wasEscaped - } -} - -// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that -// the JSON is safe to embed in HTML; it otherwise preserves the -// original input escaping. -func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '&', '<', '>': - return BackslashEscapeUnicode - default: - return EscapeJSSafe(c, wasEscaped) - } -} - -// EscapeDefault is a BackslashEscaper that mimics the default -// behavior of encoding/json. -// -// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX` -// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement -// character. -// -// A ReEncoder uses EscapeDefault if a BackslashEscaper is not -// specified. -func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\b', '\f', utf8.RuneError: - return BackslashEscapeUnicode - default: - return EscapeHTMLSafe(c, wasEscaped) - } -} - -// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the -// default behavior of an encoding/json.Encoder that has had -// SetEscapeHTML(false) called on it. -// -// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX` -// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement -// character. -func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { - switch c { - case '\b', '\f', utf8.RuneError: - return BackslashEscapeUnicode - default: - return EscapeJSSafe(c, wasEscaped) - } -} - -func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { - buf := [6]byte{ - '\\', - 'u', - internal.Hex[(c>>12)&0xf], - internal.Hex[(c>>8)&0xf], - internal.Hex[(c>>4)&0xf], - internal.Hex[(c>>0)&0xf], - } - return w.Write(buf[:]) -} - -func writeStringShortEscape(w io.Writer, c rune) (int, error) { - var b byte - switch c { - case '"', '\\', '/': - b = byte(c) - case '\b': - b = 'b' - case '\f': - b = 'f' - case '\n': - b = 'n' - case '\r': - b = 'r' - case '\t': - b = 't' - default: - panic("should not happen") - } - buf := [2]byte{'\\', b} - return w.Write(buf[:]) -} - -func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { - if escaper == nil { - escaper = EscapeDefault - } - switch escaper(c, wasEscaped) { - case BackslashEscapeNone: - switch { - case c < 0x0020: // override, gotta escape these - switch c { - case '\b', '\f', '\n', '\r', '\t': // short-escape if possible - return writeStringShortEscape(w, c) - default: - return writeStringUnicodeEscape(w, c) - } - case c == '"' || c == '\\': // override, gotta escape these - return writeStringShortEscape(w, c) - default: // obey - return writeRune(w, c) - } - case BackslashEscapeShort: - switch c { - case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey - return writeStringShortEscape(w, c) - default: // override, can't short-escape these - return writeRune(w, c) - } - case BackslashEscapeUnicode: - switch { - case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return writeRune(w, c) - default: // obey - return writeStringUnicodeEscape(w, c) - } - default: - panic("escaper returned an invalid escape mode") - } -} diff --git a/struct.go b/struct.go index 24b2ac0..b7fc287 100644 --- a/struct.go +++ b/struct.go @@ -6,6 +6,8 @@ package lowmemjson import ( "reflect" + + "git.lukeshu.com/go/lowmemjson/internal" ) type structField struct { @@ -143,7 +145,7 @@ func indexStructInner(typ reflect.Type, byPos *[]structField, byName map[string] if tag == "-" { continue } - tagName, opts := parseTag(tag) + tagName, opts := internal.ParseTag(tag) name := tagName if !isValidTag(name) { name = "" diff --git a/test_export.go b/test_export.go deleted file mode 100644 index 76d29d2..0000000 --- a/test_export.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (C) 2022 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "io" - - "git.lukeshu.com/go/lowmemjson/internal" -) - -func init() { - internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(w, nil, s) } - internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(w, nil, s) } -} - -var parseTag = internal.ParseTag -- cgit v1.1-4-g5e80