From 2b7fff828e29b63ae08a871b4b1e74784fab29e5 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 16 Feb 2023 19:06:46 -0700 Subject: Clean up the hex handling --- decode.go | 28 +++++++++------------------- encode_escape.go | 22 ++++++++++++++++++++++ internal/jsonparse/hex.go | 20 -------------------- internal/jsonparse/parse.go | 28 +++++++++++++++------------- internal/jsonstring/encode_string.go | 10 +++++----- reencode.go | 15 +++++---------- 6 files changed, 56 insertions(+), 67 deletions(-) delete mode 100644 internal/jsonparse/hex.go diff --git a/decode.go b/decode.go index 1ff8938..8514ec4 100644 --- a/decode.go +++ b/decode.go @@ -1145,7 +1145,7 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco if err := dec.expectRuneType('"', jsonparse.RuneTypeStringBeg, gTyp); err != nil { return err } - var uhex [4]byte + var uhex [3]byte for { c, t, err := dec.readRune() if err != nil { @@ -1178,18 +1178,13 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco panic(fmt.Errorf("should not happen: unexpected rune after backslash: %q", c)) } case jsonparse.RuneTypeStringEscUA: - uhex[0], _ = jsonparse.HexToInt(c) + uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: - uhex[1], _ = jsonparse.HexToInt(c) + uhex[1] = byte(c) case jsonparse.RuneTypeStringEscUC: - uhex[2], _ = jsonparse.HexToInt(c) + uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: - uhex[3], _ = jsonparse.HexToInt(c) - c = 0 | - rune(uhex[0])<<12 | - rune(uhex[1])<<8 | - rune(uhex[2])<<4 | - rune(uhex[3])<<0 + c = hexToRune(uhex[0], uhex[1], uhex[2], byte(c)) handleUnicode: if utf16.IsSurrogate(c) { t, err := dec.peekRuneType() @@ -1219,27 +1214,22 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco if err != nil { return err } - uhex[0], _ = jsonparse.HexToInt(b) + uhex[0] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[1], _ = jsonparse.HexToInt(b) + uhex[1] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[2], _ = jsonparse.HexToInt(b) + uhex[2] = byte(b) b, _, err = dec.readRune() if err != nil { return err } - uhex[3], _ = jsonparse.HexToInt(b) - c2 := 0 | - rune(uhex[0])<<12 | - rune(uhex[1])<<8 | - rune(uhex[2])<<4 | - rune(uhex[3])<<0 + c2 := hexToRune(uhex[0], uhex[1], uhex[2], byte(b)) d := utf16.DecodeRune(c, c2) if d == utf8.RuneError { _, _ = out.WriteRune(utf8.RuneError) diff --git a/encode_escape.go b/encode_escape.go index 0054e72..97da6e9 100644 --- a/encode_escape.go +++ b/encode_escape.go @@ -5,6 +5,7 @@ package lowmemjson import ( + "fmt" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/jsonstring" @@ -27,6 +28,27 @@ const ( BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode ) +func hexToInt(c byte) rune { + switch { + case '0' <= c && c <= '9': + return rune(c) - '0' + case 'a' <= c && c <= 'f': + return rune(c) - 'a' + 10 + case 'A' <= c && c <= 'F': + return rune(c) - 'A' + 10 + default: + panic(fmt.Errorf("should not happen: invalid hex char: %q", c)) + } +} + +func hexToRune(a, b, c, d byte) rune { + return 0 | + hexToInt(a)<<12 | + hexToInt(b)<<8 | + hexToInt(c)<<4 | + hexToInt(d)<<0 +} + // A BackslashEscaper controls how a ReEncoder emits a character in a // JSON string. The `rune` argument is the character being // considered, and the `BackslashEscapeMode` argument is how it was diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go deleted file mode 100644 index 3ed5f01..0000000 --- a/internal/jsonparse/hex.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package jsonparse - -const Hex = "0123456789abcdef" - -func HexToInt(c rune) (byte, bool) { - switch { - case '0' <= c && c <= '9': - return byte(c) - '0', true - case 'a' <= c && c <= 'f': - return byte(c) - 'a' + 10, true - case 'A' <= c && c <= 'F': - return byte(c) - 'A' + 10, true - default: - return 0, false - } -} diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 73584d9..2f5c1ab 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -14,6 +14,12 @@ import ( var ErrParserExceededMaxDepth = errors.New("exceeded max depth") +func isHex(c rune) bool { + return ('0' <= c && c <= '9') || + ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F') +} + // RuneType is the classification of a rune when parsing JSON input. // A Parser, rather than grouping runes into tokens and classifying // tokens, classifies runes directly. @@ -667,30 +673,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) } case RuneTypeStringEscU: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUA), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUA), nil case RuneTypeStringEscUA: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUB), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUB), nil case RuneTypeStringEscUB: - if _, ok := HexToInt(c); ok { - return par.replaceState(RuneTypeStringEscUC), nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + return par.replaceState(RuneTypeStringEscUC), nil case RuneTypeStringEscUC: - if _, ok := HexToInt(c); ok { - par.replaceState(RuneTypeStringBeg) - return RuneTypeStringEscUD, nil - } else { + if !isHex(c) { return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } + par.replaceState(RuneTypeStringBeg) + return RuneTypeStringEscUD, nil // number ////////////////////////////////////////////////////////////////////////////////// // // Here's a flattened drawing of the syntax diagram from www.json.org : diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index f29dc3f..a7670c6 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -10,7 +10,6 @@ import ( "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" - "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) // BackslashEscapeMode is describe in the main lowmemjson package @@ -27,13 +26,14 @@ const ( type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) { + const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', 'u', - jsonparse.Hex[(c>>12)&0xf], - jsonparse.Hex[(c>>8)&0xf], - jsonparse.Hex[(c>>4)&0xf], - jsonparse.Hex[(c>>0)&0xf], + alphabet[(c>>12)&0xf], + alphabet[(c>>8)&0xf], + alphabet[(c>>4)&0xf], + alphabet[(c>>0)&0xf], } return w.Write(buf[:]) } diff --git a/reencode.go b/reencode.go index f23c85a..4974cb7 100644 --- a/reencode.go +++ b/reencode.go @@ -105,7 +105,7 @@ type ReEncoder struct { lastNonSpaceNonEOF jsonparse.RuneType wasNumber bool curIndent int - uhex [4]byte // "\uABCD"-encoded characters in strings + uhex [3]byte // "\uABCD"-encoded characters in strings fracZeros int64 expZero bool specu *speculation @@ -530,18 +530,13 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { } err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))) case jsonparse.RuneTypeStringEscUA: - enc.uhex[0], _ = jsonparse.HexToInt(c) + enc.uhex[0] = byte(c) case jsonparse.RuneTypeStringEscUB: - enc.uhex[1], _ = jsonparse.HexToInt(c) + enc.uhex[1] = byte(c) case jsonparse.RuneTypeStringEscUC: - enc.uhex[2], _ = jsonparse.HexToInt(c) + enc.uhex[2] = byte(c) case jsonparse.RuneTypeStringEscUD: - enc.uhex[3], _ = jsonparse.HexToInt(c) - c := 0 | - rune(enc.uhex[0])<<12 | - rune(enc.uhex[1])<<8 | - rune(enc.uhex[2])<<4 | - rune(enc.uhex[3])<<0 + c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))) case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() -- cgit v1.2.3-2-g168b