summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decode.go28
-rw-r--r--encode_escape.go22
-rw-r--r--internal/jsonparse/hex.go20
-rw-r--r--internal/jsonparse/parse.go28
-rw-r--r--internal/jsonstring/encode_string.go10
-rw-r--r--reencode.go15
6 files changed, 56 insertions, 67 deletions
diff --git a/decode.go b/decode.go
index 1ff8938..8514ec4 100644
--- a/decode.go
+++ b/decode.go
@@ -1145,7 +1145,7 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco
if err := dec.expectRuneType('"', jsonparse.RuneTypeStringBeg, gTyp); err != nil {
return err
}
- var uhex [4]byte
+ var uhex [3]byte
for {
c, t, err := dec.readRune()
if err != nil {
@@ -1178,18 +1178,13 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco
panic(fmt.Errorf("should not happen: unexpected rune after backslash: %q", c))
}
case jsonparse.RuneTypeStringEscUA:
- uhex[0], _ = jsonparse.HexToInt(c)
+ uhex[0] = byte(c)
case jsonparse.RuneTypeStringEscUB:
- uhex[1], _ = jsonparse.HexToInt(c)
+ uhex[1] = byte(c)
case jsonparse.RuneTypeStringEscUC:
- uhex[2], _ = jsonparse.HexToInt(c)
+ uhex[2] = byte(c)
case jsonparse.RuneTypeStringEscUD:
- uhex[3], _ = jsonparse.HexToInt(c)
- c = 0 |
- rune(uhex[0])<<12 |
- rune(uhex[1])<<8 |
- rune(uhex[2])<<4 |
- rune(uhex[3])<<0
+ c = hexToRune(uhex[0], uhex[1], uhex[2], byte(c))
handleUnicode:
if utf16.IsSurrogate(c) {
t, err := dec.peekRuneType()
@@ -1219,27 +1214,22 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out fastio.RuneWriter) *Deco
if err != nil {
return err
}
- uhex[0], _ = jsonparse.HexToInt(b)
+ uhex[0] = byte(b)
b, _, err = dec.readRune()
if err != nil {
return err
}
- uhex[1], _ = jsonparse.HexToInt(b)
+ uhex[1] = byte(b)
b, _, err = dec.readRune()
if err != nil {
return err
}
- uhex[2], _ = jsonparse.HexToInt(b)
+ uhex[2] = byte(b)
b, _, err = dec.readRune()
if err != nil {
return err
}
- uhex[3], _ = jsonparse.HexToInt(b)
- c2 := 0 |
- rune(uhex[0])<<12 |
- rune(uhex[1])<<8 |
- rune(uhex[2])<<4 |
- rune(uhex[3])<<0
+ c2 := hexToRune(uhex[0], uhex[1], uhex[2], byte(b))
d := utf16.DecodeRune(c, c2)
if d == utf8.RuneError {
_, _ = out.WriteRune(utf8.RuneError)
diff --git a/encode_escape.go b/encode_escape.go
index 0054e72..97da6e9 100644
--- a/encode_escape.go
+++ b/encode_escape.go
@@ -5,6 +5,7 @@
package lowmemjson
import (
+ "fmt"
"unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/jsonstring"
@@ -27,6 +28,27 @@ const (
BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode
)
+func hexToInt(c byte) rune {
+ switch {
+ case '0' <= c && c <= '9':
+ return rune(c) - '0'
+ case 'a' <= c && c <= 'f':
+ return rune(c) - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ return rune(c) - 'A' + 10
+ default:
+ panic(fmt.Errorf("should not happen: invalid hex char: %q", c))
+ }
+}
+
+func hexToRune(a, b, c, d byte) rune {
+ return 0 |
+ hexToInt(a)<<12 |
+ hexToInt(b)<<8 |
+ hexToInt(c)<<4 |
+ hexToInt(d)<<0
+}
+
// A BackslashEscaper controls how a ReEncoder emits a character in a
// JSON string. The `rune` argument is the character being
// considered, and the `BackslashEscapeMode` argument is how it was
diff --git a/internal/jsonparse/hex.go b/internal/jsonparse/hex.go
deleted file mode 100644
index 3ed5f01..0000000
--- a/internal/jsonparse/hex.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
-//
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-package jsonparse
-
-const Hex = "0123456789abcdef"
-
-func HexToInt(c rune) (byte, bool) {
- switch {
- case '0' <= c && c <= '9':
- return byte(c) - '0', true
- case 'a' <= c && c <= 'f':
- return byte(c) - 'a' + 10, true
- case 'A' <= c && c <= 'F':
- return byte(c) - 'A' + 10, true
- default:
- return 0, false
- }
-}
diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go
index 73584d9..2f5c1ab 100644
--- a/internal/jsonparse/parse.go
+++ b/internal/jsonparse/parse.go
@@ -14,6 +14,12 @@ import (
var ErrParserExceededMaxDepth = errors.New("exceeded max depth")
+func isHex(c rune) bool {
+ return ('0' <= c && c <= '9') ||
+ ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F')
+}
+
// RuneType is the classification of a rune when parsing JSON input.
// A Parser, rather than grouping runes into tokens and classifying
// tokens, classifies runes directly.
@@ -667,30 +673,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
}
case RuneTypeStringEscU:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUA), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUA), nil
case RuneTypeStringEscUA:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUB), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUB), nil
case RuneTypeStringEscUB:
- if _, ok := HexToInt(c); ok {
- return par.replaceState(RuneTypeStringEscUC), nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ return par.replaceState(RuneTypeStringEscUC), nil
case RuneTypeStringEscUC:
- if _, ok := HexToInt(c); ok {
- par.replaceState(RuneTypeStringBeg)
- return RuneTypeStringEscUD, nil
- } else {
+ if !isHex(c) {
return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
}
+ par.replaceState(RuneTypeStringBeg)
+ return RuneTypeStringEscUD, nil
// number //////////////////////////////////////////////////////////////////////////////////
//
// Here's a flattened drawing of the syntax diagram from www.json.org :
diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go
index f29dc3f..a7670c6 100644
--- a/internal/jsonstring/encode_string.go
+++ b/internal/jsonstring/encode_string.go
@@ -10,7 +10,6 @@ import (
"unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/fastio"
- "git.lukeshu.com/go/lowmemjson/internal/jsonparse"
)
// BackslashEscapeMode is describe in the main lowmemjson package
@@ -27,13 +26,14 @@ const (
type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) {
+ const alphabet = "0123456789abcdef"
buf := [6]byte{
'\\',
'u',
- jsonparse.Hex[(c>>12)&0xf],
- jsonparse.Hex[(c>>8)&0xf],
- jsonparse.Hex[(c>>4)&0xf],
- jsonparse.Hex[(c>>0)&0xf],
+ alphabet[(c>>12)&0xf],
+ alphabet[(c>>8)&0xf],
+ alphabet[(c>>4)&0xf],
+ alphabet[(c>>0)&0xf],
}
return w.Write(buf[:])
}
diff --git a/reencode.go b/reencode.go
index f23c85a..4974cb7 100644
--- a/reencode.go
+++ b/reencode.go
@@ -105,7 +105,7 @@ type ReEncoder struct {
lastNonSpaceNonEOF jsonparse.RuneType
wasNumber bool
curIndent int
- uhex [4]byte // "\uABCD"-encoded characters in strings
+ uhex [3]byte // "\uABCD"-encoded characters in strings
fracZeros int64
expZero bool
specu *speculation
@@ -530,18 +530,13 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
}
err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)))
case jsonparse.RuneTypeStringEscUA:
- enc.uhex[0], _ = jsonparse.HexToInt(c)
+ enc.uhex[0] = byte(c)
case jsonparse.RuneTypeStringEscUB:
- enc.uhex[1], _ = jsonparse.HexToInt(c)
+ enc.uhex[1] = byte(c)
case jsonparse.RuneTypeStringEscUC:
- enc.uhex[2], _ = jsonparse.HexToInt(c)
+ enc.uhex[2] = byte(c)
case jsonparse.RuneTypeStringEscUD:
- enc.uhex[3], _ = jsonparse.HexToInt(c)
- c := 0 |
- rune(enc.uhex[0])<<12 |
- rune(enc.uhex[1])<<8 |
- rune(enc.uhex[2])<<4 |
- rune(enc.uhex[3])<<0
+ c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c))
err = enc.emit(jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)))
case jsonparse.RuneTypeError: // EOF explicitly stated by .Close()