From f68498a6fdb421483d9aebb45527452f6255bb68 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 25 Feb 2023 16:17:01 -0700 Subject: jsonparse: Don't show raw bytes as Unicode --- internal/jsonparse/parse.go | 73 ++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 34 deletions(-) (limited to 'internal/jsonparse/parse.go') diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 214e3ba..5547df4 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -15,12 +15,17 @@ import ( var ErrParserExceededMaxDepth = errors.New("exceeded max depth") type InvalidCharacterError struct { - Char rune - Where string + Char rune + IsRune bool + Where string } func (e *InvalidCharacterError) Error() string { - return fmt.Sprintf("invalid character %q %s", e.Char, e.Where) + if e.IsRune { + return fmt.Sprintf("invalid character %q %s", e.Char, e.Where) + } else { + return fmt.Sprintf("invalid character '\\x%02x' %s", e.Char, e.Where) + } } func isHex(c rune) bool { @@ -520,7 +525,7 @@ func (par *Parser) HandleEOF() (RuneType, error) { case 1: switch { case par.stack[0].IsNumber(): - if _, err := par.HandleRune('\n'); err == nil { + if _, err := par.HandleRune('\n', true); err == nil { return RuneTypeEOF, nil } case par.stack[0] == runeTypeAny: @@ -562,7 +567,7 @@ func (par *Parser) IsAtBarrier() bool { // RuneTypeEOF indicates that the rune cannot be appended to the JSON // document; a new JSON document must be started in order to process // that rune. -func (par *Parser) HandleRune(c rune) (RuneType, error) { +func (par *Parser) HandleRune(c rune, isRune bool) (RuneType, error) { if par.closed { return RuneTypeError, iofs.ErrClosed } @@ -580,7 +585,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { if len(par.barriers) > 0 { return RuneTypeEOF, nil } else { - return RuneTypeError, &InvalidCharacterError{c, "after top-level value"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "after top-level value"} } } switch par.stack[len(par.stack)-1] { @@ -614,7 +619,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 'n': return par.replaceState(RuneTypeNullN), nil default: - return RuneTypeError, &InvalidCharacterError{c, "looking for beginning of value"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of value"} } // object ////////////////////////////////////////////////////////////////////////////////// case RuneTypeObjectBeg: // waiting for key to start or '}' @@ -628,7 +633,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { par.popState() return RuneTypeObjectEnd, nil default: - return RuneTypeError, &InvalidCharacterError{c, "looking for beginning of object key string"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of object key string"} } case RuneTypeStringEnd: // waiting for ':' switch c { @@ -639,7 +644,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { par.pushState(runeTypeAny) return RuneTypeObjectColon, nil default: - return RuneTypeError, &InvalidCharacterError{c, "after object key"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key"} } case RuneTypeObjectComma: // waiting for ',' or '}' switch c { @@ -652,7 +657,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { par.popState() return RuneTypeObjectEnd, nil default: - return RuneTypeError, &InvalidCharacterError{c, "after object key:value pair"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key:value pair"} } // array /////////////////////////////////////////////////////////////////////////////////// case RuneTypeArrayBeg: // waiting for item to start or ']' @@ -665,7 +670,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { default: par.replaceState(RuneTypeArrayComma) par.pushState(runeTypeAny) - return par.HandleRune(c) + return par.HandleRune(c, isRune) } case RuneTypeArrayComma: // waiting for ',' or ']' switch c { @@ -678,7 +683,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { par.popState() return RuneTypeArrayEnd, nil default: - return RuneTypeError, &InvalidCharacterError{c, "after array element"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "after array element"} } // string ////////////////////////////////////////////////////////////////////////////////// case RuneTypeStringBeg: // waiting for char or '"' @@ -691,7 +696,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 0x0020 <= c && c <= 0x10FFFF: return RuneTypeStringChar, nil default: - return RuneTypeError, &InvalidCharacterError{c, "in string literal"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "in string literal"} } case RuneTypeStringEsc: // waiting for escape char switch c { @@ -701,7 +706,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 'u': return par.replaceState(RuneTypeStringEscU), nil default: - return RuneTypeError, &InvalidCharacterError{c, "in string escape code"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "in string escape code"} } case RuneTypeStringEscU: if !isHex(c) { @@ -771,7 +776,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberIntDig), nil default: - return RuneTypeError, &InvalidCharacterError{c, "in numeric literal"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "in numeric literal"} } case RuneTypeNumberIntZero: // C switch c { @@ -781,7 +786,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() - return par.HandleRune(c) + return par.HandleRune(c, isRune) } case RuneTypeNumberIntDig: // D switch c { @@ -793,14 +798,14 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() - return par.HandleRune(c) + return par.HandleRune(c, isRune) } case RuneTypeNumberFracDot: // E switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberFracDig), nil default: - return RuneTypeError, &InvalidCharacterError{c, "after decimal point in numeric literal"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "after decimal point in numeric literal"} } case RuneTypeNumberFracDig: // F switch c { @@ -810,7 +815,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() - return par.HandleRune(c) + return par.HandleRune(c, isRune) } case RuneTypeNumberExpE: // G switch c { @@ -819,14 +824,14 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberExpDig), nil default: - return RuneTypeError, &InvalidCharacterError{c, "in exponent of numeric literal"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"} } case RuneTypeNumberExpSign: // H switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberExpDig), nil default: - return RuneTypeError, &InvalidCharacterError{c, "in exponent of numeric literal"} + return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"} } case RuneTypeNumberExpDig: // I switch c { @@ -834,40 +839,40 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { return par.replaceState(RuneTypeNumberExpDig), nil default: par.popState() - return par.HandleRune(c) + return par.HandleRune(c, isRune) } // literals //////////////////////////////////////////////////////////////////////////////// // true case RuneTypeTrueT: - return par.expectRune(c, 'r', RuneTypeTrueR, "true", false) + return par.expectRune(c, isRune, 'r', RuneTypeTrueR, "true", false) case RuneTypeTrueR: - return par.expectRune(c, 'u', RuneTypeTrueU, "true", false) + return par.expectRune(c, isRune, 'u', RuneTypeTrueU, "true", false) case RuneTypeTrueU: - return par.expectRune(c, 'e', RuneTypeTrueE, "true", true) + return par.expectRune(c, isRune, 'e', RuneTypeTrueE, "true", true) // false case RuneTypeFalseF: - return par.expectRune(c, 'a', RuneTypeFalseA, "false", false) + return par.expectRune(c, isRune, 'a', RuneTypeFalseA, "false", false) case RuneTypeFalseA: - return par.expectRune(c, 'l', RuneTypeFalseL, "false", false) + return par.expectRune(c, isRune, 'l', RuneTypeFalseL, "false", false) case RuneTypeFalseL: - return par.expectRune(c, 's', RuneTypeFalseS, "false", false) + return par.expectRune(c, isRune, 's', RuneTypeFalseS, "false", false) case RuneTypeFalseS: - return par.expectRune(c, 'e', RuneTypeFalseE, "false", true) + return par.expectRune(c, isRune, 'e', RuneTypeFalseE, "false", true) // null case RuneTypeNullN: - return par.expectRune(c, 'u', RuneTypeNullU, "null", false) + return par.expectRune(c, isRune, 'u', RuneTypeNullU, "null", false) case RuneTypeNullU: - return par.expectRune(c, 'l', RuneTypeNullL1, "null", false) + return par.expectRune(c, isRune, 'l', RuneTypeNullL1, "null", false) case RuneTypeNullL1: - return par.expectRune(c, 'l', RuneTypeNullL2, "null", true) + return par.expectRune(c, isRune, 'l', RuneTypeNullL2, "null", true) default: panic(fmt.Errorf(`should not happen: invalid stack: "%s"`, par.stackString())) } } -func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { +func (par *Parser) expectRune(c rune, isRune bool, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { if c != exp { - return RuneTypeError, &InvalidCharacterError{c, fmt.Sprintf("in literal %s (expecting %q)", context, exp)} + return RuneTypeError, &InvalidCharacterError{c, isRune, fmt.Sprintf("in literal %s (expecting %q)", context, exp)} } if pop { par.popState() -- cgit v1.2.3-2-g168b