// Copyright (C) 2022-2023  Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later

package lowmemjson

import (
	"fmt"

	"git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)

// InvalidUTF8Mode identifies one of the 3 ways that an Encoder or
// ReEncoder can behave when encountering invalid UTF-8 in a string
// value:
//
//   - Replace the byte with the Unicode replacement character U+FFFD.
//
//   - Allow the byte through to the string-encoder, with an
//     escape-mode of BackslashEscapeRawByte.
//
//   - Emit a syntax error.
type InvalidUTF8Mode = jsonstring.InvalidUTF8Mode

const (
	InvalidUTF8Replace  = jsonstring.InvalidUTF8Replace
	InvalidUTF8Preserve = jsonstring.InvalidUTF8Preserve
	InvalidUTF8Error    = jsonstring.InvalidUTF8Error
)

// BackslashEscapeMode identifies one of the four ways that a
// character may be represented in a JSON string:
//
//   - literally (no backslash escaping)
//
//   - as a short "well-known" `\X` backslash sequence (where `X` is a
//     single-character)
//
//   - as a long Unicode `\uXXXX` backslash sequence (with 16
//     permutations of capitalization)
//
//   - as a raw byte; this allows you to emit invalid JSON; JSON must
//     be valid UTF-8, but this allows you to emit arbitrary binary
//     data.  If the character does not satisfy `utf8.RuneSelf <= char
//     <= 0xFF`, then the encoder will panic.
type BackslashEscapeMode = jsonstring.BackslashEscapeMode

const (
	BackslashEscapeNone    = jsonstring.BackslashEscapeNone
	BackslashEscapeShort   = jsonstring.BackslashEscapeShort
	BackslashEscapeRawByte = jsonstring.BackslashEscapeRawByte

	BackslashEscapeUnicodeXXXX = jsonstring.BackslashEscapeUnicodeXXXX
	BackslashEscapeUnicodeXXXx = jsonstring.BackslashEscapeUnicodeXXXx
	BackslashEscapeUnicodeXXxX = jsonstring.BackslashEscapeUnicodeXXxX
	BackslashEscapeUnicodeXXxx = jsonstring.BackslashEscapeUnicodeXXxx
	BackslashEscapeUnicodeXxXX = jsonstring.BackslashEscapeUnicodeXxXX
	BackslashEscapeUnicodeXxXx = jsonstring.BackslashEscapeUnicodeXxXx
	BackslashEscapeUnicodeXxxX = jsonstring.BackslashEscapeUnicodeXxxX
	BackslashEscapeUnicodeXxxx = jsonstring.BackslashEscapeUnicodeXxxx
	BackslashEscapeUnicodexXXX = jsonstring.BackslashEscapeUnicodexXXX
	BackslashEscapeUnicodexXXx = jsonstring.BackslashEscapeUnicodexXXx
	BackslashEscapeUnicodexXxX = jsonstring.BackslashEscapeUnicodexXxX
	BackslashEscapeUnicodexXxx = jsonstring.BackslashEscapeUnicodexXxx
	BackslashEscapeUnicodexxXX = jsonstring.BackslashEscapeUnicodexxXX
	BackslashEscapeUnicodexxXx = jsonstring.BackslashEscapeUnicodexxXx
	BackslashEscapeUnicodexxxX = jsonstring.BackslashEscapeUnicodexxxX
	BackslashEscapeUnicodexxxx = jsonstring.BackslashEscapeUnicodexxxx

	BackslashEscapeUnicodeMin = jsonstring.BackslashEscapeUnicodeMin
	BackslashEscapeUnicodeMax = jsonstring.BackslashEscapeUnicodeMax

	BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode // back-compat
)

func hexToInt(c byte) rune {
	switch {
	case '0' <= c && c <= '9':
		return rune(c) - '0'
	case 'a' <= c && c <= 'f':
		return rune(c) - 'a' + 10
	case 'A' <= c && c <= 'F':
		return rune(c) - 'A' + 10
	default:
		panic(fmt.Errorf("should not happen: invalid hex char: %q", c))
	}
}

func hexToRune(a, b, c, d byte) rune {
	return 0 |
		hexToInt(a)<<12 |
		hexToInt(b)<<8 |
		hexToInt(c)<<4 |
		hexToInt(d)<<0
}

func hexToMode(a, b, c, d byte) BackslashEscapeMode {
	// The 0b0010_0000 bit is the ASCII "lowercase bit".
	return BackslashEscapeUnicodeMin + BackslashEscapeMode(0|
		((a&0b0010_0000)>>2)|
		((b&0b0010_0000)>>3)|
		((c&0b0010_0000)>>4)|
		((d&0b0010_0000)>>5))
}

// A BackslashEscaper controls how a ReEncoder emits a character in a
// JSON string.  The `rune` argument is the character being
// considered, and the `BackslashEscapeMode` argument is how it was
// originally encoded in the input.
//
// The ReEncoder will panic if a BackslashEscaper returns an unknown
// BackslashEscapeMode.  However, a BackslashEscaper should be
// permissive of BackslashEscapeModes it doesn't recognize; it is safe
// to just return them unmodified.
type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode

// EscapePreserve is a BackslashEscaper that preserves the original
// input escaping.
func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
	return wasEscaped
}

// EscapeJSSafe is a BackslashEscaper that escapes strings such that
// the JSON safe to embed in JS; it otherwise preserves the original
// input escaping.
//
// JSON is notionally a JS subset, but that's not actually true; so
// more conservative backslash-escaping is necessary to safely embed
// it in JS.  http://timelessrepo.com/json-isnt-a-javascript-subset
func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
	switch c {
	case '\u2028', '\u2029':
		return BackslashEscapeUnicode
	default:
		return wasEscaped
	}
}

// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that
// the JSON is safe to embed in HTML; it otherwise preserves the
// original input escaping.
func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
	switch c {
	case '&', '<', '>':
		return BackslashEscapeUnicode
	default:
		return EscapeJSSafe(c, wasEscaped)
	}
}

// EscapeDefault is a BackslashEscaper that mimics the default
// behavior of encoding/json.
//
// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX`
// sequences for `\b` and `\f`
//
// A ReEncoder uses EscapeDefault if a BackslashEscaper is not
// specified.
func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
	switch c {
	case '\b', '\f':
		return BackslashEscapeUnicode
	default:
		return EscapeHTMLSafe(c, wasEscaped)
	}
}

// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the
// default behavior of an encoding/json.Encoder that has had
// SetEscapeHTML(false) called on it.
//
// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX`
// sequences for `\b` and `\f`.
func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
	switch c {
	case '\b', '\f':
		return BackslashEscapeUnicode
	default:
		return EscapeJSSafe(c, wasEscaped)
	}
}