summaryrefslogtreecommitdiff
path: root/internal/jsonstring/encode_string.go
blob: f29dc3fd5640def97435e8fc0c7514dd1a9a7e76 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// Copyright (C) 2022-2023  Luke Shumaker <lukeshu@lukeshu.com>
//
// SPDX-License-Identifier: GPL-2.0-or-later

package jsonstring

import (
	"fmt"
	"io"
	"unicode/utf8"

	"git.lukeshu.com/go/lowmemjson/internal/fastio"
	"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
)

// BackslashEscapeMode is describe in the main lowmemjson package
// docs.
type BackslashEscapeMode uint8

const (
	BackslashEscapeNone BackslashEscapeMode = iota
	BackslashEscapeShort
	BackslashEscapeUnicode
)

// BackslashEscaper is describe in the main lowmemjson package docs.
type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode

func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) {
	buf := [6]byte{
		'\\',
		'u',
		jsonparse.Hex[(c>>12)&0xf],
		jsonparse.Hex[(c>>8)&0xf],
		jsonparse.Hex[(c>>4)&0xf],
		jsonparse.Hex[(c>>0)&0xf],
	}
	return w.Write(buf[:])
}

func writeStringShortEscape(w io.Writer, c rune) (int, error) {
	var b byte
	switch c {
	case '"', '\\', '/':
		b = byte(c)
	case '\b':
		b = 'b'
	case '\f':
		b = 'f'
	case '\n':
		b = 'n'
	case '\r':
		b = 'r'
	case '\t':
		b = 't'
	default:
		panic(fmt.Errorf("should not happen: writeStringShortEscape called with invalid rune: %q", c))
	}
	buf := [2]byte{'\\', b}
	return w.Write(buf[:])
}

func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) (int, error) {
	switch escape {
	case BackslashEscapeNone:
		switch {
		case c < 0x0020: // override, gotta escape these
			switch c {
			case '\b', '\f', '\n', '\r', '\t': // short-escape if possible
				return writeStringShortEscape(w, c)
			default:
				return writeStringUnicodeEscape(w, c)
			}
		case c == '"' || c == '\\': // override, gotta escape these
			return writeStringShortEscape(w, c)
		default: // obey
			return w.WriteRune(c)
		}
	case BackslashEscapeShort:
		switch c {
		case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey
			return writeStringShortEscape(w, c)
		default: // override, can't short-escape these
			return w.WriteRune(c)
		}
	case BackslashEscapeUnicode:
		switch {
		case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?)
			return w.WriteRune(c)
		default: // obey
			return writeStringUnicodeEscape(w, c)
		}
	default:
		panic("escaper returned an invalid escape mode")
	}
}

func EncodeStringFromString(w fastio.AllWriter, escaper BackslashEscaper, str string) error {
	if err := w.WriteByte('"'); err != nil {
		return err
	}
	for _, c := range str {
		if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
			return err
		}
	}
	if err := w.WriteByte('"'); err != nil {
		return err
	}
	return nil
}

func EncodeStringFromBytes(w fastio.AllWriter, escaper BackslashEscaper, str []byte) error {
	if err := w.WriteByte('"'); err != nil {
		return err
	}
	for i := 0; i < len(str); {
		c, size := utf8.DecodeRune(str[i:])
		if _, err := WriteStringChar(w, c, escaper(c, BackslashEscapeNone)); err != nil {
			return err
		}
		i += size
	}
	if err := w.WriteByte('"'); err != nil {
		return err
	}
	return nil
}