aboutsummaryrefslogtreecommitdiffstats
path: root/client/go
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-05-16 14:22:56 +0200
committerMartin Polden <mpolden@mpolden.no>2023-05-16 15:53:54 +0200
commit05ef566f46abff4c27e2d2a603d73cd75586aef1 (patch)
treee21422c1f72ac780a8fc855b1849faa323d91f34 /client/go
parent194f2afcd1805f3a0f1bd95672130d96b3d4c314 (diff)
Parse JSON token-by-token
Diffstat (limited to 'client/go')
-rw-r--r--client/go/go.mod3
-rw-r--r--client/go/go.sum4
-rw-r--r--client/go/internal/vespa/document/document.go223
-rw-r--r--client/go/internal/vespa/document/document_test.go30
-rw-r--r--client/go/internal/vespa/document/http.go7
5 files changed, 184 insertions, 83 deletions
diff --git a/client/go/go.mod b/client/go/go.mod
index c70ee5b75c8..f5f923cc063 100644
--- a/client/go/go.mod
+++ b/client/go/go.mod
@@ -6,7 +6,8 @@ require (
github.com/alessio/shellescape v1.4.1
github.com/briandowns/spinner v1.23.0
github.com/fatih/color v1.15.0
- github.com/goccy/go-json v0.10.2
+ // This is the most recent version compatible with Go 1.18. Upgrade when we upgrade our Go version
+ github.com/go-json-experiment/json v0.0.0-20220727223814-4987ed27d447
github.com/klauspost/compress v1.16.5
github.com/mattn/go-colorable v0.1.13
github.com/mattn/go-isatty v0.0.18
diff --git a/client/go/go.sum b/client/go/go.sum
index 9b79c215864..861c8725ed0 100644
--- a/client/go/go.sum
+++ b/client/go/go.sum
@@ -11,8 +11,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
-github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
-github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/go-json-experiment/json v0.0.0-20220727223814-4987ed27d447 h1:hDdASyrtiSuQvaafDrVTX34wy4ibhxrJO9/vyFbBt0k=
+github.com/go-json-experiment/json v0.0.0-20220727223814-4987ed27d447/go.mod h1:jbpkervfdK2HCcB2YEFmwYeaq057KFiaaKTNTHV4OOQ=
github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
diff --git a/client/go/internal/vespa/document/document.go b/client/go/internal/vespa/document/document.go
index ce8b22b24f0..a33c4a3c5af 100644
--- a/client/go/internal/vespa/document/document.go
+++ b/client/go/internal/vespa/document/document.go
@@ -11,7 +11,13 @@ import (
"time"
- "github.com/goccy/go-json"
+ // Why do we use an experimental parser? This appears to be the only JSON library that satisfies the following
+ // requirements:
+ // - Faster than the std parser
+ // - Supports parsing from a io.Reader
+ // - Supports parsing token-by-token
+ // - Few allocations during parsing (especially for large objects)
+ "github.com/go-json-experiment/json"
)
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
@@ -22,6 +28,12 @@ const (
OperationPut Operation = iota
OperationUpdate
OperationRemove
+
+ jsonArrayStart json.Kind = '['
+ jsonArrayEnd json.Kind = ']'
+ jsonObjectStart json.Kind = '{'
+ jsonObjectEnd json.Kind = '}'
+ jsonString json.Kind = '"'
)
// Id represents a Vespa document ID.
@@ -103,22 +115,16 @@ type Document struct {
Create bool
}
-type jsonDocument struct {
- IdString string `json:"id"`
- PutId string `json:"put"`
- UpdateId string `json:"update"`
- RemoveId string `json:"remove"`
- Condition string `json:"condition"`
- Fields json.RawMessage `json:"fields"`
- Create bool `json:"create"`
-}
-
// Decoder decodes documents from a JSON structure which is either an array of objects, or objects separated by newline.
type Decoder struct {
- buf *bufio.Reader
- dec *json.Decoder
+ r *bufio.Reader
+ dec *json.Decoder
+ buf bytes.Buffer
+
array bool
jsonl bool
+
+ fieldsEnd int64
}
func (d Document) String() string {
@@ -139,12 +145,16 @@ func (d Document) String() string {
if d.Create {
sb.WriteString(", create=true")
}
+ if d.Fields != nil {
+ sb.WriteString(", fields=")
+ sb.WriteString(string(d.Fields))
+ }
return sb.String()
}
func (d *Decoder) guessMode() error {
for !d.array && !d.jsonl {
- b, err := d.buf.ReadByte()
+ b, err := d.r.ReadByte()
if err != nil {
return err
}
@@ -152,36 +162,65 @@ func (d *Decoder) guessMode() error {
if b < 0x80 && asciiSpace[b] != 0 {
continue
}
- switch rune(b) {
- case '{':
+ switch json.Kind(b) {
+ case jsonObjectStart:
d.jsonl = true
- case '[':
+ case jsonArrayStart:
d.array = true
default:
return fmt.Errorf("unexpected token: %q", string(b))
}
- if err := d.buf.UnreadByte(); err != nil {
+ if err := d.r.UnreadByte(); err != nil {
return err
}
- if err := d.readArrayToken(true); err != nil {
+ if err := d.readArrayDelim(true); err != nil {
return err
}
}
return nil
}
-func (d *Decoder) readArrayToken(open bool) error {
+func (d *Decoder) readNext(kind json.Kind) (json.Token, error) {
+ t, err := d.dec.ReadToken()
+ if err != nil {
+ return json.Token{}, err
+ }
+ if t.Kind() != kind {
+ return json.Token{}, fmt.Errorf("unexpected json kind: %q: want %q", t, kind)
+ }
+ return t, nil
+}
+
+func (d *Decoder) readArrayDelim(open bool) error {
if !d.array {
return nil
}
- t, err := d.dec.Token()
+ kind := jsonArrayEnd
+ if open {
+ kind = jsonArrayStart
+ }
+ _, err := d.readNext(kind)
+ return err
+}
+
+func (d *Decoder) readString() (string, error) {
+ t, err := d.readNext(jsonString)
+ if err != nil {
+ return "", err
+ }
+ return t.String(), nil
+}
+
+func (d *Decoder) readBool() (bool, error) {
+ t, err := d.dec.ReadToken()
if err != nil {
- return err
+ return false, err
}
- if (open && t == json.Delim('[')) || (!open && t == json.Delim(']')) {
- return nil
+ kind := t.Kind()
+ if kind != 't' && kind != 'f' {
+ return false, fmt.Errorf("unexpected json kind: %q: want %q or %q", t, 't', 'f')
}
- return fmt.Errorf("invalid array token: %q", t)
+ return t.Bool(), nil
}
func (d *Decoder) Decode() (Document, error) {
@@ -192,60 +231,112 @@ func (d *Decoder) Decode() (Document, error) {
return doc, err
}
+func (d *Decoder) readField(name string, doc *Document) error {
+ readId := false
+ switch name {
+ case "id", "put":
+ readId = true
+ doc.Operation = OperationPut
+ case "update":
+ readId = true
+ doc.Operation = OperationUpdate
+ case "remove":
+ readId = true
+ doc.Operation = OperationRemove
+ case "condition":
+ condition, err := d.readString()
+ if err != nil {
+ return err
+ }
+ doc.Condition = condition
+ case "create":
+ create, err := d.readBool()
+ if err != nil {
+ return err
+ }
+ doc.Create = create
+ case "fields":
+ if _, err := d.readNext(jsonObjectStart); err != nil {
+ return err
+ }
+ start := d.dec.InputOffset() - 1
+ // Skip data between the most recent ending position of fields and current offset
+ d.buf.Next(int(start - d.fieldsEnd))
+ depth := 1
+ for depth > 0 {
+ t, err := d.dec.ReadToken()
+ if err != nil {
+ return err
+ }
+ switch t.Kind() {
+ case jsonObjectStart:
+ depth++
+ case jsonObjectEnd:
+ depth--
+ }
+ }
+ d.fieldsEnd = d.dec.InputOffset()
+ doc.Fields = make([]byte, int(d.fieldsEnd-start))
+ if _, err := d.buf.Read(doc.Fields); err != nil {
+ return err
+ }
+ }
+ if readId {
+ s, err := d.readString()
+ if err != nil {
+ return err
+ }
+ id, err := ParseId(s)
+ if err != nil {
+ return err
+ }
+ doc.Id = id
+ }
+ return nil
+}
+
func (d *Decoder) decode() (Document, error) {
if err := d.guessMode(); err != nil {
return Document{}, err
}
- if !d.dec.More() {
- if err := d.readArrayToken(false); err != nil {
+ if d.dec.PeekKind() == jsonArrayEnd {
+ // Reached end of the array holding document operations
+ if err := d.readArrayDelim(false); err != nil {
return Document{}, err
}
return Document{}, io.EOF
}
- doc := jsonDocument{}
- if err := d.dec.Decode(&doc); err != nil {
+ // Start of document operation
+ if _, err := d.readNext(jsonObjectStart); err != nil {
return Document{}, err
}
- return parseDocument(&doc)
-}
-
-func NewDecoder(r io.Reader) *Decoder {
- buf := bufio.NewReaderSize(r, 1<<26)
- return &Decoder{
- buf: buf,
- dec: json.NewDecoder(buf),
+ var doc Document
+loop:
+ for {
+ switch d.dec.PeekKind() {
+ case jsonString:
+ t, err := d.dec.ReadToken()
+ if err != nil {
+ return Document{}, err
+ }
+ if err := d.readField(t.String(), &doc); err != nil {
+ return Document{}, err
+ }
+ default:
+ if _, err := d.readNext(jsonObjectEnd); err != nil {
+ return Document{}, err
+ }
+ break loop
+ }
}
+ return doc, nil
}
-func parseDocument(d *jsonDocument) (Document, error) {
- id := ""
- var op Operation
- if d.IdString != "" {
- op = OperationPut
- id = d.IdString
- } else if d.PutId != "" {
- op = OperationPut
- id = d.PutId
- } else if d.UpdateId != "" {
- op = OperationUpdate
- id = d.UpdateId
- } else if d.RemoveId != "" {
- op = OperationRemove
- id = d.RemoveId
- } else {
- return Document{}, fmt.Errorf("invalid document: missing operation: %v", d)
- }
- docId, err := ParseId(id)
- if err != nil {
- return Document{}, err
- }
- return Document{
- Id: docId,
- Operation: op,
- Condition: d.Condition,
- Create: d.Create,
- Fields: d.Fields,
- }, nil
+func NewDecoder(r io.Reader) *Decoder {
+ sz := 1 << 26
+ d := &Decoder{r: bufio.NewReaderSize(r, sz)}
+ d.dec = json.NewDecoder(io.TeeReader(d.r, &d.buf))
+ return d
}
func parseError(value string) error {
diff --git a/client/go/internal/vespa/document/document_test.go b/client/go/internal/vespa/document/document_test.go
index 397136173bc..71400314634 100644
--- a/client/go/internal/vespa/document/document_test.go
+++ b/client/go/internal/vespa/document/document_test.go
@@ -113,18 +113,26 @@ func feedInput(jsonl bool) string {
`
{
"put": "id:ns:type::doc1",
- "fields": {"foo": "123"}
+ "fields": { "foo" : "123", "bar": {"a": [1, 2, 3]}}
}`,
`
{
"put": "id:ns:type::doc2",
+ "create": false,
+ "condition": "foo",
"fields": {"bar": "456"}
}`,
`
{
- "remove": "id:ns:type::doc1"
+ "remove": "id:ns:type::doc3"
}
-`}
+`,
+ `
+{
+ "put": "id:ns:type::doc4",
+ "create": true,
+ "fields": {"qux": "789"}
+}`}
if jsonl {
return strings.Join(operations, "\n")
}
@@ -135,9 +143,10 @@ func testDocumentDecoder(t *testing.T, jsonLike string) {
t.Helper()
r := NewDecoder(strings.NewReader(jsonLike))
want := []Document{
- {Id: mustParseId("id:ns:type::doc1"), Operation: OperationPut, Fields: []byte(`{"foo": "123"}`)},
- {Id: mustParseId("id:ns:type::doc2"), Operation: OperationPut, Fields: []byte(`{"bar": "456"}`)},
- {Id: mustParseId("id:ns:type::doc1"), Operation: OperationRemove},
+ {Id: mustParseId("id:ns:type::doc1"), Operation: OperationPut, Fields: []byte(`{ "foo" : "123", "bar": {"a": [1, 2, 3]}}`)},
+ {Id: mustParseId("id:ns:type::doc2"), Operation: OperationPut, Condition: "foo", Fields: []byte(`{"bar": "456"}`)},
+ {Id: mustParseId("id:ns:type::doc3"), Operation: OperationRemove},
+ {Id: mustParseId("id:ns:type::doc4"), Operation: OperationPut, Create: true, Fields: []byte(`{"qux": "789"}`)},
}
got := []Document{}
for {
@@ -155,10 +164,11 @@ func testDocumentDecoder(t *testing.T, jsonLike string) {
}
}
-func TestDocumentDecoder(t *testing.T) {
- testDocumentDecoder(t, feedInput(false))
- testDocumentDecoder(t, feedInput(true))
+func TestDocumentDecoderArray(t *testing.T) { testDocumentDecoder(t, feedInput(false)) }
+
+func TestDocumentDecoderJSONL(t *testing.T) { testDocumentDecoder(t, feedInput(true)) }
+func TestDocumentDecoderInvalid(t *testing.T) {
jsonLike := `
{
"put": "id:ns:type::doc1",
@@ -175,7 +185,7 @@ func TestDocumentDecoder(t *testing.T) {
t.Errorf("unexpected error: %s", err)
}
_, err = r.Decode()
- wantErr := "invalid json at byte offset 122: json: string of object unexpected end of JSON input"
+ wantErr := "invalid json at byte offset 109: json: invalid character '\\n' within string (expecting non-control character)"
if err.Error() != wantErr {
t.Errorf("want error %q, got %q", wantErr, err.Error())
}
diff --git a/client/go/internal/vespa/document/http.go b/client/go/internal/vespa/document/http.go
index a389a82cee1..8f7ac5bfe63 100644
--- a/client/go/internal/vespa/document/http.go
+++ b/client/go/internal/vespa/document/http.go
@@ -14,8 +14,7 @@ import (
"sync/atomic"
"time"
- "github.com/goccy/go-json"
-
+ "github.com/go-json-experiment/json"
"github.com/klauspost/compress/gzip"
"github.com/vespa-engine/vespa/client/go/internal/build"
@@ -328,8 +327,8 @@ func resultWithResponse(resp *http.Response, sentBytes int, result Result, elaps
result.Status = StatusTransportFailure
}
var body struct {
- Message string `json:"message"`
- Trace json.RawMessage `json:"trace"`
+ Message string `json:"message"`
+ Trace json.RawValue `json:"trace"`
}
buf.Reset()
written, err := io.Copy(buf, resp.Body)