diff options
-rw-r--r-- | client/go/go.mod | 3 | ||||
-rw-r--r-- | client/go/go.sum | 4 | ||||
-rw-r--r-- | client/go/internal/vespa/document/document.go | 223 | ||||
-rw-r--r-- | client/go/internal/vespa/document/document_test.go | 30 | ||||
-rw-r--r-- | client/go/internal/vespa/document/http.go | 7 |
5 files changed, 184 insertions, 83 deletions
diff --git a/client/go/go.mod b/client/go/go.mod index c70ee5b75c8..f5f923cc063 100644 --- a/client/go/go.mod +++ b/client/go/go.mod @@ -6,7 +6,8 @@ require ( github.com/alessio/shellescape v1.4.1 github.com/briandowns/spinner v1.23.0 github.com/fatih/color v1.15.0 - github.com/goccy/go-json v0.10.2 + // This is the most recent version compatible with Go 1.18. Upgrade when we upgrade our Go version + github.com/go-json-experiment/json v0.0.0-20220727223814-4987ed27d447 github.com/klauspost/compress v1.16.5 github.com/mattn/go-colorable v0.1.13 github.com/mattn/go-isatty v0.0.18 diff --git a/client/go/go.sum b/client/go/go.sum index 9b79c215864..861c8725ed0 100644 --- a/client/go/go.sum +++ b/client/go/go.sum @@ -11,8 +11,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/go-json-experiment/json v0.0.0-20220727223814-4987ed27d447 h1:hDdASyrtiSuQvaafDrVTX34wy4ibhxrJO9/vyFbBt0k= +github.com/go-json-experiment/json v0.0.0-20220727223814-4987ed27d447/go.mod h1:jbpkervfdK2HCcB2YEFmwYeaq057KFiaaKTNTHV4OOQ= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= diff --git a/client/go/internal/vespa/document/document.go b/client/go/internal/vespa/document/document.go index ce8b22b24f0..a33c4a3c5af 100644 --- a/client/go/internal/vespa/document/document.go +++ b/client/go/internal/vespa/document/document.go @@ -11,7 +11,13 @@ import ( "time" - "github.com/goccy/go-json" + // Why do we use an experimental parser? This appears to be the only JSON library that satisfies the following + // requirements: + // - Faster than the std parser + // - Supports parsing from a io.Reader + // - Supports parsing token-by-token + // - Few allocations during parsing (especially for large objects) + "github.com/go-json-experiment/json" ) var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} @@ -22,6 +28,12 @@ const ( OperationPut Operation = iota OperationUpdate OperationRemove + + jsonArrayStart json.Kind = '[' + jsonArrayEnd json.Kind = ']' + jsonObjectStart json.Kind = '{' + jsonObjectEnd json.Kind = '}' + jsonString json.Kind = '"' ) // Id represents a Vespa document ID. @@ -103,22 +115,16 @@ type Document struct { Create bool } -type jsonDocument struct { - IdString string `json:"id"` - PutId string `json:"put"` - UpdateId string `json:"update"` - RemoveId string `json:"remove"` - Condition string `json:"condition"` - Fields json.RawMessage `json:"fields"` - Create bool `json:"create"` -} - // Decoder decodes documents from a JSON structure which is either an array of objects, or objects separated by newline. type Decoder struct { - buf *bufio.Reader - dec *json.Decoder + r *bufio.Reader + dec *json.Decoder + buf bytes.Buffer + array bool jsonl bool + + fieldsEnd int64 } func (d Document) String() string { @@ -139,12 +145,16 @@ func (d Document) String() string { if d.Create { sb.WriteString(", create=true") } + if d.Fields != nil { + sb.WriteString(", fields=") + sb.WriteString(string(d.Fields)) + } return sb.String() } func (d *Decoder) guessMode() error { for !d.array && !d.jsonl { - b, err := d.buf.ReadByte() + b, err := d.r.ReadByte() if err != nil { return err } @@ -152,36 +162,65 @@ func (d *Decoder) guessMode() error { if b < 0x80 && asciiSpace[b] != 0 { continue } - switch rune(b) { - case '{': + switch json.Kind(b) { + case jsonObjectStart: d.jsonl = true - case '[': + case jsonArrayStart: d.array = true default: return fmt.Errorf("unexpected token: %q", string(b)) } - if err := d.buf.UnreadByte(); err != nil { + if err := d.r.UnreadByte(); err != nil { return err } - if err := d.readArrayToken(true); err != nil { + if err := d.readArrayDelim(true); err != nil { return err } } return nil } -func (d *Decoder) readArrayToken(open bool) error { +func (d *Decoder) readNext(kind json.Kind) (json.Token, error) { + t, err := d.dec.ReadToken() + if err != nil { + return json.Token{}, err + } + if t.Kind() != kind { + return json.Token{}, fmt.Errorf("unexpected json kind: %q: want %q", t, kind) + } + return t, nil +} + +func (d *Decoder) readArrayDelim(open bool) error { if !d.array { return nil } - t, err := d.dec.Token() + kind := jsonArrayEnd + if open { + kind = jsonArrayStart + } + _, err := d.readNext(kind) + return err +} + +func (d *Decoder) readString() (string, error) { + t, err := d.readNext(jsonString) + if err != nil { + return "", err + } + return t.String(), nil +} + +func (d *Decoder) readBool() (bool, error) { + t, err := d.dec.ReadToken() if err != nil { - return err + return false, err } - if (open && t == json.Delim('[')) || (!open && t == json.Delim(']')) { - return nil + kind := t.Kind() + if kind != 't' && kind != 'f' { + return false, fmt.Errorf("unexpected json kind: %q: want %q or %q", t, 't', 'f') } - return fmt.Errorf("invalid array token: %q", t) + return t.Bool(), nil } func (d *Decoder) Decode() (Document, error) { @@ -192,60 +231,112 @@ func (d *Decoder) Decode() (Document, error) { return doc, err } +func (d *Decoder) readField(name string, doc *Document) error { + readId := false + switch name { + case "id", "put": + readId = true + doc.Operation = OperationPut + case "update": + readId = true + doc.Operation = OperationUpdate + case "remove": + readId = true + doc.Operation = OperationRemove + case "condition": + condition, err := d.readString() + if err != nil { + return err + } + doc.Condition = condition + case "create": + create, err := d.readBool() + if err != nil { + return err + } + doc.Create = create + case "fields": + if _, err := d.readNext(jsonObjectStart); err != nil { + return err + } + start := d.dec.InputOffset() - 1 + // Skip data between the most recent ending position of fields and current offset + d.buf.Next(int(start - d.fieldsEnd)) + depth := 1 + for depth > 0 { + t, err := d.dec.ReadToken() + if err != nil { + return err + } + switch t.Kind() { + case jsonObjectStart: + depth++ + case jsonObjectEnd: + depth-- + } + } + d.fieldsEnd = d.dec.InputOffset() + doc.Fields = make([]byte, int(d.fieldsEnd-start)) + if _, err := d.buf.Read(doc.Fields); err != nil { + return err + } + } + if readId { + s, err := d.readString() + if err != nil { + return err + } + id, err := ParseId(s) + if err != nil { + return err + } + doc.Id = id + } + return nil +} + func (d *Decoder) decode() (Document, error) { if err := d.guessMode(); err != nil { return Document{}, err } - if !d.dec.More() { - if err := d.readArrayToken(false); err != nil { + if d.dec.PeekKind() == jsonArrayEnd { + // Reached end of the array holding document operations + if err := d.readArrayDelim(false); err != nil { return Document{}, err } return Document{}, io.EOF } - doc := jsonDocument{} - if err := d.dec.Decode(&doc); err != nil { + // Start of document operation + if _, err := d.readNext(jsonObjectStart); err != nil { return Document{}, err } - return parseDocument(&doc) -} - -func NewDecoder(r io.Reader) *Decoder { - buf := bufio.NewReaderSize(r, 1<<26) - return &Decoder{ - buf: buf, - dec: json.NewDecoder(buf), + var doc Document +loop: + for { + switch d.dec.PeekKind() { + case jsonString: + t, err := d.dec.ReadToken() + if err != nil { + return Document{}, err + } + if err := d.readField(t.String(), &doc); err != nil { + return Document{}, err + } + default: + if _, err := d.readNext(jsonObjectEnd); err != nil { + return Document{}, err + } + break loop + } } + return doc, nil } -func parseDocument(d *jsonDocument) (Document, error) { - id := "" - var op Operation - if d.IdString != "" { - op = OperationPut - id = d.IdString - } else if d.PutId != "" { - op = OperationPut - id = d.PutId - } else if d.UpdateId != "" { - op = OperationUpdate - id = d.UpdateId - } else if d.RemoveId != "" { - op = OperationRemove - id = d.RemoveId - } else { - return Document{}, fmt.Errorf("invalid document: missing operation: %v", d) - } - docId, err := ParseId(id) - if err != nil { - return Document{}, err - } - return Document{ - Id: docId, - Operation: op, - Condition: d.Condition, - Create: d.Create, - Fields: d.Fields, - }, nil +func NewDecoder(r io.Reader) *Decoder { + sz := 1 << 26 + d := &Decoder{r: bufio.NewReaderSize(r, sz)} + d.dec = json.NewDecoder(io.TeeReader(d.r, &d.buf)) + return d } func parseError(value string) error { diff --git a/client/go/internal/vespa/document/document_test.go b/client/go/internal/vespa/document/document_test.go index 397136173bc..71400314634 100644 --- a/client/go/internal/vespa/document/document_test.go +++ b/client/go/internal/vespa/document/document_test.go @@ -113,18 +113,26 @@ func feedInput(jsonl bool) string { ` { "put": "id:ns:type::doc1", - "fields": {"foo": "123"} + "fields": { "foo" : "123", "bar": {"a": [1, 2, 3]}} }`, ` { "put": "id:ns:type::doc2", + "create": false, + "condition": "foo", "fields": {"bar": "456"} }`, ` { - "remove": "id:ns:type::doc1" + "remove": "id:ns:type::doc3" } -`} +`, + ` +{ + "put": "id:ns:type::doc4", + "create": true, + "fields": {"qux": "789"} +}`} if jsonl { return strings.Join(operations, "\n") } @@ -135,9 +143,10 @@ func testDocumentDecoder(t *testing.T, jsonLike string) { t.Helper() r := NewDecoder(strings.NewReader(jsonLike)) want := []Document{ - {Id: mustParseId("id:ns:type::doc1"), Operation: OperationPut, Fields: []byte(`{"foo": "123"}`)}, - {Id: mustParseId("id:ns:type::doc2"), Operation: OperationPut, Fields: []byte(`{"bar": "456"}`)}, - {Id: mustParseId("id:ns:type::doc1"), Operation: OperationRemove}, + {Id: mustParseId("id:ns:type::doc1"), Operation: OperationPut, Fields: []byte(`{ "foo" : "123", "bar": {"a": [1, 2, 3]}}`)}, + {Id: mustParseId("id:ns:type::doc2"), Operation: OperationPut, Condition: "foo", Fields: []byte(`{"bar": "456"}`)}, + {Id: mustParseId("id:ns:type::doc3"), Operation: OperationRemove}, + {Id: mustParseId("id:ns:type::doc4"), Operation: OperationPut, Create: true, Fields: []byte(`{"qux": "789"}`)}, } got := []Document{} for { @@ -155,10 +164,11 @@ func testDocumentDecoder(t *testing.T, jsonLike string) { } } -func TestDocumentDecoder(t *testing.T) { - testDocumentDecoder(t, feedInput(false)) - testDocumentDecoder(t, feedInput(true)) +func TestDocumentDecoderArray(t *testing.T) { testDocumentDecoder(t, feedInput(false)) } + +func TestDocumentDecoderJSONL(t *testing.T) { testDocumentDecoder(t, feedInput(true)) } +func TestDocumentDecoderInvalid(t *testing.T) { jsonLike := ` { "put": "id:ns:type::doc1", @@ -175,7 +185,7 @@ func TestDocumentDecoder(t *testing.T) { t.Errorf("unexpected error: %s", err) } _, err = r.Decode() - wantErr := "invalid json at byte offset 122: json: string of object unexpected end of JSON input" + wantErr := "invalid json at byte offset 109: json: invalid character '\\n' within string (expecting non-control character)" if err.Error() != wantErr { t.Errorf("want error %q, got %q", wantErr, err.Error()) } diff --git a/client/go/internal/vespa/document/http.go b/client/go/internal/vespa/document/http.go index a389a82cee1..8f7ac5bfe63 100644 --- a/client/go/internal/vespa/document/http.go +++ b/client/go/internal/vespa/document/http.go @@ -14,8 +14,7 @@ import ( "sync/atomic" "time" - "github.com/goccy/go-json" - + "github.com/go-json-experiment/json" "github.com/klauspost/compress/gzip" "github.com/vespa-engine/vespa/client/go/internal/build" @@ -328,8 +327,8 @@ func resultWithResponse(resp *http.Response, sentBytes int, result Result, elaps result.Status = StatusTransportFailure } var body struct { - Message string `json:"message"` - Trace json.RawMessage `json:"trace"` + Message string `json:"message"` + Trace json.RawValue `json:"trace"` } buf.Reset() written, err := io.Copy(buf, resp.Body) |