From 238c3dc8f956d03e488e7b8b1278c46630b1767b Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 16 Feb 2018 11:00:51 +0000 Subject: Add --json option to vespa-gen-testdocs. --- .../apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp | 287 +++++++++++++++------ 1 file changed, 203 insertions(+), 84 deletions(-) (limited to 'searchcore') diff --git a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp index 0e1bad796f5..f40502e14d1 100644 --- a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp +++ b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp @@ -40,6 +40,22 @@ prependBaseDir(const string &baseDir, return baseDir + "/" + file; } +std::vector +splitArg(const string &arg) +{ + std::vector argv; + string::size_type pos = 0; + for (;;) { + auto found = arg.find(',', pos); + if (found == string::npos) { + break; + } + argv.emplace_back(arg.substr(pos, found - pos)); + pos = found + 1; + } + argv.emplace_back(arg.substr(pos, string::npos)); + return argv; +} void shafile(const string &baseDir, @@ -148,15 +164,12 @@ protected: public: FieldGenerator(const string &name); - - virtual - ~FieldGenerator(); - - virtual void - setup(); - - virtual void - generate(vespalib::asciistream &doc, uint32_t id) = 0; + virtual ~FieldGenerator(); + virtual void setup(); + virtual void generateXML(vespalib::asciistream &doc, uint32_t id); + virtual void generateJSON(vespalib::asciistream &doc, uint32_t id); + virtual void generateValue(vespalib::asciistream &doc, uint32_t id); + virtual bool isString() const { return true; } }; @@ -170,12 +183,103 @@ FieldGenerator::~FieldGenerator() { } - void FieldGenerator::setup() { } +void +FieldGenerator::generateXML(vespalib::asciistream &doc, uint32_t id) +{ + doc << " <" << _name << ">"; + generateValue(doc, id); + doc << "\n"; +} + +void +FieldGenerator::generateJSON(vespalib::asciistream &doc, uint32_t id) +{ + doc << "\"" << _name << "\": "; + bool needQuote = isString(); + if (needQuote) { + doc << "\""; + } + generateValue(doc, id); + if (needQuote) { + doc << "\""; + } +} + +void +FieldGenerator::generateValue(vespalib::asciistream &, uint32_t) +{ +} + +class ConstTextFieldGenerator : public FieldGenerator +{ + string _value; +public: + ConstTextFieldGenerator(std::vector argv); + virtual ~ConstTextFieldGenerator() override; + virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override; +}; + +ConstTextFieldGenerator::ConstTextFieldGenerator(std::vector argv) + : FieldGenerator(argv[0]), + _value() +{ + if (argv.size() > 1) { + _value = argv[1]; + } +} + +ConstTextFieldGenerator::~ConstTextFieldGenerator() = default; + +void +ConstTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t) +{ + doc << _value; +} + +class PrefixTextFieldGenerator : public FieldGenerator +{ + string _prefix; + uint32_t _mod; + uint32_t _div; +public: + PrefixTextFieldGenerator(std::vector argv); + virtual ~PrefixTextFieldGenerator() override; + virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override; +}; + +PrefixTextFieldGenerator::PrefixTextFieldGenerator(std::vector argv) + : FieldGenerator(argv[0]), + _prefix(), + _mod(std::numeric_limits::max()), + _div(1u) +{ + if (argv.size() > 1) { + _prefix = argv[1]; + if (argv.size() > 2) { + if (!argv[2].empty()) { + _mod = atol(argv[2].c_str()); + } + if (argv.size() > 3) { + if (!argv[3].empty()) { + _div = atol(argv[3].c_str()); + } + } + } + } +} + +PrefixTextFieldGenerator::~PrefixTextFieldGenerator() = default; + +void +PrefixTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t id) +{ + doc << _prefix << ((id / _div) % _mod); +} class RandTextFieldGenerator : public FieldGenerator { @@ -191,15 +295,9 @@ public: uint32_t numWords, uint32_t minFill, uint32_t maxFill); - - virtual - ~RandTextFieldGenerator(); - - virtual void - setup() override; - - virtual void - generate(vespalib::asciistream &doc, uint32_t id) override; + virtual ~RandTextFieldGenerator(); + virtual void setup() override; + virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override; }; @@ -235,10 +333,8 @@ RandTextFieldGenerator::setup() void -RandTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id) +RandTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t) { - (void) id; - doc << " <" << _name << ">"; uint32_t gLen = _minFill + _rnd.lrand48() % (_randFill + 1); bool first = true; for (uint32_t i = 0; i < gLen; ++i) { @@ -250,10 +346,8 @@ RandTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id) assert(s.size() > 0); doc << s; } - doc << "\n"; } - class ModTextFieldGenerator : public FieldGenerator { search::Rand48 &_rnd; @@ -263,12 +357,8 @@ public: ModTextFieldGenerator(const string &name, search::Rand48 &rnd, const std::vector &mods); - - virtual - ~ModTextFieldGenerator(); - - virtual void - generate(vespalib::asciistream &doc, uint32_t id) override; + virtual ~ModTextFieldGenerator(); + virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override; }; @@ -288,10 +378,9 @@ ModTextFieldGenerator::~ModTextFieldGenerator() void -ModTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id) +ModTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t id) { typedef std::vector::const_iterator MI; - doc << " <" << _name << ">"; bool first = true; for (MI mi(_mods.begin()), me(_mods.end()); mi != me; ++mi) { uint32_t m = *mi; @@ -300,7 +389,6 @@ ModTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id) first = false; doc << "w" << m << "w" << (id % m); } - doc << "\n"; } @@ -308,12 +396,8 @@ class IdTextFieldGenerator : public FieldGenerator { public: IdTextFieldGenerator(const string &name); - - virtual - ~IdTextFieldGenerator(); - - virtual void - generate(vespalib::asciistream &doc, uint32_t id) override; + virtual ~IdTextFieldGenerator(); + virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override; }; @@ -329,11 +413,9 @@ IdTextFieldGenerator::~IdTextFieldGenerator() void -IdTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id) +IdTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t id) { - doc << " <" << _name << ">"; doc << id; - doc << "\n"; } @@ -348,12 +430,9 @@ public: search::Rand48 &rnd, uint32_t low, uint32_t count); - - virtual - ~RandIntFieldGenerator(); - - virtual void - generate(vespalib::asciistream &doc, uint32_t id) override; + virtual ~RandIntFieldGenerator(); + virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override; + virtual bool isString() const override { return false; } }; @@ -376,13 +455,10 @@ RandIntFieldGenerator::~RandIntFieldGenerator() void -RandIntFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id) +RandIntFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t) { - (void) id; - doc << " <" << _name << ">"; uint32_t r = _low + _rnd.lrand48() % _count; doc << r; - doc << "\n"; } class DocumentGenerator @@ -399,17 +475,13 @@ public: DocumentGenerator(const string &docType, const string &idPrefix, const FieldVec &fields); - ~DocumentGenerator(); - - void - generate(uint32_t id); - - void - generate(uint32_t docMin, uint32_t docCount, - const string &baseDir, - const string &feedFileName, - bool headers); + void generateXML(uint32_t id); + void generateJSON(uint32_t id); + void generate(uint32_t docMin, uint32_t docIdLimit, + const string &baseDir, + const string &feedFileName, + bool headers, bool json); }; @@ -440,40 +512,68 @@ DocumentGenerator::setup() void -DocumentGenerator::generate(uint32_t id) +DocumentGenerator::generateXML(uint32_t id) { _doc.clear(); - _doc << "\n"; - typedef FieldVec::const_iterator FI; - for (FI i(_fields.begin()), ie(_fields.end()); i != ie; ++i) { - (*i)->generate(_doc, id); + for (const auto &field : _fields) { + field->generateXML(_doc, id); } _doc << "\n"; } +void +DocumentGenerator::generateJSON(uint32_t id) +{ + _doc.clear(); + _doc << " { \"put\": \"" << _idPrefix << id << "\",\n \"fields\": {"; + bool first = true; + for (const auto &field : _fields) { + if (!first) { + _doc << ","; + } + first = false; + _doc << "\n "; + field->generateJSON(_doc, id); + } + _doc << "\n }\n }"; +} void -DocumentGenerator::generate(uint32_t docMin, uint32_t docCount, +DocumentGenerator::generate(uint32_t docMin, uint32_t docIdLimit, const string &baseDir, const string &feedFileName, - bool headers) + bool headers, bool json) { string fullName(prependBaseDir(baseDir, feedFileName)); FastOS_File::Delete(fullName.c_str()); Fast_BufferedFile f(new FastOS_File); f.WriteOpen(fullName.c_str()); - if (headers) { - f.WriteString("\n"); - f.WriteString("\n"); - } - uint32_t docLim = docMin + docCount; - for (uint32_t id = docMin; id < docLim; ++id) { - generate(id); - f.WriteBuf(_doc.c_str(), _doc.size()); - } - if (headers) { - f.WriteString("\n"); + if (json) { + bool first = true; + f.WriteString("[\n"); + for (uint32_t id = docMin; id < docIdLimit; ++id) { + if (!first) { + f.WriteString(",\n"); + } + first = false; + generateJSON(id); + f.WriteBuf(_doc.c_str(), _doc.size()); + } + f.WriteString("\n]\n"); + } else { + if (headers) { + f.WriteString("\n"); + f.WriteString("\n"); + } + for (uint32_t id = docMin; id < docIdLimit; ++id) { + generateXML(id); + f.WriteBuf(_doc.c_str(), _doc.size()); + } + if (headers) { + f.WriteString("\n"); + } } f.Flush(); f.Close(); @@ -522,6 +622,7 @@ class GenTestDocsApp : public SubApp search::Rand48 _rnd; string _outFile; bool _headers; + bool _json; public: GenTestDocsApp(FastOS_Application &app) @@ -537,7 +638,8 @@ public: _mods(), _rnd(), _outFile(), - _headers(false) + _headers(false), + _json(false) { _mods.push_back(2); _mods.push_back(3); @@ -572,6 +674,8 @@ GenTestDocsApp::usage(bool showHeader) cerr << "vespa-gen-testdocs gentestdocs\n" " [--basedir basedir]\n" + " [--consttextfield name]\n" + " [--prefixtextfield name]\n" " [--randtextfield name]\n" " [--modtextfield name]\n" " [--idtextfield name]\n" @@ -581,6 +685,7 @@ GenTestDocsApp::usage(bool showHeader) " [--numwords numWords]\n" " [--doctype docType]\n" " [--headers]\n" + " [--json]\n" " outFile\n"; } @@ -592,6 +697,8 @@ GenTestDocsApp::getOptions() int longopt_index = 0; static struct option longopts[] = { { "basedir", 1, NULL, 0 }, + { "consttextfield", 1, NULL, 0 }, + { "prefixtextfield", 1, NULL, 0 }, { "randtextfield", 1, NULL, 0 }, { "modtextfield", 1, NULL, 0 }, { "idtextfield", 1, NULL, 0 }, @@ -601,10 +708,13 @@ GenTestDocsApp::getOptions() { "numwords", 1, NULL, 0 }, { "doctype", 1, NULL, 0 }, { "headers", 0, NULL, 0 }, + { "json", 0, NULL, 0 }, { NULL, 0, NULL, 0 } }; enum longopts_enum { LONGOPT_BASEDIR, + LONGOPT_CONSTTEXTFIELD, + LONGOPT_PREFIXTEXTFIELD, LONGOPT_RANDTEXTFIELD, LONGOPT_MODTEXTFIELD, LONGOPT_IDTEXTFIELD, @@ -613,7 +723,8 @@ GenTestDocsApp::getOptions() LONGOPT_MINDOCID, LONGOPT_NUMWORDS, LONGOPT_DOCTYPE, - LONGOPT_HEADERS + LONGOPT_HEADERS, + LONGOPT_JSON }; int optIndex = 2; while ((c = _app.GetOptLong("v", @@ -628,6 +739,12 @@ GenTestDocsApp::getOptions() case LONGOPT_BASEDIR: _baseDir = optArgument; break; + case LONGOPT_CONSTTEXTFIELD: + _fields.emplace_back(std::make_shared(splitArg(optArgument))); + break; + case LONGOPT_PREFIXTEXTFIELD: + _fields.emplace_back(std::make_shared(splitArg(optArgument))); + break; case LONGOPT_RANDTEXTFIELD: g.reset(new RandTextFieldGenerator(optArgument, _rnd, @@ -653,7 +770,6 @@ GenTestDocsApp::getOptions() 100000)); _fields.push_back(g); break; - break; case LONGOPT_DOCIDLIMIT: _docIdLimit = atoi(optArgument); break; @@ -669,6 +785,9 @@ GenTestDocsApp::getOptions() case LONGOPT_HEADERS: _headers = true; break; + case LONGOPT_JSON: + _json = true; + break; default: if (optArgument != NULL) { LOG(error, @@ -708,7 +827,7 @@ GenTestDocsApp::run() idPrefix, _fields); LOG(info, "generating %s", _outFile.c_str()); - dg.generate(_minDocId, _docIdLimit, _baseDir, _outFile, _headers); + dg.generate(_minDocId, _docIdLimit, _baseDir, _outFile, _headers, _json); LOG(info, "done"); return 0; } -- cgit v1.2.3