summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@oath.com>2018-02-16 11:00:51 +0000
committerTor Egge <Tor.Egge@oath.com>2018-02-16 11:00:51 +0000
commit238c3dc8f956d03e488e7b8b1278c46630b1767b (patch)
tree17dee23beb11a05cb52a5af87e3fba95e4439bb1 /searchcore
parent99d36b58fd993d3eb0c7e98d3fad9ab430593174 (diff)
Add --json option to vespa-gen-testdocs.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp287
1 files changed, 203 insertions, 84 deletions
diff --git a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp
index 0e1bad796f5..f40502e14d1 100644
--- a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp
+++ b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp
@@ -40,6 +40,22 @@ prependBaseDir(const string &baseDir,
return baseDir + "/" + file;
}
+std::vector<string>
+splitArg(const string &arg)
+{
+ std::vector<string> argv;
+ string::size_type pos = 0;
+ for (;;) {
+ auto found = arg.find(',', pos);
+ if (found == string::npos) {
+ break;
+ }
+ argv.emplace_back(arg.substr(pos, found - pos));
+ pos = found + 1;
+ }
+ argv.emplace_back(arg.substr(pos, string::npos));
+ return argv;
+}
void
shafile(const string &baseDir,
@@ -148,15 +164,12 @@ protected:
public:
FieldGenerator(const string &name);
-
- virtual
- ~FieldGenerator();
-
- virtual void
- setup();
-
- virtual void
- generate(vespalib::asciistream &doc, uint32_t id) = 0;
+ virtual ~FieldGenerator();
+ virtual void setup();
+ virtual void generateXML(vespalib::asciistream &doc, uint32_t id);
+ virtual void generateJSON(vespalib::asciistream &doc, uint32_t id);
+ virtual void generateValue(vespalib::asciistream &doc, uint32_t id);
+ virtual bool isString() const { return true; }
};
@@ -170,12 +183,103 @@ FieldGenerator::~FieldGenerator()
{
}
-
void
FieldGenerator::setup()
{
}
+void
+FieldGenerator::generateXML(vespalib::asciistream &doc, uint32_t id)
+{
+ doc << " <" << _name << ">";
+ generateValue(doc, id);
+ doc << "</" << _name << ">\n";
+}
+
+void
+FieldGenerator::generateJSON(vespalib::asciistream &doc, uint32_t id)
+{
+ doc << "\"" << _name << "\": ";
+ bool needQuote = isString();
+ if (needQuote) {
+ doc << "\"";
+ }
+ generateValue(doc, id);
+ if (needQuote) {
+ doc << "\"";
+ }
+}
+
+void
+FieldGenerator::generateValue(vespalib::asciistream &, uint32_t)
+{
+}
+
+class ConstTextFieldGenerator : public FieldGenerator
+{
+ string _value;
+public:
+ ConstTextFieldGenerator(std::vector<string> argv);
+ virtual ~ConstTextFieldGenerator() override;
+ virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override;
+};
+
+ConstTextFieldGenerator::ConstTextFieldGenerator(std::vector<string> argv)
+ : FieldGenerator(argv[0]),
+ _value()
+{
+ if (argv.size() > 1) {
+ _value = argv[1];
+ }
+}
+
+ConstTextFieldGenerator::~ConstTextFieldGenerator() = default;
+
+void
+ConstTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t)
+{
+ doc << _value;
+}
+
+class PrefixTextFieldGenerator : public FieldGenerator
+{
+ string _prefix;
+ uint32_t _mod;
+ uint32_t _div;
+public:
+ PrefixTextFieldGenerator(std::vector<string> argv);
+ virtual ~PrefixTextFieldGenerator() override;
+ virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override;
+};
+
+PrefixTextFieldGenerator::PrefixTextFieldGenerator(std::vector<string> argv)
+ : FieldGenerator(argv[0]),
+ _prefix(),
+ _mod(std::numeric_limits<uint32_t>::max()),
+ _div(1u)
+{
+ if (argv.size() > 1) {
+ _prefix = argv[1];
+ if (argv.size() > 2) {
+ if (!argv[2].empty()) {
+ _mod = atol(argv[2].c_str());
+ }
+ if (argv.size() > 3) {
+ if (!argv[3].empty()) {
+ _div = atol(argv[3].c_str());
+ }
+ }
+ }
+ }
+}
+
+PrefixTextFieldGenerator::~PrefixTextFieldGenerator() = default;
+
+void
+PrefixTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t id)
+{
+ doc << _prefix << ((id / _div) % _mod);
+}
class RandTextFieldGenerator : public FieldGenerator
{
@@ -191,15 +295,9 @@ public:
uint32_t numWords,
uint32_t minFill,
uint32_t maxFill);
-
- virtual
- ~RandTextFieldGenerator();
-
- virtual void
- setup() override;
-
- virtual void
- generate(vespalib::asciistream &doc, uint32_t id) override;
+ virtual ~RandTextFieldGenerator();
+ virtual void setup() override;
+ virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override;
};
@@ -235,10 +333,8 @@ RandTextFieldGenerator::setup()
void
-RandTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id)
+RandTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t)
{
- (void) id;
- doc << " <" << _name << ">";
uint32_t gLen = _minFill + _rnd.lrand48() % (_randFill + 1);
bool first = true;
for (uint32_t i = 0; i < gLen; ++i) {
@@ -250,10 +346,8 @@ RandTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id)
assert(s.size() > 0);
doc << s;
}
- doc << "</" << _name << ">\n";
}
-
class ModTextFieldGenerator : public FieldGenerator
{
search::Rand48 &_rnd;
@@ -263,12 +357,8 @@ public:
ModTextFieldGenerator(const string &name,
search::Rand48 &rnd,
const std::vector<uint32_t> &mods);
-
- virtual
- ~ModTextFieldGenerator();
-
- virtual void
- generate(vespalib::asciistream &doc, uint32_t id) override;
+ virtual ~ModTextFieldGenerator();
+ virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override;
};
@@ -288,10 +378,9 @@ ModTextFieldGenerator::~ModTextFieldGenerator()
void
-ModTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id)
+ModTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t id)
{
typedef std::vector<uint32_t>::const_iterator MI;
- doc << " <" << _name << ">";
bool first = true;
for (MI mi(_mods.begin()), me(_mods.end()); mi != me; ++mi) {
uint32_t m = *mi;
@@ -300,7 +389,6 @@ ModTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id)
first = false;
doc << "w" << m << "w" << (id % m);
}
- doc << "</" << _name << ">\n";
}
@@ -308,12 +396,8 @@ class IdTextFieldGenerator : public FieldGenerator
{
public:
IdTextFieldGenerator(const string &name);
-
- virtual
- ~IdTextFieldGenerator();
-
- virtual void
- generate(vespalib::asciistream &doc, uint32_t id) override;
+ virtual ~IdTextFieldGenerator();
+ virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override;
};
@@ -329,11 +413,9 @@ IdTextFieldGenerator::~IdTextFieldGenerator()
void
-IdTextFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id)
+IdTextFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t id)
{
- doc << " <" << _name << ">";
doc << id;
- doc << "</" << _name << ">\n";
}
@@ -348,12 +430,9 @@ public:
search::Rand48 &rnd,
uint32_t low,
uint32_t count);
-
- virtual
- ~RandIntFieldGenerator();
-
- virtual void
- generate(vespalib::asciistream &doc, uint32_t id) override;
+ virtual ~RandIntFieldGenerator();
+ virtual void generateValue(vespalib::asciistream &doc, uint32_t id) override;
+ virtual bool isString() const override { return false; }
};
@@ -376,13 +455,10 @@ RandIntFieldGenerator::~RandIntFieldGenerator()
void
-RandIntFieldGenerator::generate(vespalib::asciistream &doc, uint32_t id)
+RandIntFieldGenerator::generateValue(vespalib::asciistream &doc, uint32_t)
{
- (void) id;
- doc << " <" << _name << ">";
uint32_t r = _low + _rnd.lrand48() % _count;
doc << r;
- doc << "</" << _name << ">\n";
}
class DocumentGenerator
@@ -399,17 +475,13 @@ public:
DocumentGenerator(const string &docType,
const string &idPrefix,
const FieldVec &fields);
-
~DocumentGenerator();
-
- void
- generate(uint32_t id);
-
- void
- generate(uint32_t docMin, uint32_t docCount,
- const string &baseDir,
- const string &feedFileName,
- bool headers);
+ void generateXML(uint32_t id);
+ void generateJSON(uint32_t id);
+ void generate(uint32_t docMin, uint32_t docIdLimit,
+ const string &baseDir,
+ const string &feedFileName,
+ bool headers, bool json);
};
@@ -440,40 +512,68 @@ DocumentGenerator::setup()
void
-DocumentGenerator::generate(uint32_t id)
+DocumentGenerator::generateXML(uint32_t id)
{
_doc.clear();
- _doc << "<document documenttype=\"" << _docType << "\" documentid= \"" <<
+ _doc << "<document documenttype=\"" << _docType << "\" documentid=\"" <<
_idPrefix << id << "\">\n";
- typedef FieldVec::const_iterator FI;
- for (FI i(_fields.begin()), ie(_fields.end()); i != ie; ++i) {
- (*i)->generate(_doc, id);
+ for (const auto &field : _fields) {
+ field->generateXML(_doc, id);
}
_doc << "</document>\n";
}
+void
+DocumentGenerator::generateJSON(uint32_t id)
+{
+ _doc.clear();
+ _doc << " { \"put\": \"" << _idPrefix << id << "\",\n \"fields\": {";
+ bool first = true;
+ for (const auto &field : _fields) {
+ if (!first) {
+ _doc << ",";
+ }
+ first = false;
+ _doc << "\n ";
+ field->generateJSON(_doc, id);
+ }
+ _doc << "\n }\n }";
+}
void
-DocumentGenerator::generate(uint32_t docMin, uint32_t docCount,
+DocumentGenerator::generate(uint32_t docMin, uint32_t docIdLimit,
const string &baseDir,
const string &feedFileName,
- bool headers)
+ bool headers, bool json)
{
string fullName(prependBaseDir(baseDir, feedFileName));
FastOS_File::Delete(fullName.c_str());
Fast_BufferedFile f(new FastOS_File);
f.WriteOpen(fullName.c_str());
- if (headers) {
- f.WriteString("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n");
- f.WriteString("<vespafeed>\n");
- }
- uint32_t docLim = docMin + docCount;
- for (uint32_t id = docMin; id < docLim; ++id) {
- generate(id);
- f.WriteBuf(_doc.c_str(), _doc.size());
- }
- if (headers) {
- f.WriteString("</vespafeed>\n");
+ if (json) {
+ bool first = true;
+ f.WriteString("[\n");
+ for (uint32_t id = docMin; id < docIdLimit; ++id) {
+ if (!first) {
+ f.WriteString(",\n");
+ }
+ first = false;
+ generateJSON(id);
+ f.WriteBuf(_doc.c_str(), _doc.size());
+ }
+ f.WriteString("\n]\n");
+ } else {
+ if (headers) {
+ f.WriteString("<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n");
+ f.WriteString("<vespafeed>\n");
+ }
+ for (uint32_t id = docMin; id < docIdLimit; ++id) {
+ generateXML(id);
+ f.WriteBuf(_doc.c_str(), _doc.size());
+ }
+ if (headers) {
+ f.WriteString("</vespafeed>\n");
+ }
}
f.Flush();
f.Close();
@@ -522,6 +622,7 @@ class GenTestDocsApp : public SubApp
search::Rand48 _rnd;
string _outFile;
bool _headers;
+ bool _json;
public:
GenTestDocsApp(FastOS_Application &app)
@@ -537,7 +638,8 @@ public:
_mods(),
_rnd(),
_outFile(),
- _headers(false)
+ _headers(false),
+ _json(false)
{
_mods.push_back(2);
_mods.push_back(3);
@@ -572,6 +674,8 @@ GenTestDocsApp::usage(bool showHeader)
cerr <<
"vespa-gen-testdocs gentestdocs\n"
" [--basedir basedir]\n"
+ " [--consttextfield name]\n"
+ " [--prefixtextfield name]\n"
" [--randtextfield name]\n"
" [--modtextfield name]\n"
" [--idtextfield name]\n"
@@ -581,6 +685,7 @@ GenTestDocsApp::usage(bool showHeader)
" [--numwords numWords]\n"
" [--doctype docType]\n"
" [--headers]\n"
+ " [--json]\n"
" outFile\n";
}
@@ -592,6 +697,8 @@ GenTestDocsApp::getOptions()
int longopt_index = 0;
static struct option longopts[] = {
{ "basedir", 1, NULL, 0 },
+ { "consttextfield", 1, NULL, 0 },
+ { "prefixtextfield", 1, NULL, 0 },
{ "randtextfield", 1, NULL, 0 },
{ "modtextfield", 1, NULL, 0 },
{ "idtextfield", 1, NULL, 0 },
@@ -601,10 +708,13 @@ GenTestDocsApp::getOptions()
{ "numwords", 1, NULL, 0 },
{ "doctype", 1, NULL, 0 },
{ "headers", 0, NULL, 0 },
+ { "json", 0, NULL, 0 },
{ NULL, 0, NULL, 0 }
};
enum longopts_enum {
LONGOPT_BASEDIR,
+ LONGOPT_CONSTTEXTFIELD,
+ LONGOPT_PREFIXTEXTFIELD,
LONGOPT_RANDTEXTFIELD,
LONGOPT_MODTEXTFIELD,
LONGOPT_IDTEXTFIELD,
@@ -613,7 +723,8 @@ GenTestDocsApp::getOptions()
LONGOPT_MINDOCID,
LONGOPT_NUMWORDS,
LONGOPT_DOCTYPE,
- LONGOPT_HEADERS
+ LONGOPT_HEADERS,
+ LONGOPT_JSON
};
int optIndex = 2;
while ((c = _app.GetOptLong("v",
@@ -628,6 +739,12 @@ GenTestDocsApp::getOptions()
case LONGOPT_BASEDIR:
_baseDir = optArgument;
break;
+ case LONGOPT_CONSTTEXTFIELD:
+ _fields.emplace_back(std::make_shared<ConstTextFieldGenerator>(splitArg(optArgument)));
+ break;
+ case LONGOPT_PREFIXTEXTFIELD:
+ _fields.emplace_back(std::make_shared<PrefixTextFieldGenerator>(splitArg(optArgument)));
+ break;
case LONGOPT_RANDTEXTFIELD:
g.reset(new RandTextFieldGenerator(optArgument,
_rnd,
@@ -653,7 +770,6 @@ GenTestDocsApp::getOptions()
100000));
_fields.push_back(g);
break;
- break;
case LONGOPT_DOCIDLIMIT:
_docIdLimit = atoi(optArgument);
break;
@@ -669,6 +785,9 @@ GenTestDocsApp::getOptions()
case LONGOPT_HEADERS:
_headers = true;
break;
+ case LONGOPT_JSON:
+ _json = true;
+ break;
default:
if (optArgument != NULL) {
LOG(error,
@@ -708,7 +827,7 @@ GenTestDocsApp::run()
idPrefix,
_fields);
LOG(info, "generating %s", _outFile.c_str());
- dg.generate(_minDocId, _docIdLimit, _baseDir, _outFile, _headers);
+ dg.generate(_minDocId, _docIdLimit, _baseDir, _outFile, _headers, _json);
LOG(info, "done");
return 0;
}