diff options
Diffstat (limited to 'vespa-hadoop/src/test')
21 files changed, 0 insertions, 1597 deletions
diff --git a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/MapReduceTest.java b/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/MapReduceTest.java deleted file mode 100644 index d56cd818de2..00000000000 --- a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/MapReduceTest.java +++ /dev/null @@ -1,200 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hadoop.pig; - -import com.fasterxml.jackson.core.JsonEncoding; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonGenerator; -import com.yahoo.vespa.hadoop.mapreduce.VespaOutputFormat; -import com.yahoo.vespa.hadoop.mapreduce.util.VespaConfiguration; -import com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.test.PathUtils; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.io.BufferedInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.StringTokenizer; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class MapReduceTest { - - protected static File hdfsBaseDir; - protected static FileSystem hdfs; - protected static Configuration conf; - protected static MiniDFSCluster cluster; - - protected static Path metricsJsonPath; - protected static Path metricsCsvPath; - - @BeforeAll - public static void setUp() throws IOException { - hdfsBaseDir = new File(PathUtils.getTestDir(MapReduceTest.class).getCanonicalPath()); - - conf = new HdfsConfiguration(); - conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsBaseDir.getAbsolutePath()); - conf.set(VespaConfiguration.DRYRUN, "true"); - conf.set(VespaConfiguration.ENDPOINT, "endpoint-does-not-matter-in-dryrun"); - - cluster = new MiniDFSCluster.Builder(conf).build(); - hdfs = FileSystem.get(conf); - - metricsJsonPath = new Path("metrics_json"); - metricsCsvPath = new Path("metrics_csv"); - copyToHdfs("src/test/resources/operations_data.json", metricsJsonPath, "data"); - copyToHdfs("src/test/resources/tabular_data.csv", metricsCsvPath, "data"); - } - - @AfterAll - public static void tearDown() throws IOException { - Path testDir = new Path(hdfsBaseDir.getParent()); - hdfs.delete(testDir, true); - cluster.shutdown(); - LocalFileSystem localFileSystem = FileSystem.getLocal(conf); - localFileSystem.delete(testDir, true); - } - - @Test - public void requireThatMapOnlyJobSucceeds() throws Exception { - Job job = Job.getInstance(conf); - job.setJarByClass(MapReduceTest.class); - job.setMapperClass(FeedMapper.class); - job.setOutputFormatClass(VespaOutputFormat.class); - job.setMapOutputValueClass(Text.class); - - FileInputFormat.setInputPaths(job, metricsJsonPath); - - boolean success = job.waitForCompletion(true); - assertTrue(success, "Job Failed"); - - VespaCounters counters = VespaCounters.get(job); - assertEquals(10, counters.getDocumentsSent()); - assertEquals(0, counters.getDocumentsFailed()); - assertEquals(10, counters.getDocumentsOk()); - } - - @Test - public void requireThatMapReduceJobSucceeds() throws Exception { - Job job = Job.getInstance(conf); - job.setJarByClass(MapReduceTest.class); - job.setMapperClass(FeedMapper.class); - job.setOutputFormatClass(VespaOutputFormat.class); - job.setMapOutputValueClass(Text.class); - job.setReducerClass(FeedReducer.class); - job.setNumReduceTasks(1); - - FileInputFormat.setInputPaths(job, metricsJsonPath); - - boolean success = job.waitForCompletion(true); - assertTrue(success, "Job Failed"); - - VespaCounters counters = VespaCounters.get(job); - assertEquals(10, counters.getDocumentsSent()); - assertEquals(0, counters.getDocumentsFailed()); - assertEquals(10, counters.getDocumentsOk()); - } - - - @Test - public void requireThatTransformMapJobSucceeds() throws Exception { - Job job = Job.getInstance(conf); - job.setJarByClass(MapReduceTest.class); - job.setMapperClass(ParsingMapper.class); - job.setOutputFormatClass(VespaOutputFormat.class); - job.setMapOutputValueClass(Text.class); - job.setReducerClass(FeedReducer.class); - job.setNumReduceTasks(1); - - FileInputFormat.setInputPaths(job, metricsCsvPath); - - boolean success = job.waitForCompletion(true); - assertTrue(success, "Job Failed"); - - VespaCounters counters = VespaCounters.get(job); - assertEquals(10, counters.getDocumentsSent()); - assertEquals(0, counters.getDocumentsFailed()); - assertEquals(10, counters.getDocumentsOk()); - assertEquals(0, counters.getDocumentsSkipped()); - } - - - private static void copyToHdfs(String localFile, Path hdfsDir, String hdfsName) throws IOException { - Path hdfsPath = new Path(hdfsDir, hdfsName); - FSDataOutputStream out = hdfs.create(hdfsPath); - - try (InputStream in = new BufferedInputStream(new FileInputStream(localFile))) { - int len; - byte[] buffer = new byte[1024]; - while ((len = in.read(buffer)) > 0) { - out.write(buffer, 0, len); - } - } finally { - out.close(); - } - } - - public static class FeedMapper extends Mapper<LongWritable, Text, LongWritable, Text> { - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - context.write(key, value); - } - } - - public static class FeedReducer extends Reducer<Object, Text, LongWritable, Text> { - public void reduce(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - context.write(key, value); - } - } - - public static class ParsingMapper extends Mapper<LongWritable, Text, LongWritable, Text> { - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String line = value.toString(); - if (line == null || line.length() == 0) - return; - - StringTokenizer tokenizer = new StringTokenizer(line); - long date = Long.parseLong(tokenizer.nextToken()); - String metricName = tokenizer.nextToken(); - long metricValue = Long.parseLong(tokenizer.nextToken()); - String application = tokenizer.nextToken(); - - String docid = "id:"+application+":metric::"+metricName+"-"+date; - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - JsonGenerator g = new JsonFactory().createGenerator(out, JsonEncoding.UTF8); - - g.writeStartObject(); - g.writeObjectFieldStart("fields"); - g.writeNumberField("date", date); - g.writeStringField("name", metricName); - g.writeNumberField("value", metricValue); - g.writeStringField("application", application); - g.writeEndObject(); - g.writeStringField("put", docid); - g.writeEndObject(); - g.close(); - - context.write(key, new Text(out.toString())); - } - } - - -} diff --git a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaDocumentOperationTest.java b/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaDocumentOperationTest.java deleted file mode 100644 index ec20e82763c..00000000000 --- a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaDocumentOperationTest.java +++ /dev/null @@ -1,633 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hadoop.pig; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.pig.data.BagFactory; -import org.apache.pig.data.DataBag; -import org.apache.pig.data.DataByteArray; -import org.apache.pig.data.DataType; -import org.apache.pig.data.SortedDataBag; -import org.apache.pig.data.Tuple; -import org.apache.pig.data.TupleFactory; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class VespaDocumentOperationTest { - private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); - private final PrintStream originalOut = System.out; - - @BeforeEach - public void setUpStreams() { - System.setOut(new PrintStream(outContent)); - } - - @AfterEach - public void restoreStreams() { - System.setOut(originalOut); - } - @Test - public void requireThatUDFReturnsCorrectJson() throws Exception { - String json = getDocumentOperationJson("docid=id:<application>:metrics::<name>-<date>"); - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.path("fields"); - - // operation put is default - assertEquals("id:testapp:metrics::clicks-20160112", root.get("put").asText()); - assertEquals("testapp", fields.get("application").asText()); - assertEquals("clicks", fields.get("name").asText()); - assertEquals(3, fields.get("value").asInt()); - } - - - @Test - public void requireThatUDFSupportsUpdateAssign() throws IOException { - String json = getDocumentOperationJson("docid=id:<application>:metrics::<name>-<date>", "operation=update"); - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.path("fields"); - - assertEquals("id:testapp:metrics::clicks-20160112", root.get("update").asText()); - assertEquals("testapp", fields.get("application").get("assign").asText()); - assertEquals("clicks", fields.get("name").get("assign").asText()); - assertEquals(3, fields.get("value").get("assign").asInt()); - } - - @Test - public void requireThatUDFSupportsConditionalUpdateAssign() throws IOException { - String json = getDocumentOperationJson("docid=id:<application>:metrics::<name>-<date>", "operation=update", "condition=clicks < <value>"); - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.path("fields"); - - assertEquals("id:testapp:metrics::clicks-20160112", root.get("update").asText()); - assertEquals("clicks < 3", root.get("condition").asText()); - assertEquals("testapp", fields.get("application").get("assign").asText()); - assertEquals("clicks", fields.get("name").get("assign").asText()); - assertEquals(3, fields.get("value").get("assign").asInt()); - } - - @Test - public void requireThatUDFSupportsCreateIfNonExistent() throws IOException { - String json = getDocumentOperationJson("docid=id:<application>:metrics::<name>-<date>", "operation=update", - "create-if-non-existent=true"); - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.path("fields"); - - assertEquals("id:testapp:metrics::clicks-20160112", root.get("update").asText()); - assertTrue(root.get("create").asBoolean()); - assertEquals("testapp", fields.get("application").get("assign").asText()); - assertEquals("clicks", fields.get("name").get("assign").asText()); - assertEquals(3, fields.get("value").get("assign").asInt()); - } - - - @Test - public void requireThatUDFReturnsNullForMissingConfig() throws Exception { - String json = getDocumentOperationJson(); - assertNull(json); - } - - - @Test - public void requireThatUDFCorrectlyGeneratesRemoveBagAsMapOperation() throws Exception { - DataBag bag = BagFactory.getInstance().newDefaultBag(); - - Schema innerObjectSchema = new Schema(); - Tuple innerObjectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("year", DataType.CHARARRAY, "2020", innerObjectSchema, innerObjectTuple); - addToTuple("month", DataType.INTEGER, 3, innerObjectSchema, innerObjectTuple); - - Schema objectSchema = new Schema(); - Tuple objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "234566", objectSchema, objectTuple); - addToTuple("value", DataType.TUPLE, innerObjectTuple,innerObjectSchema,objectSchema, objectTuple); - - Schema bagSchema = new Schema(); - addToBagWithSchema("firstLayerTuple",DataType.TUPLE,objectTuple,objectSchema,bagSchema,bag); - - innerObjectSchema = new Schema(); - innerObjectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("year", DataType.CHARARRAY, "2020", innerObjectSchema, innerObjectTuple); - addToTuple("month", DataType.INTEGER, 3, innerObjectSchema, innerObjectTuple); - - objectSchema = new Schema(); - objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "123456", objectSchema, objectTuple); - addToTuple("value", DataType.TUPLE, innerObjectTuple,innerObjectSchema,objectSchema, objectTuple); - - addToBagWithSchema("firstLayerTuple",DataType.TUPLE,objectTuple,objectSchema,bagSchema,bag); - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - addToTuple("bag", DataType.BAG, bag, bagSchema, schema, tuple); - addToTuple("id", DataType.CHARARRAY, "123", schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=id", "remove-map-fields=bag","operation=update"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - assertEquals("{\"remove\":0}", fields.get("bag{123456}").toString()); - assertEquals("{\"remove\":0}", fields.get("bag{234566}").toString()); - - } - - @Test - public void requireThatUDFCorrectlyGeneratesAddBagAsMapOperation() throws Exception { - - DataBag bag = BagFactory.getInstance().newDefaultBag(); - - Schema innerObjectSchema = new Schema(); - Tuple innerObjectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("year", DataType.CHARARRAY, "2020", innerObjectSchema, innerObjectTuple); - addToTuple("month", DataType.INTEGER, 3, innerObjectSchema, innerObjectTuple); - - Schema objectSchema = new Schema(); - Tuple objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "123456", objectSchema, objectTuple); - addToTuple("value", DataType.TUPLE, innerObjectTuple,innerObjectSchema,objectSchema, objectTuple); - - Schema bagSchema = new Schema(); - addToBagWithSchema("firstLayerTuple",DataType.TUPLE,objectTuple,objectSchema,bagSchema,bag); - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - addToTuple("bag", DataType.BAG, bag, bagSchema, schema, tuple); - addToTuple("id", DataType.CHARARRAY, "123", schema, tuple); - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=id", "update-map-fields=bag","operation=update"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - - JsonNode fields = root.get("fields"); - JsonNode value = fields.get("bag{123456}"); - JsonNode assign = value.get("assign"); - assertEquals("2020", assign.get("year").asText()); - assertEquals(3, assign.get("month").asInt()); - } - - @Test - public void requireThatUDFCorrectlyGeneratesAddTensorOperation() throws Exception { - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - // Please refer to the tensor format documentation - - Map<String, Double> tensor = new HashMap<String, Double>() {{ - put("x:label1,y:label2,z:label4", 2.0); - put("x:label3", 3.0); - }}; - - addToTuple("id", DataType.CHARARRAY, "123", schema, tuple); - addToTuple("tensor", DataType.MAP, tensor, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=empty", "update-tensor-fields=tensor","operation=update"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - JsonNode tensorValue = fields.get("tensor"); - JsonNode add = tensorValue.get("add"); - JsonNode cells = add.get("cells"); - Iterator<JsonNode> cellsIterator = cells.iterator(); - - JsonNode element = cellsIterator.next(); - assertEquals("label1", element.get("address").get("x").asText()); - assertEquals("label2", element.get("address").get("y").asText()); - assertEquals("label4", element.get("address").get("z").asText()); - assertEquals("2.0", element.get("value").toString()); - - element = cellsIterator.next(); - assertEquals("label3", element.get("address").get("x").asText()); - assertEquals("3.0", element.get("value").toString()); - } - - @Test - public void requireThatUDFCorrectlyGeneratesRemoveTensorOperation() throws Exception { - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - // Please refer to the tensor format documentation - - Map<String, Double> tensor = new HashMap<String, Double>() {{ - put("x:label1,y:label2,z:label4", 2.0); - put("x:label3", 3.0); - }}; - - addToTuple("id", DataType.CHARARRAY, "123", schema, tuple); - addToTuple("tensor", DataType.MAP, tensor, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=empty", "remove-tensor-fields=tensor","operation=update"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - JsonNode tensorValue = fields.get("tensor"); - JsonNode remove = tensorValue.get("remove"); - JsonNode address = remove.get("addresses"); - - Iterator<JsonNode> addressIterator = address.iterator(); - - JsonNode element = addressIterator.next(); - assertEquals("label1", element.get("x").asText()); - - element = addressIterator.next(); - assertEquals("label2", element.get("y").asText()); - - element = addressIterator.next(); - assertEquals("label4", element.get("z").asText()); - - element = addressIterator.next(); - assertEquals("label3", element.get("x").asText()); - } - - @Test - public void requireThatUDFReturnsNullWhenExceptionHappens() throws IOException { - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - // broken DELTA format that would throw internally - Map<String, Double> tensor = new HashMap<String, Double>() {{ - put("xlabel1", 2.0); // missing : between 'x' and 'label1' - }}; - - addToTuple("id", DataType.CHARARRAY, "123", schema, tuple); - addToTuple("tensor", DataType.MAP, tensor, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=empty", "create-tensor-fields=tensor"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - assertNull(json); - } - - @Test - public void requireThatUDFCorrectlyGeneratesRemoveOperation() throws Exception { - String json = getDocumentOperationJson("operation=remove", "docid=id:<application>:metrics::<name>-<date>"); - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - - assertEquals("id:testapp:metrics::clicks-20160112", root.get("remove").asText()); - assertNull(fields); - } - - - @Test - public void requireThatUDFGeneratesComplexDataTypes() throws Exception { - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - Tuple intTuple = TupleFactory.getInstance().newTuple(); - int[] intArray = {1, 2, 3}; - for (int i : intArray) { intTuple.append(i); } - - Tuple stringTuple = TupleFactory.getInstance().newTuple(); - String[] stringArray = {"a", "b", "c"}; - for (String s : stringArray) { stringTuple.append(s); } - - DataBag bag = new SortedDataBag(null); - bag.add(intTuple); - bag.add(stringTuple); - - Map<String, Object> innerMap = new HashMap<String, Object>() {{ - put("a", "string"); - put("tuple", intTuple); - }}; - - DataByteArray bytes = new DataByteArray("testdata".getBytes()); - - Map<String, Object> outerMap = new HashMap<String, Object>() {{ - put("string", "value"); - put("int", 3); - put("float", 3.145); - put("bool", true); - put("byte", bytes); - put("map", innerMap); - put("bag", bag); - }}; - - addToTuple("map", DataType.MAP, outerMap, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=empty"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - JsonNode map = fields.get("map"); - - assertEquals("value", map.get("string").asText()); - assertEquals(3, map.get("int").asInt()); - assertEquals(3.145, map.get("float").asDouble(), 1e-6); - assertTrue(map.get("bool").asBoolean()); - assertEquals("dGVzdGRhdGE=", map.get("byte").asText()); - - assertEquals("string", map.get("map").get("a").asText()); - for (int i = 0; i < intArray.length; ++i) { - assertEquals(intArray[i], map.get("map").get("tuple").get(i).asInt()); - } - - JsonNode bagField = map.get("bag"); - for (int i = 0; i < intArray.length; ++i) { - assertEquals(intArray[i], bagField.get(0).get(i).asInt()); - } - for (int i = 0; i < stringArray.length; ++i) { - assertEquals(stringArray[i], bagField.get(1).get(i).asText()); - } - } - - - @Test - public void requireThatSimpleArraysMustBeConfigured() throws Exception { - String[] stringArray = {"a", "b", "c"}; - JsonNode array = setupSimpleArrayOperation("array", stringArray, "docid=empty"); // simple arrays not configured - // json: [["a"], ["b"], ["c"]] - assertEquals("a", array.get(0).get(0).asText()); - assertEquals("b", array.get(1).get(0).asText()); - assertEquals("c", array.get(2).get(0).asText()); - } - - - @Test - public void requireThatSimpleArraysAreSupported() throws Exception { - String[] stringArray = {"a", "b", "c"}; - JsonNode array = setupSimpleArrayOperation("array", stringArray, "docid=empty", "simple-array-fields=array"); - // json: ["a", "b", "c"] - assertEquals("a", array.get(0).asText()); - assertEquals("b", array.get(1).asText()); - assertEquals("c", array.get(2).asText()); - } - - - @Test - public void requireThatSimpleArraysCanBeConfiguredWithWildcard() throws Exception { - String[] stringArray = {"a", "b", "c"}; - JsonNode array = setupSimpleArrayOperation("array", stringArray, "docid=empty", "simple-array-fields=*"); - // json: ["a", "b", "c"] - assertEquals("a", array.get(0).asText()); - assertEquals("b", array.get(1).asText()); - assertEquals("c", array.get(2).asText()); - } - - - @Test - public void requireThatMultipleSimpleArraysAreSupported() throws Exception { - String[] stringArray = {"a", "b", "c"}; - JsonNode array = setupSimpleArrayOperation("array", stringArray, "docid=empty", "simple-array-fields=empty,array"); - // json: ["a", "b", "c"] - assertEquals("a", array.get(0).asText()); - assertEquals("b", array.get(1).asText()); - assertEquals("c", array.get(2).asText()); - } - - - private JsonNode setupSimpleArrayOperation(String name, String[] array, String... params) throws IOException { - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - DataBag bag = new SortedDataBag(null); - for (String s : array) { - Tuple stringTuple = TupleFactory.getInstance().newTuple(); - stringTuple.append(s); - bag.add(stringTuple); - } - addToTuple(name, DataType.BAG, bag, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation(params); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - return fields.get(name); - } - - - @Test - public void requireThatUDFSupportsTensors() throws IOException { - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - // Please refer to the tensor format documentation - - Map<String, Double> tensor = new HashMap<String, Double>() {{ - put("x:label1,y:label2,z:label4", 2.0); - put("x:label3", 3.0); - }}; - - addToTuple("id", DataType.CHARARRAY, "123", schema, tuple); - addToTuple("tensor", DataType.MAP, tensor, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=empty", "create-tensor-fields=tensor"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - JsonNode tensorNode = fields.get("tensor"); - JsonNode cells = tensorNode.get("cells"); - - assertEquals("label1", cells.get(0).get("address").get("x").asText()); - assertEquals("label2", cells.get(0).get("address").get("y").asText()); - assertEquals("label4", cells.get(0).get("address").get("z").asText()); - assertEquals("label3", cells.get(1).get("address").get("x").asText()); - - assertEquals(2.0, cells.get(0).get("value").asDouble(), 1e-6); - assertEquals(3.0, cells.get(1).get("value").asDouble(), 1e-6); - } - - - @Test - public void requireThatUDFCanExcludeFields() throws IOException { - String json = getDocumentOperationJson("docid=id:<application>:metrics::<name>-<date>", "exclude-fields=application,date"); - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.path("fields"); - - // 'application' and 'date' fields should not appear in JSON - assertNull(fields.get("application")); - assertNull(fields.get("date")); - assertNotNull(fields.get("name")); - assertNotNull(fields.get("value")); - } - - - private String getDocumentOperationJson(String... params) throws IOException { - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - addToTuple("application", DataType.CHARARRAY, "testapp", schema, tuple); - addToTuple("name", DataType.CHARARRAY, "clicks", schema, tuple); - addToTuple("date", DataType.CHARARRAY, "20160112", schema, tuple); - addToTuple("value", DataType.CHARARRAY, 3, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation(params); - docOp.setInputSchema(schema); - return docOp.exec(tuple); - } - - - @Test - public void requireThatUDFSupportsSimpleObjectFields() throws IOException { - Schema objectSchema = new Schema(); - Tuple objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("id", DataType.LONG, 123456789L, objectSchema, objectTuple); - addToTuple("url", DataType.CHARARRAY, "example.com", objectSchema, objectTuple); - addToTuple("value", DataType.INTEGER, 123, objectSchema, objectTuple); - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - addToTuple("object", DataType.TUPLE, objectTuple, objectSchema, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=empty", "simple-object-fields=object"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - JsonNode objectNode = fields.get("object"); - - assertEquals(123456789L, objectNode.get("id").asLong()); - assertEquals("example.com", objectNode.get("url").asText()); - assertEquals(123, objectNode.get("value").asInt()); - } - - - @Test - public void requireThatUDFSupportsBagAsMapFields() throws IOException { - DataBag bag = BagFactory.getInstance().newDefaultBag(); - - Schema objectSchema = new Schema(); - Tuple objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "123456", objectSchema, objectTuple); - addToTuple("value", DataType.INTEGER, 123456, objectSchema, objectTuple); - bag.add(objectTuple); - - objectSchema = new Schema(); - objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "234567", objectSchema, objectTuple); - addToTuple("value", DataType.INTEGER, 234567, objectSchema, objectTuple); - bag.add(objectTuple); - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - addToTuple("bag", DataType.BAG, bag, objectSchema, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=empty", "bag-as-map-fields=bag"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); - JsonNode fields = root.get("fields"); - JsonNode bagNode = fields.get("bag"); - - assertEquals(123456, bagNode.get("123456").asInt()); - assertEquals(234567, bagNode.get("234567").asInt()); - } - - @Test - public void requireThatUDFPrintIdWhenVerbose() throws IOException { - DataBag bag = BagFactory.getInstance().newDefaultBag(); - - Schema objectSchema = new Schema(); - Tuple objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "123456", objectSchema, objectTuple); - addToTuple("value", DataType.INTEGER, 123456, objectSchema, objectTuple); - bag.add(objectTuple); - - objectSchema = new Schema(); - objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "234567", objectSchema, objectTuple); - addToTuple("value", DataType.INTEGER, 234567, objectSchema, objectTuple); - bag.add(objectTuple); - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - addToTuple("bag", DataType.BAG, bag, objectSchema, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=7654321", "bag-as-map-fields=bag","verbose=true"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - assertTrue(outContent.toString().contains("Processing docId: 7654321")); - } - - @Test - public void requireThatUDFVerboseSetToFalseByDefault() throws IOException { - DataBag bag = BagFactory.getInstance().newDefaultBag(); - - Schema objectSchema = new Schema(); - Tuple objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "123456", objectSchema, objectTuple); - addToTuple("value", DataType.INTEGER, 123456, objectSchema, objectTuple); - bag.add(objectTuple); - - objectSchema = new Schema(); - objectTuple = TupleFactory.getInstance().newTuple(); - addToTuple("key", DataType.CHARARRAY, "234567", objectSchema, objectTuple); - addToTuple("value", DataType.INTEGER, 234567, objectSchema, objectTuple); - bag.add(objectTuple); - - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - addToTuple("bag", DataType.BAG, bag, objectSchema, schema, tuple); - - VespaDocumentOperation docOp = new VespaDocumentOperation("docid=7654321", "bag-as-map-fields=bag"); - docOp.setInputSchema(schema); - String json = docOp.exec(tuple); - - assertEquals("", outContent.toString()); - } - - private void addToTuple(String alias, byte type, Object value, Schema schema, Tuple tuple) { - schema.add(new Schema.FieldSchema(alias, type)); - tuple.append(value); - } - - - private void addToTuple(String alias, byte type, Object value, Schema schemaInField, Schema schema, Tuple tuple) - throws FrontendException { - schema.add(new Schema.FieldSchema(alias, schemaInField, type)); - tuple.append(value); - } - - private void addToBagWithSchema(String alias, byte type, Tuple value, Schema schemaInField, Schema schema,DataBag bag) - throws FrontendException { - schema.add(new Schema.FieldSchema(alias, schemaInField, type)); - bag.add(value); - } -} diff --git a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaQueryTest.java b/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaQueryTest.java deleted file mode 100644 index a0b549a737f..00000000000 --- a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaQueryTest.java +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hadoop.pig; - -import com.sun.net.httpserver.HttpServer; -import com.yahoo.vespa.hadoop.util.MockQueryHandler; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; -import org.junit.jupiter.api.Test; - -import java.net.InetSocketAddress; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class VespaQueryTest { - - @Test - public void requireThatQueriesAreReturnedCorrectly() throws Exception { - runQueryTest("src/test/pig/query.pig", createQueryHandler(""), 18901); - } - - @Test - public void requireThatQueriesAreReturnedCorrectlyWithAlternativeJsonRoot() throws Exception { - runQueryTest("src/test/pig/query_alt_root.pig", createQueryHandler("children"), 18902); - } - - private void runQueryTest(String script, MockQueryHandler queryHandler, int port) throws Exception { - final String endpoint = "http://localhost:" + port; - - HttpServer server = HttpServer.create(new InetSocketAddress(port), 0); - server.createContext("/", queryHandler); - server.start(); - - PigServer ps = setup(script, endpoint); - - Iterator<Tuple> recommendations = ps.openIterator("recommendations"); - while (recommendations.hasNext()) { - Tuple tuple = recommendations.next(); - - String userid = (String) tuple.get(0); - Integer rank = (Integer) tuple.get(1); - String docid = (String) tuple.get(2); - Double relevance = (Double) tuple.get(3); - String fieldId = (String) tuple.get(4); - String fieldContent = (String) tuple.get(5); - - MockQueryHandler.MockQueryHit hit = queryHandler.getHit(userid, rank); - assertEquals(docid, hit.id); - assertEquals(relevance, hit.relevance, 1e-3); - assertEquals(fieldId, hit.fieldId); - assertEquals(fieldContent, hit.fieldContent); - } - - if (server != null) { - server.stop(0); - } - - } - - private PigServer setup(String script, String endpoint) throws Exception { - Configuration conf = new HdfsConfiguration(); - Map<String, String> parameters = new HashMap<>(); - parameters.put("ENDPOINT", endpoint); - - PigServer ps = new PigServer(ExecType.LOCAL, conf); - ps.setBatchOn(); - ps.registerScript(script, parameters); - - return ps; - } - - private MockQueryHandler createQueryHandler(String childNode) { - MockQueryHandler queryHandler = new MockQueryHandler(childNode); - - List<String> userIds = Arrays.asList("5", "104", "313"); - - int hitsPerUser = 3; - for (int i = 0; i < hitsPerUser * userIds.size(); ++i) { - String id = "" + (i+1); - String userId = userIds.get(i / hitsPerUser); - queryHandler.newHit(). - setId("id::::" + id). - setRelevance(1.0 - (i % hitsPerUser) * 0.1). - setFieldSddocname("doctype"). - setFieldId("" + id). - setFieldDate("2016060" + id). - setFieldContent("Content for user " + userId + " hit " + i % hitsPerUser + "..."). - add(userId); - } - - return queryHandler; - } - -} diff --git a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaStorageTest.java b/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaStorageTest.java deleted file mode 100644 index 3183c770bc7..00000000000 --- a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/pig/VespaStorageTest.java +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hadoop.pig; - -import com.yahoo.vespa.hadoop.mapreduce.util.VespaConfiguration; -import com.yahoo.vespa.hadoop.mapreduce.util.VespaCounters; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.mapred.Counters; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecJob; -import org.apache.pig.tools.pigstats.JobStats; -import org.apache.pig.tools.pigstats.PigStats; -import org.apache.pig.tools.pigstats.mapreduce.MRJobStats; -import org.junit.jupiter.api.Test; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -public class VespaStorageTest { - - @Test - public void requireThatPremadeOperationsFeedSucceeds() throws Exception { - assertAllDocumentsOk("src/test/pig/feed_operations.pig"); - } - - - @Test - public void requireThatPremadeMultilineOperationsFeedSucceeds() throws Exception { - assertAllDocumentsOk("src/test/pig/feed_multiline_operations.pig"); - } - - - @Test - public void requireThatPremadeOperationsWithJsonLoaderFeedSucceeds() throws Exception { - assertAllDocumentsOk("src/test/pig/feed_operations_with_json_loader.pig"); - } - - @Test - public void requireThatPremadeOperationsWithJsonLoaderFeedAndNonLegacyClientSucceeds() throws Exception { - Configuration conf = new HdfsConfiguration(); - conf.set(VespaConfiguration.USE_SSL, Boolean.TRUE.toString()); - assertAllDocumentsOk("src/test/pig/feed_operations_with_json_loader.pig", conf); - } - - @Test - public void requireThatCreateOperationsFeedSucceeds() throws Exception { - assertAllDocumentsOk("src/test/pig/feed_create_operations.pig"); - } - - - @Test - public void requireThatCreateOperationsShortFormFeedSucceeds() throws Exception { - assertAllDocumentsOk("src/test/pig/feed_create_operations_short_form.pig"); - } - - - @Test - public void requireThatFeedVisitDataSucceeds() throws Exception { - assertAllDocumentsOk("src/test/pig/feed_visit_data.pig"); - } - - - private PigServer setup(String script, Configuration conf) throws Exception { - if (conf == null) { - conf = new HdfsConfiguration(); - } - conf.setIfUnset(VespaConfiguration.DRYRUN, "true"); - conf.setIfUnset(VespaConfiguration.ENDPOINT, "dummy-endpoint"); - - // Parameter substitutions - can also be set by configuration - Map<String, String> parameters = new HashMap<>(); - parameters.put("ENDPOINT", "endpoint-does-not-matter-in-dryrun,another-endpoint-that-does-not-matter"); - - PigServer ps = new PigServer(ExecType.LOCAL, conf); - ps.setBatchOn(); - ps.registerScript(script, parameters); - - return ps; - } - - - private void assertAllDocumentsOk(String script) throws Exception { - assertAllDocumentsOk(script, null); - } - - - private void assertAllDocumentsOk(String script, Configuration conf) throws Exception { - PigServer ps = setup(script, conf); - List<ExecJob> jobs = ps.executeBatch(); - PigStats stats = jobs.get(0).getStatistics(); - for (JobStats js : stats.getJobGraph()) { - Counters hadoopCounters = ((MRJobStats)js).getHadoopCounters(); - assertNotNull(hadoopCounters); - VespaCounters counters = VespaCounters.get(hadoopCounters); - assertEquals(10, counters.getDocumentsSent()); - assertEquals(0, counters.getDocumentsFailed()); - assertEquals(10, counters.getDocumentsOk()); - } - } - -} diff --git a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/util/MockQueryHandler.java b/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/util/MockQueryHandler.java deleted file mode 100644 index 64c160ea14c..00000000000 --- a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/util/MockQueryHandler.java +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hadoop.util; - -import com.fasterxml.jackson.core.JsonEncoding; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonGenerator; -import com.sun.net.httpserver.HttpExchange; -import com.sun.net.httpserver.HttpHandler; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.net.URI; -import java.net.URLDecoder; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class MockQueryHandler implements HttpHandler { - - private final Map<String, List<MockQueryHit>> hitMap; - private final String childNode; - - public MockQueryHandler(String childNode) { - this.hitMap = new HashMap<>(); - this.childNode = childNode; - } - - public void handle(HttpExchange t) throws IOException { - URI uri = t.getRequestURI(); - String query = uri.getQuery(); - String response = null; - - // Parse query - extract "query" element - if (query != null) { - String params[] = query.split("[&]"); - for (String param : params) { - int i = param.indexOf('='); - String name = param.substring(0, i); - String value = URLDecoder.decode(param.substring(i + 1), "UTF-8"); - - if ("query".equalsIgnoreCase(name)) { - response = getResponse(URLDecoder.decode(param.substring(i + 1), "UTF-8")); - } - } - } - - t.sendResponseHeaders(200, response == null ? 0 : response.length()); - OutputStream os = t.getResponseBody(); - os.write(response == null ? "".getBytes() : response.getBytes()); - os.close(); - - } - - public MockQueryHit getHit(String query, Integer rank) { - if (!hitMap.containsKey(query)) { - return null; - } - if (rank >= hitMap.get(query).size()) { - return null; - } - return hitMap.get(query).get(rank); - } - - public MockQueryHit newHit() { - return new MockQueryHit(this); - } - - public void addHit(String query, MockQueryHit hit) { - if (!hitMap.containsKey(query)) { - hitMap.put(query, new ArrayList<>()); - } - hitMap.get(query).add(hit); - } - - private String getResponse(String query) throws IOException { - List<MockQueryHit> hits = hitMap.get(query); - if (hits == null) { - return null; - } - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - JsonGenerator g = new JsonFactory().createGenerator(out, JsonEncoding.UTF8); - - writeResultStart(g, hits.size()); - for (MockQueryHit hit : hits) { - writeHit(g, hit); - } - writeResultsEnd(g); - g.close(); - - return out.toString(); - } - - private void writeHit(JsonGenerator g, MockQueryHit hit) throws IOException { - g.writeStartObject(); - - g.writeFieldName("id"); - g.writeString(hit.id); - - g.writeFieldName("relevance"); - g.writeNumber(hit.relevance); - - g.writeFieldName("fields"); - g.writeStartObject(); - - g.writeFieldName("sddocname"); - g.writeString(hit.fieldSddocname); - - g.writeFieldName("date"); - g.writeString(hit.fieldDate); - - g.writeFieldName("content"); - g.writeString(hit.fieldContent); - - g.writeFieldName("id"); - g.writeString(hit.fieldId); - - g.writeEndObject(); - g.writeEndObject(); - } - - private void writeResultStart(JsonGenerator g, int count) throws IOException { - g.writeStartObject(); - g.writeFieldName("root"); - - g.writeStartObject(); - - g.writeFieldName("id"); - g.writeString("toplevel"); - - g.writeFieldName("relevance"); - g.writeNumber(1); - - g.writeFieldName("fields"); - g.writeStartObject(); - g.writeFieldName("totalCount"); - g.writeNumber(count); - g.writeEndObject(); - - g.writeFieldName("coverage"); - g.writeStartObject(); - g.writeFieldName("coverage"); - g.writeNumber(100); - // ... more stuff here usually - g.writeEndObject(); - - g.writeFieldName("children"); - g.writeStartArray(); - - if (!childNode.isEmpty()) { - g.writeStartObject(); - g.writeFieldName(childNode); - g.writeStartArray(); - } - } - - private void writeResultsEnd(JsonGenerator g) throws IOException { - if (!childNode.isEmpty()) { - g.writeEndArray(); - g.writeEndObject(); - } - g.writeEndArray(); - g.writeEndObject(); - g.writeEndObject(); - } - - public static class MockQueryHit { - - private final MockQueryHandler handler; - - public String id; - public Double relevance; - public String fieldSddocname; - public String fieldDate; - public String fieldContent; - public String fieldId; - - private MockQueryHit(MockQueryHandler handler) { - this.handler = handler; - } - - public void add(String query) { - handler.addHit(query, this); - } - - public MockQueryHit setId(String id) { - this.id = id; - return this; - } - - public MockQueryHit setRelevance(Double relevance) { - this.relevance = relevance; - return this; - } - - public MockQueryHit setFieldSddocname(String fieldSddocname) { - this.fieldSddocname = fieldSddocname; - return this; - } - - public MockQueryHit setFieldDate(String fieldDate) { - this.fieldDate = fieldDate; - return this; - } - - public MockQueryHit setFieldContent(String fieldContent) { - this.fieldContent = fieldContent; - return this; - } - - public MockQueryHit setFieldId(String fieldId) { - this.fieldId = fieldId; - return this; - } - } - -} diff --git a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/util/TupleToolsTest.java b/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/util/TupleToolsTest.java deleted file mode 100644 index b4ccbdf2183..00000000000 --- a/vespa-hadoop/src/test/java/com/yahoo/vespa/hadoop/util/TupleToolsTest.java +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hadoop.util; - -import com.yahoo.vespa.hadoop.mapreduce.util.TupleTools; -import org.apache.pig.data.DataType; -import org.apache.pig.data.Tuple; -import org.apache.pig.data.TupleFactory; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.junit.jupiter.api.Test; - -import java.io.IOException; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class TupleToolsTest { - - @Test - public void requireThatTupleToStringHandlesSimpleTypes() throws IOException { - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - addToTuple("id", DataType.CHARARRAY, "123", schema, tuple); - addToTuple("rank", DataType.INTEGER, 1, schema, tuple); - - String template = "Id is <id> and rank is <rank>"; - String result = TupleTools.toString(schema, tuple, template); - - assertEquals("Id is 123 and rank is 1", result); - } - - - private void addToTuple(String alias, byte type, Object value, Schema schema, Tuple tuple) { - schema.add(new Schema.FieldSchema(alias, type)); - tuple.append(value); - } - - @Test - public void requireThatTupleToStringHandlesStringCharacters() throws IOException { - Schema schema = new Schema(); - Tuple tuple = TupleFactory.getInstance().newTuple(); - - addToTuple("id", DataType.CHARARRAY, "_!@#$%^&*()", schema, tuple); - addToTuple("rank", DataType.INTEGER, 1, schema, tuple); - - String template = "Id is <id> and rank is <rank>"; - String result = TupleTools.toString(schema, tuple, template); - - assertEquals("Id is _!@#$%^&*() and rank is 1", result); - } - -} diff --git a/vespa-hadoop/src/test/pig/feed_create_operations.pig b/vespa-hadoop/src/test/pig/feed_create_operations.pig deleted file mode 100644 index 4583c095133..00000000000 --- a/vespa-hadoop/src/test/pig/feed_create_operations.pig +++ /dev/null @@ -1,24 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Create valid Vespa put operations -DEFINE VespaPutOperation - com.yahoo.vespa.hadoop.pig.VespaDocumentOperation( - 'operation=put', - 'docid=id:<application>:metrics::<name>-<date>' - ); - --- By default, VespaStorage assumes it's feeding valid Vespa operations -DEFINE VespaStorage - com.yahoo.vespa.hadoop.pig.VespaStorage(); - --- Load tabular data -metrics = LOAD 'src/test/resources/tabular_data.csv' AS (date:chararray, name:chararray, value:int, application:chararray); - --- Transform tabular data to a Vespa document operation JSON format -metrics = FOREACH metrics GENERATE VespaPutOperation(*); - --- Store into Vespa -STORE metrics INTO '$ENDPOINT' USING VespaStorage(); - - diff --git a/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig b/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig deleted file mode 100644 index 0f0e63d843a..00000000000 --- a/vespa-hadoop/src/test/pig/feed_create_operations_short_form.pig +++ /dev/null @@ -1,19 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Transform tabular data to a Vespa document operation JSON format --- as part of storing the data. -DEFINE VespaStorage - com.yahoo.vespa.hadoop.pig.VespaStorage( - 'create-document-operation=true', - 'operation=put', - 'docid=id:<application>:metrics::<name>-<date>' - ); - --- Load tabular data -metrics = LOAD 'src/test/resources/tabular_data.csv' AS (date:chararray, name:chararray, value:int, application:chararray); - --- Store into Vespa -STORE metrics INTO '$ENDPOINT' USING VespaStorage(); - - diff --git a/vespa-hadoop/src/test/pig/feed_multiline_operations.pig b/vespa-hadoop/src/test/pig/feed_multiline_operations.pig deleted file mode 100644 index 1971270cbdc..00000000000 --- a/vespa-hadoop/src/test/pig/feed_multiline_operations.pig +++ /dev/null @@ -1,15 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Define short name for VespaJsonLoader -DEFINE VespaJsonLoader com.yahoo.vespa.hadoop.pig.VespaSimpleJsonLoader(); - --- Define short name for VespaStorage -DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); - --- Load data - one column for json data -metrics = LOAD 'src/test/resources/operations_multiline_data.json' USING VespaJsonLoader() AS (data:chararray); - --- Store into Vespa -STORE metrics INTO '$ENDPOINT' USING VespaStorage(); - diff --git a/vespa-hadoop/src/test/pig/feed_operations.pig b/vespa-hadoop/src/test/pig/feed_operations.pig deleted file mode 100644 index 48873fde87a..00000000000 --- a/vespa-hadoop/src/test/pig/feed_operations.pig +++ /dev/null @@ -1,11 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Define short name for VespaStorage -DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); - --- Load data - one column for json data -metrics = LOAD 'src/test/resources/operations_data.json' AS (data:chararray); - --- Store into Vespa -STORE metrics INTO '$ENDPOINT' USING VespaStorage(); diff --git a/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig b/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig deleted file mode 100644 index da58fe3c678..00000000000 --- a/vespa-hadoop/src/test/pig/feed_operations_with_json_loader.pig +++ /dev/null @@ -1,14 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Define short name for VespaJsonLoader -DEFINE VespaJsonLoader com.yahoo.vespa.hadoop.pig.VespaSimpleJsonLoader(); - --- Define short name for VespaStorage -DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); - --- Load data - one column for json data -metrics = LOAD 'src/test/resources/operations_data.json' USING VespaJsonLoader() AS (data:chararray); - --- Store into Vespa -STORE metrics INTO '$ENDPOINT' USING VespaStorage(); diff --git a/vespa-hadoop/src/test/pig/feed_operations_xml.pig b/vespa-hadoop/src/test/pig/feed_operations_xml.pig deleted file mode 100644 index 4e5057f4909..00000000000 --- a/vespa-hadoop/src/test/pig/feed_operations_xml.pig +++ /dev/null @@ -1,11 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Define short name for VespaStorage -DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); - --- Load data - one column for xml data -data = LOAD 'src/test/resources/operations_data.xml' AS (data:chararray); - --- Store into Vespa -STORE data INTO '$ENDPOINT' USING VespaStorage(); diff --git a/vespa-hadoop/src/test/pig/feed_visit_data.pig b/vespa-hadoop/src/test/pig/feed_visit_data.pig deleted file mode 100644 index 59d144b53dc..00000000000 --- a/vespa-hadoop/src/test/pig/feed_visit_data.pig +++ /dev/null @@ -1,12 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Define short name for VespaStorage -DEFINE VespaStorage com.yahoo.vespa.hadoop.pig.VespaStorage(); - --- Load data - one column for json data -metrics = LOAD 'src/test/resources/visit_data.json' AS (data:chararray); - --- Store into Vespa -STORE metrics INTO '$ENDPOINT' USING VespaStorage(); - diff --git a/vespa-hadoop/src/test/pig/query.pig b/vespa-hadoop/src/test/pig/query.pig deleted file mode 100644 index 96caa5cd0c4..00000000000 --- a/vespa-hadoop/src/test/pig/query.pig +++ /dev/null @@ -1,19 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Define Vespa query for retrieving blog posts -DEFINE BlogPostRecommendations - com.yahoo.vespa.hadoop.pig.VespaQuery( - 'query=$ENDPOINT/search?query=<userid>&hits=100', - 'schema=rank:int,id:chararray,relevance:double,fields/id:chararray,fields/content:chararray' - ); - --- Load data from a local file -users = LOAD 'src/test/resources/user_ids.csv' AS (userid:chararray); -users = FILTER users BY userid IS NOT null; - --- Run a set of queries against Vespa -recommendations = FOREACH users GENERATE userid, FLATTEN(BlogPostRecommendations(*)); - --- Output recommendations -DUMP recommendations; diff --git a/vespa-hadoop/src/test/pig/query_alt_root.pig b/vespa-hadoop/src/test/pig/query_alt_root.pig deleted file mode 100644 index 2884b4a600f..00000000000 --- a/vespa-hadoop/src/test/pig/query_alt_root.pig +++ /dev/null @@ -1,20 +0,0 @@ --- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --- REGISTER vespa-hadoop.jar -- Not needed in tests - --- Define Vespa query for retrieving blog posts -DEFINE BlogPostRecommendations - com.yahoo.vespa.hadoop.pig.VespaQuery( - 'query=$ENDPOINT/search?query=<userid>&hits=100', - 'rootnode=root/children/children', - 'schema=rank:int,id:chararray,relevance:double,fields/id:chararray,fields/content:chararray' - ); - --- Load data from a local file -users = LOAD 'src/test/resources/user_ids.csv' AS (userid:chararray); -users = FILTER users BY userid IS NOT null; - --- Run a set of queries against Vespa -recommendations = FOREACH users GENERATE userid, FLATTEN(BlogPostRecommendations(*)); - --- Output recommendations -DUMP recommendations; diff --git a/vespa-hadoop/src/test/resources/operations_data.json b/vespa-hadoop/src/test/resources/operations_data.json deleted file mode 100644 index 5af436dbfe7..00000000000 --- a/vespa-hadoop/src/test/resources/operations_data.json +++ /dev/null @@ -1,10 +0,0 @@ -{"put":"id:testapp:metric::clicks-2015110414","fields":{"date":"2015110414","name":"clicks","value":1,"application":"testapp"}} -{"fields":{"date":"2015110416","name":"clicks","value":5,"application":"testapp"},"put":"id:testapp:metric::clicks-2015110416"} -{"put":"id:testapp:metric::clicks-2015110415","fields":{"date":"2015110415","name":"clicks","value":2,"application":"testapp"}} -{"put":"id:testapp:metric::clicks-2015110417","fields":{"date":"2015110417","name":"clicks","value":3,"application":"testapp"}} -{"put":"id:testapp:metric::clicks-2015110418","fields":{"date":"2015110418","name":"clicks","value":6,"application":"testapp"}} -{"put":"id:testapp:metric::clicks-2015110419","fields":{"date":"2015110419","name":"clicks","value":3,"application":"testapp"}} -{"put":"id:testapp:metric::clicks-2015110420","fields":{"date":"2015110420","name":"clicks","value":4,"application":"testapp"}} -{"put":"id:testapp:metric::clicks-2015110421","fields":{"date":"2015110421","name":"clicks","value":2,"application":"testapp"}} -{"fields":{"date":"2015110422","name":"clicks","value":5,"application":"testapp"},"condition":"metrics==0","put":"id:testapp:metric::clicks-2015110422"} -{"put":"id:testapp:metric::clicks-2015110423","fields":{"date":"2015110423","name":"clicks","value":1,"application":"testapp"}} diff --git a/vespa-hadoop/src/test/resources/operations_data.xml b/vespa-hadoop/src/test/resources/operations_data.xml deleted file mode 100644 index db02b6bee73..00000000000 --- a/vespa-hadoop/src/test/resources/operations_data.xml +++ /dev/null @@ -1,14 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<!-- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> -<vespafeed> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/a-ha/Scoundrel+Days"> <url>http://music.yahoo.com/a-ha/Scoundrel+Days</url> <title><![CDATA[Scoundrel Days]]></title> <artist><![CDATA[a-ha]]></artist> <year>0</year> <popularity>290</popularity> </document> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/Accept/Restless+And+Wild"> <url>http://music.yahoo.com/Accept/Restless+And+Wild</url> <title><![CDATA[Restless And Wild]]></title> <artist><![CDATA[Accept]]></artist> <year>0</year> <popularity>75</popularity> </document> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/Accept/Staying+A+Life"> <url>http://music.yahoo.com/Accept/Staying+A+Life</url> <title><![CDATA[Staying A Life]]></title> <artist><![CDATA[Accept]]></artist> <year>1985</year> <popularity>77</popularity> </document> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/Alice+In+Chains/Dirt"> <url>http://music.yahoo.com/Alice+In+Chains/Dirt</url> <title><![CDATA[Dirt]]></title> <artist><![CDATA[Alice In Chains]]></artist> <year>1992</year> <popularity>114</popularity> </document> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/Alice+In+Chains/Live"> <url>http://music.yahoo.com/Alice+In+Chains/Live</url> <title><![CDATA[Live]]></title> <artist><![CDATA[Alice In Chains]]></artist> <year>1990</year> <popularity>363</popularity> </document> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/Amy+MacDonald/This+Is+The+Life"> <url>http://music.yahoo.com/Amy+MacDonald/This+Is+The+Life</url> <title><![CDATA[This Is The Life]]></title> <artist><![CDATA[Amy MacDonald]]></artist> <year>2007</year> <popularity>355</popularity> </document> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/Ane+Brun/Duets"> <url>http://music.yahoo.com/Ane+Brun/Duets</url> <title><![CDATA[Duets]]></title> <artist><![CDATA[Ane Brun]]></artist> <year>0</year> <popularity>255</popularity> </document> - <update documenttype="music" documentid="id:music:music::http://music.yahoo.com/bobdylan/BestOf"><assign field="title">The Best of Bob Dylan</assign><add field="tracks"><item>Man Of Constant Sorrow</item></add></update> - <remove documentid="id:music:music::http://music.yahoo.com/Aqpop/Beautifully+Smart" /> - <document documenttype="music" documentid="id:music:music::http://music.yahoo.com/Annuals/Be+He+Me"> <url>http://music.yahoo.com/Annuals/Be+He+Me</url> <title><![CDATA[Be He Me]]></title> <artist><![CDATA[Annuals]]></artist> <year>0</year> <popularity>207</popularity> </document> -</vespafeed> diff --git a/vespa-hadoop/src/test/resources/operations_multiline_data.json b/vespa-hadoop/src/test/resources/operations_multiline_data.json deleted file mode 100644 index 2b51698d9b7..00000000000 --- a/vespa-hadoop/src/test/resources/operations_multiline_data.json +++ /dev/null @@ -1,93 +0,0 @@ -[ - { - "put": "id:testapp:metric::clicks-2015110414", - "fields": { - "date": "2015110414", - "name": "clicks", - "value": 1, - "application": "testapp" - } - }, - { - "fields": { - "date": "2015110416", - "name": "clicks", - "value": 5, - "application": "testapp" - }, - "put": "id:testapp:metric::clicks-2015110416" - }, - { - "put": "id:testapp:metric::clicks-2015110415", - "fields": { - "date": "2015110415", - "name": "clicks", - "value": 2, - "application": "testapp" - } - }, - { - "put": "id:testapp:metric::clicks-2015110417", - "fields": { - "date": "2015110417", - "name": "clicks", - "value": 3, - "application": "testapp" - } - }, - { - "put": "id:testapp:metric::clicks-2015110418", - "fields": { - "date": "2015110418", - "name": "clicks", - "value": 6, - "application": "testapp" - } - }, - { - "put": "id:testapp:metric::clicks-2015110419", - "fields": { - "date": "2015110419", - "name": "clicks", - "value": 3, - "application": "testapp" - } - }, - { - "put": "id:testapp:metric::clicks-2015110420", - "fields": { - "date": "2015110420", - "name": "clicks", - "value": 4, - "application": "testapp" - } - }, - { - "put": "id:testapp:metric::clicks-2015110421", - "fields": { - "date": "2015110421", - "name": "clicks", - "value": 2, - "application": "testapp" - } - }, - { - "fields": { - "date": "2015110422", - "name": "clicks", - "value": 5, - "application": "testapp" - }, - "condition": "metrics==0", - "put": "id:testapp:metric::clicks-2015110422" - }, - { - "put": "id:testapp:metric::clicks-2015110423", - "fields": { - "date": "2015110423", - "name": "clicks", - "value": 1, - "application": "testapp" - } - } -] diff --git a/vespa-hadoop/src/test/resources/tabular_data.csv b/vespa-hadoop/src/test/resources/tabular_data.csv deleted file mode 100644 index 541597998e9..00000000000 --- a/vespa-hadoop/src/test/resources/tabular_data.csv +++ /dev/null @@ -1,11 +0,0 @@ -2015110414 clicks 1 testapp -2015110415 clicks 2 testapp -2015110416 clicks 5 testapp -2015110417 clicks 3 testapp -2015110418 clicks 6 testapp -2015110419 clicks 3 testapp -2015110420 clicks 4 testapp -2015110421 clicks 2 testapp -2015110422 clicks 5 testapp -2015110423 clicks 1 testapp - diff --git a/vespa-hadoop/src/test/resources/user_ids.csv b/vespa-hadoop/src/test/resources/user_ids.csv deleted file mode 100644 index 5875a3b9a7c..00000000000 --- a/vespa-hadoop/src/test/resources/user_ids.csv +++ /dev/null @@ -1,4 +0,0 @@ -5 -104 -313 - diff --git a/vespa-hadoop/src/test/resources/visit_data.json b/vespa-hadoop/src/test/resources/visit_data.json deleted file mode 100644 index 947b9326cc8..00000000000 --- a/vespa-hadoop/src/test/resources/visit_data.json +++ /dev/null @@ -1,10 +0,0 @@ -{"id":"id:testapp:metric::clicks-2015110414","fields":{"date":"2015110414","name":"clicks","value":1,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110415","fields":{"date":"2015110415","name":"clicks","value":2,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110416","fields":{"date":"2015110416","name":"clicks","value":4,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110417","fields":{"date":"2015110417","name":"clicks","value":3,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110418","fields":{"date":"2015110418","name":"clicks","value":6,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110419","fields":{"date":"2015110419","name":"clicks","value":3,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110420","fields":{"date":"2015110420","name":"clicks","value":4,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110421","fields":{"date":"2015110421","name":"clicks","value":2,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110422","fields":{"date":"2015110422","name":"clicks","value":7,"application":"testapp"}} -{"id":"id:testapp:metric::clicks-2015110423","fields":{"date":"2015110423","name":"clicks","value":1,"application":"testapp"}}
\ No newline at end of file |