config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.application.validation;

import com.yahoo.config.application.api.ApplicationFile;
import com.yahoo.config.application.api.ApplicationPackage;
import com.yahoo.config.model.NullConfigModelRegistry;
import com.yahoo.config.model.api.ApplicationClusterEndpoint;
import com.yahoo.config.model.api.ContainerEndpoint;
import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.api.OnnxModelOptions;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.model.deploy.TestProperties;
import com.yahoo.config.model.provision.InMemoryProvisioner;
import com.yahoo.config.model.test.MockApplicationPackage;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.text.Text;
import com.yahoo.vespa.model.VespaModel;
import org.junit.jupiter.api.Test;
import org.xml.sax.SAXException;

import java.io.IOException;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;

import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
 * @author bjorncs
 */
class JvmHeapSizeValidatorTest {

    @Test
    void fails_on_too_low_jvm_percentage() throws IOException, SAXException {
        var deployState = createDeployState(8, 7L * 1024 * 1024 * 1024);
        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
        var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
        String expectedMessage = "Allocated percentage of memory of JVM in cluster 'container' is too low (3% < 15%). Estimated cost of ONNX models is 7.00GB";
        assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
    }

    @Test
    void fails_on_too_low_heap_size() throws IOException, SAXException {
        var deployState = createDeployState(2.2, 1024L * 1024 * 1024);
        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
        var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
        String expectedMessage = "Allocated memory to JVM in cluster 'container' is too low (0.50GB < 0.60GB). Estimated cost of ONNX models is 1.00GB.";
        assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
    }

    @Test
    void accepts_adequate_heap_size() throws IOException, SAXException {
        var deployState = createDeployState(8, 1024L * 1024 * 1024);
        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
        assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
    }

    @Test
    void accepts_services_with_explicit_jvm_size() throws IOException, SAXException {
        String servicesXml =
                """
                <?xml version="1.0" encoding="utf-8" ?>
                <services version='1.0'>
                    <container version='1.0'>
                        <nodes count="2">
                            <jvm allocated-memory='5%'/>
                            <resources vcpu="4" memory="2Gb" disk="125Gb"/>
                        </nodes>
                        <component id="hf-embedder" type="hugging-face-embedder">
                            <transformer-model url="https://my/url/model.onnx"/>
                            <tokenizer-model path="app/tokenizer.json"/>
                        </component>
                    </container>
                </services>""";
        var deployState = createDeployState(servicesXml, 2, 1024L * 1024 * 1024);
        var model = new VespaModel(new NullConfigModelRegistry(), deployState);
        assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
    }

    private static DeployState createDeployState(String servicesXml, double nodeGb, long modelCostBytes) {
        return new DeployState.Builder()
                .applicationPackage(
                        new MockApplicationPackage.Builder()
                                .withServices(servicesXml)
                                .build())
                .modelHostProvisioner(new InMemoryProvisioner(5, new NodeResources(4, nodeGb, 125, 0.3), true))
                .endpoints(Set.of(new ContainerEndpoint("container", ApplicationClusterEndpoint.Scope.zone, List.of("c.example.com"))))
                .properties(new TestProperties().setHostedVespa(true).setDynamicHeapSize(true))
                .onnxModelCost(new ModelCostDummy(modelCostBytes))
                .build();
    }

    private static DeployState createDeployState(double nodeGb, long modelCostBytes) {
        String servicesXml =
                Text.format("""
                <?xml version="1.0" encoding="utf-8" ?>
                <services version='1.0'>
                    <container version='1.0'>
                        <nodes count="2">
                            <resources vcpu="4" memory="%fGb" disk="125Gb"/>
                        </nodes>
                        <component id="hf-embedder" type="hugging-face-embedder">
                            <transformer-model url="https://my/url/model.onnx"/>
                            <tokenizer-model path="app/tokenizer.json"/>
                        </component>
                    </container>
                </services>""", nodeGb);
        return createDeployState(servicesXml, nodeGb, modelCostBytes);
    }

    private static class ModelCostDummy implements OnnxModelCost, OnnxModelCost.Calculator {
        final AtomicLong totalCost = new AtomicLong();
        final long modelCost;

        ModelCostDummy(long modelCost) { this.modelCost = modelCost; }

        @Override public Calculator newCalculator(ApplicationPackage appPkg, ApplicationId applicationId) { return this; }
        @Override public Map<String, ModelInfo> models() { return Map.of(); }
        @Override public void setRestartOnDeploy() {}
        @Override public boolean restartOnDeploy() { return false;}
        @Override public long aggregatedModelCostInBytes() { return totalCost.get(); }
        @Override public void registerModel(ApplicationFile path) {}
        @Override public void registerModel(ApplicationFile path, OnnxModelOptions onnxModelOptions) {}

        @Override
        public void registerModel(URI uri) {
            assertEquals("https://my/url/model.onnx", uri.toString());
            totalCost.addAndGet(modelCost);
        }

        @Override
        public void registerModel(URI uri, OnnxModelOptions onnxModelOptions) {
            assertEquals("https://my/url/model.onnx", uri.toString());
            totalCost.addAndGet(modelCost);
        }

    }

}