summaryrefslogtreecommitdiffstats
path: root/config-proxy/src/main/java/com/yahoo/vespa/config/proxy/UrlDownloadRpcServer.java
blob: 3c24cb58cffc218f1c7cde00706348cb59c74c92 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.config.proxy;

import com.yahoo.concurrent.DaemonThreadFactory;
import com.yahoo.jrt.Method;
import com.yahoo.jrt.Request;
import com.yahoo.jrt.StringValue;
import com.yahoo.jrt.Supervisor;
import com.yahoo.log.LogLevel;
import com.yahoo.text.Utf8;
import com.yahoo.vespa.defaults.Defaults;
import net.jpountz.xxhash.XXHashFactory;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.file.Files;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.logging.Logger;

import static com.yahoo.vespa.config.UrlDownloader.DOES_NOT_EXIST;
import static com.yahoo.vespa.config.UrlDownloader.HTTP_ERROR;
import static com.yahoo.vespa.config.UrlDownloader.INTERNAL_ERROR;

/**
 * An RPC server that handles URL download requests.
 *
 * @author lesters
 */
public class UrlDownloadRpcServer {
    private final static Logger log = Logger.getLogger(UrlDownloadRpcServer.class.getName());

    private static final String CONTENTS_FILE_NAME = "contents";
    private static final String LAST_MODFIED_FILE_NAME = "lastmodified";

    private final File downloadBaseDir;
    private final ExecutorService rpcDownloadExecutor = Executors.newFixedThreadPool(Math.max(8, Runtime.getRuntime().availableProcessors()),
                                                                                     new DaemonThreadFactory("Rpc download executor"));

    UrlDownloadRpcServer(Supervisor supervisor) {
        supervisor.addMethod(new Method("url.waitFor", "s", "s", this, "download")
                                    .methodDesc("get path to url download")
                                    .paramDesc(0, "url", "url")
                                    .returnDesc(0, "path", "path to file"));
        downloadBaseDir = new File(Defaults.getDefaults().underVespaHome("var/db/vespa/download"));
    }

    @SuppressWarnings({"UnusedDeclaration"})
    public final void download(Request req) {
        req.detach();
        rpcDownloadExecutor.execute(() -> downloadFile(req));
    }

    private void downloadFile(Request req) {
        String url = req.parameters().get(0).asString();
        File downloadDir = new File(this.downloadBaseDir, urlToDirName(url));

        try {
            URL website = new URL(url);
            HttpURLConnection connection = (HttpURLConnection) website.openConnection();
            setIfModifiedSince(connection, downloadDir);  // don't download if we already have the file

            if (connection.getResponseCode() == 200) {
                log.log(LogLevel.INFO, "Downloading URL '" + url + "'");
                downloadFile(req, connection, downloadDir);

            } else if (connection.getResponseCode() == 304) {
                log.log(LogLevel.INFO, "URL '" + url + "' already downloaded (server response: 304)");
                req.returnValues().add(new StringValue(new File(downloadDir, CONTENTS_FILE_NAME).getAbsolutePath()));

            } else {
                log.log(LogLevel.ERROR, "Download of URL '" + url + "' got server response: " + connection.getResponseCode());
                req.setError(HTTP_ERROR, String.valueOf(connection.getResponseCode()));
            }

        } catch (Throwable e) {
            log.log(LogLevel.ERROR, "Download of URL '" + url + "' got exception: " + e.getMessage());
            req.setError(INTERNAL_ERROR, "Download of URL '" + url + "' internal error: " + e.getMessage());
        }
        req.returnRequest();
    }

    private static void downloadFile(Request req, HttpURLConnection connection, File downloadDir) throws IOException {
        long start = System.currentTimeMillis();
        String url = connection.getURL().toString();
        Files.createDirectories(downloadDir.toPath());
        File contentsPath = new File(downloadDir, CONTENTS_FILE_NAME);
        try (ReadableByteChannel rbc = Channels.newChannel(connection.getInputStream())) {
            try (FileOutputStream fos = new FileOutputStream((contentsPath.getAbsolutePath()))) {
                fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);

                if (contentsPath.exists() && contentsPath.length() > 0) {
                    writeLastModifiedTimestamp(downloadDir, connection.getLastModified());
                    req.returnValues().add(new StringValue(contentsPath.getAbsolutePath()));
                    log.log(LogLevel.DEBUG, () -> "URL '" + url + "' available at " + contentsPath);
                } else {
                    log.log(LogLevel.ERROR, "Downloaded URL '" + url + "' not found, returning error");
                    req.setError(DOES_NOT_EXIST, "Downloaded '" + url + "' not found");
                }
            }
        }
        long end = System.currentTimeMillis();
        log.log(LogLevel.INFO, String.format("Download of URL '%s' done in %.3f seconds", url,  (end-start) / 1000.0));
    }

    private static String urlToDirName(String uri) {
        return String.valueOf(XXHashFactory.nativeInstance().hash64().hash(ByteBuffer.wrap(Utf8.toBytes(uri)), 0));
    }

    private static void setIfModifiedSince(HttpURLConnection connection, File downloadDir) throws IOException {
        File contents = new File(downloadDir, CONTENTS_FILE_NAME);
        if (contents.exists() && contents.length() > 0) {
            long lastModified = readLastModifiedTimestamp(downloadDir);
            if (lastModified > 0) {
                connection.setIfModifiedSince(lastModified);
            }
        }
    }

    private static long readLastModifiedTimestamp(File downloadDir) throws IOException {
        File lastModified = new File(downloadDir, LAST_MODFIED_FILE_NAME);
        if (lastModified.exists() && lastModified.length() > 0) {
            try (BufferedReader br = new BufferedReader(new FileReader(lastModified))) {
                String timestamp = br.readLine();
                return Long.parseLong(timestamp);
            }
        }
        return 0;
    }

    private static void writeLastModifiedTimestamp(File downloadDir, long timestamp) throws IOException {
        File lastModified = new File(downloadDir, LAST_MODFIED_FILE_NAME);
        try (BufferedWriter lastModifiedWriter = new BufferedWriter(new FileWriter(lastModified.getAbsolutePath()))) {
            lastModifiedWriter.write(Long.toString(timestamp));
        }
    }

}