Interact with S3 Without Temp Files

There are lots of documentation and examples around uploading and downloading files to/from S3. A lot of times you don't actually want to keep around the files you upload or download, and want to delete them as soon as that process is done. You can easily do this using temp files like this:

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GetObjectRequest;

import java.io.*;

class S3TempFileTest {

    private static final String S3_FILE_PREFIX = "s3test";
    private static final String S3_FILE_SUFFIX = ".tmp";
    private static final String S3_BUCKET_NAME = "bucket";
    private static final String S3_KEY_NAME = "key";

    private static final AmazonS3 AMAZON_S3 = new AmazonS3Client();

    public void testUploadWithTempFile() throws IOException {
        File tempFile = File.createTempFile(S3_FILE_PREFIX, S3_FILE_SUFFIX);
        // writeContent(tempFile)
        try {
            AMAZON_S3.putObject(S3_BUCKET_NAME, S3_KEY_NAME, tempFile);
        } finally {
            tempFile.delete();
        }
    }

    public void testDownloadWithTempFile() throws IOException {
        File tempFile = File.createTempFile(S3_FILE_PREFIX, S3_FILE_SUFFIX);
        try {
            AMAZON_S3.getObject(new GetObjectRequest(S3_BUCKET_NAME, S3_KEY_NAME), tempFile);
            // process(tempFile)
        } finally {
            tempFile.delete();
        }
    }
}

However, code like this is not easily testable as you're interacting with the file system, and is also not the most efficient if you're dealing with small files because writing to hard disk is slow (compared to memory IO).

Here is an example on how you could do it all in memory without using temp files. The example uses Jackson-serialized JSON content, but it applies to any kind of file.

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.util.IOUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;

import java.io.*;
import java.nio.charset.StandardCharsets;

class S3StreamJacksonTest {
    private static final String S3_BUCKET_NAME = "bucket";
    private static final String S3_KEY_NAME = "key";
    private static final String CONTENT_TYPE = "application/json";

    private static final AmazonS3 AMAZON_S3 = new AmazonS3Client();
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    private static final TestObject TEST_OBJECT = new TestObject("test", 123, 456L);

    public void testUploadWithStream() throws JsonProcessingException {
        String fileContentString = OBJECT_MAPPER.writeValueAsString(TEST_OBJECT);
        byte[] fileContentBytes = fileContentString.getBytes(StandardCharsets.UTF_8);
        InputStream fileInputStream = new ByteArrayInputStream(fileContentBytes);
        ObjectMetadata metadata = new ObjectMetadata();
        metadata.setContentType(CONTENT_TYPE);
        metadata.setContentLength(fileContentBytes.length);
        PutObjectRequest putObjectRequest = new PutObjectRequest(
                S3_BUCKET_NAME, S3_KEY_NAME, fileInputStream, metadata);
        AMAZON_S3.putObject(putObjectRequest);
    }

    public void testDownloadWithStream() throws IOException {
        S3Object s3Object = AMAZON_S3.getObject(new GetObjectRequest(S3_BUCKET_NAME, S3_KEY_NAME));
        String fileContent = IOUtils.toString(s3Object.getObjectContent());
        TestObject testObject = OBJECT_MAPPER.readValue(fileContent, TestObject.class);
        // process(testObject)
    }
}

The downside of this approach is that you have to set Object Metadata yourself, while you would get it for free with the temp file approach. I also would not try this with large files - I haven't done any benchmarks but at some point performance will start to suffer.

Posted Fri 06 November 2015 by Ivan Dyedov in Java (Java, AWS)