Stream uploading file to S3 on Node.js using formidable and (knox or aws-sdk)

Using AWS S3's multipartUpload (s3-upload-stream as working module) and node-formidable's readable stream, you can pipe the stream to upload like this:

var formidable = require('formidable');
var http = require('http');
var util = require('util');
var AWS      = require('aws-sdk');
var config = require('./config');
var s3 = new AWS.S3({
    accessKeyId: config.get('S3_ACCESS_KEY'),
    secretAccessKey: config.get('S3_SECRET_KEY'),
    apiVersion: '2006-03-01'
});
var s3Stream = require('s3-upload-stream')(s3);
var bucket = 'bucket-name';
var key = 'abcdefgh';


http.createServer(function(req, res) {

    if (req.url == '/upload' && req.method.toLowerCase() == 'post') {

        var form = new formidable.IncomingForm();
        form.on('progress', function(bytesReceived, bytesExpected) {
            //console.log('onprogress', parseInt( 100 * bytesReceived / bytesExpected ), '%');
        });

        form.on('error', function(err) {
            console.log('err',err);
        });

        // This 'end' is for the client to finish uploading
        // upload.on('uploaded') is when the uploading is
        // done on AWS S3
        form.on('end', function() {
            console.log('ended!!!!', arguments);
        });

        form.on('aborted', function() {
            console.log('aborted', arguments);
        });

        form.onPart = function(part) {
            console.log('part',part);
            // part looks like this
            //    {
            //        readable: true,
            //        headers:
            //        {
            //            'content-disposition': 'form-data; name="upload"; filename="00video38.mp4"',
            //            'content-type': 'video/mp4'
            //        },
            //        name: 'upload',
            //            filename: '00video38.mp4',
            //        mime: 'video/mp4',
            //        transferEncoding: 'binary',
            //        transferBuffer: ''
            //    }

            var start = new Date().getTime();
            var upload = s3Stream.upload({
                "Bucket": bucket,
                "Key": part.filename
            });

            // Optional configuration
            //upload.maxPartSize(20971520); // 20 MB
            upload.concurrentParts(5);

            // Handle errors.
            upload.on('error', function (error) {
                console.log('errr',error);
            });
            upload.on('part', function (details) {
                console.log('part',details);
            });
            upload.on('uploaded', function (details) {
                var end = new Date().getTime();
                console.log('it took',end-start);
                console.log('uploaded',details);
            });

            // Maybe you could add compress like
            // part.pipe(compress).pipe(upload)
            part.pipe(upload);
        };

        form.parse(req, function(err, fields, files) {
            res.writeHead(200, {'content-type': 'text/plain'});
            res.write('received upload:\n\n');
            res.end(util.inspect({fields: fields, files: files}));
        });
        return;
    }

    // show a file upload form
    res.writeHead(200, {'content-type': 'text/html'});
    res.end(
        '<form action="/upload" enctype="multipart/form-data" method="post">'+
        '<input type="text" name="title"><br>'+
        '<input type="file" name="upload" multiple="multiple"><br>'+
        '<input type="submit" value="Upload">'+
        '</form>'
    );
}).listen(8080);

Well, according to the creator of Formidable, direct streaming to Amazon S3 is impossible :

The S3 API requires you to provide the size of new files when creating them. This information is not available for multipart/form-data files until they have been fully received. This means streaming is impossible.

Indeed, form.bytesExpected refers to the size of the whole form, and not the size of the single file.

The data must therefore either hit the memory or the disk on the server first before being uploaded to S3.


Since this post is so old and I believe streaming directly is now supported, I spent a lot of time reading out of date answers on this topic...

If it helps anyone I was able to stream from the client to s3 directly without the need for installing packages:

https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a

The server assumes req is a stream object, in my case a File object was used in xhr(send) which will send binary data in modern browsers.

const fileUploadStream = (req, res) => {
  //get "body" args from header
  const { id, fn } = JSON.parse(req.get('body'));
  const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
  const params = {
    Key,
    Bucket: bucketName, //set somewhere
    Body: req, //req is a stream
  };
  s3.upload(params, (err, data) => {
    if (err) {
      res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
    } else {
      res.send(Key);
    }
  });
};

Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found relying on other packages.

+1 for pragmatism and thanks to @SalehenRahman for his help.