How do I download a file with Node.js without using third-party libraries?
I don't need anything special. I only want to download a file from a given URL, and then save it to a given directory.
You can create an HTTP GET
request and pipe its response
into a writable file stream:
const http = require('http'); // or 'https' for https:// URLs
const fs = require('fs');
const file = fs.createWriteStream("file.jpg");
const request = http.get("http://i3.ytimg.com/vi/J---aiyznGQ/mqdefault.jpg", function(response) {
response.pipe(file);
// after download completed close filestream
file.on("finish", () => {
file.close();
console.log("Download Completed");
});
});
If you want to support gathering information on the command line--like specifying a target file or directory, or URL--check out something like Commander.
More detailed explanation in https://sebhastian.com/nodejs-download-file/
Don't forget to handle errors! The following code is based on Augusto Roman's answer.
var http = require('http');
var fs = require('fs');
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb); // close() is async, call cb after close completes.
});
}).on('error', function(err) { // Handle errors
fs.unlink(dest); // Delete the file async. (But we don't check the result)
if (cb) cb(err.message);
});
};
download()
itself pipe
able?
download()
, how would I do it? What would I place as the cb
argument? I have the download('someURI', '/some/destination', cb)
but don't understand what to put in the cb
As Michelle Tilley said, but with the appropriate control flow:
var http = require('http');
var fs = require('fs');
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb);
});
});
}
Without waiting for the finish
event, naive scripts may end up with an incomplete file.
Edit: Thanks to @Augusto Roman for pointing out that cb
should be passed to file.close
, not called explicitly.
download()
, how would I do it? What would I place as the cb
argument? I have the download('someURI', '/some/destination', cb)
but don't understand what to put in the cb
response.statusCode == 200
Speaking of handling errors, it's even better listening to request errors too. I'd even validate by checking response code. Here it's considered success only for 200 response code, but other codes might be good.
const fs = require('fs');
const http = require('http');
const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);
const request = http.get(url, (response) => {
// check if response is success
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}
response.pipe(file);
});
// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));
// check for request error too
request.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};
Despite the relative simplicity of this code, I would advise to use the request module as it handles many more protocols (hello HTTPS!) which aren't natively supported by http
.
That would be done like so:
const fs = require('fs');
const request = require('request');
const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);
const sendReq = request.get(url);
// verify response code
sendReq.on('response', (response) => {
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}
sendReq.pipe(file);
});
// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));
// check for request errors
sendReq.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};
EDIT:
To make it work with https
, change
const http = require('http');
to
const http = require('https');
response.statusCode !== 200
the cb on finish
will never be called.
gfxmonk's answer has a very tight data race between the callback and the file.close()
completing. file.close()
actually takes a callback that is called when the close has completed. Otherwise, immediate uses of the file may fail (very rarely!).
A complete solution is:
var http = require('http');
var fs = require('fs');
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb); // close() is async, call cb after close completes.
});
});
}
Without waiting for the finish event, naive scripts may end up with an incomplete file. Without scheduling the cb
callback via close, you may get a race between accessing the file and the file actually being ready.
var request =
is removed?
Maybe node.js has changed, but it seems there are some problems with the other solutions (using node v8.1.2):
You don't need to call file.close() in the finish event. Per default the fs.createWriteStream is set to autoClose: https://nodejs.org/api/fs.html#fs_fs_createwritestream_path_options file.close() should be called on error. Maybe this is not needed when the file is deleted (unlink()), but normally it is: https://nodejs.org/api/stream.html#stream_readable_pipe_destination_options Temp file is not deleted on statusCode !== 200 fs.unlink() without a callback is deprecated (outputs warning) If dest file exists; it is overridden
Below is a modified solution (using ES6 and promises) which handles these problems.
const http = require("http");
const fs = require("fs");
function download(url, dest) {
return new Promise((resolve, reject) => {
const file = fs.createWriteStream(dest, { flags: "wx" });
const request = http.get(url, response => {
if (response.statusCode === 200) {
response.pipe(file);
} else {
file.close();
fs.unlink(dest, () => {}); // Delete temp file
reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
}
});
request.on("error", err => {
file.close();
fs.unlink(dest, () => {}); // Delete temp file
reject(err.message);
});
file.on("finish", () => {
resolve();
});
file.on("error", err => {
file.close();
if (err.code === "EEXIST") {
reject("File already exists");
} else {
fs.unlink(dest, () => {}); // Delete temp file
reject(err.message);
}
});
});
}
const https = require("https");
for const http = require("http");
fs.createWriteStream()
: "If autoClose is set to true (default behavior) on 'error' or 'finish' the file descriptor will be closed automatically. " So no need to manually close the file on error.
for those who came in search of es6-style promise based way, I guess it would be something like:
var http = require('http');
var fs = require('fs');
function pDownload(url, dest){
var file = fs.createWriteStream(dest);
return new Promise((resolve, reject) => {
var responseSent = false; // flag to make sure that response is sent only once.
http.get(url, response => {
response.pipe(file);
file.on('finish', () =>{
file.close(() => {
if(responseSent) return;
responseSent = true;
resolve();
});
});
}).on('error', err => {
if(responseSent) return;
responseSent = true;
reject(err);
});
});
}
//example
pDownload(url, fileLocation)
.then( ()=> console.log('downloaded file no issues...'))
.catch( e => console.error('error while downloading', e));
responseSet
flag caused, for some reason which I hadn't had the time to investigate, my file to be downloaded incompletely. No errors popped up but the .txt file I was populating had half of the rows that needed to be there. Removing the logic for the flag fixed it. Just wanted to point that out if someone had the issues with the approach. Still, +1
Based on the other answers above and some subtle issues, here is my attempt.
Check the file does not exist before hitting the network by using fs.access. Only create the fs.createWriteStream if you get a 200 OK status code. This reduces the amount of fs.unlink commands required to tidy up temporary file handles. Even on a 200 OK we can still possibly reject due to an EEXIST file already exists (imagine another process created the file whilst we were doing network calls). Recursively call download if you get a 301 Moved Permanently or 302 Found (Moved Temporarily) redirect following the link location provided in the header. The issue with some of the other answers recursively calling download was that they called resolve(download) instead of download(...).then(() => resolve()) so the Promise would return before the download actually finished. This way the nested chain of promises resolve in the correct order. It might seem cool to clean up the temp file asynchronously, but I chose to reject only after that completed too so I know that everything start to finish is done when this promise resolves or rejects.
const https = require('https');
const fs = require('fs');
/**
* Download a resource from `url` to `dest`.
* @param {string} url - Valid URL to attempt download of resource
* @param {string} dest - Valid path to save the file.
* @returns {Promise<void>} - Returns asynchronously when successfully completed download
*/
function download(url, dest) {
return new Promise((resolve, reject) => {
// Check file does not exist yet before hitting network
fs.access(dest, fs.constants.F_OK, (err) => {
if (err === null) reject('File already exists');
const request = https.get(url, response => {
if (response.statusCode === 200) {
const file = fs.createWriteStream(dest, { flags: 'wx' });
file.on('finish', () => resolve());
file.on('error', err => {
file.close();
if (err.code === 'EEXIST') reject('File already exists');
else fs.unlink(dest, () => reject(err.message)); // Delete temp file
});
response.pipe(file);
} else if (response.statusCode === 302 || response.statusCode === 301) {
//Recursively follow redirects, only a 200 will resolve.
download(response.headers.location, dest).then(() => resolve());
} else {
reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
}
});
request.on('error', err => {
reject(err.message);
});
});
});
}
Solution with timeout, prevent memory leak :
The following code is based on Brandon Tilley's answer :
var http = require('http'),
fs = require('fs');
var request = http.get("http://example12345.com/yourfile.html", function(response) {
if (response.statusCode === 200) {
var file = fs.createWriteStream("copy.html");
response.pipe(file);
}
// Add timeout.
request.setTimeout(12000, function () {
request.abort();
});
});
Don't make file when you get an error, and prefere to use timeout to close your request after X secondes.
http.get("http://example.com/yourfile.html",function(){})
http.get
. The memory leak is only if the file take too long to be downloaded.
Hi,I think you can use child_process module and curl command.
const cp = require('child_process');
let download = async function(uri, filename){
let command = `curl -o ${filename} '${uri}'`;
let result = cp.execSync(command);
};
async function test() {
await download('http://zhangwenning.top/20181221001417.png', './20181221001417.png')
}
test()
In addition,when you want download large、multiple files,you can use cluster module to use more cpu cores.
Modern version (ES6, Promise, Node 12.x+ ) works for https/http. ALso it supports redirects 302 & 301. I decided do not use 3rd party libraries due to it can be easy done with standard Node.js libs.
// download.js
import fs from 'fs'
import https from 'https'
import http from 'http'
import { basename } from 'path'
import { URL } from 'url'
const TIMEOUT = 10000
function download (url, dest) {
const uri = new URL(url)
if (!dest) {
dest = basename(uri.pathname)
}
const pkg = url.toLowerCase().startsWith('https:') ? https : http
return new Promise((resolve, reject) => {
const request = pkg.get(uri.href).on('response', (res) => {
if (res.statusCode === 200) {
const file = fs.createWriteStream(dest, { flags: 'wx' })
res
.on('end', () => {
file.end()
// console.log(`${uri.pathname} downloaded to: ${path}`)
resolve()
})
.on('error', (err) => {
file.destroy()
fs.unlink(dest, () => reject(err))
}).pipe(file)
} else if (res.statusCode === 302 || res.statusCode === 301) {
// Recursively follow redirects, only a 200 will resolve.
download(res.headers.location, dest).then(() => resolve())
} else {
reject(new Error(`Download request failed, response status: ${res.statusCode} ${res.statusMessage}`))
}
})
request.setTimeout(TIMEOUT, function () {
request.abort()
reject(new Error(`Request timeout after ${TIMEOUT / 1000.0}s`))
})
})
}
export default download
Kudo to Andrey Tkachenko for his gist which I modified
Include it in another file and use
const download = require('./download.js')
const url = 'https://raw.githubusercontent.com/replace-this-with-your-remote-file'
console.log('Downloading ' + url)
async function run() {
console.log('Downloading file')
try {
await download(url, 'server')
console.log('Download done')
} catch (e) {
console.log('Download failed')
console.log(e.message)
}
}
run()
Vince Yuan's code is great but it seems to be something wrong.
function download(url, dest, callback) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function (response) {
response.pipe(file);
file.on('finish', function () {
file.close(callback); // close() is async, call callback after close completes.
});
file.on('error', function (err) {
fs.unlink(dest); // Delete the file async. (But we don't check the result)
if (callback)
callback(err.message);
});
});
}
const download = (url, path) => new Promise((resolve, reject) => {
http.get(url, response => {
const statusCode = response.statusCode;
if (statusCode !== 200) {
return reject('Download error!');
}
const writeStream = fs.createWriteStream(path);
response.pipe(writeStream);
writeStream.on('error', () => reject('Error writing to file!'));
writeStream.on('finish', () => writeStream.close(resolve));
});}).catch(err => console.error(err));
I prefer request() because you can use both http and https with it.
request('http://i3.ytimg.com/vi/J---aiyznGQ/mqdefault.jpg')
.pipe(fs.createWriteStream('cat.jpg'))
"As of Feb 11th 2020, request is fully deprecated. No new changes are expected to land. In fact, none have landed for some time."
✅So if you use pipeline, it would close all other streams and make sure that there are no memory leaks. Working example: const http = require('http'); const { pipeline } = require('stream'); const fs = require('fs'); const file = fs.createWriteStream('./file.jpg'); http.get('http://via.placeholder.com/150/92c952', response => { pipeline( response, file, err => { if (err) console.error('Pipeline failed.', err); else console.log('Pipeline succeeded.'); } ); });
From my answer to "What's the difference between .pipe and .pipeline on streams".
You can use https://github.com/douzi8/ajax-request#download
request.download('http://res.m.ctrip.com/html5/Content/images/57.png',
function(err, res, body) {}
);
ajax-request
is not a third party library?
Download using promise, which resolve a readable stream. put extra logic to handle the redirect.
var http = require('http');
var promise = require('bluebird');
var url = require('url');
var fs = require('fs');
var assert = require('assert');
function download(option) {
assert(option);
if (typeof option == 'string') {
option = url.parse(option);
}
return new promise(function(resolve, reject) {
var req = http.request(option, function(res) {
if (res.statusCode == 200) {
resolve(res);
} else {
if (res.statusCode === 301 && res.headers.location) {
resolve(download(res.headers.location));
} else {
reject(res.statusCode);
}
}
})
.on('error', function(e) {
reject(e);
})
.end();
});
}
download('http://localhost:8080/redirect')
.then(function(stream) {
try {
var writeStream = fs.createWriteStream('holyhigh.jpg');
stream.pipe(writeStream);
} catch(e) {
console.error(e);
}
});
Using the http2 Module
I saw answers using the http, https, and request modules. I'd like to add one using yet another native NodeJS module that supports either the http or https protocol:
Solution
I've referenced the official NodeJS API, as well as some of the other answers on this question for something I'm doing. The following was the test I wrote to try it out, which worked as intended:
import * as fs from 'fs';
import * as _path from 'path';
import * as http2 from 'http2';
/* ... */
async function download( host, query, destination )
{
return new Promise
(
( resolve, reject ) =>
{
// Connect to client:
const client = http2.connect( host );
client.on( 'error', error => reject( error ) );
// Prepare a write stream:
const fullPath = _path.join( fs.realPathSync( '.' ), destination );
const file = fs.createWriteStream( fullPath, { flags: "wx" } );
file.on( 'error', error => reject( error ) );
// Create a request:
const request = client.request( { [':path']: query } );
// On initial response handle non-success (!== 200) status error:
request.on
(
'response',
( headers/*, flags*/ ) =>
{
if( headers[':status'] !== 200 )
{
file.close();
fs.unlink( fullPath, () => {} );
reject( new Error( `Server responded with ${headers[':status']}` ) );
}
}
);
// Set encoding for the payload:
request.setEncoding( 'utf8' );
// Write the payload to file:
request.on( 'data', chunk => file.write( chunk ) );
// Handle ending the request
request.on
(
'end',
() =>
{
file.close();
client.close();
resolve( { result: true } );
}
);
/*
You can use request.setTimeout( 12000, () => {} ) for aborting
after period of inactivity
*/
// Fire off [flush] the request:
request.end();
}
);
}
Then, for example:
/* ... */
let downloaded = await download( 'https://gitlab.com', '/api/v4/...', 'tmp/tmpFile' );
if( downloaded.result )
{
// Success!
}
// ...
External References
https://nodejs.org/api/http2.html#http2_client_side_example
https://nodejs.org/api/http2.html#http2_clienthttp2session_request_headers_options
EDIT Information
The solution was written for typescript, the function a class method - but with out noting this the solution would not have worked for the presumed javascript user with out proper use of the function declaration, which our contributor has so promptly added. Thanks!
download.js (i.e. /project/utils/download.js)
const fs = require('fs');
const request = require('request');
const download = (uri, filename, callback) => {
request.head(uri, (err, res, body) => {
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
module.exports = { download };
app.js
...
// part of imports
const { download } = require('./utils/download');
...
// add this function wherever
download('https://imageurl.com', 'imagename.jpg', () => {
console.log('done')
});
If you are using express use res.download() method. otherwise fs module use.
app.get('/read-android', function(req, res) {
var file = "/home/sony/Documents/docs/Android.apk";
res.download(file)
});
(or)
function readApp(req,res) {
var file = req.fileName,
filePath = "/home/sony/Documents/docs/";
fs.exists(filePath, function(exists){
if (exists) {
res.writeHead(200, {
"Content-Type": "application/octet-stream",
"Content-Disposition" : "attachment; filename=" + file});
fs.createReadStream(filePath + file).pipe(res);
} else {
res.writeHead(400, {"Content-Type": "text/plain"});
res.end("ERROR File does NOT Exists.ipa");
}
});
}
Path : img type : jpg random uniqid
function resim(url) {
var http = require("http");
var fs = require("fs");
var sayi = Math.floor(Math.random()*10000000000);
var uzanti = ".jpg";
var file = fs.createWriteStream("img/"+sayi+uzanti);
var request = http.get(url, function(response) {
response.pipe(file);
});
return sayi+uzanti;
}
Without library it could be buggy just to point out. Here are a few:
Can't handle http redirection, like this url https://calibre-ebook.com/dist/portable which is binary.
http module can't https url, you will get Protocol "https:" not supported.
Here my suggestion:
Call system tool like wget or curl
use some tool like node-wget-promise which also very simple to use. var wget = require('node-wget-promise'); wget('http://nodejs.org/images/logo.svg');
Writing my own solution since the existing didn't fit my requirements.
What this covers:
HTTPS download (switch package to http for HTTP downloads)
Promise based function
Handle forwarded path (status 302)
Browser header - required on a few CDNs
Filename from URL (as well as hardcoded)
Error handling
It's typed, it's safer. Feel free to drop the types if you're working with plain JS (no Flow, no TS) or convert to a .d.ts
file
index.js
import httpsDownload from httpsDownload;
httpsDownload('https://example.com/file.zip', './');
httpsDownload.[js|ts]
import https from "https";
import fs from "fs";
import path from "path";
function download(
url: string,
folder?: string,
filename?: string
): Promise<void> {
return new Promise((resolve, reject) => {
const req = https
.request(url, { headers: { "User-Agent": "javascript" } }, (response) => {
if (response.statusCode === 302 && response.headers.location != null) {
download(
buildNextUrl(url, response.headers.location),
folder,
filename
)
.then(resolve)
.catch(reject);
return;
}
const file = fs.createWriteStream(
buildDestinationPath(url, folder, filename)
);
response.pipe(file);
file.on("finish", () => {
file.close();
resolve();
});
})
.on("error", reject);
req.end();
});
}
function buildNextUrl(current: string, next: string) {
const isNextUrlAbsolute = RegExp("^(?:[a-z]+:)?//").test(next);
if (isNextUrlAbsolute) {
return next;
} else {
const currentURL = new URL(current);
const fullHost = `${currentURL.protocol}//${currentURL.hostname}${
currentURL.port ? ":" + currentURL.port : ""
}`;
return `${fullHost}${next}`;
}
}
function buildDestinationPath(url: string, folder?: string, filename?: string) {
return path.join(folder ?? "./", filename ?? generateFilenameFromPath(url));
}
function generateFilenameFromPath(url: string): string {
const urlParts = url.split("/");
return urlParts[urlParts.length - 1] ?? "";
}
export default download;
function download(url, dest, cb) {
var request = http.get(url, function (response) {
const settings = {
flags: 'w',
encoding: 'utf8',
fd: null,
mode: 0o666,
autoClose: true
};
// response.pipe(fs.createWriteStream(dest, settings));
var file = fs.createWriteStream(dest, settings);
response.pipe(file);
file.on('finish', function () {
let okMsg = {
text: `File downloaded successfully`
}
cb(okMsg);
file.end();
});
}).on('error', function (err) { // Handle errors
fs.unlink(dest); // Delete the file async. (But we don't check the result)
let errorMsg = {
text: `Error in file downloadin: ${err.message}`
}
if (cb) cb(errorMsg);
});
};
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.cryptocompare.com/media/19684/doge.png', 'icons/taskks12.png', function(){
console.log('done');
});
Here's yet another way to handle it without 3rd party dependency and also searching for redirects:
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
https.get(url, function(response) {
if ([301,302].indexOf(response.statusCode) !== -1) {
body = [];
download(response.headers.location, dest, cb);
}
response.pipe(file);
file.on('finish', function() {
file.close(cb); // close() is async, call cb after close completes.
});
});
}
You can try using res.redirect
to the https file download url, and then it will be downloading the file.
Like: res.redirect('https//static.file.com/file.txt');
I've found this approach to be the most helpful especially when it comes to pdfs and random other files.
import fs from "fs";
fs.appendFile("output_file_name.ext", fileDataInBytes, (err) => {
if (err) throw err;
console.log("File saved!");
});
I suggest you to use res.download
same as follow:
app.get('/download', function(req, res){
const file = `${__dirname}/folder/abc.csv`;
res.download(file); // Set disposition and send it.
});
var requestModule=require("request");
requestModule(filePath).pipe(fs.createWriteStream('abc.zip'));
Success story sharing
node.js:201 throw e; // process.nextTick error, or 'error' event on first tick ^ Error: connect ECONNREFUSED at errnoException (net.js:646:11) at Object.afterConnect [as oncomplete] (net.js:637:18)
.https
you must usehttps
otherwise it will throw error.createWriteStream
will setautoClose
totrue
andreadable.pipe
will callend()
on the writeable when the readable ends.