Compress directory to tar.gz with Commons Compress
I followed this solution and it worked until I was processing a larger set of files and it randomly crashes after processing 15000 - 16000 files. the following line is leaking file handlers:
IOUtils.copy(new FileInputStream(f), tOut);
and the code crashed with a "Too many open files" error at the OS level The following minor change fix the problem:
FileInputStream in = new FileInputStream(f);
IOUtils.copy(in, tOut);
in.close();
I haven't figured out what exactly was going wrong but a scouring of google caches I found a working example. Sorry for the tumbleweed!
public void CreateTarGZ()
throws FileNotFoundException, IOException
{
try {
System.out.println(new File(".").getAbsolutePath());
dirPath = "parent/childDirToCompress/";
tarGzPath = "archive.tar.gz";
fOut = new FileOutputStream(new File(tarGzPath));
bOut = new BufferedOutputStream(fOut);
gzOut = new GzipCompressorOutputStream(bOut);
tOut = new TarArchiveOutputStream(gzOut);
addFileToTarGz(tOut, dirPath, "");
} finally {
tOut.finish();
tOut.close();
gzOut.close();
bOut.close();
fOut.close();
}
}
private void addFileToTarGz(TarArchiveOutputStream tOut, String path, String base)
throws IOException
{
File f = new File(path);
System.out.println(f.exists());
String entryName = base + f.getName();
TarArchiveEntry tarEntry = new TarArchiveEntry(f, entryName);
tOut.putArchiveEntry(tarEntry);
if (f.isFile()) {
IOUtils.copy(new FileInputStream(f), tOut);
tOut.closeArchiveEntry();
} else {
tOut.closeArchiveEntry();
File[] children = f.listFiles();
if (children != null) {
for (File child : children) {
System.out.println(child.getName());
addFileToTarGz(tOut, child.getAbsolutePath(), entryName + "/");
}
}
}
}
I ended up doing the following:
public URL createTarGzip() throws IOException {
Path inputDirectoryPath = ...
File outputFile = new File("/path/to/filename.tar.gz");
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile);
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
GzipCompressorOutputStream gzipOutputStream = new GzipCompressorOutputStream(bufferedOutputStream);
TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream(gzipOutputStream)) {
tarArchiveOutputStream.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
tarArchiveOutputStream.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
List<File> files = new ArrayList<>(FileUtils.listFiles(
inputDirectoryPath,
new RegexFileFilter("^(.*?)"),
DirectoryFileFilter.DIRECTORY
));
for (int i = 0; i < files.size(); i++) {
File currentFile = files.get(i);
String relativeFilePath = new File(inputDirectoryPath.toUri()).toURI().relativize(
new File(currentFile.getAbsolutePath()).toURI()).getPath();
TarArchiveEntry tarEntry = new TarArchiveEntry(currentFile, relativeFilePath);
tarEntry.setSize(currentFile.length());
tarArchiveOutputStream.putArchiveEntry(tarEntry);
tarArchiveOutputStream.write(IOUtils.toByteArray(new FileInputStream(currentFile)));
tarArchiveOutputStream.closeArchiveEntry();
}
tarArchiveOutputStream.close();
return outputFile.toURI().toURL();
}
}
This takes care of the some of the edge cases that come up in the other solutions.