Java - Read file and split into multiple files
import java.io.*;
import java.util.Scanner;
public class split {
public static void main(String args[])
{
try{
// Reading file and getting no. of files to be generated
String inputfile = "C:/test.txt"; // Source File Name.
double nol = 2000.0; // No. of lines to be split and saved in each output file.
File file = new File(inputfile);
Scanner scanner = new Scanner(file);
int count = 0;
while (scanner.hasNextLine())
{
scanner.nextLine();
count++;
}
System.out.println("Lines in the file: " + count); // Displays no. of lines in the input file.
double temp = (count/nol);
int temp1=(int)temp;
int nof=0;
if(temp1==temp)
{
nof=temp1;
}
else
{
nof=temp1+1;
}
System.out.println("No. of files to be generated :"+nof); // Displays no. of files to be generated.
//---------------------------------------------------------------------------------------------------------
// Actual splitting of file into smaller files
FileInputStream fstream = new FileInputStream(inputfile); DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine;
for (int j=1;j<=nof;j++)
{
FileWriter fstream1 = new FileWriter("C:/New Folder/File"+j+".txt"); // Destination File Location
BufferedWriter out = new BufferedWriter(fstream1);
for (int i=1;i<=nol;i++)
{
strLine = br.readLine();
if (strLine!= null)
{
out.write(strLine);
if(i!=nol)
{
out.newLine();
}
}
}
out.close();
}
in.close();
}catch (Exception e)
{
System.err.println("Error: " + e.getMessage());
}
}
}
a clean solution to edit.
this solution involves loading the entire file into memory.
set all line of a file in List<String> rowsOfFile;
edit maxSizeFile
to choice max size of a single file splitted
public void splitFile(File fileToSplit) throws IOException {
long maxSizeFile = 10000000 // 10mb
StringBuilder buffer = new StringBuilder((int) maxSizeFile);
int sizeOfRows = 0;
int recurrence = 0;
String fileName;
List<String> rowsOfFile;
rowsOfFile = Files.readAllLines(fileToSplit.toPath(), Charset.defaultCharset());
for (String row : rowsOfFile) {
buffer.append(row);
numOfRow++;
sizeOfRows += row.getBytes(StandardCharsets.UTF_8).length;
if (sizeOfRows >= maxSizeFile) {
fileName = generateFileName(recurrence);
File newFile = new File(fileName);
try (PrintWriter writer = new PrintWriter(newFile)) {
writer.println(buffer.toString());
}
recurrence++;
sizeOfRows = 0;
buffer = new StringBuilder();
}
}
// last rows
if (sizeOfRows > 0) {
fileName = generateFileName(recurrence);
File newFile = createFile(fileName);
try (PrintWriter writer = new PrintWriter(newFile)) {
writer.println(buffer.toString());
}
}
Files.delete(fileToSplit.toPath());
}
method to generate Name of file:
public String generateFileName(int numFile) {
String extension = ".txt";
return "myFile" + numFile + extension;
}
Though its a old question but for reference I am listing out the code which I used to split large files to any sizes and it works with any Java versions above 1.4 .
Sample Split and Join blocks were like below:
public void join(String FilePath) {
long leninfile = 0, leng = 0;
int count = 1, data = 0;
try {
File filename = new File(FilePath);
//RandomAccessFile outfile = new RandomAccessFile(filename,"rw");
OutputStream outfile = new BufferedOutputStream(new FileOutputStream(filename));
while (true) {
filename = new File(FilePath + count + ".sp");
if (filename.exists()) {
//RandomAccessFile infile = new RandomAccessFile(filename,"r");
InputStream infile = new BufferedInputStream(new FileInputStream(filename));
data = infile.read();
while (data != -1) {
outfile.write(data);
data = infile.read();
}
leng++;
infile.close();
count++;
} else {
break;
}
}
outfile.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void split(String FilePath, long splitlen) {
long leninfile = 0, leng = 0;
int count = 1, data;
try {
File filename = new File(FilePath);
//RandomAccessFile infile = new RandomAccessFile(filename, "r");
InputStream infile = new BufferedInputStream(new FileInputStream(filename));
data = infile.read();
while (data != -1) {
filename = new File(FilePath + count + ".sp");
//RandomAccessFile outfile = new RandomAccessFile(filename, "rw");
OutputStream outfile = new BufferedOutputStream(new FileOutputStream(filename));
while (data != -1 && leng < splitlen) {
outfile.write(data);
leng++;
data = infile.read();
}
leninfile += leng;
leng = 0;
outfile.close();
count++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
Complete java code available here in File Split in Java Program link.
Since one file can be very large, each split file could be large as well.
Example:
Source File Size: 5GB
Num Splits: 5: Destination
File Size: 1GB each (5 files)
There is no way to read this large split chunk in one go, even if we have such a memory. Basically for each split we can read a fix size byte-array
which we know should be feasible in terms of performance as well memory.
NumSplits: 10 MaxReadBytes: 8KB
public static void main(String[] args) throws Exception
{
RandomAccessFile raf = new RandomAccessFile("test.csv", "r");
long numSplits = 10; //from user input, extract it from args
long sourceSize = raf.length();
long bytesPerSplit = sourceSize/numSplits ;
long remainingBytes = sourceSize % numSplits;
int maxReadBufferSize = 8 * 1024; //8KB
for(int destIx=1; destIx <= numSplits; destIx++) {
BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+destIx));
if(bytesPerSplit > maxReadBufferSize) {
long numReads = bytesPerSplit/maxReadBufferSize;
long numRemainingRead = bytesPerSplit % maxReadBufferSize;
for(int i=0; i<numReads; i++) {
readWrite(raf, bw, maxReadBufferSize);
}
if(numRemainingRead > 0) {
readWrite(raf, bw, numRemainingRead);
}
}else {
readWrite(raf, bw, bytesPerSplit);
}
bw.close();
}
if(remainingBytes > 0) {
BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+(numSplits+1)));
readWrite(raf, bw, remainingBytes);
bw.close();
}
raf.close();
}
static void readWrite(RandomAccessFile raf, BufferedOutputStream bw, long numBytes) throws IOException {
byte[] buf = new byte[(int) numBytes];
int val = raf.read(buf);
if(val != -1) {
bw.write(buf);
}
}