how to sum file size from ls like output log with Bytes, KiB, MiB, GiB
You can parse the input before sending them to bc
:
echo "0 Bytes
3.9 KiB
201 Bytes
2.0 KiB
2.7 MiB
1.3 GiB" |
sed 's/Bytes//; s/KiB/* 1024/; s/MiB/* 1024 * 1024/;
s/GiB/* 1024 * 1024 * 1024/; s/$/ + /' |
tr -d '\n' |
sed 's/+ $/\n/' |
bc
When your sed
doesn't support \n
, you can try replacing the '\n' with a real newline like
sed 's/+ $/
/'
or add an echo
after parsing (and move part of the last sed
into the first command for removing the last +
)
(echo "0 Bytes
3.9 KiB
201 Bytes
2.0 KiB
2.7 MiB
1.3 GiB" | sed 's/Bytes//; s/KiB/* 1024/; s/MiB/* 1024 * 1024/;
s/GiB/* 1024 * 1024 * 1024/; s/$/ + /; $s/+ //' | tr -d '\n' ; echo) | bc
For input like the described:
2016-10-14 14:52:09 0 Bytes folder/
2020-04-18 05:19:04 201 Bytes folder/file1.txt
2019-10-16 00:32:44 201 Bytes folder/file2.txt
2019-08-26 06:29:46 201 Bytes folder/file3.txt
2020-07-08 16:13:56 411 Bytes folder/file4.txt
2020-04-18 03:03:34 201 Bytes folder/file5.txt
2019-10-16 08:27:11 1.1 KiB folder/file6.txt
2019-10-16 10:13:52 201 Bytes folder/file7.txt
2019-10-16 08:44:35 920 Bytes folder/file8.txt
2019-02-17 14:43:10 590 Bytes folder/file9.txt
You could use a table of units that you'd like to be able to decode:
BEGIN {
unit["Bytes"] = 1;
unit["kB"] = 10**3;
unit["MB"] = 10**6;
unit["GB"] = 10**9;
unit["TB"] = 10**12;
unit["PB"] = 10**15;
unit["EB"] = 10**18;
unit["ZB"] = 10**21;
unit["YB"] = 10**24;
unit["KB"] = 1024;
unit["KiB"] = 1024**1;
unit["MiB"] = 1024**2;
unit["GiB"] = 1024**3;
unit["TiB"] = 1024**4;
unit["PiB"] = 1024**5;
unit["EiB"] = 1024**6;
unit["ZiB"] = 1024**7;
unit["YiB"] = 1024**8;
}
Then just sum it up in the main loop:
{
if($4 in unit) total += $3 * unit[$4];
else printf("ERROR: Can't decode unit at line %d: %s\n", NR, $0);
}
And print the result at the end:
END {
binaryunits[0] = "Bytes";
binaryunits[1] = "KiB";
binaryunits[2] = "MiB";
binaryunits[3] = "GiB";
binaryunits[4] = "TiB";
binaryunits[5] = "PiB";
binaryunits[6] = "EiB";
binaryunits[7] = "ZiB";
binaryunits[8] = "YiB";
for(i = 8;; --i) {
if(total >= 1024**i || i == 0) {
printf("%.3f %s\n", total/(1024**i), binaryunits[i]);
break;
}
}
}
Output:
3.957 KiB
Note that you can add a she-bang to beginning of the awk-script to make it possible to run it on its own so that you won't have to put it in a bash script:
#!/usr/bin/awk -f
Use numfmt
to convert those numbers.
cat <<EOF |
2016-10-14 14:52:09 0 Bytes folder/
2020-04-18 05:19:04 201 Bytes folder/file1.txt
2019-10-16 00:32:44 201 Bytes folder/file2.txt
2019-08-26 06:29:46 201 Bytes folder/file3.txt
2020-07-08 16:13:56 411 Bytes folder/file4.txt
2020-04-18 03:03:34 201 Bytes folder/file5.txt
2019-10-16 08:27:11 1.1 KiB folder/file6.txt
2019-10-16 10:13:52 201 Bytes folder/file7.txt
2019-10-16 08:44:35 920 Bytes folder/file8.txt
2019-02-17 14:43:10 590 Bytes folder/file9.txt
2019-02-17 14:43:10 3.9 KiB folder/file9.txt
2019-02-17 14:43:10 2.7 MiB folder/file9.txt
2019-02-17 14:43:10 1.3 GiB folder/file9.txt
EOF
# extract 3rd and 4th column
tr -s ' ' | cut -d' ' -f3,4 |
# Remove space, remove "Bytes", remove "B"
sed 's/ //; s/Bytes//; s/B//' |
# convert to numbers
numfmt --from=auto |
# sum
awk '{s+=$1}END{print s}'
outputs the sum.