how to sum file size from ls like output log with Bytes, KiB, MiB, GiB

You can parse the input before sending them to bc:

echo "0   Bytes
3.9 KiB
201 Bytes
2.0 KiB
2.7 MiB
1.3 GiB" |
   sed 's/Bytes//; s/KiB/* 1024/; s/MiB/* 1024 * 1024/; 
        s/GiB/* 1024 * 1024 * 1024/; s/$/ + /'  |
   tr -d '\n' | 
   sed 's/+ $/\n/' |
   bc

When your sed doesn't support \n, you can try replacing the '\n' with a real newline like

sed 's/+ $/
/'

or add an echo after parsing (and move part of the last sed into the first command for removing the last +)

(echo "0   Bytes
3.9 KiB
201 Bytes
2.0 KiB
2.7 MiB
1.3 GiB" | sed 's/Bytes//; s/KiB/* 1024/; s/MiB/* 1024 * 1024/;
s/GiB/* 1024 * 1024 * 1024/; s/$/ + /; $s/+ //'  | tr -d '\n' ; echo) | bc

For input like the described:

2016-10-14 14:52:09    0 Bytes folder/
2020-04-18 05:19:04  201 Bytes folder/file1.txt
2019-10-16 00:32:44  201 Bytes folder/file2.txt
2019-08-26 06:29:46  201 Bytes folder/file3.txt
2020-07-08 16:13:56  411 Bytes folder/file4.txt
2020-04-18 03:03:34  201 Bytes folder/file5.txt
2019-10-16 08:27:11    1.1 KiB folder/file6.txt
2019-10-16 10:13:52  201 Bytes folder/file7.txt
2019-10-16 08:44:35  920 Bytes folder/file8.txt
2019-02-17 14:43:10  590 Bytes folder/file9.txt

You could use a table of units that you'd like to be able to decode:

BEGIN {
    unit["Bytes"] = 1;

    unit["kB"] = 10**3;
    unit["MB"] = 10**6;
    unit["GB"] = 10**9;
    unit["TB"] = 10**12;
    unit["PB"] = 10**15;
    unit["EB"] = 10**18;
    unit["ZB"] = 10**21;
    unit["YB"] = 10**24;

    unit["KB"] = 1024;
    unit["KiB"] = 1024**1;
    unit["MiB"] = 1024**2;
    unit["GiB"] = 1024**3;
    unit["TiB"] = 1024**4;
    unit["PiB"] = 1024**5;
    unit["EiB"] = 1024**6;
    unit["ZiB"] = 1024**7;
    unit["YiB"] = 1024**8;
}

Then just sum it up in the main loop:

{
    if($4 in unit) total += $3 * unit[$4];
    else printf("ERROR: Can't decode unit at line %d: %s\n", NR, $0);
}

And print the result at the end:

END {
    binaryunits[0] = "Bytes";
    binaryunits[1] = "KiB";
    binaryunits[2] = "MiB";
    binaryunits[3] = "GiB";
    binaryunits[4] = "TiB";
    binaryunits[5] = "PiB";
    binaryunits[6] = "EiB";
    binaryunits[7] = "ZiB";
    binaryunits[8] = "YiB";
    for(i = 8;; --i) {
         if(total >= 1024**i || i == 0) {
            printf("%.3f %s\n", total/(1024**i), binaryunits[i]);
            break;
        }
    }
}

Output:

3.957 KiB

Note that you can add a she-bang to beginning of the awk-script to make it possible to run it on its own so that you won't have to put it in a bash script:

#!/usr/bin/awk -f

Use numfmt to convert those numbers.

cat <<EOF |
2016-10-14 14:52:09    0 Bytes folder/
2020-04-18 05:19:04  201 Bytes folder/file1.txt
2019-10-16 00:32:44  201 Bytes folder/file2.txt
2019-08-26 06:29:46  201 Bytes folder/file3.txt
2020-07-08 16:13:56  411 Bytes folder/file4.txt
2020-04-18 03:03:34  201 Bytes folder/file5.txt
2019-10-16 08:27:11    1.1 KiB folder/file6.txt
2019-10-16 10:13:52  201 Bytes folder/file7.txt
2019-10-16 08:44:35  920 Bytes folder/file8.txt
2019-02-17 14:43:10  590 Bytes folder/file9.txt
2019-02-17 14:43:10  3.9 KiB  folder/file9.txt
2019-02-17 14:43:10  2.7 MiB folder/file9.txt
2019-02-17 14:43:10  1.3 GiB folder/file9.txt
EOF
# extract 3rd and 4th column
tr -s ' ' | cut -d' ' -f3,4 |
# Remove space, remove "Bytes", remove "B"
sed 's/ //; s/Bytes//; s/B//' |
# convert to numbers
numfmt --from=auto |
# sum
awk '{s+=$1}END{print s}'

outputs the sum.

Tags:

Bash

Awk

Ls