bash add/append new columns from other files
You may use this awk
:
awk 'NF == 2 {
map[FILENAME,$1] = $2
next
}
{
printf "%s", $1
for (f=1; f<ARGC-1; ++f)
printf "%s", OFS map[ARGV[f],$1]+0
print ""
}' {x,y,z}.txt name.txt
A 1 1 0
B 0 4 2
C 3 0 0
D 2 0 2
E 0 3 0
F 0 0 1
Adding one more way of doing it. Could you please try following, written and tested with shown samples. IMHO should work in any awk
, though I only have 3.1 version of GNU awk
only. This is very simple and usual way, create an array in first(major) Input_file's reading then later on in each file add 0
whoever element of that array is NOT found in that specific Input_file, tested with small given samples only.
awk '
function checkArray(array){
for(i in array){
if(!(i in found)){ array[i]=array[i] OFS "0" }
}
}
FNR==NR{
arr[$0]
next
}
foundCheck && FNR==1{
checkArray(arr)
delete found
foundCheck=""
}
{
if($1 in arr){
arr[$1]=(arr[$1] OFS $2)
found[$1]
foundCheck=1
next
}
}
END{
checkArray(arr)
for(key in arr){
print key,arr[key]
}
}
' name.txt x.txt y.txt z.txt
Explanation: Adding detailed explanation for above.
awk ' ##Starting awk program from here.
function checkArray(array){ ##Creating a function named checkArray from here.
for(i in array){ ##CTraversing through array here.
if(!(i in found)){ array[i]=array[i] OFS "0" } ##Checking condition if key is NOT in found then append a 0 in that specific value.
}
}
FNR==NR{ ##Checking condition if FNR==NR which will be TRUE when names.txt is being read.
arr[$0] ##Creating array with name arr with index of current line.
next ##next will skip all further statements from here.
}
foundCheck && FNR==1{ ##Checking condition if foundCheck is SET and this is first line of Input_file.
checkArray(arr) ##Calling function checkArray by passing arr array name in it.
delete found ##Deleting found array to get rid of previous values.
foundCheck="" ##Nullifying foundCheck here.
}
{
if($1 in arr){ ##Checking condition if 1st field is present in arr.
arr[$1]=(arr[$1] OFS $2) ##Appening 2nd field value to arr with index of $1.
found[$1] ##Adding 1st field to found as an index here.
foundCheck=1 ##Setting foundCheck here.
next ##next will skip all further statements from here.
}
}
END{ ##Starting END block of this program from here.
checkArray(arr) ##Calling function checkArray by passing arr array name in it.
for(key in arr){ ##Traversing thorugh arr here.
print key,arr[key] ##Printing index and its value here.
}
}
' name.txt x.txt y.txt z.txt ##Mentioning Input_file names here.
Yes, you can do it, and yes, awk
is the tool. Using arrays and your normal file line number (FNR
file number of records) and total lines (NR
records) you can read all letters from names.txt
into the a[]
array, then keeping track of the file number in the variable fno
, you can add all the additions from x.txt
and then before processing the first line of the next file (y.txt
), loop over all letters seen in the in the last file, and for those not seen place a 0
, then continue processing as normal. Repeat for each additional file.
Further line-by-line explanation is shown in the comments:
awk '
FNR==NR { # first file
a[$1] = "" # fill array with letters as index
fno = 1 # set file number counter
next # get next record (line)
}
FNR == 1 { fno++ } # first line in file, increment file count
fno > 2 && FNR == 1 { # file no. 3+ (not run on x.txt)
for (i in a) # loop over letters
if (!(i in seen)) # if not in seen array
a[i] = a[i]" "0 # append 0
delete seen # delete seen array
}
$1 in a { # if line begins with letter in array
a[$1] = a[$1]" "$2 # append second field
seen[$1]++ # add letter to seen array
}
END {
for (i in a) # place zeros for last column
if (!(i in seen))
a[i] = a[i]" "0
for (i in a) # print results
print i a[i]
}' name.txt x.txt y.txt z.txt
Example Use/Output
Just copy the above, and middle-mouse-paste into an xterm with the current directory containing your files and you will receive:
A 1 1 0
B 0 4 2
C 3 0 0
D 2 0 2
E 0 3 0
F 0 0 1
Creating a Self-Contained Script
If you would like to create a script to run instead of pasting at the command line, you simply include the contents (without surrounding in single-quotes) and then make the file executable. For example, you include the interpreter as the first line and the contents as follows:
#!/usr/bin/awk -f
FNR==NR { # first file
a[$1] = "" # fill array with letters as index
fno = 1 # set file number counter
next # get next record (line)
}
FNR == 1 { fno++ } # first line in file, increment file count
fno > 2 && FNR == 1 { # file no. 3+ (not run on x.txt)
for (i in a) # loop over letters
if (!(i in seen)) # if not in seen array
a[i] = a[i]" "0 # append 0
delete seen # delete seen array
}
$1 in a { # if line begins with letter in array
a[$1] = a[$1]" "$2 # append second field
seen[$1]++ # add letter to seen array
}
END {
for (i in a) # place zeros for last column
if (!(i in seen))
a[i] = a[i]" "0
for (i in a) # print results
print i a[i]
}
awk
will process the filenames given as arguments in the order given.
Example Use/Output
Using the script file (I put it in names.awk
and then used chmod +x names.awk
to make it executable), you would then do:
$ ./names.awk name.txt x.txt y.txt z.txt
A 1 1 0
B 0 4 2
C 3 0 0
D 2 0 2
E 0 3 0
F 0 0 1
Let me know if you have further questions.