Looping through multiple arrays & concatenating values in pandas

Assuming a column of lists, explode the lists, then this is a simple isin check that we sum along the original index. I'd suggest a different output, which gets across the same information but is much easier to work with in the future.

Example

import pandas as pd

df = pd.DataFrame({'Items': [['X1', 'Y1', 'Z1'], ['X2', 'Z3'], ['X3'],
                             ['X1', 'X2'], ['Y2', 'Y4', 'Z2', 'Y5', 'Z3'],
                             ['X2', 'X3', 'Y1', 'Y2', 'Z2', 'Z4', 'X1']]})
X = ['X1','X2','X3','X4','X5']
Y = ['Y1','Y2','Y3','Y4','Y5']
Z = ['Z1','Z2','Z3','Z4','Z5']

s = df.explode('Items')['Items']
pd.concat([s.isin(l).sum(level=0).rename(name) 
           for name, l in [('X', X), ('Y', Y), ('Z', Z)]], axis=1).astype(int)
#   X  Y  Z
#0  1  1  1
#1  1  0  1
#2  1  0  0
#3  2  0  0
#4  0  3  2
#5  3  2  2

To get your output, mask the 0s and add the columns names after the values. Then we string join to get the result. Here I use an apply for simplicity, alignment and NaN handling, but there are other slightly faster alternatives.

res = pd.concat([s.isin(l).sum(level=0).rename(name) 
                 for name, l in [('X', X), ('Y', Y), ('Z', Z)]], axis=1).astype(int)

res = res.astype(str).replace('1', '').where(res.ne(0))
res = res.add(res.columns, axis=1)

# Aligns on index due to `.sum(level=0)`
df['Category'] = res.apply(lambda x: ' & '.join(x.dropna()), axis=1) 
#                          Items      Category
#0                  [X1, Y1, Z1]     X & Y & Z
#1                      [X2, Z3]         X & Z
#2                          [X3]             X
#3                      [X1, X2]            2X
#4          [Y2, Y4, Z2, Y5, Z3]       3Y & 2Z
#5  [X2, X3, Y1, Y2, Z2, Z4, X1]  3X & 2Y & 2Z

Setup

df = pd.DataFrame(
    [['X1,Y1,Z1'],
      ['X2,Z3'],
      ['X3'],
      ['X1,X2'],
      ['Y2,Y4,Z2,Y5,Z3'],
      ['X2,X3,Y1,Y2,Z2,Z4,X1']],
    columns=['Items']
)

X = ['X1', 'X2', 'X3', 'X4', 'X5']
Y = ['Y1', 'Y2', 'Y3', 'Y4', 'Y5']
Z = ['Z1', 'Z2', 'Z3', 'Z4', 'Z5']

`Counter`

from collections import Counter

M = {**dict.fromkeys(X, 'X'), **dict.fromkeys(Y, 'Y'), **dict.fromkeys(Z, 'Z')}

num = lambda x: {1: ''}.get(x, x)
cat = ' & '.join
fmt = lambda c: cat(f'{num(v)}{k}' for k, v in c.items())
cnt = lambda x: Counter(map(M.get, x.split(',')))

df.assign(Category=[*map(fmt, map(cnt, df.Items))])

                  Items      Category
0              X1,Y1,Z1     X & Y & Z
1                 X2,Z3         X & Z
2                    X3             X
3                 X1,X2            2X
4        Y2,Y4,Z2,Y5,Z3       3Y & 2Z
5  X2,X3,Y1,Y2,Z2,Z4,X1  3X & 2Y & 2Z

OLD STUFF

`pandas.Series.str.get_dummies` and `groupby`

First convert the definitions of X, Y, and Z into one dictionary, then use that as the argument for groupby on axis=1

M = {**dict.fromkeys(X, 'X'), **dict.fromkeys(Y, 'Y'), **dict.fromkeys(Z, 'Z')}

counts = df.Items.str.get_dummies(',').groupby(M, axis=1).sum()
counts

   X  Y  Z
0  1  1  1
1  1  0  1
2  1  0  0
3  2  0  0
4  0  3  2
5  3  2  2

Add the desired column
Work in Progress I don't like this solution

def fmt(row):
    a = [f'{"" if v == 1 else v}{k}' for k, v in row.items() if v > 0]
    return ' & '.join(a)

df.assign(Category=counts.apply(fmt, axis=1))

                  Items      Category
0              X1,Y1,Z1     X & Y & Z
1                 X2,Z3         X & Z
2                    X3             X
3                 X1,X2            2X
4        Y2,Y4,Z2,Y5,Z3       3Y & 2Z
5  X2,X3,Y1,Y2,Z2,Z4,X1  3X & 2Y & 2Z

NOT TO BE TAKEN SERIOUSLY

Because I'm leveraging the character of your contrived example and there is nowai you should depend on the first character of your values to be the thing that differentiates them.

from operator import itemgetter

df.Items.str.get_dummies(',').groupby(itemgetter(0), axis=1).sum()

   X  Y  Z
0  1  1  1
1  1  0  1
2  1  0  0
3  2  0  0
4  0  3  2
5  3  2  2

Create your dataframe

import pandas as pd

df = pd.DataFrame({'Items': [['X1', 'Y1', 'Z1'], 
                            ['X2', 'Z3'], 
                            ['X3'], 
                            ['X1', 'X2'], 
                            ['Y2', 'Y4', 'Z2', 'Y5', 'Z3'],
                            ['X2', 'X3', 'Y1', 'Y2', 'Z2', 'Z4', 'X1']]})

explode

df_exp = df.explode('Items')

def check_if_in_set(item, set):
    return 1 if (item in set) else 0

dict = {'X': set(['X1','X2','X3','X4','X5']),
        'Y': set(['Y1','Y2','Y3','Y4','Y5']), 
        'Z': set(['Z1','Z2','Z3','Z4','Z5'])}

for l, s in dict.items():
    df_exp[l] = df_exp.apply(lambda row: check_if_in_set(row['Items'], s), axis=1)

groupby

df_exp.groupby(df_exp.index).agg(
    Items_list = ('Items', list),
    X_count = ('X', 'sum'),
    y_count = ('Y', 'sum'),
    Z_count = ('Z', 'sum')
)

                      Items_list  X_count  y_count  Z_count
0                   [X1, Y1, Z1]        1        1        1
1                       [X2, Z3]        1        0        1
2                           [X3]        1        0        0
3                       [X1, X2]        2        0        0
4           [Y2, Y4, Z2, Y5, Z3]        0        3        2
5  [X2, X3,  Y1, Y2, Z2, Z4, X1]        3        2        2

Looping through multiple arrays & concatenating values in pandas

Example

Setup

`Counter`

OLD STUFF

`pandas.Series.str.get_dummies` and `groupby`

NOT TO BE TAKEN SERIOUSLY

Tags:

Python

Pandas

Arrays

Related

Recent Posts

Looping through multiple arrays & concatenating values in pandas

Example

Setup

Counter

OLD STUFF

pandas.Series.str.get_dummies and groupby

NOT TO BE TAKEN SERIOUSLY

Tags:

Python

Pandas

Arrays

Related

`Counter`

`pandas.Series.str.get_dummies` and `groupby`