Pandas - Duplicate Row based on condition
I think you need get unique
rows by Date Completed
and then concat
rows to original:
df1 = df.loc[~df['Date Completed'].duplicated(keep=False), ['Date Completed']]
print (df1)
Date Completed
3 2/1/17
df = pd.concat([df,df1], ignore_index=True).sort_values('Date Completed')
df['PathID'] = df.groupby('Date Completed').cumcount() + 1
df['MaxPathID'] = df.groupby('Date Completed')['PathID'].transform(max)
print (df)
Date Completed PathID MaxPathID
0 1/31/17 1 3
1 1/31/17 2 3
2 1/31/17 3 3
3 2/1/17 1 2
6 2/1/17 2 2
4 2/2/17 1 2
5 2/2/17 2 2
EDIT:
print (df)
Date Completed a b
0 1/31/17 4 5
1 1/31/17 3 5
2 1/31/17 6 3
3 2/1/17 7 9
4 2/2/17 2 0
5 2/2/17 6 7
df1 = df[~df['Date Completed'].duplicated(keep=False)]
#alternative - boolean indexing by numpy array
#df1 = df[~df['Date Completed'].duplicated(keep=False).values]
print (df1)
Date Completed a b
3 2/1/17 7 9
df = pd.concat([df,df1], ignore_index=True).sort_values('Date Completed')
print (df)
Date Completed a b
0 1/31/17 4 5
1 1/31/17 3 5
2 1/31/17 6 3
3 2/1/17 7 9
6 2/1/17 7 9
4 2/2/17 2 0
5 2/2/17 6 7