How to use IEnumerable.GroupBy comparing multiple properties between elements?
First, let Site
class be (for debugging / demonstration)
public class Site {
public Site() { }
public string RouteId;
public Decimal StartMilepost;
public Decimal EndMilepost;
public override string ToString() => $"{RouteId} {StartMilepost}..{EndMilepost}";
}
Well, as you can see we have to break the rules: equality must be transitive, i.e. whenever
A equals B
B equals C
then
A equals C
It's not the case in your example. However, if we sort the sites by StartMilepost
we, technically, can implement IEqualityComparer<Site>
like this:
public class MySiteEqualityComparer : IEqualityComparer<Site> {
public bool Equals(Site x, Site y) {
if (ReferenceEquals(x, y))
return true;
else if (null == x || null == y)
return false;
else if (x.RouteId != y.RouteId)
return false;
else if (x.StartMilepost <= y.StartMilepost && x.EndMilepost >= y.StartMilepost)
return true;
else if (y.StartMilepost <= x.StartMilepost && y.EndMilepost >= x.StartMilepost)
return true;
return false;
}
public int GetHashCode(Site obj) {
return obj == null
? 0
: obj.RouteId == null
? 0
: obj.RouteId.GetHashCode();
}
}
then GroupBy
as usual; please, note that OrderBy
is required, since order of comparision matters here. Suppose we have
A = {RouteId="X", StartMilepost=0.00m, EndMilepost=1.00m}
B = {RouteId="X", StartMilepost=1.00m, EndMilepost=2.00m}
C = {RouteId="X", StartMilepost=2.00m, EndMilepost=3.00m}
Here A == B
, B == C
(so in case of A, B, C
all items will be in the same group) but A != C
(and thus in A, C, B
will end up with 3
groups)
Code:
List<Site> sites = new List<Site> {
new Site { RouteId="A", StartMilepost=0.00m, EndMilepost=1.00m },
new Site { RouteId="A", StartMilepost=1.00m, EndMilepost=2.00m },
new Site { RouteId="A", StartMilepost=5.00m, EndMilepost=7.00m },
new Site { RouteId="B", StartMilepost=3.00m, EndMilepost=5.00m },
new Site { RouteId="B", StartMilepost=11.00m, EndMilepost=13.00m },
new Site { RouteId="B", StartMilepost=13.00m, EndMilepost=14.00m },
};
var result = sites
.GroupBy(item => item.RouteId)
.Select(group => group
// Required Here, since MySiteEqualityComparer breaks the rules
.OrderBy(item => item.StartMilepost)
.GroupBy(item => item, new MySiteEqualityComparer())
.ToArray())
.ToArray();
// Let's have a look
var report = string.Join(Environment.NewLine, result
.Select(group => string.Join(Environment.NewLine,
group.Select(g => string.Join("; ", g)))));
Console.Write(report);
Outcome:
A 0.00..1.00; A 1.00..2.00
A 5.00..7.00
B 3.00..5.00
B 11.00..13.00; B 13.00..14.00
Here are a couple of implementations where order of Site
's does not matter. You can use the LINQ Aggregate
function:
return sites.GroupBy(x => x.RouteId)
.SelectMany(x =>
{
var groupedSites = new List<List<Site>>();
var aggs = x.Aggregate(new List<Site>(), (contiguous, next) =>
{
if (contiguous.Count == 0 || contiguous.Any(y => y.EndMilepost == next.StartMilepost))
{
contiguous.Add(next);
}
else if (groupedSites.Any(y => y.Any(z => z.EndMilepost == next.StartMilepost)))
{
var groupMatchIndex = groupedSites.FindIndex(y => y.Any(z => z.EndMilepost == next.StartMilepost));
var el = groupedSites.ElementAt(groupMatchIndex);
el.Add(next);
groupedSites[groupMatchIndex] = el;
}
else
{
groupedSites.Add(contiguous);
contiguous = new List<Site>();
contiguous.Add(next);
}
return contiguous;
}, final => { groupedSites.Add(final); return final; });
return groupedSites;
});
Alternatively, just with foreach
:
return sites.GroupBy(x => x.RouteId)
.SelectMany(x =>
{
var groupedSites = new List<List<Site>>();
var aggList = new List<Site>();
foreach (var item in x)
{
if (aggList.Count == 0 || aggList.Any(y => y.EndMilepost == item.StartMilepost))
{
aggList.Add(item);
continue;
}
var groupMatchIndex = groupedSites.FindIndex(y => y.Any(z => z.EndMilepost == item.StartMilepost));
if (groupMatchIndex > -1)
{
var el = groupedSites.ElementAt(groupMatchIndex);
el.Add(item);
groupedSites[groupMatchIndex] = el;
continue;
}
groupedSites.Add(aggList);
aggList = new List<Site>();
aggList.Add(item);
}
groupedSites.Add(aggList);
return groupedSites;
});