PostgreSQL Group By Sum

This type of problem requires a recursive CTE (or similar functionality). Here is an example:

with recursive t as (
      select id, length(content) as len,
             row_number() over (order by id) as seqnum
      from test 
     ),
     cte(id, len, ids, seqnum, grp) as (
      select id, len, len as cumelen, t.id::text, 1::int as seqnum, 1 as grp
      from t
      where seqnum = 1
      union all
      select t.id,
             t.len,
             (case when cte.cumelen >= 100 then t.len else cte.cumelen + t.len end) as cumelen,
             (case when cte.cumelen >= 100 then t.id::text else cte.ids || ',' || t.id::text end) as ids,
             t.seqnum
             (case when cte.cumelen >= 100 then cte.grp + 1 else cte.grp end) as ids,
      from t join
           cte
           on cte.seqnum = t.seqnum - 1
     )
select grp, max(ids)
from cte
group by grp;

Here is a small working example:

with recursive test as (
      select 1 as id, 'abcd'::text as content union all
      select 2 as id, 'abcd'::text as content union all
      select 3 as id, 'abcd'::text as content 
     ),
     t as (
      select id, length(content) as len,
             row_number() over (order by id) as seqnum
      from test 
     ),
     cte(id, len, cumelen, ids, seqnum, grp) as (
      select id, len, len as cumelen, t.id::text, 1::int as seqnum, 1 as grp
      from t
      where seqnum = 1
      union all
      select t.id,
             t.len,
             (case when cte.cumelen >= 5 then t.len else cte.cumelen + t.len end) as cumelen,
             (case when cte.cumelen >= 5 then t.id::text else cte.ids || ',' || t.id::text end) as ids,
             t.seqnum::int,
             (case when cte.cumelen >= 5 then cte.grp + 1 else cte.grp end)
      from t join
           cte
           on cte.seqnum = t.seqnum - 1
     )
select grp, max(ids)
from cte
group by grp;

Using stored functions allows to avoid (sometime) the head-breaking queries.

create or replace function fn_foo(ids out int[], characters out int) returns setof record language plpgsql as $$
declare
  r record;
  threshold int := 100;
begin
  ids := '{}'; characters := 0;
  for r in (
    select id, coalesce(length(content),0) as lng
    from test order by id)
  loop
    characters := characters + r.lng;
    ids := ids || r.id;
    if characters > threshold then
      return next;
      ids := '{}'; characters := 0;
    end if;
  end loop;
  if ids <> '{}' then
    return next;
  end if;
end $$;

select * from fn_foo();

╔═══════╤════════════╗
║  ids  │ characters ║
╠═══════╪════════════╣
║ {1,2} │        129 ║
║ {3,4} │        113 ║
║ {5}   │        120 ║
║ {6,7} │        172 ║
║ {8}   │         35 ║
╚═══════╧════════════╝
(5 rows)

Tags:

Sql

Postgresql