Prevent and/or detect cycles in postgres
To answer my own question, I came up with a trigger that prevents this:
CREATE OR REPLACE FUNCTION detect_cycle() RETURNS TRIGGER AS
$func$
DECLARE
loops INTEGER;
BEGIN
EXECUTE 'WITH RECURSIVE search_graph(id, parentid, name, depth, path, cycle) AS (
SELECT g.id, g.parentid, g.name, 1,
ARRAY[g.id],
false
FROM node g
UNION ALL
SELECT g.id, g.parentid, g.name, sg.depth + 1,
path || g.id,
g.id = ANY(path)
FROM node g, search_graph sg
WHERE g.id = sg.parentid AND NOT cycle
)
SELECT count(*) FROM search_graph where cycle = TRUE' INTO loops;
IF loops > 0 THEN
RAISE EXCEPTION 'Loop detected!';
ELSE
RETURN NEW;
END IF;
END
$func$ LANGUAGE plpgsql;
CREATE TRIGGER detect_cycle_after_update
AFTER UPDATE ON node
FOR EACH ROW EXECUTE PROCEDURE detect_cycle();
So, if you try to create a loop, like in the question:
UPDATE node SET parentid = 2 WHERE id = 1;
You get an EXCEPTION
:
ERROR: Loop detected!
Your trigger simplified and optimized, should be considerably faster:
CREATE OR REPLACE FUNCTION detect_cycle()
RETURNS TRIGGER
LANGUAGE plpgsql AS
$func$
BEGIN
IF EXISTS (
WITH RECURSIVE search_graph(parentid, path, cycle) AS ( -- relevant columns
-- check ahead, makes 1 step less
SELECT g.parentid, ARRAY[g.id, g.parentid], (g.id = g.parentid)
FROM node g
WHERE g.id = NEW.id -- only test starting from new row
UNION ALL
SELECT g.parentid, sg.path || g.parentid, g.parentid = ANY(sg.path)
FROM search_graph sg
JOIN node g ON g.id = sg.parentid
WHERE NOT sg.cycle
)
SELECT FROM search_graph
WHERE cycle
LIMIT 1 -- stop evaluation at first find
)
THEN
RAISE EXCEPTION 'Loop detected!';
ELSE
RETURN NEW;
END IF;
END
$func$;
You don't need dynamic SQL, you don't need to count, you don't need all the columns and you don't need to test the whole table for every single row.
CREATE TRIGGER detect_cycle_after_update
AFTER INSERT OR UPDATE ON node
FOR EACH ROW EXECUTE PROCEDURE detect_cycle();
An INSERT
like this has to be prohibited, too:
INSERT INTO node (id, name,parentid) VALUES (8,'D',9), (9,'E',8);