我实际上正在解决同样的问题。不幸的是,我认为无法找到非常有效的解决方案 - 至少仅使用 SQL 是不容易的。只需删除“不同”和自消除查询即可观察工作集的大小。也就是说,以下解决方案将起作用。
drop table if exists foobar;
drop function if exists addset(v int[], a int);
/* our vertices table */
create table foobar (
src int,
dst int
);
/* Create a small function to treat an array like a set,
not strictly necessary but convenient */
create function addset(v int[], a int) returns int[]
as $$
begin
return (select array_agg(e order by e)
from (select unnest(v || a) as e) f);
end
$$ language plpgsql;
/* fill our table with vertices, note the ordering of each value */
insert into foobar (src, dst)
values (1,2), (1,3), (2,3), (3,4), (4,5), (6,7), (6,8);
/* use a recursive query to extend the sets */
with recursive foo_union (v) as (
select array[src, dst] as v from foobar /* starter sets */
union all
/* join self to original array; i can use a CTE as a 'starter',
but that is not necessary here */
select addset(v, dst) as v from foo_union u, foobar f
where src = any(v) and not dst = any(v)
) select distinct v from foo_union a where not exists (
/* eliminate the many overlapping results */
select * from foo_union b where b.v @> a.v and b.v != a.v
);
但同样,这对于较大的数据集是完全不切实际的;任何其他解决方案都需要迭代。