filmov
tv
How can I get a hash of an entire table in postgresql?

Показать описание
The bounty expires in 2 days. Answers to this question are eligible for a +50 reputation bounty. Dima Tisnek is looking for a canonical answer:
Some standard solution would be great, SQL or possibly some postgres extension...
I would like a fairly efficient way to condense an entire table to a hash value.
I have some tools that generate entire data tables, which can then be used to generate further tables, and so on. I'm trying to implement a simplistic build system to coordinate build runs and avoid repeating work. I want to be able to record hashes of the input tables so that I can later check whether they have changed. Building a table takes minutes or hours, so spending several seconds building hashes is acceptable.
Solution Hints:
SELECT
md5(CAST((array_agg(f.* order by id))AS text)) /* id is a primary key of table (to avoid random sorting) */
FROM
foo f;
SELECT md5(array_agg(md5((t.*)::varchar))::varchar)
FROM (
SELECT *
FROM my_table
ORDER BY 1
) AS t
reate function pg_concat( text, text ) returns text as '
begin
if $1 isnull then
return $2;
else
return $1 || $2;
end if;
end;' language 'plpgsql';
create function pg_concat_fin(text) returns text as '
begin
return $1;
end;' language 'plpgsql';
create aggregate pg_concat (
basetype = text,
sfunc = pg_concat,
stype = text,
finalfunc = pg_concat_fin);
select zz_hashagg(CAST((example.*)AS text) order by id) from example;
create function zz_concat(text, text) returns text as
'select md5($1 || $2);' language 'sql';
create aggregate zz_hashagg(text) (
sfunc = zz_concat,
stype = text,
initcond = '');
select
from(
select
from
foo f
cross join lateral(select ('\x' || md5(f::text))::bytea row_hash) c
group by substring(row_hash for 3)
) t
;
Some standard solution would be great, SQL or possibly some postgres extension...
I would like a fairly efficient way to condense an entire table to a hash value.
I have some tools that generate entire data tables, which can then be used to generate further tables, and so on. I'm trying to implement a simplistic build system to coordinate build runs and avoid repeating work. I want to be able to record hashes of the input tables so that I can later check whether they have changed. Building a table takes minutes or hours, so spending several seconds building hashes is acceptable.
Solution Hints:
SELECT
md5(CAST((array_agg(f.* order by id))AS text)) /* id is a primary key of table (to avoid random sorting) */
FROM
foo f;
SELECT md5(array_agg(md5((t.*)::varchar))::varchar)
FROM (
SELECT *
FROM my_table
ORDER BY 1
) AS t
reate function pg_concat( text, text ) returns text as '
begin
if $1 isnull then
return $2;
else
return $1 || $2;
end if;
end;' language 'plpgsql';
create function pg_concat_fin(text) returns text as '
begin
return $1;
end;' language 'plpgsql';
create aggregate pg_concat (
basetype = text,
sfunc = pg_concat,
stype = text,
finalfunc = pg_concat_fin);
select zz_hashagg(CAST((example.*)AS text) order by id) from example;
create function zz_concat(text, text) returns text as
'select md5($1 || $2);' language 'sql';
create aggregate zz_hashagg(text) (
sfunc = zz_concat,
stype = text,
initcond = '');
select
from(
select
from
foo f
cross join lateral(select ('\x' || md5(f::text))::bytea row_hash) c
group by substring(row_hash for 3)
) t
;