select version();
version
----------------------------------------------------------------------------------------------------
PostgreSQL 10devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 6.3.0-8) 6.3.0 20170221, 64-bit
(1 ligne)
Temps : 0,204 ms
set max_parallel_workers_per_gather=0;
SET
Temps : 0,139 ms
create unlogged table ventes_agg(dtv timestamp, id_prod integer, mtv integer) ;
CREATE TABLE
Temps : 25,879 ms
WITH serie(i) AS (SELECT generate_series(157680000,1,-1))
INSERT INTO ventes_agg
(SELECT current_timestamp - (ceil(i/5) || ' seconds')::interval,
trunc(random() * 200 + 1), trunc(random() * 100 + 1) from serie);
INSERT 0 157680000
Time: 658855,601 ms (10:58,856)
\d+
Liste des relations
Schéma | Nom | Type | Propriétaire | Taille | Description
--------+------------+-------+--------------+---------+-------------
public | ventes_agg | table | postgres | 6660 MB |
(1 ligne)
analyze ventes_agg;
ANALYZE
Time: 3941,324 ms (00:03,941)
explain
select id_prod, sum(mtv)
from ventes_agg
where id_prod in(5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199)
group by id_prod order by 2 desc
fetch first 3 rows only;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=10470817.39..10470817.40 rows=3 width=12)
-> Sort (cost=10470817.39..10470817.89 rows=200 width=12)
Sort Key: (sum(mtv)) DESC
-> HashAggregate (cost=10470812.81..10470814.81 rows=200 width=12)
Group Key: id_prod
-> Seq Scan on ventes_agg (cost=0.00..10313132.68 rows=31536026 width=8)
Filter: (id_prod = ANY ('{5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199}'::integer[]))
(7 lignes)
Temps : 351,372 ms
select id_prod, sum(mtv)
from ventes_agg
where id_prod in(5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199)
group by id_prod order by 2 desc
fetch first 3 rows only;
id_prod | sum
---------+----------
130 | 39914265
171 | 39913866
135 | 39899958
(3 lignes)
1ere exe : Time: 220962,871 ms (03:40,963)
2eme exe : Time: 54895,406 ms (00:54,895)
3eme exe : Time: 54204,300 ms (00:54,204)
create index ventes_agg_i1 on ventes_agg(id_prod);
CREATE INDEX
Time: 428682,229 ms (07:08,682)
explain
select id_prod, sum(mtv)
from ventes_agg
where id_prod in(5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199)
group by id_prod order by 2 desc
fetch first 3 rows only;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=3461068.38..3461068.39 rows=3 width=12)
-> Sort (cost=3461068.38..3461068.88 rows=200 width=12)
Sort Key: (sum(mtv)) DESC
-> HashAggregate (cost=3461063.80..3461065.80 rows=200 width=12)
Group Key: id_prod
-> Bitmap Heap Scan on ventes_agg (cost=558898.80..3303383.80 rows=31536000 width=8)
Recheck Cond: (id_prod = ANY ('{5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199}'::integer[]))
-> Bitmap Index Scan on ventes_agg_i1 (cost=0.00..551014.80 rows=31536000 width=0)
Index Cond: (id_prod = ANY ('{5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199}'::integer[]))
(9 lignes)
Temps : 48,855 ms
select id_prod, sum(mtv)
from ventes_agg
where id_prod in(5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199)
group by id_prod order by 2 desc
fetch first 3 rows only;
id_prod | sum
---------+----------
130 | 39914265
171 | 39913866
135 | 39899958
(3 lignes)
1ere exe : Time: 146560,763 ms (02:26,561)
2eme exe : Time: 56605,836 ms (00:56,606)
3eme exe : Time: 58195,223 ms (00:58,195)
drop index ventes_agg_i1;
DROP INDEX
Temps : 273,678 ms
set max_parallel_workers_per_gather=3;
SET
Temps : 0,154 ms
explain
select id_prod, sum(mtv)
from ventes_agg
where id_prod in(5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199)
group by id_prod order by 2 desc
fetch first 3 rows only;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=3956159.25..3956159.26 rows=3 width=12)
-> Sort (cost=3956159.25..3956159.75 rows=200 width=12)
Sort Key: (sum(mtv)) DESC
-> Finalize GroupAggregate (cost=3956150.17..3956156.67 rows=200 width=12)
Group Key: id_prod
-> Sort (cost=3956150.17..3956151.67 rows=600 width=12)
Sort Key: id_prod
-> Gather (cost=3956060.48..3956122.48 rows=600 width=12)
Workers Planned: 3
-> Partial HashAggregate (cost=3955060.48..3955062.48 rows=200 width=12)
Group Key: id_prod
-> Parallel Seq Scan on ventes_agg (cost=0.00..3904195.97 rows=10172903 width=8)
Filter: (id_prod = ANY ('{5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199}'::integer[]))
(13 lignes)
Temps : 40,151 ms
select id_prod, sum(mtv)
from ventes_agg
where id_prod in(5,11,14,21,24,31,36,42,43,51,57,61,68,72,78,81,87,92,94,101,108,110,113,120,122,130,135,141,149,150,157,161,168,170,171,182,183,191,197,199)
group by id_prod order by 2 desc
fetch first 3 rows only;
id_prod | sum
---------+----------
130 | 39914265
171 | 39913866
135 | 39899958
(3 lignes)
1ere exe : Time: 14945,747 ms (00:14,946)
2eme exe : Time: 14815,274 ms (00:14,815)
3eme exe : Time: 15181,102 ms (00:15,181)
La requête prenait autour de 55 secondes au départ avec les données en cache. Un index B-Tree, même utilisé de manière spéciale, nétait pas pertinent puisque le temps dexécution restait dans les mêmes eaux. En le supprimant et en utilisant toutes les ressources de la machine virtuelle grâce au parallélisme, le temps a pu être abaissé autour des 15 secondes. Le parallélisme est à manier avec précaution. Il est coûteux et doit être envisagé en dernier ressort. Mais, parfois, cest la seule option pour gagner du temps sur une requête particulière. A noter que dès PostgreSQL 9.6, le parallélisme peut sappliquer sur les balayages complets de table mais aussi sur les jointures et les aggrégations.