sql - Sélectionnez la première ligne dans chaque groupe GROUP BY

Mots clés : sqlsqlitepostgresqlgroup-bygreatest-n-per-groupsql

meilleur 4 Réponses sql - Sélectionnez la première ligne dans chaque groupe GROUP BY

vote vote

97

SELECT DISTINCT ON (customer)        id, customer, total FROM   purchases ORDER  BY customer, total DESC, id;
SELECT DISTINCT ON (2)        id, customer, total FROM   purchases ORDER  BY 2, 3 DESC, 1; 
... ORDER  BY customer, total DESC NULLS LAST, id;
CREATE INDEX purchases_3c_idx ON purchases (customer, total DESC, id); 
vote vote

80

WITH summary AS (     SELECT p.id,             p.customer,             p.total,             ROW_NUMBER() OVER(PARTITION BY p.customer                                   ORDER BY p.total DESC) AS rank       FROM PURCHASES p)  SELECT *    FROM summary  WHERE rank = 1 
  SELECT MIN(x.id),  -- change to MAX if you want the highest          x.customer,           x.total     FROM PURCHASES x     JOIN (SELECT p.customer,                  MAX(total) AS max_total             FROM PURCHASES p         GROUP BY p.customer) y ON y.customer = x.customer                               AND y.max_total = x.total GROUP BY x.customer, x.total 
vote vote

74

CREATE TABLE purchases (   id          serial , customer_id int  -- REFERENCES customer , total       int  -- could be amount of money in Cent , some_column text -- to make the row bigger, more realistic ); 
INSERT INTO purchases (customer_id, total, some_column)    -- insert 200k rows SELECT (random() * 10000)::int             AS customer_id  -- 10k customers      , (random() * random() * 100000)::int AS total           , 'note: ' || repeat('x', (random()^2 * random() * random() * 500)::int) FROM   generate_series(1,200000) g;  ALTER TABLE purchases ADD CONSTRAINT purchases_id_pkey PRIMARY KEY (id);  DELETE FROM purchases WHERE random() > 0.9; -- some dead rows  INSERT INTO purchases (customer_id, total, some_column) SELECT (random() * 10000)::int             AS customer_id  -- 10k customers      , (random() * random() * 100000)::int AS total           , 'note: ' || repeat('x', (random()^2 * random() * random() * 500)::int) FROM   generate_series(1,20000) g;  -- add 20k to make it ~ 200k  CREATE INDEX purchases_3c_idx ON purchases (customer_id, total DESC, id);  VACUUM ANALYZE purchases; 
CREATE TABLE customer AS SELECT customer_id, 'customer_' || customer_id AS customer FROM   purchases GROUP  BY 1 ORDER  BY 1;  ALTER TABLE customer ADD CONSTRAINT customer_customer_id_pkey PRIMARY KEY (customer_id);  VACUUM ANALYZE customer; 
               what                | bytes/ct | bytes_pretty | bytes_per_row -----------------------------------+----------+--------------+---------------  core_relation_size                | 20496384 | 20 MB        |           102  visibility_map                    |        0 | 0 bytes      |             0  free_space_map                    |    24576 | 24 kB        |             0  table_size_incl_toast             | 20529152 | 20 MB        |           102  indexes_size                      | 10977280 | 10 MB        |            54  total_size_incl_toast_and_indexes | 31506432 | 30 MB        |           157  live_rows_in_text_representation  | 13729802 | 13 MB        |            68  ------------------------------    |          |              |  row_count                         |   200045 |              |  live_tuples                       |   200045 |              |  dead_tuples                       |    19955 |              | 
WITH cte AS (    SELECT id, customer_id, total         , row_number() OVER(PARTITION BY customer_id ORDER BY total DESC) AS rn    FROM   purchases    ) SELECT id, customer_id, total FROM   cte WHERE  rn = 1; 
SELECT id, customer_id, total FROM   (    SELECT id, customer_id, total         , row_number() OVER(PARTITION BY customer_id ORDER BY total DESC) AS rn    FROM   purchases    ) sub WHERE  rn = 1; 
SELECT DISTINCT ON (customer_id)        id, customer_id, total FROM   purchases ORDER  BY customer_id, total DESC, id; 
WITH RECURSIVE cte AS (    (  -- parentheses required    SELECT id, customer_id, total    FROM   purchases    ORDER  BY customer_id, total DESC    LIMIT  1    )    UNION ALL    SELECT u.*    FROM   cte c    ,      LATERAL (       SELECT id, customer_id, total       FROM   purchases       WHERE  customer_id > c.customer_id  -- lateral reference       ORDER  BY customer_id, total DESC       LIMIT  1       ) u    ) SELECT id, customer_id, total FROM   cte ORDER  BY customer_id; 
SELECT l.* FROM   customer c ,      LATERAL (    SELECT id, customer_id, total    FROM   purchases    WHERE  customer_id = c.customer_id  -- lateral reference    ORDER  BY total DESC    LIMIT  1    ) l; 
SELECT (array_agg(id ORDER BY total DESC))[1] AS id      , customer_id      , max(total) AS total FROM   purchases GROUP  BY customer_id; 
1. 273.274 ms   2. 194.572 ms   3. 111.067 ms   4.  92.922 ms  -- ! 5.  37.679 ms  -- winner 6. 189.495 ms 
1. 288.006 ms 2. 223.032 ms   3. 107.074 ms   4.  78.032 ms  -- ! 5.  33.944 ms  -- winner 6. 211.540 ms   
1. 381.573 ms 2. 311.976 ms 3. 124.074 ms  -- winner 4. 710.631 ms 5. 311.976 ms 6. 421.679 ms 
1. 103 ms 2. 103 ms   3.  23 ms  -- winner   4.  71 ms   5.  22 ms  -- winner 6.  81 ms   
1. 127 ms 2. 126 ms   3.  36 ms  -- winner   4. 620 ms   5. 145 ms 6. 203 ms   
1. 526 ms 2. 527 ms   3. 127 ms 4.   2 ms  -- winner ! 5.   1 ms  -- winner ! 6. 356 ms   
1. 535 ms 2. 529 ms   3. 132 ms 4. 108 ms  -- ! 5.  71 ms  -- winner 6. 376 ms   
1.  691 ms 2.  684 ms   3.  234 ms  -- winner 4. 4669 ms 5. 1089 ms 6. 1264 ms   
A: 567.218 ms B: 386.673 ms 
A: 249.136 ms B:  55.111 ms 
A:   0.143 ms B:   0.072 ms 
CREATE INDEX purchases_3c_idx ON purchases (customer, total DESC, id); 
1A: 277.953 ms   1B: 193.547 ms  2A: 249.796 ms -- special index not used   2B:  28.679 ms  3A:   0.120 ms   3B:   0.048 ms 
vote vote

67

SELECT  customer,         (array_agg(id ORDER BY total DESC))[1],         max(total) FROM purchases GROUP BY customer 

Questions similaires