Таблица | Кортежей | Размер |
---|---|---|
genres | 19 | 48 kB |
links | 40 110 | 2 944 kB |
movie_genres | 74 229 | 4 856 kB |
movie_tags | 668 953 | 49 152 kB |
movies | 40 110 | 3 400 kB |
rating | 24 404 096 | 1 971 200 kB |
genres | 49 657 | 5 344 kB |
Обзщий размер: ~1995MB
Запрос для примера: вывести все комедии про зомби по рейтингу.
select m.id, m.title, avg(r.rating)
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
join ratings r on (r.movie_id = m.id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id, m.title
order by avg(r.rating) desc;
select m.id, m.title, avg(r.rating)
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
join ratings r on (r.movie_id = m.id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id, m.title
order by avg(r.rating) desc;
select m.id, m.title, avg(r.rating)
from (
select m.id, m.title
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id
) m
join ratings r on (r.movie_id = m.id)
group by m.id, m.title
order by avg(r.rating) desc;
select m.id, m.title, avg(r.rating)
from (
select m.id, m.title
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id
) m
join ratings r on (r.movie_id = m.id)
group by m.id, m.title
order by avg(r.rating) desc;
SELECT E.*
FROM Employee E WHERE EXISTS (
SELECT *
FROM Department D WHERE D.DepartmentID = E.DepartmentID
);
SELECT E.*
FROM Employee E WHERE E.DepartmentID IN (
SELECT DepartmentID
FROM Department D
);
SELECT E.*
FROM Employee E
JOIN Department D ON (E.DepartmentID = D.DepartmentID);
Удалось ускорить в 93.90 раз! Победа?
Заменим 'Zombie%' на 'Comedy%' и снова более 10 секунд :(
select m.id, m.title, avg(r.rating)
from (
select m.id, m.title
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('ZombieComedy%')
group by m.id
) m
join ratings r on (r.movie_id = m.id)
group by m.id, m.title
order by avg(r.rating) desc;
alter table movies
add column rating_sum float default 0 not null,
add column rating_cnt int default 0 not null;
update movies m
set
rating_cnt = r.rating_cnt,
rating_sum = r.rating_sum
from (
select movie_id, count(*) as rating_cnt, sum(rating) as rating_sum
from ratings
group by movie_id
) r where (m.id = r.movie_id);
create trigger ...;
select m.id, m.title, m.rating_sum / m.rating_cnt
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
and m.rating_cnt > 0
group by m.id
order by 3 desc
DELETE FROM films;
Для удаления всех записей из таблицы/таблиц есть отдельная команда
TRUNCATE TABLE films CASCADE;
Особенности:
SELECT COUNT(*) FROM films;
Если нужно ориентировочное количество записей в таблице, то можно выполнить запрос вида:
SELECT reltuples::bigint AS estimate
FROM pg_class
WHERE oid = 'films'::regclass;
SELECT COUNT(*) FROM (
SELECT * FROM tags WHERE name LIKE 'Zombie%'
);
Если нужно ориентировочное количество записей в результате выполнения запроса, то можно получить их из плана выполнения:
EXPLAIN SELECT * FROM tags WHERE name LIKE 'Comedy%';
CREATE FUNCTION count_estimate(query text) RETURNS INTEGER AS
$func$
DECLARE
rec record;
ROWS INTEGER;
BEGIN
FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP
ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)');
EXIT WHEN ROWS IS NOT NULL;
END LOOP;
RETURN ROWS;
END
$func$ LANGUAGE plpgsql;
SELECT count_estimate('SELECT * FROM tags WHERE name LIKE ''Comedy%''');
SELECT * FROM movies
ORDER BY title LIMIT 100 OFFSET 2000;
SELECT * FROM movies
WHERE title > 'Angry Video Game Nerd: The Movie'
ORDER BY title LIMIT 100;
SELECT *
FROM movies
ORDER BY title
LIMIT 10 OFFSET 100;
SELECT m.*
FROM movies m
JOIN (
SELECT id
FROM movies m
ORDER BY title
LIMIT 10 OFFSET 100
) l ON (m.id = l.id)
ORDER BY title;
CREATE TABLE test (
id SERIAL PRIMARY KEY,
title TEXT,
created_on TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_test_created_on ON test (created_on);
CREATE TABLE test_1
(CHECK ( id >= 100 AND id < 200 ))
INHERITS (test);
CREATE TABLE test_2 ( LIKE test INCLUDING ALL );
ALTER TABLE test_2
INHERIT test,
ADD CONSTRAINT partition_check CHECK (id >= 200 and id < 300);
CREATE EXTENSION pg_pathman;
CREATE TABLE journal (
id SERIAL,
dt TIMESTAMP NOT NULL,
level INTEGER,
msg TEXT);
CREATE INDEX ON journal(dt);
INSERT INTO journal (dt, level, msg)
SELECT g, random() * 6, md5(g::text)
FROM generate_series('2015-01-01'::date, '2015-12-31'::date, '1 minute') as g;
SELECT create_range_partitions('journal', 'dt',
'2015-01-01'::date, '1 day'::interval);
CREATE TABLE test (
id SERIAL PRIMARY KEY,
title TEXT,
created_on TIMESTAMPTZ NOT NULL DEFAULT NOW()
) PARTITION BY RANGE (id);
CREATE TABLE test_1 PARTITION OF test FOR VALUES FROM (100) TO (199);
CREATE TABLE test_2 PARTITION OF test FOR VALUES FROM (200) TO (299);
CREATE INDEX ON test_1 (id);
CREATE INDEX ON test_2 (id);
Уменьшение времени блокировок за счет избавления от крупных запросов вида «обновить всё».
До:
DELETE FROM work_logs
WHERE created_at > NOW() - interval '90 days';
После (выполнять в цикле):
BEGIN;
DELETE FROM work_logs
WHERE id IN (
SELECT id
DELETE FROM work_logs
WHERE created_at > NOW() - interval '90 days'
LIMIT 10000
);
COMMIT;
До:
BEGIN;
INSERT INTO spent_daily
SELECT :day, level, SUM(spent_daily) FROM players
GROUP BY level FOR UPDATE;
UPDATE players SET spent_daily = 0;
END;
UPDATE players SET
spent_day = spent_day + :delta,
money = money - :delta
WHERE id = :id;
После:
BEGIN;
INSERT INTO spent_daily
SELECT :day, level, SUM(CASE
WHEN spent_day = :day THEN spent_prev
WHEN spent_day = :day - 1 THEN spent_last
END) FROM players;
END;
UPDATE players SET
spent_prev = CASE
WHEN spent_day = :day THEN spent_prev
WHEN spent_day = :day - 1 THEN spent_last
ELSE 0
END,
spent_last = CASE
WHEN spent_day = :day THEN spent_last ELSE 0
END + :delta,
spent_day = :day,
money = money - :delta
WHERE id = :id;
Массовая замена сгенерированных ресурсов на один с мета-данными.
Решение «в лоб»:
UPDATE items SET res_id = 73534, level = 1, meta = 1001
WHERE res_id = 40477;
UPDATE items SET res_id = 73534, level = 1, meta = 1201
WHERE res_id = 40478;
UPDATE items SET res_id = 73534, level = 2, meta = 1031
WHERE res_id = 40479;
…
UPDATE items SET res_id = 73534, level = 80, meta = 7641
WHERE res_id = 70477;
Более красивое решение:
CREATE TEMPORARY TABLE tmp_items (
res_id INT,
level SMALLINT,
meta BYTEA
);
INSERT INTO items VALUES
(1, 1001, 40477),
(1, 1201, 40478),
…
(80, 7641, 70477);
UPDATE items i
SET
res_id = 73534,
level = t.level,
meta = t.meta
FROM tmp_items t
WHERE i.res_id = t.res_id;
Решение «в лоб»:
String sql = "INSERT INTO foo (name, description) VALUES (?, ?)";
try (PreparedStatement ps = db.prepareStatement(sql)) {
for (int i = 0; i < 100; ++i) {
ps.setString(1, "name-" + i);
ps.setString(2, "description-" + i);
ps.execute();
}
}
Более производительное решение:
String sql = "INSERT INTO foo (name, description) VALUES (?, ?)";
try (PreparedStatement ps = db.prepareStatement(sql)) {
for (int i = 0; i < 100; ++i) {
ps.setString(1, "name-" + i);
ps.setString(2, "description-" + i);
ps.addBatch();
}
ps.executeBatch();
}
Решение без покемонов:
String sql = "INSERT INTO foo (name, description) VALUES (?, ?)";
try (PreparedStatement ps = db.prepareStatement(sql,
Statement.NO_GENERATED_KEYS)) {
for (int i = 0; i < 100; ++i) {
ps.setString(1, "name-" + i);
ps.setString(2, "description-" + i);
ps.addBatch();
}
ps.executeBatch();
}
Загрузка данных через COPY гораздо быстрее:
COPY movie_tags (
user_id,
movie_id,
tag,
created)
FROM 'ml-latest/tags.csv'
WITH (
DELIMITER ',',
FORMAT CSV,
HEADER true,
ESCAPE '"',
ENCODING 'UTF-8'
);
BEGIN;
SELECT * FROM player WHERE id = 42 FOR UPDATE;
...
UPDATE player SET money = 100500 WHERE id = 42;
COMMIT;
BEGIN;
SELECT * FROM player WHERE id = 42;
COMMIT;
...
BEGIN;
UPDATE player SET money = 100500, ver = 13
WHERE id = 42 AND ver = 12;
COMMIT;
UPDATE players SET spent_daily = 0;
BEGIN;
UPDATE player SET money = 100500 WHERE id = 42;
COMMIT;
BEGIN;
SET LOCAL lock_timeout = '100ms';
UPDATE player SET money = 100500 WHERE id = 42;
COMMIT;
CREATE INDEX sales_quantity_index
ON sales_table (quantity);
CREATE INDEX CONCURRENTLY sales_quantity_index
ON sales_table (quantity);
ALTER TABLE ratings ADD COLUMN created_new TIMESTAMPTZ NULL;
UPDATE ratings SET
created_new = 'epoch'::TIMESTEMPTZ
+ created * INTERVAL '1 second';
ALTER TABLE ratings DROP COLUMN created;
ALTER TABLE ratings RENAME COLUMN created_new TO created;
ALTER TABLE ratings ALTER COLUMN created SET NOT NULL;
ALTER TABLE ratings
ALTER COLUMN created SET DATA TYPE TIMESTAMPTZ
USING 'epoch'::TIMESTEMPTZ + created * INTERVAL '1 second';