From 35e63fa3f5472f847044499ce6d2c6a44220ce07 Mon Sep 17 00:00:00 2001
From: Omar Roth <omarroth@hotmail.com>
Date: Tue, 9 Oct 2018 08:40:29 -0500
Subject: [PATCH] Use materialized views for subscription feeds

---
 config/config.yml                |  1 +
 src/invidious.cr                 | 72 +++++++++++++++++++-------------
 src/invidious/helpers/helpers.cr |  1 +
 src/invidious/helpers/utils.cr   |  6 +++
 src/invidious/jobs.cr            | 11 +++++
 5 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/config/config.yml b/config/config.yml
index 577968f0..309be236 100644
--- a/config/config.yml
+++ b/config/config.yml
@@ -10,3 +10,4 @@ db:
 full_refresh: false
 https_only: false
 geo_bypass: true
+update_feeds: true
diff --git a/src/invidious.cr b/src/invidious.cr
index a73054fc..4ba4f960 100644
--- a/src/invidious.cr
+++ b/src/invidious.cr
@@ -98,6 +98,12 @@ spawn do
   end
 end
 
+if CONFIG.update_feeds
+  spawn do
+    update_feeds(PG_DB)
+  end
+end
+
 decrypt_function = [] of {name: String, value: Int32}
 spawn do
   update_decrypt_function do |function|
@@ -475,9 +481,8 @@ get "/search" do |env|
   user = env.get? "user"
   if user
     user = user.as(User)
-    ucids = user.subscriptions
+    view_name = "subscriptions_#{sha256(user.email)[0..7]}"
   end
-  ucids ||= [] of String
 
   channel = nil
   content_type = "all"
@@ -514,14 +519,19 @@ get "/search" do |env|
   if channel
     count, videos = channel_search(search_query, page, channel)
   elsif subscriptions
-    videos = PG_DB.query_all("SELECT id,title,published,updated,ucid,author FROM (
+    if view_name
+      videos = PG_DB.query_all("SELECT id,title,published,updated,ucid,author FROM (
       SELECT *,
-      to_tsvector(channel_videos.title) ||
-      to_tsvector(channel_videos.author)
+      to_tsvector(#{view_name}.title) ||
+      to_tsvector(#{view_name}.author)
       as document
-      FROM channel_videos WHERE ucid IN (#{arg_array(ucids, 3)})
-      ) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;", [search_query, (page - 1) * 20] + ucids, as: ChannelVideo)
-    count = videos.size
+      FROM #{view_name}
+      ) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;", search_query, (page - 1) * 20, as: ChannelVideo)
+      count = videos.size
+    else
+      videos = [] of ChannelVideo
+      count = 0
+    end
   else
     begin
       search_params = produce_search_params(sort: sort, date: date, content_type: content_type,
@@ -799,6 +809,12 @@ post "/login" do |env|
 
       PG_DB.exec("INSERT INTO users VALUES (#{args})", user_array)
 
+      view_name = "subscriptions_#{sha256(user.email)[0..7]}"
+      PG_DB.exec("CREATE MATERIALIZED VIEW #{view_name} AS \
+        SELECT * FROM channel_videos WHERE \
+        ucid = ANY ((SELECT subscriptions FROM users WHERE email = '#{user.email}')::text[]) \
+      ORDER BY published DESC;")
+
       if Kemal.config.ssl || CONFIG.https_only
         secure = true
       else
@@ -1364,6 +1380,8 @@ get "/feed/subscriptions" do |env|
 
     notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email,
       as: Array(String))
+    view_name = "subscriptions_#{sha256(user.email)[0..7]}"
+
     if preferences.notifications_only && !notifications.empty?
       args = arg_array(notifications)
 
@@ -1386,39 +1404,34 @@ get "/feed/subscriptions" do |env|
     else
       if preferences.latest_only
         if preferences.unseen_only
-          ucids = arg_array(user.subscriptions)
           if user.watched.empty?
             watched = "'{}'"
           else
-            watched = arg_array(user.watched, user.subscriptions.size + 1)
+            watched = arg_array(user.watched)
           end
 
-          videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \
-            ucid IN (#{ucids}) AND id NOT IN (#{watched}) ORDER BY ucid, published DESC",
-            user.subscriptions + user.watched, as: ChannelVideo)
+          videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} WHERE \
+            id NOT IN (#{watched}) ORDER BY ucid, published DESC",
+            user.watched, as: ChannelVideo)
         else
-          args = arg_array(user.subscriptions)
-          videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \
-          ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo)
+          videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name}", as: ChannelVideo)
         end
 
         videos.sort_by! { |video| video.published }.reverse!
       else
         if preferences.unseen_only
-          ucids = arg_array(user.subscriptions, 3)
           if user.watched.empty?
             watched = "'{}'"
           else
-            watched = arg_array(user.watched, user.subscriptions.size + 3)
+            watched = arg_array(user.watched, 3)
           end
 
-          videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{ucids}) \
-          AND id NOT IN (#{watched}) ORDER BY published DESC LIMIT $1 OFFSET $2",
-            [limit, offset] + user.subscriptions + user.watched, as: ChannelVideo)
+          videos = PG_DB.query_all("SELECT * FROM #{view_name} WHERE \
+          id NOT IN (#{watched}) LIMIT $1 OFFSET $2",
+            [limit, offset] + user.watched, as: ChannelVideo)
         else
-          args = arg_array(user.subscriptions, 3)
-          videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \
-          ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo)
+          videos = PG_DB.query_all("SELECT * FROM #{view_name} \
+          ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
         end
       end
 
@@ -1576,15 +1589,14 @@ get "/feed/private" do |env|
   latest_only ||= 0
   latest_only = latest_only == 1
 
+  view_name = "subscriptions_#{sha256(user.email)[0..7]}"
+
   if latest_only
-    args = arg_array(user.subscriptions)
-    videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \
-    ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo)
+    videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} ORDER BY ucid, published DESC", as: ChannelVideo)
     videos.sort_by! { |video| video.published }.reverse!
   else
-    args = arg_array(user.subscriptions, 3)
-    videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \
-  ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo)
+    videos = PG_DB.query_all("SELECT * FROM #{view_name} \
+    ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
   end
 
   sort = env.params.query["sort"]?
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index ac48534a..46e1e931 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -15,6 +15,7 @@ class Config
     hmac_key:     String?,
     full_refresh: Bool,
     geo_bypass:   Bool,
+    update_feeds: Bool,
   })
 end
 
diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr
index f6478d00..7ff2e622 100644
--- a/src/invidious/helpers/utils.cr
+++ b/src/invidious/helpers/utils.cr
@@ -238,3 +238,9 @@ def write_var_int(value : Int)
 
   return bytes
 end
+
+def sha256(text)
+  digest = OpenSSL::Digest.new("SHA256")
+  digest << text
+  return digest.hexdigest
+end
diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr
index e1c4e2d3..14ddf679 100644
--- a/src/invidious/jobs.cr
+++ b/src/invidious/jobs.cr
@@ -104,6 +104,17 @@ def refresh_videos(db)
   end
 end
 
+def update_feeds(db)
+  loop do
+    users = db.query_all("SELECT email FROM users", as: String)
+
+    users.each do |email|
+      view_name = "subscriptions_#{sha256(email)[0..7]}"
+      db.exec("REFRESH MATERIALIZED VIEW #{view_name}")
+    end
+  end
+end
+
 def pull_top_videos(config, db)
   if config.dl_api_key
     DetectLanguage.configure do |dl_config|