invidious-mod-JP/src/invidious/channels.cr

class InvidiousChannel
  add_mapping({
    id:      String,
    author:  String,
    updated: Time,
  })
end

class ChannelVideo
  add_mapping({
    id:        String,
    title:     String,
    published: Time,
    updated:   Time,
    ucid:      String,
    author:    String,
  })
end

def get_channel(id, client, db, refresh = true, pull_all_videos = true)
  if db.query_one?("SELECT EXISTS (SELECT true FROM channels WHERE id = $1)", id, as: Bool)
    channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)

    if refresh && Time.now - channel.updated > 10.minutes
      channel = fetch_channel(id, client, db, pull_all_videos)
      channel_array = channel.to_a
      args = arg_array(channel_array)

      db.exec("INSERT INTO channels VALUES (#{args}) \
        ON CONFLICT (id) DO UPDATE SET updated = $3", channel_array)
    end
  else
    channel = fetch_channel(id, client, db, pull_all_videos)
    args = arg_array(channel.to_a)
    db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a)
  end

  return channel
end

def fetch_channel(ucid, client, db, pull_all_videos = true)
  rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body
  rss = XML.parse_html(rss)

  author = rss.xpath_node(%q(//feed/title))
  if !author
    raise "Deleted or invalid channel"
  end
  author = author.content

  # Auto-generated channels
  # https://support.google.com/youtube/answer/2579942
  if author.ends_with?(" - Topic") ||
     {"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author
    auto_generated = true
  end

  if !pull_all_videos
    rss.xpath_nodes("//feed/entry").each do |entry|
      video_id = entry.xpath_node("videoid").not_nil!.content
      title = entry.xpath_node("title").not_nil!.content
      published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z", Time::Location.local)
      updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z", Time::Location.local)
      author = entry.xpath_node("author/name").not_nil!.content
      ucid = entry.xpath_node("channelid").not_nil!.content

      video = ChannelVideo.new(video_id, title, published, Time.now, ucid, author)

      db.exec("UPDATE users SET notifications = notifications || $1 \
        WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid)

      video_array = video.to_a
      args = arg_array(video_array)
      db.exec("INSERT INTO channel_videos VALUES (#{args}) \
        ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
        updated = $4, ucid = $5, author = $6", video_array)
    end
  else
    page = 1
    ids = [] of String

    loop do
      url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
      response = client.get(url)
      json = JSON.parse(response.body)

      if json["content_html"]? && !json["content_html"].as_s.empty?
        document = XML.parse_html(json["content_html"].as_s)
        nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
      else
        break
      end

      if auto_generated
        videos = extract_videos(nodeset)
      else
        videos = extract_videos(nodeset, ucid)
        videos.each { |video| video.ucid = ucid }
        videos.each { |video| video.author = author }
      end

      count = nodeset.size
      videos = videos.map { |video| ChannelVideo.new(video.id, video.title, video.published, Time.now, video.ucid, video.author) }

      videos.each do |video|
        ids << video.id

        # FIXME: Red videos don't provide published date, so the best we can do is ignore them
        if Time.now - video.published > 1.minute
          db.exec("UPDATE users SET notifications = notifications || $1 \
          WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, video.ucid)

          video_array = video.to_a
          args = arg_array(video_array)
          db.exec("INSERT INTO channel_videos VALUES (#{args}) ON CONFLICT (id) DO UPDATE SET title = $2, \
          published = $3, updated = $4, ucid = $5, author = $6", video_array)
        end
      end

      if count < 30
        break
      end

      page += 1
    end

    # When a video is deleted from a channel, we find and remove it here
    db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{ids.map { |id| %("#{id}") }.join(",")}}') AND ucid = $1", ucid)
  end

  channel = InvidiousChannel.new(ucid, author, Time.now)

  return channel
end

def produce_channel_videos_url(ucid, page = 1, auto_generated = nil)
  if auto_generated
    seed = Time.epoch(1525757349)

    until seed >= Time.now
      seed += 1.month
    end
    timestamp = seed - (page - 1).months

    page = "#{timestamp.epoch}"
    switch = "\x36"
  else
    page = "#{page}"
    switch = "\x00"
  end

  meta = "\x12\x06videos"
  meta += "\x30\x02"
  meta += "\x38\x01"
  meta += "\x60\x01"
  meta += "\x6a\x00"
  meta += "\xb8\x01\x00"
  meta += "\x20#{switch}"
  meta += "\x7a"
  meta += page.size.to_u8.unsafe_chr
  meta += page

  meta = Base64.urlsafe_encode(meta)
  meta = URI.escape(meta)

  continuation = "\x12"
  continuation += ucid.size.to_u8.unsafe_chr
  continuation += ucid
  continuation += "\x1a"
  continuation += meta.size.to_u8.unsafe_chr
  continuation += meta

  continuation = continuation.size.to_u8.unsafe_chr + continuation
  continuation = "\xe2\xa9\x85\xb2\x02" + continuation

  continuation = Base64.urlsafe_encode(continuation)
  continuation = URI.escape(continuation)

  url = "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"

  return url
end

def get_about_info(ucid)
  client = make_client(YT_URL)

  about = client.get("/user/#{ucid}/about?disable_polymer=1")
  about = XML.parse_html(about.body)

  if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a))
    about = client.get("/channel/#{ucid}/about?disable_polymer=1")
    about = XML.parse_html(about.body)
  end

  if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a))
    raise "User does not exist."
  end

  sub_count = about.xpath_node(%q(//span[contains(text(), "subscribers")]))
  if sub_count
    sub_count = sub_count.content.delete(", subscribers").to_i?
  end
  sub_count ||= 0

  author = about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a)).not_nil!.content
  ucid = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"].split("/")[-1]

  # Auto-generated channels
  # https://support.google.com/youtube/answer/2579942
  auto_generated = false
  if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) ||
     about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"]))
    auto_generated = true
  end

  return {author, ucid, auto_generated, sub_count}
end

def get_60_videos(ucid, page, auto_generated, proxies)
  count = 0
  videos = [] of SearchVideo

  client = make_client(YT_URL)

  2.times do |i|
    url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated)
    response = client.get(url)
    json = JSON.parse(response.body)

    if json["content_html"]? && !json["content_html"].as_s.empty?
      document = XML.parse_html(json["content_html"].as_s)
      nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))

      if !json["load_more_widget_html"]?.try &.as_s.empty?
        count += 30
      end

      if !json["load_more_widget_html"]?.try &.as_s.empty? && nodeset.size < 30
        bypass_channel = Channel(XML::NodeSet | Nil).new

        proxies.each do |region, list|
          spawn do
            list.each do |proxy|
              begin
                proxy_client = HTTPClient.new(YT_URL)
                proxy_client.read_timeout = 10.seconds
                proxy_client.connect_timeout = 10.seconds

                proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port])
                proxy_client.set_proxy(proxy)

                proxy_response = proxy_client.get(url)
                json = JSON.parse(proxy_response.body)

                document = XML.parse_html(json["content_html"].as_s)
                nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))

                if nodeset.size == 30
                  bypass_channel.send(nodeset)
                  break
                end
              rescue ex
              end
            end

            if nodeset.size != 30
              bypass_channel.send(nil)
            end
          end
        end

        proxies.size.times do
          response = bypass_channel.receive
          if response
            nodeset = response
            break
          end
        end
      end

      if auto_generated
        videos += extract_videos(nodeset)
      else
        videos += extract_videos(nodeset, ucid)
      end
    else
      break
    end
  end

  return videos, count
end
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`class InvidiousChannel`
			`add_mapping({`
			`id: String,`
			`author: String,`
			`updated: Time,`
			`})`
			`end`

			`class ChannelVideo`
			`add_mapping({`
			`id: String,`
			`title: String,`
			`published: Time,`
			`updated: Time,`
			`ucid: String,`
			`author: String,`
			`})`
			`end`

			`def get_channel(id, client, db, refresh = true, pull_all_videos = true)`
			`if db.query_one?("SELECT EXISTS (SELECT true FROM channels WHERE id = $1)", id, as: Bool)`
			`channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)`

			`if refresh && Time.now - channel.updated > 10.minutes`
			`channel = fetch_channel(id, client, db, pull_all_videos)`
			`channel_array = channel.to_a`
			`args = arg_array(channel_array)`

			`db.exec("INSERT INTO channels VALUES (#{args}) \`
			`ON CONFLICT (id) DO UPDATE SET updated = $3", channel_array)`
			`end`
			`else`
			`channel = fetch_channel(id, client, db, pull_all_videos)`
			`args = arg_array(channel.to_a)`
			`db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a)`
			`end`

			`return channel`
			`end`

			`def fetch_channel(ucid, client, db, pull_all_videos = true)`
			`rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body`
			`rss = XML.parse_html(rss)`

			`author = rss.xpath_node(%q(//feed/title))`
			`if !author`
			`raise "Deleted or invalid channel"`
			`end`
			`author = author.content`

Fix channel refresh 2018-09-17 02:44:24 +00:00			`# Auto-generated channels`
			`# https://support.google.com/youtube/answer/2579942`
			`if author.ends_with?(" - Topic") \|\|`
			`{"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author`
			`auto_generated = true`
			`end`

Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`if !pull_all_videos`
			`rss.xpath_nodes("//feed/entry").each do \|entry\|`
			`video_id = entry.xpath_node("videoid").not_nil!.content`
			`title = entry.xpath_node("title").not_nil!.content`
			`published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z", Time::Location.local)`
			`updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z", Time::Location.local)`
			`author = entry.xpath_node("author/name").not_nil!.content`
			`ucid = entry.xpath_node("channelid").not_nil!.content`

			`video = ChannelVideo.new(video_id, title, published, Time.now, ucid, author)`

			`db.exec("UPDATE users SET notifications = notifications \|\| $1 \`
			`WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid)`

			`video_array = video.to_a`
			`args = arg_array(video_array)`
			`db.exec("INSERT INTO channel_videos VALUES (#{args}) \`
			`ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \`
			`updated = $4, ucid = $5, author = $6", video_array)`
			`end`
			`else`
			`page = 1`
Fix full channel refresh 2018-09-17 01:32:39 +00:00			`ids = [] of String`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00
			`loop do`
Fix channel refresh 2018-09-17 02:44:24 +00:00			`url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)`
			`response = client.get(url)`
			`json = JSON.parse(response.body)`

			`if json["content_html"]? && !json["content_html"].as_s.empty?`
			`document = XML.parse_html(json["content_html"].as_s)`
			`nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))`
			`else`
			`break`
			`end`

			`if auto_generated`
			`videos = extract_videos(nodeset)`
			`else`
			`videos = extract_videos(nodeset, ucid)`
Properly filter user's subscriptions in search 2018-09-17 03:14:51 +00:00			`videos.each { \|video\| video.ucid = ucid }`
			`videos.each { \|video\| video.author = author }`
Fix channel refresh 2018-09-17 02:44:24 +00:00			`end`

			`count = nodeset.size`
Fix full channel refresh 2018-09-17 01:32:39 +00:00			`videos = videos.map { \|video\| ChannelVideo.new(video.id, video.title, video.published, Time.now, video.ucid, video.author) }`

			`videos.each do \|video\|`
			`ids << video.id`
Don't deliver new notifications for YouTube Red videos 2018-09-28 14:23:28 +00:00
			`# FIXME: Red videos don't provide published date, so the best we can do is ignore them`
			`if Time.now - video.published > 1.minute`
			`db.exec("UPDATE users SET notifications = notifications \|\| $1 \`
Properly filter user's subscriptions in search 2018-09-17 03:14:51 +00:00			`WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, video.ucid)`
Fix full channel refresh 2018-09-17 01:32:39 +00:00
Don't deliver new notifications for YouTube Red videos 2018-09-28 14:23:28 +00:00			`video_array = video.to_a`
			`args = arg_array(video_array)`
			`db.exec("INSERT INTO channel_videos VALUES (#{args}) ON CONFLICT (id) DO UPDATE SET title = $2, \`
Properly filter user's subscriptions in search 2018-09-17 03:14:51 +00:00			`published = $3, updated = $4, ucid = $5, author = $6", video_array)`
Don't deliver new notifications for YouTube Red videos 2018-09-28 14:23:28 +00:00			`end`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`end`

Fix channel refresh 2018-09-17 02:44:24 +00:00			`if count < 30`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`break`
			`end`

			`page += 1`
			`end`

			`# When a video is deleted from a channel, we find and remove it here`
Fix full channel refresh 2018-09-17 01:32:39 +00:00			`db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{ids.map { \|id\| %("#{id}") }.join(",")}}') AND ucid = $1", ucid)`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`end`

			`channel = InvidiousChannel.new(ucid, author, Time.now)`

			`return channel`
			`end`
Minor refactor 2018-09-04 13:52:30 +00:00
Add support for genre channels 2018-09-05 02:04:40 +00:00			`def produce_channel_videos_url(ucid, page = 1, auto_generated = nil)`
			`if auto_generated`
			`seed = Time.epoch(1525757349)`
Minor refactor 2018-09-04 13:52:30 +00:00
Add support for genre channels 2018-09-05 02:04:40 +00:00			`until seed >= Time.now`
			`seed += 1.month`
			`end`
			`timestamp = seed - (page - 1).months`

			`page = "#{timestamp.epoch}"`
			`switch = "\x36"`
			`else`
			`page = "#{page}"`
			`switch = "\x00"`
			`end`

Refactor protocol buffers 2018-09-17 21:38:18 +00:00			`meta = "\x12\x06videos"`
			`meta += "\x30\x02"`
			`meta += "\x38\x01"`
			`meta += "\x60\x01"`
			`meta += "\x6a\x00"`
			`meta += "\xb8\x01\x00"`
			`meta += "\x20#{switch}"`
			`meta += "\x7a"`
Minor refactor 2018-09-04 13:52:30 +00:00			`meta += page.size.to_u8.unsafe_chr`
			`meta += page`

			`meta = Base64.urlsafe_encode(meta)`
			`meta = URI.escape(meta)`

			`continuation = "\x12"`
			`continuation += ucid.size.to_u8.unsafe_chr`
			`continuation += ucid`
			`continuation += "\x1a"`
			`continuation += meta.size.to_u8.unsafe_chr`
			`continuation += meta`

			`continuation = continuation.size.to_u8.unsafe_chr + continuation`
			`continuation = "\xe2\xa9\x85\xb2\x02" + continuation`

			`continuation = Base64.urlsafe_encode(continuation)`
			`continuation = URI.escape(continuation)`

Attempt to bypass channel region locks 2018-10-14 14:53:40 +00:00			`url = "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"`
Minor refactor 2018-09-04 13:52:30 +00:00
			`return url`
			`end`
Refactor name to ucid conversion 2018-09-21 14:40:04 +00:00
			`def get_about_info(ucid)`
			`client = make_client(YT_URL)`

			`about = client.get("/user/#{ucid}/about?disable_polymer=1")`
			`about = XML.parse_html(about.body)`

			`if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a))`
			`about = client.get("/channel/#{ucid}/about?disable_polymer=1")`
			`about = XML.parse_html(about.body)`
			`end`

			`if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a))`
			`raise "User does not exist."`
			`end`

Add subCountText and add XHR alternative for subscribing to channels 2018-10-13 02:17:37 +00:00			`sub_count = about.xpath_node(%q(//span[contains(text(), "subscribers")]))`
			`if sub_count`
			`sub_count = sub_count.content.delete(", subscribers").to_i?`
			`end`
			`sub_count \|\|= 0`

Refactor name to ucid conversion 2018-09-21 14:40:04 +00:00			`author = about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a)).not_nil!.content`
			`ucid = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"].split("/")[-1]`

			`# Auto-generated channels`
			`# https://support.google.com/youtube/answer/2579942`
			`auto_generated = false`
			`if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) \|\|`
			`about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"]))`
			`auto_generated = true`
			`end`

Add subCountText and add XHR alternative for subscribing to channels 2018-10-13 02:17:37 +00:00			`return {author, ucid, auto_generated, sub_count}`
Refactor name to ucid conversion 2018-09-21 14:40:04 +00:00			`end`
Add next page for channels with geo-blocked videos 2018-10-14 14:06:04 +00:00
Attempt to bypass channel region locks 2018-10-14 14:53:40 +00:00			`def get_60_videos(ucid, page, auto_generated, proxies)`
Add next page for channels with geo-blocked videos 2018-10-14 14:06:04 +00:00			`count = 0`
			`videos = [] of SearchVideo`

			`client = make_client(YT_URL)`

			`2.times do \|i\|`
			`url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated)`
			`response = client.get(url)`
			`json = JSON.parse(response.body)`

			`if json["content_html"]? && !json["content_html"].as_s.empty?`
			`document = XML.parse_html(json["content_html"].as_s)`
			`nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))`

			`if !json["load_more_widget_html"]?.try &.as_s.empty?`
			`count += 30`
			`end`

Attempt to bypass channel region locks 2018-10-14 14:53:40 +00:00			`if !json["load_more_widget_html"]?.try &.as_s.empty? && nodeset.size < 30`
			`bypass_channel = Channel(XML::NodeSet \| Nil).new`

			`proxies.each do \|region, list\|`
			`spawn do`
			`list.each do \|proxy\|`
			`begin`
			`proxy_client = HTTPClient.new(YT_URL)`
			`proxy_client.read_timeout = 10.seconds`
			`proxy_client.connect_timeout = 10.seconds`

			`proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port])`
			`proxy_client.set_proxy(proxy)`

			`proxy_response = proxy_client.get(url)`
			`json = JSON.parse(proxy_response.body)`

			`document = XML.parse_html(json["content_html"].as_s)`
			`nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))`

			`if nodeset.size == 30`
			`bypass_channel.send(nodeset)`
			`break`
			`end`
			`rescue ex`
			`end`
			`end`

			`if nodeset.size != 30`
			`bypass_channel.send(nil)`
			`end`
			`end`
			`end`

			`proxies.size.times do`
			`response = bypass_channel.receive`
			`if response`
			`nodeset = response`
			`break`
			`end`
			`end`
			`end`

Add next page for channels with geo-blocked videos 2018-10-14 14:06:04 +00:00			`if auto_generated`
			`videos += extract_videos(nodeset)`
			`else`
			`videos += extract_videos(nodeset, ucid)`
			`end`
			`else`
			`break`
			`end`
			`end`

			`return videos, count`
			`end`