2018-08-04 20:30:44 +00:00
|
|
|
class InvidiousChannel
|
|
|
|
add_mapping({
|
|
|
|
id: String,
|
|
|
|
author: String,
|
|
|
|
updated: Time,
|
|
|
|
})
|
|
|
|
end
|
|
|
|
|
|
|
|
class ChannelVideo
|
|
|
|
add_mapping({
|
|
|
|
id: String,
|
|
|
|
title: String,
|
|
|
|
published: Time,
|
|
|
|
updated: Time,
|
|
|
|
ucid: String,
|
|
|
|
author: String,
|
|
|
|
})
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_channel(id, client, db, refresh = true, pull_all_videos = true)
|
|
|
|
if db.query_one?("SELECT EXISTS (SELECT true FROM channels WHERE id = $1)", id, as: Bool)
|
|
|
|
channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
|
|
|
|
|
|
|
|
if refresh && Time.now - channel.updated > 10.minutes
|
|
|
|
channel = fetch_channel(id, client, db, pull_all_videos)
|
|
|
|
channel_array = channel.to_a
|
|
|
|
args = arg_array(channel_array)
|
|
|
|
|
|
|
|
db.exec("INSERT INTO channels VALUES (#{args}) \
|
|
|
|
ON CONFLICT (id) DO UPDATE SET updated = $3", channel_array)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
channel = fetch_channel(id, client, db, pull_all_videos)
|
|
|
|
args = arg_array(channel.to_a)
|
|
|
|
db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a)
|
|
|
|
end
|
|
|
|
|
|
|
|
return channel
|
|
|
|
end
|
|
|
|
|
|
|
|
def fetch_channel(ucid, client, db, pull_all_videos = true)
|
|
|
|
rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body
|
|
|
|
rss = XML.parse_html(rss)
|
|
|
|
|
|
|
|
author = rss.xpath_node(%q(//feed/title))
|
|
|
|
if !author
|
|
|
|
raise "Deleted or invalid channel"
|
|
|
|
end
|
|
|
|
author = author.content
|
|
|
|
|
2018-09-17 02:44:24 +00:00
|
|
|
# Auto-generated channels
|
|
|
|
# https://support.google.com/youtube/answer/2579942
|
|
|
|
if author.ends_with?(" - Topic") ||
|
|
|
|
{"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author
|
|
|
|
auto_generated = true
|
|
|
|
end
|
|
|
|
|
2018-08-04 20:30:44 +00:00
|
|
|
if !pull_all_videos
|
|
|
|
rss.xpath_nodes("//feed/entry").each do |entry|
|
|
|
|
video_id = entry.xpath_node("videoid").not_nil!.content
|
|
|
|
title = entry.xpath_node("title").not_nil!.content
|
|
|
|
published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z", Time::Location.local)
|
|
|
|
updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z", Time::Location.local)
|
|
|
|
author = entry.xpath_node("author/name").not_nil!.content
|
|
|
|
ucid = entry.xpath_node("channelid").not_nil!.content
|
|
|
|
|
|
|
|
video = ChannelVideo.new(video_id, title, published, Time.now, ucid, author)
|
|
|
|
|
|
|
|
db.exec("UPDATE users SET notifications = notifications || $1 \
|
|
|
|
WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid)
|
|
|
|
|
|
|
|
video_array = video.to_a
|
|
|
|
args = arg_array(video_array)
|
|
|
|
db.exec("INSERT INTO channel_videos VALUES (#{args}) \
|
|
|
|
ON CONFLICT (id) DO UPDATE SET title = $2, published = $3, \
|
|
|
|
updated = $4, ucid = $5, author = $6", video_array)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
page = 1
|
2018-09-17 01:32:39 +00:00
|
|
|
ids = [] of String
|
2018-08-04 20:30:44 +00:00
|
|
|
|
|
|
|
loop do
|
2018-09-17 02:44:24 +00:00
|
|
|
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
|
|
|
|
response = client.get(url)
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
|
|
|
|
if json["content_html"]? && !json["content_html"].as_s.empty?
|
|
|
|
document = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
|
|
|
|
|
|
|
if auto_generated
|
|
|
|
videos = extract_videos(nodeset)
|
|
|
|
else
|
|
|
|
videos = extract_videos(nodeset, ucid)
|
2018-09-17 03:14:51 +00:00
|
|
|
videos.each { |video| video.ucid = ucid }
|
|
|
|
videos.each { |video| video.author = author }
|
2018-09-17 02:44:24 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
count = nodeset.size
|
2018-09-17 01:32:39 +00:00
|
|
|
videos = videos.map { |video| ChannelVideo.new(video.id, video.title, video.published, Time.now, video.ucid, video.author) }
|
|
|
|
|
|
|
|
videos.each do |video|
|
|
|
|
ids << video.id
|
2018-09-28 14:23:28 +00:00
|
|
|
|
|
|
|
# FIXME: Red videos don't provide published date, so the best we can do is ignore them
|
|
|
|
if Time.now - video.published > 1.minute
|
|
|
|
db.exec("UPDATE users SET notifications = notifications || $1 \
|
2018-09-17 03:14:51 +00:00
|
|
|
WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, video.ucid)
|
2018-09-17 01:32:39 +00:00
|
|
|
|
2018-09-28 14:23:28 +00:00
|
|
|
video_array = video.to_a
|
|
|
|
args = arg_array(video_array)
|
|
|
|
db.exec("INSERT INTO channel_videos VALUES (#{args}) ON CONFLICT (id) DO UPDATE SET title = $2, \
|
2018-09-17 03:14:51 +00:00
|
|
|
published = $3, updated = $4, ucid = $5, author = $6", video_array)
|
2018-09-28 14:23:28 +00:00
|
|
|
end
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
|
|
|
|
2018-09-17 02:44:24 +00:00
|
|
|
if count < 30
|
2018-08-04 20:30:44 +00:00
|
|
|
break
|
|
|
|
end
|
|
|
|
|
|
|
|
page += 1
|
|
|
|
end
|
|
|
|
|
|
|
|
# When a video is deleted from a channel, we find and remove it here
|
2018-09-17 01:32:39 +00:00
|
|
|
db.exec("DELETE FROM channel_videos * WHERE NOT id = ANY ('{#{ids.map { |id| %("#{id}") }.join(",")}}') AND ucid = $1", ucid)
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
channel = InvidiousChannel.new(ucid, author, Time.now)
|
|
|
|
|
|
|
|
return channel
|
|
|
|
end
|
2018-09-04 13:52:30 +00:00
|
|
|
|
2018-09-05 02:04:40 +00:00
|
|
|
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil)
|
|
|
|
if auto_generated
|
|
|
|
seed = Time.epoch(1525757349)
|
2018-09-04 13:52:30 +00:00
|
|
|
|
2018-09-05 02:04:40 +00:00
|
|
|
until seed >= Time.now
|
|
|
|
seed += 1.month
|
|
|
|
end
|
|
|
|
timestamp = seed - (page - 1).months
|
|
|
|
|
|
|
|
page = "#{timestamp.epoch}"
|
|
|
|
switch = "\x36"
|
|
|
|
else
|
|
|
|
page = "#{page}"
|
|
|
|
switch = "\x00"
|
|
|
|
end
|
|
|
|
|
2018-09-17 21:38:18 +00:00
|
|
|
meta = "\x12\x06videos"
|
|
|
|
meta += "\x30\x02"
|
|
|
|
meta += "\x38\x01"
|
|
|
|
meta += "\x60\x01"
|
|
|
|
meta += "\x6a\x00"
|
|
|
|
meta += "\xb8\x01\x00"
|
|
|
|
meta += "\x20#{switch}"
|
|
|
|
meta += "\x7a"
|
2018-09-04 13:52:30 +00:00
|
|
|
meta += page.size.to_u8.unsafe_chr
|
|
|
|
meta += page
|
|
|
|
|
|
|
|
meta = Base64.urlsafe_encode(meta)
|
|
|
|
meta = URI.escape(meta)
|
|
|
|
|
|
|
|
continuation = "\x12"
|
|
|
|
continuation += ucid.size.to_u8.unsafe_chr
|
|
|
|
continuation += ucid
|
|
|
|
continuation += "\x1a"
|
|
|
|
continuation += meta.size.to_u8.unsafe_chr
|
|
|
|
continuation += meta
|
|
|
|
|
|
|
|
continuation = continuation.size.to_u8.unsafe_chr + continuation
|
|
|
|
continuation = "\xe2\xa9\x85\xb2\x02" + continuation
|
|
|
|
|
|
|
|
continuation = Base64.urlsafe_encode(continuation)
|
|
|
|
continuation = URI.escape(continuation)
|
|
|
|
|
2018-10-14 14:53:40 +00:00
|
|
|
url = "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
|
2018-09-04 13:52:30 +00:00
|
|
|
|
|
|
|
return url
|
|
|
|
end
|
2018-09-21 14:40:04 +00:00
|
|
|
|
|
|
|
def get_about_info(ucid)
|
|
|
|
client = make_client(YT_URL)
|
|
|
|
|
|
|
|
about = client.get("/user/#{ucid}/about?disable_polymer=1")
|
|
|
|
about = XML.parse_html(about.body)
|
|
|
|
|
|
|
|
if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a))
|
|
|
|
about = client.get("/channel/#{ucid}/about?disable_polymer=1")
|
|
|
|
about = XML.parse_html(about.body)
|
|
|
|
end
|
|
|
|
|
|
|
|
if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a))
|
|
|
|
raise "User does not exist."
|
|
|
|
end
|
|
|
|
|
2018-10-13 02:17:37 +00:00
|
|
|
sub_count = about.xpath_node(%q(//span[contains(text(), "subscribers")]))
|
|
|
|
if sub_count
|
|
|
|
sub_count = sub_count.content.delete(", subscribers").to_i?
|
|
|
|
end
|
|
|
|
sub_count ||= 0
|
|
|
|
|
2018-09-21 14:40:04 +00:00
|
|
|
author = about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a)).not_nil!.content
|
|
|
|
ucid = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"].split("/")[-1]
|
|
|
|
|
|
|
|
# Auto-generated channels
|
|
|
|
# https://support.google.com/youtube/answer/2579942
|
|
|
|
auto_generated = false
|
|
|
|
if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) ||
|
|
|
|
about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"]))
|
|
|
|
auto_generated = true
|
|
|
|
end
|
|
|
|
|
2018-10-13 02:17:37 +00:00
|
|
|
return {author, ucid, auto_generated, sub_count}
|
2018-09-21 14:40:04 +00:00
|
|
|
end
|
2018-10-14 14:06:04 +00:00
|
|
|
|
2018-10-14 14:53:40 +00:00
|
|
|
def get_60_videos(ucid, page, auto_generated, proxies)
|
2018-10-14 14:06:04 +00:00
|
|
|
count = 0
|
|
|
|
videos = [] of SearchVideo
|
|
|
|
|
|
|
|
client = make_client(YT_URL)
|
|
|
|
|
|
|
|
2.times do |i|
|
|
|
|
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated)
|
|
|
|
response = client.get(url)
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
|
|
|
|
if json["content_html"]? && !json["content_html"].as_s.empty?
|
|
|
|
document = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
|
|
|
|
if !json["load_more_widget_html"]?.try &.as_s.empty?
|
|
|
|
count += 30
|
|
|
|
end
|
|
|
|
|
2018-10-14 14:53:40 +00:00
|
|
|
if !json["load_more_widget_html"]?.try &.as_s.empty? && nodeset.size < 30
|
|
|
|
bypass_channel = Channel(XML::NodeSet | Nil).new
|
|
|
|
|
|
|
|
proxies.each do |region, list|
|
|
|
|
spawn do
|
|
|
|
list.each do |proxy|
|
|
|
|
begin
|
|
|
|
proxy_client = HTTPClient.new(YT_URL)
|
|
|
|
proxy_client.read_timeout = 10.seconds
|
|
|
|
proxy_client.connect_timeout = 10.seconds
|
|
|
|
|
|
|
|
proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port])
|
|
|
|
proxy_client.set_proxy(proxy)
|
|
|
|
|
|
|
|
proxy_response = proxy_client.get(url)
|
|
|
|
json = JSON.parse(proxy_response.body)
|
|
|
|
|
|
|
|
document = XML.parse_html(json["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
|
|
|
|
|
|
|
if nodeset.size == 30
|
|
|
|
bypass_channel.send(nodeset)
|
|
|
|
break
|
|
|
|
end
|
|
|
|
rescue ex
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if nodeset.size != 30
|
|
|
|
bypass_channel.send(nil)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
proxies.size.times do
|
|
|
|
response = bypass_channel.receive
|
|
|
|
if response
|
|
|
|
nodeset = response
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-10-14 14:06:04 +00:00
|
|
|
if auto_generated
|
|
|
|
videos += extract_videos(nodeset)
|
|
|
|
else
|
|
|
|
videos += extract_videos(nodeset, ucid)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return videos, count
|
|
|
|
end
|