2018-08-15 15:22:36 +00:00
|
|
|
class Playlist
|
|
|
|
add_mapping({
|
2018-09-05 00:27:10 +00:00
|
|
|
title: String,
|
|
|
|
id: String,
|
|
|
|
author: String,
|
2018-09-25 15:28:40 +00:00
|
|
|
author_thumbnail: String,
|
2018-09-05 00:27:10 +00:00
|
|
|
ucid: String,
|
|
|
|
description: String,
|
|
|
|
description_html: String,
|
|
|
|
video_count: Int32,
|
|
|
|
views: Int64,
|
|
|
|
updated: Time,
|
2018-08-15 15:22:36 +00:00
|
|
|
})
|
|
|
|
end
|
|
|
|
|
|
|
|
class PlaylistVideo
|
|
|
|
add_mapping({
|
|
|
|
title: String,
|
|
|
|
id: String,
|
|
|
|
author: String,
|
|
|
|
ucid: String,
|
|
|
|
length_seconds: Int32,
|
|
|
|
published: Time,
|
|
|
|
playlists: Array(String),
|
|
|
|
index: Int32,
|
|
|
|
})
|
|
|
|
end
|
|
|
|
|
2018-09-22 19:13:10 +00:00
|
|
|
def fetch_playlist_videos(plid, page, video_count)
|
2018-08-15 15:22:36 +00:00
|
|
|
client = make_client(YT_URL)
|
2018-09-22 19:13:10 +00:00
|
|
|
|
|
|
|
if video_count > 100
|
|
|
|
index = (page - 1) * 100
|
|
|
|
url = produce_playlist_url(plid, index)
|
|
|
|
|
|
|
|
response = client.get(url)
|
|
|
|
response = JSON.parse(response.body)
|
|
|
|
if !response["content_html"]? || response["content_html"].as_s.empty?
|
|
|
|
raise "Playlist is empty"
|
|
|
|
end
|
|
|
|
|
|
|
|
document = XML.parse_html(response["content_html"].as_s)
|
|
|
|
nodeset = document.xpath_nodes(%q(.//tr[contains(@class, "pl-video")]))
|
|
|
|
videos = extract_playlist(plid, nodeset, index)
|
|
|
|
else
|
|
|
|
if page > 1
|
|
|
|
videos = [] of PlaylistVideo
|
|
|
|
else
|
2018-09-25 22:55:32 +00:00
|
|
|
response = client.get("/playlist?list=#{plid}&gl=US&hl=en&disable_polymer=1")
|
2018-09-22 19:13:10 +00:00
|
|
|
document = XML.parse_html(response.body)
|
|
|
|
nodeset = document.xpath_nodes(%q(.//tr[contains(@class, "pl-video")]))
|
|
|
|
|
|
|
|
videos = extract_playlist(plid, nodeset, 0)
|
|
|
|
end
|
2018-08-15 15:22:36 +00:00
|
|
|
end
|
|
|
|
|
2018-09-22 19:13:10 +00:00
|
|
|
return videos
|
|
|
|
end
|
|
|
|
|
|
|
|
def extract_playlist(plid, nodeset, index)
|
2018-08-15 15:22:36 +00:00
|
|
|
videos = [] of PlaylistVideo
|
|
|
|
|
2018-09-22 19:13:10 +00:00
|
|
|
nodeset.each_with_index do |video, offset|
|
|
|
|
anchor = video.xpath_node(%q(.//td[@class="pl-video-title"]))
|
|
|
|
if !anchor
|
|
|
|
next
|
2018-08-15 15:22:36 +00:00
|
|
|
end
|
2018-09-22 19:13:10 +00:00
|
|
|
|
|
|
|
title = anchor.xpath_node(%q(.//a)).not_nil!.content.strip(" \n")
|
|
|
|
id = anchor.xpath_node(%q(.//a)).not_nil!["href"].lchop("/watch?v=")[0, 11]
|
|
|
|
|
|
|
|
anchor = anchor.xpath_node(%q(.//div[@class="pl-video-owner"]/a))
|
|
|
|
if anchor
|
|
|
|
author = anchor.content
|
|
|
|
ucid = anchor["href"].split("/")[2]
|
|
|
|
else
|
|
|
|
author = ""
|
|
|
|
ucid = ""
|
|
|
|
end
|
|
|
|
|
|
|
|
anchor = video.xpath_node(%q(.//td[@class="pl-video-time"]/div/div[1]))
|
|
|
|
if anchor && !anchor.content.empty?
|
|
|
|
length_seconds = decode_length_seconds(anchor.content)
|
|
|
|
else
|
|
|
|
length_seconds = 0
|
|
|
|
end
|
|
|
|
|
|
|
|
videos << PlaylistVideo.new(
|
|
|
|
title,
|
|
|
|
id,
|
|
|
|
author,
|
|
|
|
ucid,
|
|
|
|
length_seconds,
|
|
|
|
Time.now,
|
|
|
|
[plid],
|
|
|
|
index + offset,
|
|
|
|
)
|
2018-08-15 15:22:36 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
return videos
|
|
|
|
end
|
|
|
|
|
|
|
|
def produce_playlist_url(id, index)
|
|
|
|
if id.starts_with? "UC"
|
|
|
|
id = "UU" + id.lchop("UC")
|
|
|
|
end
|
|
|
|
ucid = "VL" + id
|
|
|
|
|
2018-09-17 21:38:18 +00:00
|
|
|
meta = "\x08#{write_var_int(index).join}"
|
|
|
|
meta = Base64.urlsafe_encode(meta, false)
|
|
|
|
meta = "PT:#{meta}"
|
|
|
|
|
|
|
|
wrapped = "\x7a"
|
|
|
|
wrapped += meta.bytes.size.unsafe_chr
|
|
|
|
wrapped += meta
|
|
|
|
|
|
|
|
wrapped = Base64.urlsafe_encode(wrapped)
|
|
|
|
meta = URI.escape(wrapped)
|
|
|
|
|
|
|
|
continuation = "\x12"
|
|
|
|
continuation += ucid.size.unsafe_chr
|
|
|
|
continuation += ucid
|
|
|
|
continuation += "\x1a"
|
|
|
|
continuation += meta.bytes.size.unsafe_chr
|
|
|
|
continuation += meta
|
|
|
|
|
|
|
|
continuation = continuation.size.to_u8.unsafe_chr + continuation
|
|
|
|
continuation = "\xe2\xa9\x85\xb2\x02" + continuation
|
|
|
|
|
|
|
|
continuation = Base64.urlsafe_encode(continuation)
|
|
|
|
continuation = URI.escape(continuation)
|
2018-08-15 15:22:36 +00:00
|
|
|
|
2018-09-22 19:13:10 +00:00
|
|
|
url = "/browse_ajax?continuation=#{continuation}"
|
2018-08-15 15:22:36 +00:00
|
|
|
|
|
|
|
return url
|
|
|
|
end
|
|
|
|
|
|
|
|
def fetch_playlist(plid)
|
|
|
|
client = make_client(YT_URL)
|
2018-09-22 19:13:10 +00:00
|
|
|
|
|
|
|
if plid.starts_with? "UC"
|
|
|
|
plid = "UU#{plid.lchop("UC")}"
|
|
|
|
end
|
|
|
|
|
2018-09-25 22:55:32 +00:00
|
|
|
response = client.get("/playlist?list=#{plid}&hl=en&disable_polymer=1")
|
2018-09-23 17:26:12 +00:00
|
|
|
if response.status_code != 200
|
|
|
|
raise "Invalid playlist."
|
|
|
|
end
|
|
|
|
|
2018-09-14 02:00:39 +00:00
|
|
|
body = response.body.gsub(<<-END_BUTTON
|
|
|
|
<button class="yt-uix-button yt-uix-button-size-default yt-uix-button-link yt-uix-expander-head playlist-description-expander yt-uix-inlineedit-ignore-edit" type="button" onclick=";return false;"><span class="yt-uix-button-content"> less <img alt="" src="/yts/img/pixel-vfl3z5WfW.gif">
|
|
|
|
</span></button>
|
|
|
|
END_BUTTON
|
|
|
|
, "")
|
|
|
|
document = XML.parse_html(body)
|
2018-08-15 15:22:36 +00:00
|
|
|
|
2018-09-23 17:32:32 +00:00
|
|
|
title = document.xpath_node(%q(//h1[@class="pl-header-title"]))
|
|
|
|
if !title
|
|
|
|
raise "Playlist does not exist."
|
|
|
|
end
|
|
|
|
title = title.content.strip(" \n")
|
2018-08-15 15:22:36 +00:00
|
|
|
|
2018-09-05 00:27:10 +00:00
|
|
|
description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1]))
|
2018-09-25 15:28:57 +00:00
|
|
|
description_html ||= document.xpath_node(%q(//span[@class="pl-header-description-text"]))
|
2018-09-14 02:00:39 +00:00
|
|
|
description_html, description = html_to_content(description_html)
|
2018-08-15 15:22:36 +00:00
|
|
|
|
|
|
|
anchor = document.xpath_node(%q(//ul[@class="pl-header-details"])).not_nil!
|
|
|
|
author = anchor.xpath_node(%q(.//li[1]/a)).not_nil!.content
|
2018-09-25 15:28:40 +00:00
|
|
|
author_thumbnail = document.xpath_node(%q(//img[@class="channel-header-profile-image"])).try &.["src"]
|
|
|
|
author_thumbnail ||= ""
|
2018-08-15 15:22:36 +00:00
|
|
|
ucid = anchor.xpath_node(%q(.//li[1]/a)).not_nil!["href"].split("/")[2]
|
|
|
|
|
2018-08-20 15:25:05 +00:00
|
|
|
video_count = anchor.xpath_node(%q(.//li[2])).not_nil!.content.delete("videos, ").to_i
|
2018-09-18 00:21:13 +00:00
|
|
|
views = anchor.xpath_node(%q(.//li[3])).not_nil!.content.delete("No views, ")
|
|
|
|
if views.empty?
|
|
|
|
views = 0_i64
|
|
|
|
else
|
|
|
|
views = views.to_i64
|
|
|
|
end
|
2018-08-15 15:22:36 +00:00
|
|
|
|
|
|
|
updated = anchor.xpath_node(%q(.//li[4])).not_nil!.content.lchop("Last updated on ").lchop("Updated ")
|
|
|
|
updated = decode_date(updated)
|
|
|
|
|
|
|
|
playlist = Playlist.new(
|
|
|
|
title,
|
|
|
|
plid,
|
|
|
|
author,
|
2018-09-25 15:28:40 +00:00
|
|
|
author_thumbnail,
|
2018-08-15 15:22:36 +00:00
|
|
|
ucid,
|
|
|
|
description,
|
2018-09-05 00:27:10 +00:00
|
|
|
description_html,
|
2018-08-15 15:22:36 +00:00
|
|
|
video_count,
|
|
|
|
views,
|
|
|
|
updated
|
|
|
|
)
|
|
|
|
|
|
|
|
return playlist
|
|
|
|
end
|