Add support for the new channel layout - part 1 (#3374)
This commit is contained in:
commit
09942dee66
|
@ -5,13 +5,13 @@ CONFIG = Config.from_yaml(File.open("config/config.example.yml"))
|
|||
Spectator.describe "Helper" do
|
||||
describe "#produce_channel_videos_url" do
|
||||
it "correctly produces url for requesting page `x` of a channel's videos" do
|
||||
expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")).to eq("/browse_ajax?continuation=4qmFsgI8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4&gl=US&hl=en")
|
||||
# expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")).to eq("/browse_ajax?continuation=4qmFsgI8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4&gl=US&hl=en")
|
||||
#
|
||||
# expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4R0FFPQ%3D%3D&gl=US&hl=en")
|
||||
|
||||
expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4R0FFPQ%3D%3D&gl=US&hl=en")
|
||||
# expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", page: 20)).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUE9PQ%3D%3D&gl=US&hl=en")
|
||||
|
||||
expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", page: 20)).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUE9PQ%3D%3D&gl=US&hl=en")
|
||||
|
||||
expect(produce_channel_videos_url(ucid: "UC-9-kyTW8ZkZNDHQJ6FgpwQ", page: 20, sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQy05LWt5VFc4WmtaTkRIUUo2Rmdwd1EaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUJnQg%3D%3D&gl=US&hl=en")
|
||||
# expect(produce_channel_videos_url(ucid: "UC-9-kyTW8ZkZNDHQJ6FgpwQ", page: 20, sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQy05LWt5VFc4WmtaTkRIUUo2Rmdwd1EaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUJnQg%3D%3D&gl=US&hl=en")
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,53 +1,48 @@
|
|||
def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
|
||||
object = {
|
||||
"80226972:embedded" => {
|
||||
"2:string" => ucid,
|
||||
"3:base64" => {
|
||||
"2:string" => "videos",
|
||||
"6:varint" => 2_i64,
|
||||
"7:varint" => 1_i64,
|
||||
"12:varint" => 1_i64,
|
||||
"13:string" => "",
|
||||
"23:varint" => 0_i64,
|
||||
object_inner_2 = {
|
||||
"2:0:embedded" => {
|
||||
"1:0:varint" => 0_i64,
|
||||
},
|
||||
"5:varint" => 50_i64,
|
||||
"6:varint" => 1_i64,
|
||||
"7:varint" => (page * 30).to_i64,
|
||||
"9:varint" => 1_i64,
|
||||
"10:varint" => 0_i64,
|
||||
}
|
||||
|
||||
object_inner_2_encoded = object_inner_2
|
||||
.try { |i| Protodec::Any.cast_json(i) }
|
||||
.try { |i| Protodec::Any.from_json(i) }
|
||||
.try { |i| Base64.urlsafe_encode(i) }
|
||||
.try { |i| URI.encode_www_form(i) }
|
||||
|
||||
object_inner_1 = {
|
||||
"110:embedded" => {
|
||||
"3:embedded" => {
|
||||
"15:embedded" => {
|
||||
"1:embedded" => {
|
||||
"1:string" => object_inner_2_encoded,
|
||||
"2:string" => "00000000-0000-0000-0000-000000000000",
|
||||
},
|
||||
"3:varint" => 1_i64,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if !v2
|
||||
if auto_generated
|
||||
seed = Time.unix(1525757349)
|
||||
until seed >= Time.utc
|
||||
seed += 1.month
|
||||
end
|
||||
timestamp = seed - (page - 1).months
|
||||
object_inner_1_encoded = object_inner_1
|
||||
.try { |i| Protodec::Any.cast_json(i) }
|
||||
.try { |i| Protodec::Any.from_json(i) }
|
||||
.try { |i| Base64.urlsafe_encode(i) }
|
||||
.try { |i| URI.encode_www_form(i) }
|
||||
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
|
||||
else
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
|
||||
end
|
||||
else
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
|
||||
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
|
||||
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
|
||||
"1:varint" => 30_i64 * (page - 1),
|
||||
}))),
|
||||
})))
|
||||
end
|
||||
|
||||
case sort_by
|
||||
when "newest"
|
||||
when "popular"
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x01_i64
|
||||
when "oldest"
|
||||
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x02_i64
|
||||
else nil # Ignore
|
||||
end
|
||||
|
||||
object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
|
||||
object["80226972:embedded"].delete("3:base64")
|
||||
object = {
|
||||
"80226972:embedded" => {
|
||||
"2:string" => ucid,
|
||||
"3:string" => object_inner_1_encoded,
|
||||
"35:string" => "browse-feed#{ucid}videos102",
|
||||
},
|
||||
}
|
||||
|
||||
continuation = object.try { |i| Protodec::Any.cast_json(i) }
|
||||
.try { |i| Protodec::Any.from_json(i) }
|
||||
|
@ -67,10 +62,11 @@ end
|
|||
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
|
||||
videos = [] of SearchVideo
|
||||
|
||||
2.times do |i|
|
||||
initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
|
||||
videos.concat extract_videos(initial_data, author, ucid)
|
||||
end
|
||||
# 2.times do |i|
|
||||
# initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
|
||||
initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by)
|
||||
videos = extract_videos(initial_data, author, ucid)
|
||||
# end
|
||||
|
||||
return videos.size, videos
|
||||
end
|
||||
|
|
|
@ -17,6 +17,7 @@ private ITEM_PARSERS = {
|
|||
Parsers::PlaylistRendererParser,
|
||||
Parsers::CategoryRendererParser,
|
||||
Parsers::RichItemRendererParser,
|
||||
Parsers::ReelItemRendererParser,
|
||||
}
|
||||
|
||||
record AuthorFallback, name : String, id : String
|
||||
|
@ -369,7 +370,7 @@ private module Parsers
|
|||
end
|
||||
|
||||
# Parses an InnerTube richItemRenderer into a SearchVideo.
|
||||
# Returns nil when the given object isn't a shelfRenderer
|
||||
# Returns nil when the given object isn't a RichItemRenderer
|
||||
#
|
||||
# A richItemRenderer seems to be a simple wrapper for a videoRenderer, used
|
||||
# by the result page for hashtags. It is located inside a continuationItems
|
||||
|
@ -390,6 +391,90 @@ private module Parsers
|
|||
return {{@type.name}}
|
||||
end
|
||||
end
|
||||
|
||||
# Parses an InnerTube reelItemRenderer into a SearchVideo.
|
||||
# Returns nil when the given object isn't a reelItemRenderer
|
||||
#
|
||||
# reelItemRenderer items are used in the new (2022) channel layout,
|
||||
# in the "shorts" tab.
|
||||
#
|
||||
module ReelItemRendererParser
|
||||
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
|
||||
if item_contents = item["reelItemRenderer"]?
|
||||
return self.parse(item_contents, author_fallback)
|
||||
end
|
||||
end
|
||||
|
||||
private def self.parse(item_contents, author_fallback)
|
||||
video_id = item_contents["videoId"].as_s
|
||||
|
||||
video_details_container = item_contents.dig(
|
||||
"navigationEndpoint", "reelWatchEndpoint",
|
||||
"overlay", "reelPlayerOverlayRenderer",
|
||||
"reelPlayerHeaderSupportedRenderers",
|
||||
"reelPlayerHeaderRenderer"
|
||||
)
|
||||
|
||||
# Author infos
|
||||
|
||||
author = video_details_container
|
||||
.dig?("channelTitleText", "runs", 0, "text")
|
||||
.try &.as_s || author_fallback.name
|
||||
|
||||
ucid = video_details_container
|
||||
.dig?("channelNavigationEndpoint", "browseEndpoint", "browseId")
|
||||
.try &.as_s || author_fallback.id
|
||||
|
||||
# Title & publication date
|
||||
|
||||
title = video_details_container.dig?("reelTitleText")
|
||||
.try { |t| extract_text(t) } || ""
|
||||
|
||||
published = video_details_container
|
||||
.dig?("timestampText", "simpleText")
|
||||
.try { |t| decode_date(t.as_s) } || Time.utc
|
||||
|
||||
# View count
|
||||
|
||||
view_count_text = video_details_container.dig?("viewCountText", "simpleText")
|
||||
view_count_text ||= video_details_container
|
||||
.dig?("viewCountText", "accessibility", "accessibilityData", "label")
|
||||
|
||||
view_count = view_count_text.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
|
||||
|
||||
# Duration
|
||||
|
||||
a11y_data = item_contents
|
||||
.dig?("accessibility", "accessibilityData", "label")
|
||||
.try &.as_s || ""
|
||||
|
||||
regex_match = /- (?<min>\d+ minutes? )?(?<sec>\d+ seconds?)+ -/.match(a11y_data)
|
||||
|
||||
minutes = regex_match.try &.["min"].to_i(strict: false) || 0
|
||||
seconds = regex_match.try &.["sec"].to_i(strict: false) || 0
|
||||
|
||||
duration = (minutes*60 + seconds)
|
||||
|
||||
SearchVideo.new({
|
||||
title: title,
|
||||
id: video_id,
|
||||
author: author,
|
||||
ucid: ucid,
|
||||
published: published,
|
||||
views: view_count,
|
||||
description_html: "",
|
||||
length_seconds: duration,
|
||||
live_now: false,
|
||||
premium: false,
|
||||
premiere_timestamp: Time.unix(0),
|
||||
author_verified: false,
|
||||
})
|
||||
end
|
||||
|
||||
def self.parser_name
|
||||
return {{@type.name}}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# The following are the extractors for extracting an array of items from
|
||||
|
@ -436,7 +521,18 @@ private module Extractors
|
|||
content = extract_selected_tab(target["tabs"])["content"]
|
||||
|
||||
if section_list_contents = content.dig?("sectionListRenderer", "contents")
|
||||
section_list_contents.as_a.each do |renderer_container|
|
||||
raw_items = unpack_section_list(section_list_contents)
|
||||
elsif rich_grid_contents = content.dig?("richGridRenderer", "contents")
|
||||
raw_items = rich_grid_contents.as_a
|
||||
end
|
||||
|
||||
return raw_items
|
||||
end
|
||||
|
||||
private def self.unpack_section_list(contents)
|
||||
raw_items = [] of JSON::Any
|
||||
|
||||
contents.as_a.each do |renderer_container|
|
||||
renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0]
|
||||
|
||||
# Category extraction
|
||||
|
@ -452,7 +548,6 @@ private module Extractors
|
|||
raw_items << item
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return raw_items
|
||||
end
|
||||
|
@ -525,14 +620,11 @@ private module Extractors
|
|||
end
|
||||
|
||||
private def self.extract(target)
|
||||
raw_items = [] of JSON::Any
|
||||
if content = target["gridContinuation"]?
|
||||
raw_items = content["items"].as_a
|
||||
elsif content = target["continuationItems"]?
|
||||
raw_items = content.as_a
|
||||
end
|
||||
content = target["continuationItems"]?
|
||||
content ||= target.dig?("gridContinuation", "items")
|
||||
content ||= target.dig?("richGridContinuation", "contents")
|
||||
|
||||
return raw_items
|
||||
return content.nil? ? [] of JSON::Any : content.as_a
|
||||
end
|
||||
|
||||
def self.extractor_name
|
||||
|
|
Loading…
Reference in a new issue