Properly filter movies, playlists, channels from search results

This commit is contained in:
Omar Roth 2018-08-16 08:47:51 -05:00
parent 7c63c759f4
commit ec399f5f7b

View file

@ -302,6 +302,15 @@ def extract_videos(nodeset, ucid = nil)
next next
end end
case node.xpath_node(%q(.//div)).not_nil!["class"]
when .includes? "yt-lockup-movie-vertical-poster"
next
when .includes? "yt-lockup-playlist"
next
when .includes? "yt-lockup-channel"
next
end
title = anchor.content.strip title = anchor.content.strip
id = anchor["href"].lchop("/watch?v=") id = anchor["href"].lchop("/watch?v=")
@ -318,35 +327,25 @@ def extract_videos(nodeset, ucid = nil)
author_id = anchor["href"].split("/")[-1] author_id = anchor["href"].split("/")[-1]
end end
# Skip playlists
if node.xpath_node(%q(.//div[contains(@class, "yt-playlist-renderer")]))
next
end
# Skip movies
if node.xpath_node(%q(.//div[contains(@class, "yt-lockup-movie-top-content")]))
next
end
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li)) metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
if metadata.size == 0 if metadata.size == 1
next # Scheduled livestream
elsif metadata.size == 1
if metadata[0].content.starts_with? "Starts" if metadata[0].content.starts_with? "Starts"
view_count = 0_i64 view_count = 0_i64
published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64) published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
else else
view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64 # Livestream
view_count = metadata[0].content.delete("Streamed, watching").to_i64
published = Time.now published = Time.now
end end
else else
published = decode_date(metadata[0].content) published = decode_date(metadata[0].content)
view_count = metadata[1].content.split(" ")[0] view_count = metadata[1].content.delete("No views,")
if view_count == "No" if view_count.empty?
view_count = 0_i64 view_count = 0_i64
else else
view_count = view_count.delete(",").to_i64 view_count = view_count.to_i64
end end
end end