class RedditThing JSON.mapping({ kind: String, data: RedditComment | RedditLink | RedditMore | RedditListing, }) end class RedditComment module TimeConverter def self.from_json(value : JSON::PullParser) : Time Time.unix(value.read_float.to_i) end def self.to_json(value : Time, json : JSON::Builder) json.number(value.to_unix) end end JSON.mapping({ author: String, body_html: String, replies: RedditThing | String, score: Int32, depth: Int32, created_utc: { type: Time, converter: RedditComment::TimeConverter, }, }) end class RedditLink JSON.mapping({ author: String, score: Int32, subreddit: String, num_comments: Int32, id: String, permalink: String, title: String, }) end class RedditMore JSON.mapping({ children: Array(String), count: Int32, depth: Int32, }) end class RedditListing JSON.mapping({ children: Array(RedditThing), modhash: String, }) end def fetch_youtube_comments(id, continuation, proxies, format) client = make_client(YT_URL) html = client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999") headers = HTTP::Headers.new headers["cookie"] = html.cookies.add_request_headers(headers)["cookie"] body = html.body session_token = body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/).not_nil!["session_token"] itct = body.match(/itct=(?[^"]+)"/).not_nil!["itct"] ctoken = body.match(/'COMMENTS_TOKEN': "(?[^"]+)"/) if body.match(//) bypass_channel = Channel({String, HTTPClient, HTTP::Headers} | Nil).new proxies.each do |proxy_region, list| spawn do proxy_client = make_client(YT_URL, proxies, proxy_region) response = proxy_client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999") proxy_headers = HTTP::Headers.new proxy_headers["Cookie"] = response.cookies.add_request_headers(headers)["cookie"] proxy_html = response.body if !proxy_html.match(//) bypass_channel.send({proxy_html, proxy_client, proxy_headers}) else bypass_channel.send(nil) end end end proxies.size.times do response = bypass_channel.receive if response html, client, headers = response session_token = html.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/).not_nil!["session_token"] itct = html.match(/itct=(?[^"]+)"/).not_nil!["itct"] ctoken = html.match(/'COMMENTS_TOKEN': "(?[^"]+)"/) break end end end if !ctoken if format == "json" return {"comments" => [] of String}.to_json else return {"contentHtml" => "", "commentCount" => 0}.to_json end end ctoken = ctoken["ctoken"] if !continuation.empty? ctoken = continuation else continuation = ctoken end post_req = { "session_token" => session_token, } post_req = HTTP::Params.encode(post_req) headers["content-type"] = "application/x-www-form-urlencoded" headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ==" headers["x-spf-previous"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999" headers["x-spf-referer"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999" headers["x-youtube-client-name"] = "1" headers["x-youtube-client-version"] = "2.20180719" response = client.post("/comment_service_ajax?action_get_comments=1&pbj=1&ctoken=#{ctoken}&continuation=#{continuation}&itct=#{itct}&hl=en&gl=US", headers, post_req) response = JSON.parse(response.body) if !response["response"]["continuationContents"]? raise "Could not fetch comments" end response = response["response"]["continuationContents"] if response["commentRepliesContinuation"]? body = response["commentRepliesContinuation"] else body = response["itemSectionContinuation"] end contents = body["contents"]? if !contents if format == "json" return {"comments" => [] of String}.to_json else return {"contentHtml" => "", "commentCount" => 0}.to_json end end comments = JSON.build do |json| json.object do if body["header"]? comment_count = body["header"]["commentsHeaderRenderer"]["countText"]["simpleText"].as_s.delete("Comments,").to_i json.field "commentCount", comment_count end json.field "comments" do json.array do contents.as_a.each do |node| json.object do if !response["commentRepliesContinuation"]? node = node["commentThreadRenderer"] end if node["replies"]? node_replies = node["replies"]["commentRepliesRenderer"] end if !response["commentRepliesContinuation"]? node_comment = node["comment"]["commentRenderer"] else node_comment = node["commentRenderer"] end content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff') if content_html content_html = HTML.escape(content_html) end content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a) content_html, content = html_to_content(content_html) author = node_comment["authorText"]?.try &.["simpleText"] author ||= "" json.field "author", author json.field "authorThumbnails" do json.array do node_comment["authorThumbnail"]["thumbnails"].as_a.each do |thumbnail| json.object do json.field "url", thumbnail["url"] json.field "width", thumbnail["width"] json.field "height", thumbnail["height"] end end end end if node_comment["authorEndpoint"]? json.field "authorId", node_comment["authorEndpoint"]["browseEndpoint"]["browseId"] json.field "authorUrl", node_comment["authorEndpoint"]["browseEndpoint"]["canonicalBaseUrl"] else json.field "authorId", "" json.field "authorUrl", "" end published = decode_date(node_comment["publishedTimeText"]["runs"][0]["text"].as_s.rchop(" (edited)")) json.field "content", content json.field "contentHtml", content_html json.field "published", published.to_unix json.field "publishedText", "#{recode_date(published)} ago" json.field "likeCount", node_comment["likeCount"] json.field "commentId", node_comment["commentId"] if node_replies && !response["commentRepliesContinuation"]? reply_count = node_replies["moreText"]["simpleText"].as_s.delete("View all reply replies,") if reply_count.empty? reply_count = 1 else reply_count = reply_count.try &.to_i? reply_count ||= 1 end continuation = node_replies["continuations"].as_a[0]["nextContinuationData"]["continuation"].as_s json.field "replies" do json.object do json.field "replyCount", reply_count json.field "continuation", continuation end end end end end end end if body["continuations"]? continuation = body["continuations"][0]["nextContinuationData"]["continuation"] json.field "continuation", continuation end end end if format == "html" comments = JSON.parse(comments) content_html = template_youtube_comments(comments) comments = JSON.build do |json| json.object do json.field "contentHtml", content_html if comments["commentCount"]? json.field "commentCount", comments["commentCount"] else json.field "commentCount", 0 end end end end return comments end def fetch_reddit_comments(id) client = make_client(REDDIT_URL) headers = HTTP::Headers{"User-Agent" => "web:invidio.us:v0.12.0 (by /u/omarroth)"} query = "(url:3D#{id}%20OR%20url:#{id})%20(site:youtube.com%20OR%20site:youtu.be)" search_results = client.get("/search.json?q=#{query}", headers) if search_results.status_code == 200 search_results = RedditThing.from_json(search_results.body) thread = search_results.data.as(RedditListing).children.sort_by { |child| child.data.as(RedditLink).score }[-1] thread = thread.data.as(RedditLink) result = client.get("/r/#{thread.subreddit}/comments/#{thread.id}.json?limit=100&sort=top", headers).body result = Array(RedditThing).from_json(result) elsif search_results.status_code == 302 result = client.get(search_results.headers["Location"], headers).body result = Array(RedditThing).from_json(result) thread = result[0].data.as(RedditListing).children[0].data.as(RedditLink) else raise "Got error code #{search_results.status_code}" end comments = result[1].data.as(RedditListing).children return comments, thread end def template_youtube_comments(comments) html = "" root = comments["comments"].as_a root.each do |child| if child["replies"]? replies_html = <<-END_HTML END_HTML end author_thumbnail = "/ggpht#{URI.parse(child["authorThumbnails"][-1]["url"].as_s).full_path}" html += <<-END_HTML

#{child["author"]}

#{child["contentHtml"]}

#{recode_date(Time.unix(child["published"].as_i64))} ago | #{number_with_separator(child["likeCount"])}

#{replies_html}
END_HTML end if comments["continuation"]? html += <<-END_HTML END_HTML end return html end def template_reddit_comments(root) html = "" root.each do |child| if child.data.is_a?(RedditComment) child = child.data.as(RedditComment) author = child.author score = child.score body_html = HTML.unescape(child.body_html) replies_html = "" if child.replies.is_a?(RedditThing) replies = child.replies.as(RedditThing) replies_html = template_reddit_comments(replies.data.as(RedditListing).children) end content = <<-END_HTML

[ - ] #{author} #{number_with_separator(score)} points #{recode_date(child.created_utc)} ago

#{body_html} #{replies_html}
END_HTML if child.depth > 0 html += <<-END_HTML
#{content}
END_HTML else html += <<-END_HTML
#{content}
END_HTML end end end return html end def replace_links(html) html = XML.parse_html(html) html.xpath_nodes(%q(//a)).each do |anchor| url = URI.parse(anchor["href"]) if {"www.youtube.com", "m.youtube.com", "youtu.be"}.includes?(url.host) if url.path == "/redirect" params = HTTP::Params.parse(url.query.not_nil!) anchor["href"] = params["q"]? else anchor["href"] = url.full_path end elsif url.to_s == "#" begin length_seconds = decode_length_seconds(anchor.content) rescue ex length_seconds = decode_time(anchor.content) end anchor["href"] = "javascript:void(0)" anchor["onclick"] = "player.currentTime(#{length_seconds})" end end html = html.to_xml(options: XML::SaveOptions::NO_DECL) return html end def fill_links(html, scheme, host) html = XML.parse_html(html) html.xpath_nodes("//a").each do |match| url = URI.parse(match["href"]) # Reddit links don't have host if !url.host && !match["href"].starts_with?("javascript") && !url.to_s.ends_with? "#" url.scheme = scheme url.host = host match["href"] = url end end if host == "www.youtube.com" html = html.xpath_node(%q(//body)).not_nil!.to_xml else html = html.to_xml(options: XML::SaveOptions::NO_DECL) end return html end def content_to_comment_html(content) comment_html = content.map do |run| text = HTML.escape(run["text"].as_s) if run["text"] == "\n" text = "
" end if run["bold"]? text = "#{text}" end if run["italics"]? text = "#{text}" end if run["navigationEndpoint"]? if url = run["navigationEndpoint"]["urlEndpoint"]?.try &.["url"].as_s url = URI.parse(url) if !url.host || {"m.youtube.com", "www.youtube.com", "youtu.be"}.includes? url.host if url.path == "/redirect" url = HTTP::Params.parse(url.query.not_nil!)["q"] else url = url.full_path end end text = %(#{text}) elsif watch_endpoint = run["navigationEndpoint"]["watchEndpoint"]? length_seconds = watch_endpoint["startTimeSeconds"].as_i video_id = watch_endpoint["videoId"].as_s text = %(#{text}) elsif url = run["navigationEndpoint"]["commandMetadata"]?.try &.["webCommandMetadata"]["url"].as_s text = %(#{text}) end end text end.join.rchop('\ufeff') return comment_html end