2018-08-04 20:30:44 +00:00
class RedditThing
JSON . mapping ( {
kind : String ,
data : RedditComment | RedditLink | RedditMore | RedditListing ,
} )
end
class RedditComment
2018-08-13 15:30:27 +00:00
module TimeConverter
def self . from_json ( value : JSON :: PullParser ) : Time
2018-11-04 15:37:12 +00:00
Time . unix ( value . read_float . to_i )
2018-08-13 15:30:27 +00:00
end
def self . to_json ( value : Time , json : JSON :: Builder )
2018-11-04 15:37:12 +00:00
json . number ( value . to_unix )
2018-08-13 15:30:27 +00:00
end
end
2018-08-04 20:30:44 +00:00
JSON . mapping ( {
2018-09-20 16:03:58 +00:00
author : String ,
body_html : String ,
replies : RedditThing | String ,
score : Int32 ,
depth : Int32 ,
created_utc : {
2018-08-13 15:30:27 +00:00
type : Time ,
converter : RedditComment :: TimeConverter ,
} ,
2018-08-04 20:30:44 +00:00
} )
end
class RedditLink
JSON . mapping ( {
author : String ,
score : Int32 ,
subreddit : String ,
num_comments : Int32 ,
id : String ,
permalink : String ,
title : String ,
} )
end
class RedditMore
JSON . mapping ( {
children : Array ( String ) ,
count : Int32 ,
depth : Int32 ,
} )
end
class RedditListing
JSON . mapping ( {
children : Array ( RedditThing ) ,
modhash : String ,
} )
end
2018-12-20 21:32:09 +00:00
def fetch_youtube_comments ( id , continuation , proxies , format , locale )
2018-10-31 21:47:53 +00:00
client = make_client ( YT_URL )
2018-11-10 16:50:09 +00:00
html = client . get ( " /watch?v= #{ id } &gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999 " )
2018-10-31 21:47:53 +00:00
headers = HTTP :: Headers . new
headers [ " cookie " ] = html . cookies . add_request_headers ( headers ) [ " cookie " ]
body = html . body
session_token = body . match ( / 'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9 \ _ \ - \ =]+)" / ) . not_nil! [ " session_token " ]
itct = body . match ( / itct=(?<itct>[^"]+)" / ) . not_nil! [ " itct " ]
ctoken = body . match ( / 'COMMENTS_TOKEN': "(?<ctoken>[^"]+)" / )
2019-01-20 15:51:35 +00:00
if body . match ( / <meta itemprop="regionsAllowed" content=""> / ) && ! body . match ( / player-age-gate-content \ "> / )
2018-10-31 21:47:53 +00:00
bypass_channel = Channel ( { String , HTTPClient , HTTP :: Headers } | Nil ) . new
2018-11-20 16:07:50 +00:00
proxies . each do | proxy_region , list |
2018-10-31 21:47:53 +00:00
spawn do
2018-11-20 16:07:50 +00:00
proxy_client = make_client ( YT_URL , proxies , proxy_region )
response = proxy_client . get ( " /watch?v= #{ id } &gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999 " )
proxy_headers = HTTP :: Headers . new
proxy_headers [ " Cookie " ] = response . cookies . add_request_headers ( headers ) [ " cookie " ]
proxy_html = response . body
2018-10-31 21:47:53 +00:00
2019-01-20 15:51:35 +00:00
if ! proxy_html . match ( / <meta itemprop="regionsAllowed" content=""> / ) && ! proxy_html . match ( / player-age-gate-content \ "> / )
2018-11-20 16:07:50 +00:00
bypass_channel . send ( { proxy_html , proxy_client , proxy_headers } )
else
2018-10-31 21:47:53 +00:00
bypass_channel . send ( nil )
end
end
end
proxies . size . times do
response = bypass_channel . receive
if response
2018-11-20 16:07:50 +00:00
html , client , headers = response
session_token = html . match ( / 'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9 \ _ \ - \ =]+)" / ) . not_nil! [ " session_token " ]
itct = html . match ( / itct=(?<itct>[^"]+)" / ) . not_nil! [ " itct " ]
ctoken = html . match ( / 'COMMENTS_TOKEN': "(?<ctoken>[^"]+)" / )
2018-10-31 21:47:53 +00:00
break
end
end
end
if ! ctoken
if format == " json "
return { " comments " = > [ ] of String } . to_json
else
return { " contentHtml " = > " " , " commentCount " = > 0 } . to_json
end
end
ctoken = ctoken [ " ctoken " ]
if ! continuation . empty?
ctoken = continuation
else
continuation = ctoken
end
post_req = {
" session_token " = > session_token ,
}
post_req = HTTP :: Params . encode ( post_req )
headers [ " content-type " ] = " application/x-www-form-urlencoded "
headers [ " x-client-data " ] = " CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ== "
2018-11-10 16:50:09 +00:00
headers [ " x-spf-previous " ] = " https://www.youtube.com/watch?v= #{ id } &gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999 "
headers [ " x-spf-referer " ] = " https://www.youtube.com/watch?v= #{ id } &gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999 "
2018-10-31 21:47:53 +00:00
headers [ " x-youtube-client-name " ] = " 1 "
headers [ " x-youtube-client-version " ] = " 2.20180719 "
response = client . post ( " /comment_service_ajax?action_get_comments=1&pbj=1&ctoken= #{ ctoken } &continuation= #{ continuation } &itct= #{ itct } &hl=en&gl=US " , headers , post_req )
response = JSON . parse ( response . body )
if ! response [ " response " ] [ " continuationContents " ]?
2018-12-20 21:32:09 +00:00
raise translate ( locale , " Could not fetch comments " )
2018-10-31 21:47:53 +00:00
end
response = response [ " response " ] [ " continuationContents " ]
if response [ " commentRepliesContinuation " ]?
body = response [ " commentRepliesContinuation " ]
else
body = response [ " itemSectionContinuation " ]
end
contents = body [ " contents " ]?
if ! contents
if format == " json "
return { " comments " = > [ ] of String } . to_json
else
return { " contentHtml " = > " " , " commentCount " = > 0 } . to_json
end
end
comments = JSON . build do | json |
json . object do
if body [ " header " ]?
comment_count = body [ " header " ] [ " commentsHeaderRenderer " ] [ " countText " ] [ " simpleText " ] . as_s . delete ( " Comments, " ) . to_i
json . field " commentCount " , comment_count
end
2019-02-03 14:45:34 +00:00
2019-01-31 11:21:26 +00:00
json . field " videoId " , id
2018-10-31 21:47:53 +00:00
json . field " comments " do
json . array do
contents . as_a . each do | node |
json . object do
if ! response [ " commentRepliesContinuation " ]?
node = node [ " commentThreadRenderer " ]
end
if node [ " replies " ]?
node_replies = node [ " replies " ] [ " commentRepliesRenderer " ]
end
if ! response [ " commentRepliesContinuation " ]?
node_comment = node [ " comment " ] [ " commentRenderer " ]
else
node_comment = node [ " commentRenderer " ]
end
content_html = node_comment [ " contentText " ] [ " simpleText " ]? . try & . as_s . rchop ( '\ufeff' )
if content_html
content_html = HTML . escape ( content_html )
end
content_html || = content_to_comment_html ( node_comment [ " contentText " ] [ " runs " ] . as_a )
content_html , content = html_to_content ( content_html )
author = node_comment [ " authorText " ]? . try & . [ " simpleText " ]
author || = " "
json . field " author " , author
json . field " authorThumbnails " do
json . array do
node_comment [ " authorThumbnail " ] [ " thumbnails " ] . as_a . each do | thumbnail |
json . object do
json . field " url " , thumbnail [ " url " ]
json . field " width " , thumbnail [ " width " ]
json . field " height " , thumbnail [ " height " ]
end
end
end
end
if node_comment [ " authorEndpoint " ]?
json . field " authorId " , node_comment [ " authorEndpoint " ] [ " browseEndpoint " ] [ " browseId " ]
json . field " authorUrl " , node_comment [ " authorEndpoint " ] [ " browseEndpoint " ] [ " canonicalBaseUrl " ]
else
json . field " authorId " , " "
json . field " authorUrl " , " "
end
2019-02-01 12:09:10 +00:00
published_text = node_comment [ " publishedTimeText " ] [ " runs " ] [ 0 ] [ " text " ] . as_s
published = decode_date ( published_text . rchop ( " (edited) " ) )
2019-02-03 14:45:34 +00:00
2019-02-01 12:09:10 +00:00
if published_text . includes? ( " (edited) " )
json . field " isEdited " , true
else
json . field " isEdited " , false
end
2018-10-31 21:47:53 +00:00
json . field " content " , content
json . field " contentHtml " , content_html
2018-11-04 15:37:12 +00:00
json . field " published " , published . to_unix
2018-12-20 21:32:09 +00:00
json . field " publishedText " , translate ( locale , " `x` ago " , recode_date ( published ) )
2018-10-31 21:47:53 +00:00
json . field " likeCount " , node_comment [ " likeCount " ]
json . field " commentId " , node_comment [ " commentId " ]
2019-01-31 11:21:26 +00:00
json . field " authorIsChannelOwner " , node_comment [ " authorIsChannelOwner " ]
2019-02-03 14:45:34 +00:00
2019-02-01 12:09:10 +00:00
if node_comment [ " actionButtons " ] [ " commentActionButtonsRenderer " ] [ " creatorHeart " ]?
hearth_data = node_comment [ " actionButtons " ] [ " commentActionButtonsRenderer " ] [ " creatorHeart " ] [ " creatorHeartRenderer " ] [ " creatorThumbnail " ]
2019-01-31 11:21:26 +00:00
json . field " creatorHeart " do
json . object do
2019-02-01 12:09:10 +00:00
json . field " creatorThumbnail " , hearth_data [ " thumbnails " ] [ - 1 ] [ " url " ]
json . field " creatorName " , hearth_data [ " accessibility " ] [ " accessibilityData " ] [ " label " ]
2019-01-31 11:21:26 +00:00
end
end
end
2018-10-31 21:47:53 +00:00
if node_replies && ! response [ " commentRepliesContinuation " ]?
reply_count = node_replies [ " moreText " ] [ " simpleText " ] . as_s . delete ( " View all reply replies, " )
if reply_count . empty?
reply_count = 1
else
reply_count = reply_count . try & . to_i?
reply_count || = 1
end
2019-01-20 15:56:54 +00:00
continuation = node_replies [ " continuations " ]? . try & . as_a [ 0 ] [ " nextContinuationData " ] [ " continuation " ] . as_s
continuation || = " "
2018-10-31 21:47:53 +00:00
json . field " replies " do
json . object do
json . field " replyCount " , reply_count
json . field " continuation " , continuation
end
end
end
end
end
end
end
if body [ " continuations " ]?
continuation = body [ " continuations " ] [ 0 ] [ " nextContinuationData " ] [ " continuation " ]
json . field " continuation " , continuation
end
end
end
2018-11-10 15:05:26 +00:00
if format == " html "
comments = JSON . parse ( comments )
2018-12-20 21:32:09 +00:00
content_html = template_youtube_comments ( comments , locale )
2018-11-10 15:05:26 +00:00
comments = JSON . build do | json |
json . object do
json . field " contentHtml " , content_html
if comments [ " commentCount " ]?
json . field " commentCount " , comments [ " commentCount " ]
else
json . field " commentCount " , 0
end
end
end
end
2018-10-31 21:47:53 +00:00
return comments
end
def fetch_reddit_comments ( id )
client = make_client ( REDDIT_URL )
2019-01-19 16:03:23 +00:00
headers = HTTP :: Headers { " User-Agent " = > " web:invidio.us:v0.13.1 (by /u/omarroth) " }
2018-10-31 21:47:53 +00:00
2018-08-04 20:30:44 +00:00
query = " (url:3D #{ id } %20OR%20url: #{ id } )%20(site:youtube.com%20OR%20site:youtu.be) "
search_results = client . get ( " /search.json?q= #{ query } " , headers )
if search_results . status_code == 200
search_results = RedditThing . from_json ( search_results . body )
thread = search_results . data . as ( RedditListing ) . children . sort_by { | child | child . data . as ( RedditLink ) . score } [ - 1 ]
thread = thread . data . as ( RedditLink )
result = client . get ( " /r/ #{ thread . subreddit } /comments/ #{ thread . id } .json?limit=100&sort=top " , headers ) . body
result = Array ( RedditThing ) . from_json ( result )
elsif search_results . status_code == 302
result = client . get ( search_results . headers [ " Location " ] , headers ) . body
result = Array ( RedditThing ) . from_json ( result )
thread = result [ 0 ] . data . as ( RedditListing ) . children [ 0 ] . data . as ( RedditLink )
else
raise " Got error code #{ search_results . status_code } "
end
comments = result [ 1 ] . data . as ( RedditListing ) . children
return comments , thread
end
2018-12-20 21:32:09 +00:00
def template_youtube_comments ( comments , locale )
2018-08-04 20:30:44 +00:00
html = " "
root = comments [ " comments " ] . as_a
root . each do | child |
if child [ " replies " ]?
replies_html = <<-END_HTML
< div id = " replies " class = " pure-g " >
2018-08-05 04:27:41 +00:00
< div class = " pure-u-1-24 " > < / div>
< div class = " pure-u-23-24 " >
2018-08-04 20:30:44 +00:00
< p >
< a href = " javascript:void(0) " data - continuation = " #{ child [ " replies " ] [ " continuation " ] } "
2018-12-20 21:32:09 +00:00
onclick = " get_youtube_replies(this) " > #{translate(locale, "View `x` replies", child["replies"]["replyCount"].to_s)}</a>
2018-08-04 20:30:44 +00:00
< / p>
< / div>
< / div>
END_HTML
end
2018-09-17 23:39:28 +00:00
author_thumbnail = " /ggpht #{ URI . parse ( child [ " authorThumbnails " ] [ - 1 ] [ " url " ] . as_s ) . full_path } "
2018-08-04 20:30:44 +00:00
html += <<-END_HTML
< div class = " pure-g " >
2018-10-05 15:08:24 +00:00
< div class = " pure-u-4-24 pure-u-md-2-24 " >
2018-09-17 23:39:28 +00:00
< img style = " width:90%; padding-right:1em; padding-top:1em; " src = " #{ author_thumbnail } " >
2018-08-13 15:16:01 +00:00
< / div>
2018-10-05 15:08:24 +00:00
< div class = " pure-u-20-24 pure-u-md-22-24 " >
2018-08-04 20:30:44 +00:00
< p >
2018-10-06 23:53:27 +00:00
< b >
2019-01-31 11:21:26 +00:00
< a class = " #{ child [ " authorIsChannelOwner " ] == true ? " channel-owner " : " " } " href = " #{ child [ " authorUrl " ] } " > #{child["author"]}</a>
2018-10-06 23:53:27 +00:00
< / b>
< p style = " white-space:pre-wrap " > #{child["contentHtml"]}</p>
2019-02-03 14:45:34 +00:00
< span title = " #{ Time . unix ( child [ " published " ] . as_i64 ) . to_s ( translate ( locale , " %A %B %-d, %Y " ) ) } " > #{translate(locale, "`x` ago", recode_date(Time.unix(child["published"].as_i64)))} #{child["isEdited"] == true ? translate(locale, "(edited)") : ""}</span>
2019-01-30 12:28:28 +00:00
|
2019-01-31 11:21:26 +00:00
< a href = " https://www.youtube.com/watch?v= #{ comments [ " videoId " ] } &lc= #{ child [ " commentId " ] } " title = " #{ translate ( locale , " Youtube permalink of the comment " ) } " > [ YT ] < / a>
2018-10-06 23:53:27 +00:00
|
2018-10-20 18:52:06 +00:00
< i class = " icon ion-ios-thumbs-up " > < / i> #{ number_with_separator ( child [ " likeCount " ] ) }
2019-02-01 12:09:10 +00:00
END_HTML
2019-02-03 14:45:34 +00:00
2019-02-01 12:09:10 +00:00
if child [ " creatorHeart " ]?
2019-02-03 14:45:34 +00:00
creator_thumbnail = " /ggpht #{ URI . parse ( child [ " creatorHeart " ] [ " creatorThumbnail " ] . as_s ) . full_path } "
2019-02-01 12:09:10 +00:00
html += <<-END_HTML
< span class = " creator-heart-container " title = " #{ translate ( locale , " `x` marked it with a ❤ " , child [ " creatorHeart " ] [ " creatorName " ] . as_s ) } " >
< div class = " creator-heart " >
2019-02-03 14:45:34 +00:00
< img class = " creator-heart-background-hearted " src = " #{ creator_thumbnail } " > < / img>
2019-02-01 12:09:10 +00:00
< div class = " creator-heart-small-hearted " >
< div class = " creator-heart-small-container " > 🖤 < / div>
< / div>
< / div>
< / span>
END_HTML
end
2019-02-03 14:45:34 +00:00
2019-02-01 12:09:10 +00:00
html += <<-END_HTML
2018-10-06 23:53:27 +00:00
< / p>
#{replies_html}
< / div>
2018-08-04 20:30:44 +00:00
< / div>
END_HTML
end
if comments [ " continuation " ]?
html += <<-END_HTML
< div class = " pure-g " >
< div class = " pure-u-1 " >
< p >
< a href = " javascript:void(0) " data - continuation = " #{ comments [ " continuation " ] } "
2018-12-20 21:32:09 +00:00
onclick = " get_youtube_replies(this, true) " > #{translate(locale, "Load more")}</a>
2018-08-04 20:30:44 +00:00
< / p>
< / div>
< / div>
END_HTML
end
return html
end
2018-12-20 21:32:09 +00:00
def template_reddit_comments ( root , locale )
2018-08-04 20:30:44 +00:00
html = " "
root . each do | child |
if child . data . is_a? ( RedditComment )
child = child . data . as ( RedditComment )
author = child . author
score = child . score
body_html = HTML . unescape ( child . body_html )
replies_html = " "
if child . replies . is_a? ( RedditThing )
replies = child . replies . as ( RedditThing )
2018-12-20 21:32:09 +00:00
replies_html = template_reddit_comments ( replies . data . as ( RedditListing ) . children , locale )
2018-08-04 20:30:44 +00:00
end
content = <<-END_HTML
< p >
2018-10-07 03:20:40 +00:00
< a href = " javascript:void(0) " onclick = " toggle_parent(this) " > [ - ] < / a>
2018-08-04 20:30:44 +00:00
< b > < a href = " https://www.reddit.com/user/ #{ author } " > #{author}</a></b>
2018-12-20 21:32:09 +00:00
#{translate(locale, "`x` points", number_with_separator(score))}
#{translate(locale, "`x` ago", recode_date(child.created_utc))}
2018-08-04 20:30:44 +00:00
< / p>
< div >
#{body_html}
#{replies_html}
< / div>
END_HTML
if child . depth > 0
html += <<-END_HTML
< div class = " pure-g " >
< div class = " pure-u-1-24 " >
< / div>
< div class = " pure-u-23-24 " >
#{content}
< / div>
< / div>
END_HTML
else
html += <<-END_HTML
< div class = " pure-g " >
< div class = " pure-u-1 " >
#{content}
< / div>
< / div>
END_HTML
end
end
end
return html
end
2018-09-04 03:15:47 +00:00
def replace_links ( html )
html = XML . parse_html ( html )
2018-08-04 20:30:44 +00:00
2018-09-04 03:15:47 +00:00
html . xpath_nodes ( % q ( / / a ) ) . each do | anchor |
2018-08-04 20:30:44 +00:00
url = URI . parse ( anchor [ " href " ] )
2018-09-09 14:18:31 +00:00
if { " www.youtube.com " , " m.youtube.com " , " youtu.be " } . includes? ( url . host )
2018-08-04 20:30:44 +00:00
if url . path == " /redirect "
params = HTTP :: Params . parse ( url . query . not_nil! )
2018-09-04 03:15:47 +00:00
anchor [ " href " ] = params [ " q " ]?
2018-08-04 20:30:44 +00:00
else
2018-09-04 03:15:47 +00:00
anchor [ " href " ] = url . full_path
2018-08-04 20:30:44 +00:00
end
elsif url . to_s == " # "
2018-08-07 04:00:39 +00:00
begin
length_seconds = decode_length_seconds ( anchor . content )
rescue ex
length_seconds = decode_time ( anchor . content )
end
2018-09-04 03:15:47 +00:00
anchor [ " href " ] = " javascript:void(0) "
anchor [ " onclick " ] = " player.currentTime( #{ length_seconds } ) "
2018-08-04 20:30:44 +00:00
end
end
2018-09-04 03:15:47 +00:00
html = html . to_xml ( options : XML :: SaveOptions :: NO_DECL )
2018-08-04 20:30:44 +00:00
return html
end
def fill_links ( html , scheme , host )
html = XML . parse_html ( html )
html . xpath_nodes ( " //a " ) . each do | match |
url = URI . parse ( match [ " href " ] )
# Reddit links don't have host
if ! url . host && ! match [ " href " ] . starts_with? ( " javascript " ) && ! url . to_s . ends_with? " # "
url . scheme = scheme
url . host = host
match [ " href " ] = url
end
end
if host == " www.youtube.com "
2018-08-15 15:22:36 +00:00
html = html . xpath_node ( % q ( / / body ) ) . not_nil! . to_xml
2018-08-04 20:30:44 +00:00
else
html = html . to_xml ( options : XML :: SaveOptions :: NO_DECL )
end
2018-09-04 03:15:47 +00:00
return html
2018-08-04 20:30:44 +00:00
end
2018-09-19 20:25:49 +00:00
def content_to_comment_html ( content )
comment_html = content . map do | run |
text = HTML . escape ( run [ " text " ] . as_s )
if run [ " text " ] == " \n "
text = " <br> "
end
if run [ " bold " ]?
text = " <b> #{ text } </b> "
end
if run [ " italics " ]?
text = " <i> #{ text } </i> "
end
if run [ " navigationEndpoint " ]?
2018-11-19 22:24:21 +00:00
if url = run [ " navigationEndpoint " ] [ " urlEndpoint " ]? . try & . [ " url " ] . as_s
2018-09-19 20:25:49 +00:00
url = URI . parse ( url )
if ! url . host || { " m.youtube.com " , " www.youtube.com " , " youtu.be " } . includes? url . host
if url . path == " /redirect "
url = HTTP :: Params . parse ( url . query . not_nil! ) [ " q " ]
else
url = url . full_path
end
end
2018-11-19 22:24:21 +00:00
text = %( <a href=" #{ url } "> #{ text } </a> )
elsif watch_endpoint = run [ " navigationEndpoint " ] [ " watchEndpoint " ]?
2019-01-20 15:51:24 +00:00
length_seconds = watch_endpoint [ " startTimeSeconds " ]?
2018-11-19 22:24:21 +00:00
video_id = watch_endpoint [ " videoId " ] . as_s
2019-01-20 15:51:24 +00:00
if length_seconds
text = %( <a href="javascript:void ( 0 ) " onclick="player.currentTime ( #{ length_seconds } ) "> #{ text } </a> )
else
text = %( <a href="/watch?v= #{ video_id } "> #{ text } </a> )
end
2018-11-19 22:24:21 +00:00
elsif url = run [ " navigationEndpoint " ] [ " commandMetadata " ]? . try & . [ " webCommandMetadata " ] [ " url " ] . as_s
text = %( <a href=" #{ url } "> #{ text } </a> )
end
2018-09-19 20:25:49 +00:00
end
text
end . join . rchop ( '\ufeff' )
return comment_html
end