2019-04-03 16:35:58 +00:00
require " ./macros "
2019-04-10 21:23:37 +00:00
struct Nonce
db_mapping ( {
nonce : String ,
expire : Time ,
} )
end
struct SessionId
db_mapping ( {
id : String ,
email : String ,
issued : String ,
} )
end
2019-04-15 16:13:09 +00:00
struct Annotation
db_mapping ( {
id : String ,
annotations : String ,
} )
end
2019-04-03 16:35:58 +00:00
struct ConfigPreferences
module StringToArray
2019-08-15 16:29:55 +00:00
def self . to_json ( value : Array ( String ) , json : JSON :: Builder )
json . array do
value . each do | element |
json . string element
end
end
end
def self . from_json ( value : JSON :: PullParser ) : Array ( String )
begin
result = [ ] of String
value . read_array do
result << HTML . escape ( value . read_string [ 0 , 100 ] )
end
rescue ex
result = [ HTML . escape ( value . read_string [ 0 , 100 ] ) , " " ]
end
result
end
2019-04-03 16:35:58 +00:00
def self . to_yaml ( value : Array ( String ) , yaml : YAML :: Nodes :: Builder )
yaml . sequence do
value . each do | element |
yaml . scalar element
end
end
end
def self . from_yaml ( ctx : YAML :: ParseContext , node : YAML :: Nodes :: Node ) : Array ( String )
begin
unless node . is_a? ( YAML :: Nodes :: Sequence )
node . raise " Expected sequence, not #{ node . class } "
end
result = [ ] of String
2019-04-04 00:04:33 +00:00
node . nodes . each do | item |
unless item . is_a? ( YAML :: Nodes :: Scalar )
node . raise " Expected scalar, not #{ item . class } "
2019-04-03 16:35:58 +00:00
end
2019-08-15 16:29:55 +00:00
result << HTML . escape ( item . value [ 0 , 100 ] )
2019-04-03 16:35:58 +00:00
end
rescue ex
if node . is_a? ( YAML :: Nodes :: Scalar )
2019-08-15 16:29:55 +00:00
result = [ HTML . escape ( node . value [ 0 , 100 ] ) , " " ]
2019-04-03 16:35:58 +00:00
else
result = [ " " , " " ]
end
end
result
end
end
2019-08-15 16:29:55 +00:00
module BoolToString
def self . to_json ( value : String , json : JSON :: Builder )
json . string value
end
def self . from_json ( value : JSON :: PullParser ) : String
begin
result = value . read_string
if result . empty?
CONFIG . default_user_preferences . dark_mode
else
result
end
rescue ex
result = value . read_bool
if result
" dark "
else
" light "
end
end
end
def self . to_yaml ( value : String , yaml : YAML :: Nodes :: Builder )
yaml . scalar value
end
def self . from_yaml ( ctx : YAML :: ParseContext , node : YAML :: Nodes :: Node ) : String
unless node . is_a? ( YAML :: Nodes :: Scalar )
node . raise " Expected sequence, not #{ node . class } "
end
case node . value
when " true "
" dark "
when " false "
" light "
when " "
CONFIG . default_user_preferences . dark_mode
else
node . value
end
end
end
2019-04-03 16:35:58 +00:00
yaml_mapping ( {
2019-05-01 04:39:04 +00:00
annotations : { type : Bool , default : false } ,
annotations_subscribed : { type : Bool , default : false } ,
autoplay : { type : Bool , default : false } ,
captions : { type : Array ( String ) , default : [ " " , " " , " " ] , converter : StringToArray } ,
comments : { type : Array ( String ) , default : [ " youtube " , " " ] , converter : StringToArray } ,
continue : { type : Bool , default : false } ,
continue_autoplay : { type : Bool , default : true } ,
2019-08-15 16:29:55 +00:00
dark_mode : { type : String , default : " " , converter : BoolToString } ,
2019-05-01 04:39:04 +00:00
latest_only : { type : Bool , default : false } ,
listen : { type : Bool , default : false } ,
local : { type : Bool , default : false } ,
locale : { type : String , default : " en-US " } ,
max_results : { type : Int32 , default : 40 } ,
notifications_only : { type : Bool , default : false } ,
2019-08-09 19:09:24 +00:00
player_style : { type : String , default : " invidious " } ,
2019-05-01 04:39:04 +00:00
quality : { type : String , default : " hd720 " } ,
redirect_feed : { type : Bool , default : false } ,
related_videos : { type : Bool , default : true } ,
sort : { type : String , default : " published " } ,
speed : { type : Float32 , default : 1.0_f32 } ,
thin_mode : { type : Bool , default : false } ,
unseen_only : { type : Bool , default : false } ,
video_loop : { type : Bool , default : false } ,
volume : { type : Int32 , default : 100 } ,
2019-04-03 16:35:58 +00:00
} )
end
2019-03-29 21:30:02 +00:00
struct Config
2019-04-03 16:35:58 +00:00
module ConfigPreferencesConverter
2019-07-18 23:51:10 +00:00
def self . to_yaml ( value : Preferences , yaml : YAML :: Nodes :: Builder )
value . to_yaml ( yaml )
end
2019-04-03 16:35:58 +00:00
def self . from_yaml ( ctx : YAML :: ParseContext , node : YAML :: Nodes :: Node ) : Preferences
Preferences . new ( * ConfigPreferences . new ( ctx , node ) . to_tuple )
end
2019-07-18 23:51:10 +00:00
end
2019-04-03 16:35:58 +00:00
2019-07-18 23:51:10 +00:00
module FamilyConverter
def self . to_yaml ( value : Socket :: Family , yaml : YAML :: Nodes :: Builder )
case value
when Socket :: Family :: UNSPEC
yaml . scalar nil
when Socket :: Family :: INET
yaml . scalar " ipv4 "
when Socket :: Family :: INET6
yaml . scalar " ipv6 "
end
end
def self . from_yaml ( ctx : YAML :: ParseContext , node : YAML :: Nodes :: Node ) : Socket :: Family
if node . is_a? ( YAML :: Nodes :: Scalar )
case node . value . downcase
when " ipv4 "
Socket :: Family :: INET
when " ipv6 "
Socket :: Family :: INET6
else
Socket :: Family :: UNSPEC
end
else
node . raise " Expected scalar, not #{ node . class } "
end
2019-04-03 16:35:58 +00:00
end
end
2019-07-07 14:07:53 +00:00
def disabled? ( option )
case disabled = CONFIG . disable_proxy
when Bool
return disabled
when Array
if disabled . includes? option
return true
else
return false
end
end
end
2018-08-04 20:30:44 +00:00
YAML . mapping ( {
2019-05-21 14:00:35 +00:00
channel_threads : Int32 , # Number of threads to use for crawling videos from channels (for updating subscriptions)
feed_threads : Int32 , # Number of threads to use for updating feeds
db : DBConfig , # Database configuration
2019-04-04 12:49:53 +00:00
full_refresh : Bool , # Used for crawling channels: threads should check all videos uploaded by a channel
https_only : Bool ?, # Used to tell Invidious it is behind a proxy, so links to resources should be https://
hmac_key : String ?, # HMAC signing key for CSRF tokens and verifying pubsub subscriptions
domain : String ?, # Domain to be used for links to resources on the site where an absolute URL is required
use_pubsub_feeds : { type : Bool | Int32 , default : false } , # Subscribe to channels using PubSubHubbub (requires domain, hmac_key)
2019-04-03 16:35:58 +00:00
default_home : { type : String , default : " Top " } ,
feed_menu : { type : Array ( String ) , default : [ " Popular " , " Top " , " Trending " , " Subscriptions " ] } ,
top_enabled : { type : Bool , default : true } ,
captcha_enabled : { type : Bool , default : true } ,
login_enabled : { type : Bool , default : true } ,
registration_enabled : { type : Bool , default : true } ,
statistics_enabled : { type : Bool , default : false } ,
admins : { type : Array ( String ) , default : [ ] of String } ,
external_port : { type : Int32 ?, default : nil } ,
default_user_preferences : { type : Preferences ,
default : Preferences . new ( * ConfigPreferences . from_yaml ( " " ) . to_tuple ) ,
converter : ConfigPreferencesConverter ,
} ,
2019-07-18 23:51:10 +00:00
dmca_content : { type : Array ( String ) , default : [ ] of String } , # For compliance with DMCA, disables download widget using list of video IDs
check_tables : { type : Bool , default : false } , # Check table integrity, automatically try to add any missing columns, create tables, etc.
cache_annotations : { type : Bool , default : false } , # Cache annotations requested from IA, will not cache empty annotations or annotations that only contain cards
banner : { type : String ?, default : nil } , # Optional banner to be displayed along top of page for announcements, etc.
hsts : { type : Bool ?, default : true } , # Enables 'Strict-Transport-Security'. Ensure that `domain` and all subdomains are served securely
disable_proxy : { type : Bool ? | Array ( String ) ?, default : false } , # Disable proxying server-wide: options: 'dash', 'livestreams', 'downloads', 'local'
force_resolve : { type : Socket :: Family , default : Socket :: Family :: UNSPEC , converter : FamilyConverter } , # Connect to YouTube over 'ipv6', 'ipv4'. Will sometimes resolve fix issues with rate-limiting (see https://github.com/ytdl-org/youtube-dl/issues/21729)
2018-08-04 20:30:44 +00:00
} )
end
2019-05-21 14:00:35 +00:00
struct DBConfig
yaml_mapping ( {
user : String ,
password : String ,
host : String ,
port : Int32 ,
dbname : String ,
} )
end
2019-03-03 17:51:28 +00:00
def rank_videos ( db , n )
2018-08-04 20:30:44 +00:00
top = [ ] of { Float64 , String }
db . query ( " SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000 " ) do | rs |
rs . each do
id = rs . read ( String )
wilson_score = rs . read ( Float64 )
published = rs . read ( Time )
# Exponential decay, older videos tend to rank lower
2019-06-08 00:56:41 +00:00
temperature = wilson_score * Math . exp ( - 0.000005 * ( ( Time . utc - published ) . total_minutes ) )
2018-08-04 20:30:44 +00:00
top << { temperature , id }
end
end
top . sort!
# Make hottest come first
top . reverse!
top = top . map { | a , b | b }
2019-03-03 17:51:28 +00:00
return top [ 0 .. n - 1 ]
2018-08-04 20:30:44 +00:00
end
2019-06-09 18:48:31 +00:00
def login_req ( f_req )
2018-08-04 20:30:44 +00:00
data = {
2019-06-09 18:48:31 +00:00
# Unfortunately there's not much information available on `bgRequest`; part of Google's BotGuard
2019-07-12 17:04:39 +00:00
# Generally this is much longer (>1250 characters), see also
# https://github.com/ytdl-org/youtube-dl/commit/baf67a604d912722b0fe03a40e9dc5349a2208cb .
2019-06-09 18:48:31 +00:00
# For now this can be empty.
2019-07-12 17:04:39 +00:00
" bgRequest " = > % | [ " identifier " , " " ] | ,
" pstMsg " = > " 1 " ,
" checkConnection " = > " youtube " ,
" checkedDomains " = > " youtube " ,
" hl " = > " en " ,
" deviceinfo " = > % | [ null , null , null , [ ] , null , " US " , null , null , [ ] , " GlifWebSignIn " , null , [ null , null , [ ] ] ] | ,
" f.req " = > f_req ,
2018-08-04 20:30:44 +00:00
" flowName " = > " GlifWebSignIn " ,
" flowEntry " = > " ServiceLogin " ,
2019-07-12 17:04:39 +00:00
# "cookiesDisabled" => "false",
# "gmscoreversion" => "undefined",
# "continue" => "https://accounts.google.com/ManageAccount",
# "azt" => "",
# "bgHash" => "",
2019-06-09 18:48:31 +00:00
}
2018-08-04 20:30:44 +00:00
return HTTP :: Params . encode ( data )
end
2019-06-08 20:08:27 +00:00
def html_to_content ( description_html : String )
description = description_html . gsub ( / (<br>)|(<br \/ >) / , {
" <br> " : " \n " ,
" <br/> " : " \n " ,
} )
if ! description . empty?
description = XML . parse_html ( description ) . content . strip ( " \n " )
2018-08-10 13:38:31 +00:00
end
2019-06-08 20:08:27 +00:00
return description
2018-08-10 13:38:31 +00:00
end
2018-08-10 14:44:19 +00:00
2019-04-12 21:29:23 +00:00
def extract_videos ( nodeset , ucid = nil , author_name = nil )
videos = extract_items ( nodeset , ucid , author_name )
2018-09-20 14:36:09 +00:00
videos . select! { | item | ! item . is_a? ( SearchChannel | SearchPlaylist ) }
videos . map { | video | video . as ( SearchVideo ) }
end
2019-02-15 23:28:54 +00:00
def extract_items ( nodeset , ucid = nil , author_name = nil )
2018-08-10 14:44:19 +00:00
# TODO: Make this a 'common', so it makes more sense to be used here
2018-09-20 14:36:09 +00:00
items = [ ] of SearchItem
2018-08-10 14:44:19 +00:00
nodeset . each do | node |
2019-02-15 23:28:54 +00:00
anchor = node . xpath_node ( % q ( . / / h3 [ contains ( @class , " yt-lockup-title " ) ] / a ) )
2018-08-10 14:44:19 +00:00
if ! anchor
next
end
2019-02-15 23:28:54 +00:00
title = anchor . content . strip
id = anchor [ " href " ]
2018-08-10 14:44:19 +00:00
if anchor [ " href " ] . starts_with? " https://www.googleadservices.com "
next
end
2018-09-20 14:36:09 +00:00
anchor = node . xpath_node ( % q ( . / / div [ contains ( @class , " yt-lockup-byline " ) ] / a ) )
2019-02-15 23:28:54 +00:00
if anchor
2018-09-22 15:49:42 +00:00
author = anchor . content . strip
2018-08-10 14:44:19 +00:00
author_id = anchor [ " href " ] . split ( " / " ) [ - 1 ]
end
2019-02-15 23:28:54 +00:00
author || = author_name
author_id || = ucid
author || = " "
author_id || = " "
2018-08-10 14:44:19 +00:00
2019-06-08 20:08:27 +00:00
description_html = node . xpath_node ( % q ( . / / div [ contains ( @class , " yt-lockup-description " ) ] ) ) . try & . to_s || " "
2018-09-19 20:24:19 +00:00
2018-09-23 17:13:08 +00:00
tile = node . xpath_node ( % q ( . / / div [ contains ( @class , " yt-lockup-tile " ) ] ) )
if ! tile
next
end
case tile [ " class " ]
2018-09-20 14:36:09 +00:00
when . includes? " yt-lockup-playlist "
plid = HTTP :: Params . parse ( URI . parse ( id ) . query . not_nil! ) [ " list " ]
2018-08-21 00:25:12 +00:00
2018-09-22 16:14:57 +00:00
anchor = node . xpath_node ( % q ( . / / div [ contains ( @class , " yt-lockup-meta " ) ] / a ) )
2018-09-29 04:12:35 +00:00
2018-09-22 16:14:57 +00:00
if ! anchor
anchor = node . xpath_node ( % q ( . / / ul [ @class = " yt-lockup-meta-info " ] / li / a ) )
end
2018-09-29 04:12:35 +00:00
video_count = node . xpath_node ( % q ( . / / span [ @class = " formatted-video-count-label " ] / b ) )
if video_count
video_count = video_count . content
if video_count == " 50+ "
author = " YouTube "
author_id = " UC-9-kyTW8ZkZNDHQJ6FgpwQ "
video_count = video_count . rchop ( " + " )
end
2019-04-12 21:37:35 +00:00
video_count = video_count . gsub ( / \ D / , " " ) . to_i?
2018-09-20 14:36:09 +00:00
end
video_count || = 0
videos = [ ] of SearchPlaylistVideo
2018-09-22 15:49:42 +00:00
node . xpath_nodes ( % q ( . / / * [ contains ( @class , " yt-lockup-playlist-items " ) ] / li ) ) . each do | video |
2018-09-20 14:36:09 +00:00
anchor = video . xpath_node ( % q ( . / / a ) )
if anchor
2018-09-22 15:49:42 +00:00
video_title = anchor . content . strip
2018-09-20 14:36:09 +00:00
id = HTTP :: Params . parse ( URI . parse ( anchor [ " href " ] ) . query . not_nil! ) [ " v " ]
end
video_title || = " "
id || = " "
2018-09-19 20:24:19 +00:00
2018-09-20 14:36:09 +00:00
anchor = video . xpath_node ( % q ( . / / span / span ) )
if anchor
length_seconds = decode_length_seconds ( anchor . content )
end
length_seconds || = 0
2018-08-10 14:44:19 +00:00
2018-09-20 14:36:09 +00:00
videos << SearchPlaylistVideo . new (
video_title ,
id ,
length_seconds
)
end
2018-08-10 14:44:19 +00:00
2019-03-17 14:00:00 +00:00
playlist_thumbnail = node . xpath_node ( % q ( . / / div / span / img ) ) . try & . [ " data-thumb " ]?
playlist_thumbnail || = node . xpath_node ( % q ( . / / div / span / img ) ) . try & . [ " src " ]
if ! playlist_thumbnail || playlist_thumbnail . empty?
thumbnail_id = videos [ 0 ]? . try & . id
else
thumbnail_id = playlist_thumbnail . match ( / \/ vi \/ (?<video_id>[a-zA-Z0-9_-]{11}) \/ \ w+ \ .jpg / ) . try & . [ " video_id " ]
end
2018-09-20 14:36:09 +00:00
items << SearchPlaylist . new (
title ,
plid ,
author ,
author_id ,
video_count ,
2019-03-17 14:00:00 +00:00
videos ,
thumbnail_id
2018-09-20 14:36:09 +00:00
)
when . includes? " yt-lockup-channel "
2018-09-22 15:49:42 +00:00
author = title . strip
2018-11-28 16:20:52 +00:00
ucid = node . xpath_node ( % q ( . / / button [ contains ( @class , " yt-uix-subscription-button " ) ] ) ) . try & . [ " data-channel-external-id " ]?
ucid || = id . split ( " / " ) [ - 1 ]
2018-09-20 14:36:09 +00:00
author_thumbnail = node . xpath_node ( % q ( . / / div / span / img ) ) . try & . [ " data-thumb " ]?
author_thumbnail || = node . xpath_node ( % q ( . / / div / span / img ) ) . try & . [ " src " ]
2019-04-02 13:51:28 +00:00
if author_thumbnail
author_thumbnail = URI . parse ( author_thumbnail )
author_thumbnail . scheme = " https "
author_thumbnail = author_thumbnail . to_s
end
2018-09-20 14:36:09 +00:00
author_thumbnail || = " "
2019-04-12 21:37:35 +00:00
subscriber_count = node . xpath_node ( % q ( . / / span [ contains ( @class , " yt-subscriber-count " ) ] ) ) . try & . [ " title " ] . gsub ( / \ D / , " " ) . to_i?
2018-09-20 14:36:09 +00:00
subscriber_count || = 0
2019-04-12 21:37:35 +00:00
video_count = node . xpath_node ( % q ( . / / ul [ @class = " yt-lockup-meta-info " ] / li ) ) . try & . content . split ( " " ) [ 0 ] . gsub ( / \ D / , " " ) . to_i?
2018-09-20 14:36:09 +00:00
video_count || = 0
items << SearchChannel . new (
2018-12-15 19:02:53 +00:00
author : author ,
ucid : ucid ,
author_thumbnail : author_thumbnail ,
subscriber_count : subscriber_count ,
video_count : video_count ,
description_html : description_html
2018-09-20 14:36:09 +00:00
)
2018-08-10 14:44:19 +00:00
else
2018-09-20 14:36:09 +00:00
id = id . lchop ( " /watch?v= " )
2018-08-10 14:44:19 +00:00
2018-09-20 14:36:09 +00:00
metadata = node . xpath_nodes ( % q ( . / / div [ contains ( @class , " yt-lockup-meta " ) ] / ul / li ) )
begin
published = decode_date ( metadata [ 0 ] . content . lchop ( " Streamed " ) . lchop ( " Starts " ) )
rescue ex
end
begin
2018-11-04 15:37:12 +00:00
published || = Time . unix ( metadata [ 0 ] . xpath_node ( % q ( . / / span ) ) . not_nil! [ " data-timestamp " ] . to_i64 )
2018-09-20 14:36:09 +00:00
rescue ex
end
2019-06-08 00:56:41 +00:00
published || = Time . utc
2018-09-20 14:36:09 +00:00
begin
view_count = metadata [ 0 ] . content . rchop ( " watching " ) . delete ( " , " ) . try & . to_i64?
rescue ex
end
begin
view_count || = metadata . try & . [ 1 ] . content . delete ( " No views, " ) . try & . to_i64?
rescue ex
end
view_count || = 0 _i64
length_seconds = node . xpath_node ( % q ( . / / span [ @class = " video-time " ] ) )
if length_seconds
length_seconds = decode_length_seconds ( length_seconds . content )
else
length_seconds = - 1
end
live_now = node . xpath_node ( % q ( . / / span [ contains ( @class , " yt-badge-live " ) ] ) )
if live_now
live_now = true
else
live_now = false
end
2018-10-16 16:15:14 +00:00
if node . xpath_node ( % q ( . / / span [ text ( ) = " Premium " ] ) )
premium = true
else
premium = false
end
2019-01-03 02:09:00 +00:00
if ! premium || node . xpath_node ( % q ( . / / span [ contains ( text ( ) , " Free episode " ) ] ) )
2018-10-16 16:15:14 +00:00
paid = false
2019-01-03 02:09:00 +00:00
else
paid = true
2018-10-16 16:15:14 +00:00
end
2019-03-22 17:24:47 +00:00
premiere_timestamp = node . xpath_node ( % q ( . / / ul [ @class = " yt-lockup-meta-info " ] / li / span [ @class = " localized-date " ] ) ) . try & . [ " data-timestamp " ]? . try & . to_i64
if premiere_timestamp
premiere_timestamp = Time . unix ( premiere_timestamp )
end
2018-09-20 14:36:09 +00:00
items << SearchVideo . new (
2018-12-15 19:02:53 +00:00
title : title ,
id : id ,
author : author ,
ucid : author_id ,
published : published ,
views : view_count ,
description_html : description_html ,
length_seconds : length_seconds ,
live_now : live_now ,
paid : paid ,
2019-03-22 17:24:47 +00:00
premium : premium ,
premiere_timestamp : premiere_timestamp
2018-09-20 14:36:09 +00:00
)
end
2018-08-10 14:44:19 +00:00
end
2018-09-20 14:36:09 +00:00
return items
2018-08-10 14:44:19 +00:00
end
2019-02-15 23:28:54 +00:00
def extract_shelf_items ( nodeset , ucid = nil , author_name = nil )
items = [ ] of SearchPlaylist
nodeset . each do | shelf |
shelf_anchor = shelf . xpath_node ( % q ( . / / h2 [ contains ( @class , " branded-page-module-title " ) ] ) )
if ! shelf_anchor
next
end
title = shelf_anchor . xpath_node ( % q ( . / / span [ contains ( @class , " branded-page-module-title-text " ) ] ) )
if title
title = title . content . strip
end
title || = " "
id = shelf_anchor . xpath_node ( % q ( . / / a ) ) . try & . [ " href " ]
if ! id
next
end
is_playlist = false
videos = [ ] of SearchPlaylistVideo
shelf . xpath_nodes ( % q ( . / / ul [ contains ( @class , " yt-uix-shelfslider-list " ) ] / li ) ) . each do | child_node |
type = child_node . xpath_node ( % q ( . / div ) )
if ! type
next
end
case type [ " class " ]
when . includes? " yt-lockup-video "
is_playlist = true
anchor = child_node . xpath_node ( % q ( . / / h3 [ contains ( @class , " yt-lockup-title " ) ] / a ) )
if anchor
video_title = anchor . content . strip
video_id = HTTP :: Params . parse ( URI . parse ( anchor [ " href " ] ) . query . not_nil! ) [ " v " ]
end
video_title || = " "
video_id || = " "
anchor = child_node . xpath_node ( % q ( . / / span [ @class = " video-time " ] ) )
if anchor
length_seconds = decode_length_seconds ( anchor . content )
end
length_seconds || = 0
videos << SearchPlaylistVideo . new (
video_title ,
video_id ,
length_seconds
)
when . includes? " yt-lockup-playlist "
anchor = child_node . xpath_node ( % q ( . / / h3 [ contains ( @class , " yt-lockup-title " ) ] / a ) )
if anchor
playlist_title = anchor . content . strip
params = HTTP :: Params . parse ( URI . parse ( anchor [ " href " ] ) . query . not_nil! )
plid = params [ " list " ]
end
playlist_title || = " "
plid || = " "
2019-03-17 14:00:00 +00:00
playlist_thumbnail = child_node . xpath_node ( % q ( . / / span / img ) ) . try & . [ " data-thumb " ]?
playlist_thumbnail || = child_node . xpath_node ( % q ( . / / span / img ) ) . try & . [ " src " ]
if ! playlist_thumbnail || playlist_thumbnail . empty?
thumbnail_id = videos [ 0 ]? . try & . id
else
thumbnail_id = playlist_thumbnail . match ( / \/ vi \/ (?<video_id>[a-zA-Z0-9_-]{11}) \/ \ w+ \ .jpg / ) . try & . [ " video_id " ]
end
2019-03-17 23:31:11 +00:00
video_count_label = child_node . xpath_node ( % q ( . / / span [ @class = " formatted-video-count-label " ] ) )
if video_count_label
2019-04-12 21:37:35 +00:00
video_count = video_count_label . content . gsub ( / \ D / , " " ) . to_i?
2019-03-17 23:31:11 +00:00
end
video_count || = 50
2019-02-15 23:28:54 +00:00
items << SearchPlaylist . new (
playlist_title ,
plid ,
author_name ,
ucid ,
2019-03-17 23:31:11 +00:00
video_count ,
2019-03-17 14:00:00 +00:00
Array ( SearchPlaylistVideo ) . new ,
thumbnail_id
2019-02-15 23:28:54 +00:00
)
end
end
if is_playlist
plid = HTTP :: Params . parse ( URI . parse ( id ) . query . not_nil! ) [ " list " ]
items << SearchPlaylist . new (
title ,
plid ,
author_name ,
ucid ,
videos . size ,
2019-03-17 14:00:00 +00:00
videos ,
videos [ 0 ] . try & . id
2019-02-15 23:28:54 +00:00
)
end
end
return items
end
2019-04-10 21:23:37 +00:00
def analyze_table ( db , logger , table_name , struct_type = nil )
# Create table if it doesn't exist
2019-04-10 22:16:18 +00:00
begin
db . exec ( " SELECT * FROM #{ table_name } LIMIT 0 " )
rescue ex
2019-06-08 01:07:55 +00:00
logger . puts ( " CREATE TABLE #{ table_name } " )
2019-04-10 22:09:36 +00:00
2019-04-10 21:23:37 +00:00
db . using_connection do | conn |
conn . as ( PG :: Connection ) . exec_all ( File . read ( " config/sql/ #{ table_name } .sql " ) )
end
end
if ! struct_type
return
end
struct_array = struct_type . to_type_tuple
column_array = get_column_array ( db , table_name )
column_types = File . read ( " config/sql/ #{ table_name } .sql " ) . match ( / CREATE TABLE public \ . #{ table_name } \ n \ ((?<types>[ \ d \ D]*?) \ ); / )
. try & . [ " types " ] . split ( " , " ) . map { | line | line . strip }
if ! column_types
return
end
struct_array . each_with_index do | name , i |
if name != column_array [ i ]?
if ! column_array [ i ]?
new_column = column_types . select { | line | line . starts_with? name } [ 0 ]
2019-06-08 01:07:55 +00:00
logger . puts ( " ALTER TABLE #{ table_name } ADD COLUMN #{ new_column } " )
2019-04-10 22:09:36 +00:00
db . exec ( " ALTER TABLE #{ table_name } ADD COLUMN #{ new_column } " )
2019-04-10 21:23:37 +00:00
next
end
# Column doesn't exist
if ! column_array . includes? name
new_column = column_types . select { | line | line . starts_with? name } [ 0 ]
db . exec ( " ALTER TABLE #{ table_name } ADD COLUMN #{ new_column } " )
end
# Column exists but in the wrong position, rotate
if struct_array . includes? column_array [ i ]
until name == column_array [ i ]
new_column = column_types . select { | line | line . starts_with? column_array [ i ] } [ 0 ]? . try & . gsub ( " #{ column_array [ i ] } " , " #{ column_array [ i ] } _new " )
# There's a column we didn't expect
if ! new_column
2019-06-08 01:07:55 +00:00
logger . puts ( " ALTER TABLE #{ table_name } DROP COLUMN #{ column_array [ i ] } " )
2019-04-10 22:09:36 +00:00
db . exec ( " ALTER TABLE #{ table_name } DROP COLUMN #{ column_array [ i ] } CASCADE " )
2019-04-10 21:23:37 +00:00
column_array = get_column_array ( db , table_name )
next
end
2019-06-08 01:07:55 +00:00
logger . puts ( " ALTER TABLE #{ table_name } ADD COLUMN #{ new_column } " )
2019-04-10 22:09:36 +00:00
db . exec ( " ALTER TABLE #{ table_name } ADD COLUMN #{ new_column } " )
2019-06-08 01:07:55 +00:00
logger . puts ( " UPDATE #{ table_name } SET #{ column_array [ i ] } _new= #{ column_array [ i ] } " )
2019-04-10 22:09:36 +00:00
db . exec ( " UPDATE #{ table_name } SET #{ column_array [ i ] } _new= #{ column_array [ i ] } " )
2019-06-08 01:07:55 +00:00
logger . puts ( " ALTER TABLE #{ table_name } DROP COLUMN #{ column_array [ i ] } CASCADE " )
2019-04-10 22:09:36 +00:00
db . exec ( " ALTER TABLE #{ table_name } DROP COLUMN #{ column_array [ i ] } CASCADE " )
2019-06-08 01:07:55 +00:00
logger . puts ( " ALTER TABLE #{ table_name } RENAME COLUMN #{ column_array [ i ] } _new TO #{ column_array [ i ] } " )
2019-04-10 22:09:36 +00:00
db . exec ( " ALTER TABLE #{ table_name } RENAME COLUMN #{ column_array [ i ] } _new TO #{ column_array [ i ] } " )
2019-04-10 21:23:37 +00:00
column_array = get_column_array ( db , table_name )
end
else
2019-06-08 01:07:55 +00:00
logger . puts ( " ALTER TABLE #{ table_name } DROP COLUMN #{ column_array [ i ] } CASCADE " )
2019-04-10 22:09:36 +00:00
db . exec ( " ALTER TABLE #{ table_name } DROP COLUMN #{ column_array [ i ] } CASCADE " )
2019-04-10 21:23:37 +00:00
end
end
end
end
class PG:: ResultSet
def field ( index = @column_index )
@fields . not_nil! [ index ]
end
end
def get_column_array ( db , table_name )
column_array = [ ] of String
db . query ( " SELECT * FROM #{ table_name } LIMIT 0 " ) do | rs |
rs . column_count . times do | i |
column = rs . as ( PG :: ResultSet ) . field ( i )
column_array << column . name
end
end
return column_array
end
2019-04-15 16:13:09 +00:00
def cache_annotation ( db , id , annotations )
if ! CONFIG . cache_annotations
return
end
body = XML . parse ( annotations )
nodeset = body . xpath_nodes ( % q ( / document / annotations / annotation ) )
if nodeset == 0
return
end
has_legacy_annotations = false
nodeset . each do | node |
if ! { " branding " , " card " , " drawer " } . includes? node [ " type " ]?
has_legacy_annotations = true
break
end
end
2019-04-17 14:06:31 +00:00
if has_legacy_annotations
# TODO: Update on conflict?
db . exec ( " INSERT INTO annotations VALUES ($1, $2) ON CONFLICT DO NOTHING " , id , annotations )
end
2019-04-15 16:13:09 +00:00
end
2019-05-19 00:14:58 +00:00
2019-06-29 02:17:56 +00:00
def create_notification_stream ( env , config , kemal_config , decrypt_function , topics , connection_channel )
2019-06-03 18:36:49 +00:00
connection = Channel ( PQ :: Notification ) . new ( 8 )
2019-06-03 18:12:06 +00:00
connection_channel . send ( { true , connection } )
2019-05-21 14:01:17 +00:00
locale = LOCALES [ env . get ( " preferences " ) . as ( Preferences ) . locale ]?
since = env . params . query [ " since " ]? . try & . to_i?
2019-06-02 12:41:53 +00:00
id = 0
2019-05-21 14:01:17 +00:00
2019-06-02 12:41:53 +00:00
if topics . includes? " debug "
2019-05-21 14:01:17 +00:00
spawn do
2019-06-03 18:12:06 +00:00
begin
loop do
time_span = [ 0 , 0 , 0 , 0 ]
time_span [ rand ( 4 ) ] = rand ( 30 ) + 5
2019-06-08 00:56:41 +00:00
published = Time . utc - Time :: Span . new ( time_span [ 0 ] , time_span [ 1 ] , time_span [ 2 ] , time_span [ 3 ] )
2019-06-03 18:12:06 +00:00
video_id = TEST_IDS [ rand ( TEST_IDS . size ) ]
2019-06-29 02:17:56 +00:00
video = get_video ( video_id , PG_DB )
2019-06-03 18:12:06 +00:00
video . published = published
response = JSON . parse ( video . to_json ( locale , config , kemal_config , decrypt_function ) )
if fields_text = env . params . query [ " fields " ]?
begin
JSONFilter . filter ( response , fields_text )
rescue ex
env . response . status_code = 400
response = { " error " = > ex . message }
end
2019-05-21 14:01:17 +00:00
end
2019-06-03 18:12:06 +00:00
env . response . puts " id: #{ id } "
env . response . puts " data: #{ response . to_json } "
env . response . puts
env . response . flush
2019-05-21 14:01:17 +00:00
2019-06-03 18:12:06 +00:00
id += 1
2019-06-02 12:41:53 +00:00
2019-06-03 18:12:06 +00:00
sleep 1 . minute
2019-06-16 00:18:36 +00:00
Fiber . yield
2019-06-03 18:12:06 +00:00
end
rescue ex
2019-06-02 12:41:53 +00:00
end
end
end
spawn do
2019-06-03 18:12:06 +00:00
begin
if since
topics . try & . each do | topic |
case topic
when . match ( / UC[A-Za-z0-9_-]{22} / )
PG_DB . query_all ( " SELECT * FROM channel_videos WHERE ucid = $1 AND published > $2 ORDER BY published DESC LIMIT 15 " ,
topic , Time . unix ( since . not_nil! ) , as : ChannelVideo ) . each do | video |
response = JSON . parse ( video . to_json ( locale , config , Kemal . config ) )
if fields_text = env . params . query [ " fields " ]?
begin
JSONFilter . filter ( response , fields_text )
rescue ex
env . response . status_code = 400
response = { " error " = > ex . message }
end
2019-06-02 12:41:53 +00:00
end
2019-06-03 18:12:06 +00:00
env . response . puts " id: #{ id } "
env . response . puts " data: #{ response . to_json } "
env . response . puts
env . response . flush
2019-06-02 12:41:53 +00:00
2019-06-03 18:12:06 +00:00
id += 1
end
else
# TODO
2019-06-02 12:41:53 +00:00
end
2019-05-21 14:01:17 +00:00
end
end
end
2019-06-02 12:41:53 +00:00
end
2019-05-21 14:01:17 +00:00
2019-06-02 12:41:53 +00:00
spawn do
2019-06-03 18:12:06 +00:00
begin
loop do
event = connection . receive
notification = JSON . parse ( event . payload )
topic = notification [ " topic " ] . as_s
video_id = notification [ " videoId " ] . as_s
published = notification [ " published " ] . as_i64
2019-06-03 18:36:49 +00:00
if ! topics . try & . includes? topic
next
end
2019-06-29 02:17:56 +00:00
video = get_video ( video_id , PG_DB )
2019-06-03 18:12:06 +00:00
video . published = Time . unix ( published )
response = JSON . parse ( video . to_json ( locale , config , Kemal . config , decrypt_function ) )
if fields_text = env . params . query [ " fields " ]?
begin
JSONFilter . filter ( response , fields_text )
rescue ex
env . response . status_code = 400
response = { " error " = > ex . message }
end
2019-06-02 12:41:53 +00:00
end
2019-06-03 18:36:49 +00:00
env . response . puts " id: #{ id } "
env . response . puts " data: #{ response . to_json } "
env . response . puts
env . response . flush
2019-06-02 12:41:53 +00:00
2019-06-03 18:36:49 +00:00
id += 1
2019-06-02 12:41:53 +00:00
end
2019-06-03 18:12:06 +00:00
rescue ex
ensure
connection_channel . send ( { false , connection } )
2019-05-21 14:01:17 +00:00
end
2019-06-02 12:41:53 +00:00
end
2019-06-03 18:12:06 +00:00
begin
# Send heartbeat
loop do
2019-06-08 00:56:41 +00:00
env . response . puts " :keepalive #{ Time . utc . to_unix } "
2019-06-03 18:12:06 +00:00
env . response . puts
env . response . flush
sleep ( 20 + rand ( 11 ) ) . seconds
end
rescue ex
ensure
connection_channel . send ( { false , connection } )
2019-05-21 14:01:17 +00:00
end
end
2019-07-11 12:27:42 +00:00
def extract_initial_data ( body )
initial_data = body . match ( / window \ ["ytInitialData" \ ] = (?<info>.*?); \ n / ) . try & . [ " info " ] || " {} "
if initial_data . starts_with? ( " JSON.parse( \" " )
return JSON . parse ( JSON . parse ( %( {"initial_data":" #{ initial_data [ 12 .. - 3 ] } "} ) ) [ " initial_data " ] . as_s )
else
return JSON . parse ( initial_data )
end
end
2019-07-18 23:51:10 +00:00
def proxy_file ( response , env )
if response . headers . includes_word? ( " Content-Encoding " , " gzip " )
Gzip :: Writer . open ( env . response ) do | deflate |
response . pipe ( deflate )
end
elsif response . headers . includes_word? ( " Content-Encoding " , " deflate " )
Flate :: Writer . open ( env . response ) do | deflate |
response . pipe ( deflate )
end
else
response . pipe ( env . response )
end
end
class HTTP::Client:: Response
def pipe ( io )
HTTP . serialize_body ( io , headers , @body , @body_io , @version )
end
end
# Supports serialize_body without first writing headers
module HTTP
def self . serialize_body ( io , headers , body , body_io , version )
if body
io << body
elsif body_io
content_length = content_length ( headers )
if content_length
copied = IO . copy ( body_io , io )
if copied != content_length
raise ArgumentError . new ( " Content-Length header is #{ content_length } but body had #{ copied } bytes " )
end
elsif Client :: Response . supports_chunked? ( version )
headers [ " Transfer-Encoding " ] = " chunked "
serialize_chunked_body ( io , body_io )
else
io << body
end
end
end
end
class HTTP:: Client
property family : Socket :: Family = Socket :: Family :: UNSPEC
private def socket
socket = @socket
return socket if socket
hostname = @host . starts_with? ( '[' ) && @host . ends_with? ( ']' ) ? @host [ 1 .. - 2 ] : @host
socket = TCPSocket . new hostname , @port , @dns_timeout , @connect_timeout , @family
socket . read_timeout = @read_timeout if @read_timeout
socket . sync = false
{% if ! flag? ( :without_openssl ) %}
if tls = @tls
socket = OpenSSL :: SSL :: Socket :: Client . new ( socket , context : tls , sync_close : true , hostname : @host )
end
{% end %}
@socket = socket
end
end
class TCPSocket
def initialize ( host , port , dns_timeout = nil , connect_timeout = nil , family = Socket :: Family :: UNSPEC )
Addrinfo . tcp ( host , port , timeout : dns_timeout , family : family ) do | addrinfo |
super ( addrinfo . family , addrinfo . type , addrinfo . protocol )
connect ( addrinfo , timeout : connect_timeout ) do | error |
close
error
end
end
end
end