2019-05-27 19:48:57 +00:00
|
|
|
def refresh_channels(db, logger, config)
|
2018-08-08 01:25:59 +00:00
|
|
|
max_channel = Channel(Int32).new
|
|
|
|
|
|
|
|
spawn do
|
|
|
|
max_threads = max_channel.receive
|
|
|
|
active_threads = 0
|
|
|
|
active_channel = Channel(Bool).new
|
2018-08-04 20:30:44 +00:00
|
|
|
|
2018-08-08 01:25:59 +00:00
|
|
|
loop do
|
2019-02-18 21:44:15 +00:00
|
|
|
db.query("SELECT id FROM channels ORDER BY updated") do |rs|
|
2018-08-08 01:25:59 +00:00
|
|
|
rs.each do
|
2018-08-04 20:30:44 +00:00
|
|
|
id = rs.read(String)
|
2018-08-08 01:25:59 +00:00
|
|
|
|
|
|
|
if active_threads >= max_threads
|
|
|
|
if active_channel.receive
|
|
|
|
active_threads -= 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
active_threads += 1
|
|
|
|
spawn do
|
|
|
|
begin
|
2019-05-27 19:48:57 +00:00
|
|
|
channel = fetch_channel(id, db, config.full_refresh)
|
2018-08-08 01:25:59 +00:00
|
|
|
|
2019-06-08 00:56:41 +00:00
|
|
|
db.exec("UPDATE channels SET updated = $1, author = $2, deleted = false WHERE id = $3", Time.utc, channel.author, id)
|
2018-08-08 01:25:59 +00:00
|
|
|
rescue ex
|
2019-02-09 16:18:24 +00:00
|
|
|
if ex.message == "Deleted or invalid channel"
|
2019-06-08 00:56:41 +00:00
|
|
|
db.exec("UPDATE channels SET updated = $1, deleted = true WHERE id = $2", Time.utc, id)
|
2019-02-09 16:18:24 +00:00
|
|
|
end
|
2019-06-08 01:07:55 +00:00
|
|
|
logger.puts("#{id} : #{ex.message}")
|
2018-08-08 01:25:59 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
active_channel.send(true)
|
|
|
|
end
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
|
|
|
end
|
2019-03-25 14:23:42 +00:00
|
|
|
|
|
|
|
sleep 1.minute
|
2019-06-16 00:18:36 +00:00
|
|
|
Fiber.yield
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
|
|
|
end
|
2018-08-08 01:25:59 +00:00
|
|
|
|
2019-05-27 19:48:57 +00:00
|
|
|
max_channel.send(config.channel_threads)
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
|
|
|
|
2019-05-27 19:48:57 +00:00
|
|
|
def refresh_feeds(db, logger, config)
|
|
|
|
max_channel = Channel(Int32).new
|
2018-10-09 22:24:29 +00:00
|
|
|
spawn do
|
|
|
|
max_threads = max_channel.receive
|
|
|
|
active_threads = 0
|
|
|
|
active_channel = Channel(Bool).new
|
|
|
|
|
|
|
|
loop do
|
2019-06-01 15:19:18 +00:00
|
|
|
db.query("SELECT email FROM users WHERE feed_needs_update = true OR feed_needs_update IS NULL") do |rs|
|
2018-10-09 22:24:29 +00:00
|
|
|
rs.each do
|
|
|
|
email = rs.read(String)
|
2019-04-11 00:56:38 +00:00
|
|
|
view_name = "subscriptions_#{sha256(email)}"
|
2018-10-09 22:24:29 +00:00
|
|
|
|
|
|
|
if active_threads >= max_threads
|
|
|
|
if active_channel.receive
|
|
|
|
active_threads -= 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
active_threads += 1
|
|
|
|
spawn do
|
|
|
|
begin
|
2019-04-11 00:56:38 +00:00
|
|
|
# Drop outdated views
|
|
|
|
column_array = get_column_array(db, view_name)
|
|
|
|
ChannelVideo.to_type_tuple.each_with_index do |name, i|
|
|
|
|
if name != column_array[i]?
|
2019-06-08 01:07:55 +00:00
|
|
|
logger.puts("DROP MATERIALIZED VIEW #{view_name}")
|
2019-03-22 21:53:16 +00:00
|
|
|
db.exec("DROP MATERIALIZED VIEW #{view_name}")
|
2019-04-11 00:56:38 +00:00
|
|
|
raise "view does not exist"
|
2019-03-22 21:53:16 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-07-07 19:00:42 +00:00
|
|
|
if !db.query_one("SELECT pg_get_viewdef('#{view_name}')", as: String).includes? "WHERE ((cv.ucid = ANY (u.subscriptions))"
|
2019-06-08 01:07:55 +00:00
|
|
|
logger.puts("Materialized view #{view_name} is out-of-date, recreating...")
|
2019-06-02 16:48:18 +00:00
|
|
|
db.exec("DROP MATERIALIZED VIEW #{view_name}")
|
|
|
|
end
|
|
|
|
|
2018-10-09 22:24:29 +00:00
|
|
|
db.exec("REFRESH MATERIALIZED VIEW #{view_name}")
|
2019-06-01 15:19:18 +00:00
|
|
|
db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
|
2018-10-09 22:24:29 +00:00
|
|
|
rescue ex
|
2019-04-11 00:56:38 +00:00
|
|
|
# Rename old views
|
|
|
|
begin
|
|
|
|
legacy_view_name = "subscriptions_#{sha256(email)[0..7]}"
|
|
|
|
|
|
|
|
db.exec("SELECT * FROM #{legacy_view_name} LIMIT 0")
|
2019-06-08 01:07:55 +00:00
|
|
|
logger.puts("RENAME MATERIALIZED VIEW #{legacy_view_name}")
|
2019-04-11 00:56:38 +00:00
|
|
|
db.exec("ALTER MATERIALIZED VIEW #{legacy_view_name} RENAME TO #{view_name}")
|
|
|
|
rescue ex
|
|
|
|
begin
|
|
|
|
# While iterating through, we may have an email stored from a deleted account
|
|
|
|
if db.query_one?("SELECT true FROM users WHERE email = $1", email, as: Bool)
|
2019-06-08 01:07:55 +00:00
|
|
|
logger.puts("CREATE #{view_name}")
|
2019-07-07 19:00:42 +00:00
|
|
|
db.exec("CREATE MATERIALIZED VIEW #{view_name} AS #{MATERIALIZED_VIEW_SQL.call(email)}")
|
2019-06-01 15:19:18 +00:00
|
|
|
db.exec("UPDATE users SET feed_needs_update = false WHERE email = $1", email)
|
2019-04-11 00:56:38 +00:00
|
|
|
end
|
|
|
|
rescue ex
|
2019-06-08 01:07:55 +00:00
|
|
|
logger.puts("REFRESH #{email} : #{ex.message}")
|
2019-03-20 16:01:54 +00:00
|
|
|
end
|
2019-02-27 15:10:28 +00:00
|
|
|
end
|
2018-10-09 22:24:29 +00:00
|
|
|
end
|
2018-10-09 13:40:29 +00:00
|
|
|
|
2018-10-09 22:24:29 +00:00
|
|
|
active_channel.send(true)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2019-03-25 14:23:42 +00:00
|
|
|
|
2019-06-01 15:19:18 +00:00
|
|
|
sleep 5.seconds
|
2019-06-16 00:18:36 +00:00
|
|
|
Fiber.yield
|
2018-10-09 13:40:29 +00:00
|
|
|
end
|
|
|
|
end
|
2018-10-09 22:24:29 +00:00
|
|
|
|
2019-05-27 19:48:57 +00:00
|
|
|
max_channel.send(config.feed_threads)
|
2018-10-09 13:40:29 +00:00
|
|
|
end
|
|
|
|
|
2019-03-04 01:18:23 +00:00
|
|
|
def subscribe_to_feeds(db, logger, key, config)
|
|
|
|
if config.use_pubsub_feeds
|
2019-04-04 12:49:53 +00:00
|
|
|
case config.use_pubsub_feeds
|
|
|
|
when Bool
|
|
|
|
max_threads = config.use_pubsub_feeds.as(Bool).to_unsafe
|
|
|
|
when Int32
|
|
|
|
max_threads = config.use_pubsub_feeds.as(Int32)
|
|
|
|
end
|
|
|
|
max_channel = Channel(Int32).new
|
|
|
|
|
2019-03-04 01:18:23 +00:00
|
|
|
spawn do
|
2019-04-04 12:49:53 +00:00
|
|
|
max_threads = max_channel.receive
|
|
|
|
active_threads = 0
|
|
|
|
active_channel = Channel(Bool).new
|
|
|
|
|
2019-03-04 01:18:23 +00:00
|
|
|
loop do
|
2019-04-04 12:49:53 +00:00
|
|
|
db.query_all("SELECT id FROM channels WHERE CURRENT_TIMESTAMP - subscribed > interval '4 days' OR subscribed IS NULL") do |rs|
|
2019-03-05 20:41:38 +00:00
|
|
|
rs.each do
|
|
|
|
ucid = rs.read(String)
|
2019-03-04 01:18:23 +00:00
|
|
|
|
2019-04-04 12:49:53 +00:00
|
|
|
if active_threads >= max_threads.as(Int32)
|
|
|
|
if active_channel.receive
|
|
|
|
active_threads -= 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
active_threads += 1
|
|
|
|
|
|
|
|
spawn do
|
|
|
|
begin
|
2019-11-24 18:41:47 +00:00
|
|
|
response = subscribe_pubsub(ucid, key, config)
|
2019-04-04 12:49:53 +00:00
|
|
|
|
|
|
|
if response.status_code >= 400
|
2019-06-08 01:07:55 +00:00
|
|
|
logger.puts("#{ucid} : #{response.body}")
|
2019-04-04 12:49:53 +00:00
|
|
|
end
|
|
|
|
rescue ex
|
2019-11-01 11:34:36 +00:00
|
|
|
logger.puts("#{ucid} : #{ex.message}")
|
2019-04-04 12:49:53 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
active_channel.send(true)
|
2019-03-05 20:41:38 +00:00
|
|
|
end
|
2019-03-04 01:18:23 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
sleep 1.minute
|
2019-06-16 00:18:36 +00:00
|
|
|
Fiber.yield
|
2019-03-04 01:18:23 +00:00
|
|
|
end
|
|
|
|
end
|
2019-04-04 12:49:53 +00:00
|
|
|
|
|
|
|
max_channel.send(max_threads.as(Int32))
|
2019-03-04 01:18:23 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-08-04 20:30:44 +00:00
|
|
|
def pull_top_videos(config, db)
|
|
|
|
loop do
|
|
|
|
begin
|
2019-03-03 17:51:28 +00:00
|
|
|
top = rank_videos(db, 40)
|
2018-08-04 20:30:44 +00:00
|
|
|
rescue ex
|
2019-06-16 00:18:36 +00:00
|
|
|
sleep 1.minute
|
|
|
|
Fiber.yield
|
|
|
|
|
2018-08-04 20:30:44 +00:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
2019-06-16 00:18:36 +00:00
|
|
|
if top.size == 0
|
|
|
|
sleep 1.minute
|
|
|
|
Fiber.yield
|
|
|
|
|
2018-08-04 20:30:44 +00:00
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
videos = [] of Video
|
|
|
|
|
|
|
|
top.each do |id|
|
|
|
|
begin
|
|
|
|
videos << get_video(id, db)
|
|
|
|
rescue ex
|
|
|
|
next
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
yield videos
|
2019-06-16 00:18:36 +00:00
|
|
|
|
2019-03-25 14:23:42 +00:00
|
|
|
sleep 1.minute
|
2019-06-16 00:18:36 +00:00
|
|
|
Fiber.yield
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-11-09 02:08:03 +00:00
|
|
|
def pull_popular_videos(db)
|
|
|
|
loop do
|
2019-06-07 02:33:30 +00:00
|
|
|
videos = db.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE ucid IN \
|
|
|
|
(SELECT channel FROM (SELECT UNNEST(subscriptions) AS channel FROM users) AS d \
|
|
|
|
GROUP BY channel ORDER BY COUNT(channel) DESC LIMIT 40) \
|
|
|
|
ORDER BY ucid, published DESC", as: ChannelVideo).sort_by { |video| video.published }.reverse
|
2018-11-09 02:08:03 +00:00
|
|
|
|
|
|
|
yield videos
|
2019-06-16 00:18:36 +00:00
|
|
|
|
2019-03-25 14:23:42 +00:00
|
|
|
sleep 1.minute
|
2019-06-16 00:18:36 +00:00
|
|
|
Fiber.yield
|
2018-11-09 02:08:03 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-08-04 20:30:44 +00:00
|
|
|
def update_decrypt_function
|
|
|
|
loop do
|
|
|
|
begin
|
2018-09-15 00:50:11 +00:00
|
|
|
decrypt_function = fetch_decrypt_function
|
2019-06-16 00:18:36 +00:00
|
|
|
yield decrypt_function
|
2018-08-04 20:30:44 +00:00
|
|
|
rescue ex
|
|
|
|
next
|
2019-11-09 19:18:19 +00:00
|
|
|
ensure
|
|
|
|
sleep 1.minute
|
|
|
|
Fiber.yield
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
2019-11-09 19:18:19 +00:00
|
|
|
end
|
|
|
|
end
|
2018-08-04 20:30:44 +00:00
|
|
|
|
2019-11-09 19:18:19 +00:00
|
|
|
def bypass_captcha(captcha_key, logger)
|
|
|
|
loop do
|
|
|
|
begin
|
2020-02-01 01:13:05 +00:00
|
|
|
{"/watch?v=CvFH_6DNRCY&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999", produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")}.each do |path|
|
|
|
|
response = YT_POOL.client &.get(path)
|
|
|
|
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.")
|
|
|
|
html = XML.parse_html(response.body)
|
|
|
|
form = html.xpath_node(%(//form[@action="/das_captcha"])).not_nil!
|
|
|
|
site_key = form.xpath_node(%(.//div[@class="g-recaptcha"])).try &.["data-sitekey"]
|
|
|
|
|
|
|
|
inputs = {} of String => String
|
|
|
|
form.xpath_nodes(%(.//input[@name])).map do |node|
|
|
|
|
inputs[node["name"]] = node["value"]
|
|
|
|
end
|
2019-11-09 19:18:19 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
headers = response.cookies.add_request_headers(HTTP::Headers.new)
|
2019-11-09 19:18:19 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/createTask", body: {
|
2019-11-09 19:18:19 +00:00
|
|
|
"clientKey" => CONFIG.captcha_key,
|
2020-02-01 01:13:05 +00:00
|
|
|
"task" => {
|
|
|
|
"type" => "NoCaptchaTaskProxyless",
|
|
|
|
"websiteURL" => "https://www.youtube.com/watch?v=CvFH_6DNRCY&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999",
|
|
|
|
"websiteKey" => site_key,
|
|
|
|
},
|
2019-11-09 19:18:19 +00:00
|
|
|
}.to_json).body)
|
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
if response["error"]?
|
|
|
|
raise response["error"].as_s
|
2019-11-09 19:18:19 +00:00
|
|
|
end
|
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
task_id = response["taskId"].as_i
|
2019-11-09 19:18:19 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
loop do
|
|
|
|
sleep 10.seconds
|
2019-12-04 00:14:11 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/getTaskResult", body: {
|
|
|
|
"clientKey" => CONFIG.captcha_key,
|
|
|
|
"taskId" => task_id,
|
|
|
|
}.to_json).body)
|
2019-12-04 00:14:11 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
if response["status"]?.try &.== "ready"
|
|
|
|
break
|
|
|
|
elsif response["errorId"]?.try &.as_i != 0
|
|
|
|
raise response["errorDescription"].as_s
|
|
|
|
end
|
|
|
|
end
|
2019-12-04 00:14:11 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
inputs["g-recaptcha-response"] = response["solution"]["gRecaptchaResponse"].as_s
|
|
|
|
response = YT_POOL.client &.post("/das_captcha", headers, form: inputs)
|
2019-12-04 00:14:11 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
yield response.cookies.select { |cookie| cookie.name != "PREF" }
|
|
|
|
elsif response.headers["Location"]?.try &.includes?("/sorry/index")
|
|
|
|
location = response.headers["Location"].try { |u| URI.parse(u) }
|
|
|
|
client = QUIC::Client.new(location.host.not_nil!)
|
|
|
|
response = client.get(location.full_path)
|
2019-12-04 00:14:11 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
html = XML.parse_html(response.body)
|
|
|
|
form = html.xpath_node(%(//form[@action="index"])).not_nil!
|
|
|
|
site_key = form.xpath_node(%(.//div[@class="g-recaptcha"])).try &.["data-sitekey"]
|
2019-12-04 00:14:11 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
inputs = {} of String => String
|
|
|
|
form.xpath_nodes(%(.//input[@name])).map do |node|
|
|
|
|
inputs[node["name"]] = node["value"]
|
|
|
|
end
|
|
|
|
|
|
|
|
response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/createTask", body: {
|
2019-12-04 00:14:11 +00:00
|
|
|
"clientKey" => CONFIG.captcha_key,
|
2020-02-01 01:13:05 +00:00
|
|
|
"task" => {
|
|
|
|
"type" => "NoCaptchaTaskProxyless",
|
|
|
|
"websiteURL" => location.to_s,
|
|
|
|
"websiteKey" => site_key,
|
|
|
|
},
|
2019-12-04 00:14:11 +00:00
|
|
|
}.to_json).body)
|
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
if response["error"]?
|
|
|
|
raise response["error"].as_s
|
2019-12-04 00:14:11 +00:00
|
|
|
end
|
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
task_id = response["taskId"].as_i
|
|
|
|
|
|
|
|
loop do
|
|
|
|
sleep 10.seconds
|
|
|
|
|
|
|
|
response = JSON.parse(HTTP::Client.post("https://api.anti-captcha.com/getTaskResult", body: {
|
|
|
|
"clientKey" => CONFIG.captcha_key,
|
|
|
|
"taskId" => task_id,
|
|
|
|
}.to_json).body)
|
2019-12-04 00:14:11 +00:00
|
|
|
|
2020-02-01 01:13:05 +00:00
|
|
|
if response["status"]?.try &.== "ready"
|
|
|
|
break
|
|
|
|
elsif response["errorId"]?.try &.as_i != 0
|
|
|
|
raise response["errorDescription"].as_s
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
inputs["g-recaptcha-response"] = response["solution"]["gRecaptchaResponse"].as_s
|
|
|
|
client.close
|
|
|
|
client = QUIC::Client.new("www.google.com")
|
|
|
|
response = client.post(location.full_path, form: inputs)
|
|
|
|
headers = HTTP::Headers{
|
|
|
|
"Cookie" => URI.parse(response.headers["location"]).query_params["google_abuse"].split(";")[0],
|
|
|
|
}
|
|
|
|
cookies = HTTP::Cookies.from_headers(headers)
|
|
|
|
|
|
|
|
yield cookies
|
|
|
|
end
|
2019-11-09 19:18:19 +00:00
|
|
|
end
|
|
|
|
rescue ex
|
|
|
|
logger.puts("Exception: #{ex.message}")
|
|
|
|
ensure
|
|
|
|
sleep 1.minute
|
|
|
|
Fiber.yield
|
|
|
|
end
|
2018-08-04 20:30:44 +00:00
|
|
|
end
|
|
|
|
end
|
2018-09-25 22:56:59 +00:00
|
|
|
|
|
|
|
def find_working_proxies(regions)
|
2018-10-03 15:38:07 +00:00
|
|
|
loop do
|
|
|
|
regions.each do |region|
|
|
|
|
proxies = get_proxies(region).first(20)
|
|
|
|
proxies = proxies.map { |proxy| {ip: proxy[:ip], port: proxy[:port]} }
|
|
|
|
# proxies = filter_proxies(proxies)
|
2018-09-25 22:56:59 +00:00
|
|
|
|
2018-10-03 15:38:07 +00:00
|
|
|
yield region, proxies
|
2018-09-25 22:56:59 +00:00
|
|
|
end
|
2019-03-25 14:23:42 +00:00
|
|
|
|
|
|
|
sleep 1.minute
|
2019-06-16 00:18:36 +00:00
|
|
|
Fiber.yield
|
2018-09-25 22:56:59 +00:00
|
|
|
end
|
|
|
|
end
|