Add language detection for top videos
This commit is contained in:
parent
dde7a643e9
commit
997449ab4b
|
@ -15,6 +15,9 @@ dependencies:
|
||||||
pg:
|
pg:
|
||||||
github: will/crystal-pg
|
github: will/crystal-pg
|
||||||
branch: master
|
branch: master
|
||||||
|
detect_language:
|
||||||
|
github: omarroth/detectlanguage-crystal
|
||||||
|
branch: v0.24.2
|
||||||
|
|
||||||
crystal: 0.24.2
|
crystal: 0.24.2
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,8 @@ class Config
|
||||||
port: Int32,
|
port: Int32,
|
||||||
dbname: String,
|
dbname: String,
|
||||||
),
|
),
|
||||||
redirect: Bool,
|
redirect: Bool,
|
||||||
|
dl_api_key: String | Nil,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -229,7 +230,7 @@ def decrypt_signature(a)
|
||||||
return a.join("")
|
return a.join("")
|
||||||
end
|
end
|
||||||
|
|
||||||
def rank_videos(db, n)
|
def rank_videos(db, n, pool, filter)
|
||||||
top = [] of {Float64, String}
|
top = [] of {Float64, String}
|
||||||
|
|
||||||
db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 10000") do |rs|
|
db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 10000") do |rs|
|
||||||
|
@ -250,8 +251,30 @@ def rank_videos(db, n)
|
||||||
top.reverse!
|
top.reverse!
|
||||||
top = top.map { |a, b| b }
|
top = top.map { |a, b| b }
|
||||||
|
|
||||||
# Return top
|
if filter
|
||||||
return top[0..n - 1]
|
language_list = [] of String
|
||||||
|
top.each do |id|
|
||||||
|
if language_list.size == n
|
||||||
|
break
|
||||||
|
else
|
||||||
|
client = get_client(pool)
|
||||||
|
video = get_video(id, client, db)
|
||||||
|
pool << client
|
||||||
|
|
||||||
|
description = XML.parse(video.description)
|
||||||
|
content = [video.title, description.content].join(" ")
|
||||||
|
|
||||||
|
results = DetectLanguage.detect(content)
|
||||||
|
|
||||||
|
if results[0].language == "en"
|
||||||
|
language_list << id
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return language_list
|
||||||
|
else
|
||||||
|
return top[0..n - 1]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def make_client(url)
|
def make_client(url)
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
require "detect_language"
|
||||||
require "kemal"
|
require "kemal"
|
||||||
require "option_parser"
|
require "option_parser"
|
||||||
require "pg"
|
require "pg"
|
||||||
|
@ -135,8 +136,14 @@ end
|
||||||
top_videos = [] of Video
|
top_videos = [] of Video
|
||||||
|
|
||||||
spawn do
|
spawn do
|
||||||
|
if CONFIG.dl_api_key
|
||||||
|
DetectLanguage.configure do |config|
|
||||||
|
config.api_key = CONFIG.dl_api_key.not_nil!
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
loop do
|
loop do
|
||||||
top = rank_videos(PG_DB, 40)
|
top = rank_videos(PG_DB, 40, youtube_pool, true)
|
||||||
|
|
||||||
if top.size > 0
|
if top.size > 0
|
||||||
args = arg_array(top)
|
args = arg_array(top)
|
||||||
|
|
Loading…
Reference in a new issue