invidious-mod-JP/src/invidious/search.cr

class SearchVideo
  add_mapping({
    title:            String,
    id:               String,
    author:           String,
    ucid:             String,
    published:        Time,
    view_count:       Int64,
    description:      String,
    description_html: String,
    length_seconds:   Int32,
  })
end

def search(query, page = 1, search_params = build_search_params(content_type: "video"))
  client = make_client(YT_URL)
  html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=#{search_params}&disable_polymer=1").body
  if html.empty?
    return [] of SearchVideo
  end

  html = XML.parse_html(html)
  videos = [] of SearchVideo

  html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |node|
    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
    if !anchor
      next
    end

    if anchor["href"].starts_with? "https://www.googleadservices.com"
      next
    end

    title = anchor.content.strip
    video_id = anchor["href"].lchop("/watch?v=")

    anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
    if !anchor
      next
    end
    author = anchor.content
    author_url = anchor["href"]
    ucid = author_url.split("/")[-1]

    # Skip playlists
    if node.xpath_node(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]))
      next
    end

    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
    if metadata.size == 0
      next
    elsif metadata.size == 1
      # Skip movies
      if metadata[0].content.includes? "·"
        next
      end

      if metadata[0].content.starts_with? "Starts"
        view_count = 0_i64
        published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
      else
        view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
        published = Time.now
      end
    else
      # Skip movies
      if metadata[0].content.includes? "·"
        next
      end

      published = decode_date(metadata[0].content)

      view_count = metadata[1].content.split(" ")[0]
      if view_count == "No"
        view_count = 0_i64
      else
        view_count = view_count.delete(",").to_i64
      end
    end

    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
    if !description_html
      description = ""
      description_html = ""
    else
      description_html = description_html.to_s
      description = description_html.gsub("<br>", "\n")
      description = description.gsub("<br/>", "\n")
      description = XML.parse_html(description).content.strip("\n ")
    end

    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
    if length_seconds
      length_seconds = decode_length_seconds(length_seconds.content)
    else
      length_seconds = -1
    end

    video = SearchVideo.new(
      title,
      video_id,
      author,
      ucid,
      published,
      view_count,
      description,
      description_html,
      length_seconds,
    )

    videos << video
  end

  return videos
end

def build_search_params(sort_by = "relevance", date : String = "", content_type : String = "", duration : String = "", features : Array(String) = [] of String)
  head = "\x08"
  head += case sort_by
          when "relevance"
            "\x00"
          when "rating"
            "\x01"
          when "upload_date"
            "\x02"
          when "view_count"
            "\x03"
          else
            raise "No sort #{sort_by}"
          end

  body = ""
  body += case date
          when "hour"
            "\x08\x01"
          when "today"
            "\x08\x02"
          when "week"
            "\x08\x03"
          when "month"
            "\x08\x04"
          when "year"
            "\x08\x05"
          else
            ""
          end

  body += case content_type
          when "video"
            "\x10\x01"
          when "channel"
            "\x10\x02"
          when "playlist"
            "\x10\x03"
          when "movie"
            "\x10\x04"
          when "show"
            "\x10\x05"
          else
            ""
          end

  body += case duration
          when "short"
            "\x18\x01"
          when "long"
            "\x18\x02"
          else
            ""
          end

  features.each do |feature|
    body += case feature
            when "hd"
              "\x20\x01"
            when "subtitles"
              "\x28\x01"
            when "creative_commons"
              "\x30\x01"
            when "3d"
              "\x38\x01"
            when "live"
              "\x40\x01"
            when "purchased"
              "\x48\x01"
            when "4k"
              "\x70\x01"
            when "360"
              "\x78\x01"
            when "location"
              "\xb8\x01\x01"
            when "hdr"
              "\xc8\x01\x01"
            else
              raise "Unknown feature #{feature}"
            end
  end

  if body.size > 0
    token = head + "\x12" + body.size.to_u8.unsafe_chr + body
  else
    token = head
  end

  token = Base64.urlsafe_encode(token)
  token = URI.escape(token)

  return token
end
Major cleanup 2018-08-05 04:07:38 +00:00			`class SearchVideo`
			`add_mapping({`
			`title: String,`
			`id: String,`
			`author: String,`
			`ucid: String,`
			`published: Time,`
			`view_count: Int64,`
			`description: String,`
			`description_html: String,`
			`length_seconds: Int32,`
			`})`
			`end`

Add filters to '/api/v1/search' endpoint 2018-08-04 22:12:58 +00:00			`def search(query, page = 1, search_params = build_search_params(content_type: "video"))`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`client = make_client(YT_URL)`
Fix method for detecting movies in search results 2018-08-05 23:07:52 +00:00			`html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=#{search_params}&disable_polymer=1").body`
Major cleanup 2018-08-05 04:07:38 +00:00			`if html.empty?`
			`return [] of SearchVideo`
			`end`

Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`html = XML.parse_html(html)`
Major cleanup 2018-08-05 04:07:38 +00:00			`videos = [] of SearchVideo`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00
Major cleanup 2018-08-05 04:07:38 +00:00			`html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do \|node\|`
			`anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))`
			`if !anchor`
			`next`
			`end`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00
Major cleanup 2018-08-05 04:07:38 +00:00			`if anchor["href"].starts_with? "https://www.googleadservices.com"`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00			`next`
			`end`

Major cleanup 2018-08-05 04:07:38 +00:00			`title = anchor.content.strip`
Update channel feed 2018-08-05 19:26:41 +00:00			`video_id = anchor["href"].lchop("/watch?v=")`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00
Fix searches where items don't have byline 2018-08-05 19:46:21 +00:00			`anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))`
			`if !anchor`
			`next`
			`end`
Major cleanup 2018-08-05 04:07:38 +00:00			`author = anchor.content`
			`author_url = anchor["href"]`
			`ucid = author_url.split("/")[-1]`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00
Add fix for playlists in search results 2018-08-07 03:56:21 +00:00			`# Skip playlists`
			`if node.xpath_node(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]))`
			`next`
			`end`

Major cleanup 2018-08-05 04:07:38 +00:00			`metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))`
			`if metadata.size == 0`
			`next`
			`elsif metadata.size == 1`
Add fix for movies in search results 2018-08-07 04:11:37 +00:00			`# Skip movies`
			`if metadata[0].content.includes? "·"`
			`next`
			`end`

Add fix for scheduled livestreams in search results 2018-08-07 13:19:20 +00:00			`if metadata[0].content.starts_with? "Starts"`
			`view_count = 0_i64`
			`published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)`
			`else`
			`view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64`
			`published = Time.now`
			`end`
Fix method for detecting movies in search results 2018-08-05 23:07:52 +00:00			`else`
Add fix for movies in search results 2018-08-05 22:07:17 +00:00			`# Skip movies`
Fix method for detecting movies in search results 2018-08-05 23:07:52 +00:00			`if metadata[0].content.includes? "·"`
Add fix for movies in search results 2018-08-05 22:07:17 +00:00			`next`
			`end`

Major cleanup 2018-08-05 04:07:38 +00:00			`published = decode_date(metadata[0].content)`

Add fix for parsing videos with 1 view 2018-08-05 19:08:39 +00:00			`view_count = metadata[1].content.split(" ")[0]`
Major cleanup 2018-08-05 04:07:38 +00:00			`if view_count == "No"`
			`view_count = 0_i64`
			`else`
			`view_count = view_count.delete(",").to_i64`
			`end`
			`end`

			`description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))`
			`if !description_html`
			`description = ""`
			`description_html = ""`
			`else`
			`description_html = description_html.to_s`
			`description = description_html.gsub("<br>", "\n")`
			`description = description.gsub("<br/>", "\n")`
			`description = XML.parse_html(description).content.strip("\n ")`
			`end`

			`length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))`
			`if length_seconds`
			`length_seconds = decode_length_seconds(length_seconds.content)`
			`else`
			`length_seconds = -1`
			`end`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00
Major cleanup 2018-08-05 04:07:38 +00:00			`video = SearchVideo.new(`
			`title,`
Update channel feed 2018-08-05 19:26:41 +00:00			`video_id,`
Major cleanup 2018-08-05 04:07:38 +00:00			`author,`
			`ucid,`
			`published,`
			`view_count,`
			`description,`
			`description_html,`
			`length_seconds,`
			`)`
Split helpers.cr into multiple files 2018-08-04 20:30:44 +00:00
			`videos << video`
			`end`

			`return videos`
			`end`
Add filters to '/api/v1/search' endpoint 2018-08-04 22:12:58 +00:00
			`def build_search_params(sort_by = "relevance", date : String = "", content_type : String = "", duration : String = "", features : Array(String) = [] of String)`
			`head = "\x08"`
			`head += case sort_by`
			`when "relevance"`
			`"\x00"`
			`when "rating"`
			`"\x01"`
			`when "upload_date"`
			`"\x02"`
			`when "view_count"`
			`"\x03"`
			`else`
			`raise "No sort #{sort_by}"`
			`end`

			`body = ""`
			`body += case date`
			`when "hour"`
			`"\x08\x01"`
			`when "today"`
			`"\x08\x02"`
			`when "week"`
			`"\x08\x03"`
			`when "month"`
			`"\x08\x04"`
			`when "year"`
			`"\x08\x05"`
			`else`
			`""`
			`end`

			`body += case content_type`
			`when "video"`
			`"\x10\x01"`
			`when "channel"`
			`"\x10\x02"`
			`when "playlist"`
			`"\x10\x03"`
			`when "movie"`
			`"\x10\x04"`
			`when "show"`
			`"\x10\x05"`
			`else`
			`""`
			`end`

			`body += case duration`
			`when "short"`
			`"\x18\x01"`
			`when "long"`
			`"\x18\x02"`
			`else`
			`""`
			`end`

			`features.each do \|feature\|`
			`body += case feature`
			`when "hd"`
			`"\x20\x01"`
			`when "subtitles"`
			`"\x28\x01"`
			`when "creative_commons"`
			`"\x30\x01"`
			`when "3d"`
			`"\x38\x01"`
			`when "live"`
			`"\x40\x01"`
			`when "purchased"`
			`"\x48\x01"`
			`when "4k"`
			`"\x70\x01"`
			`when "360"`
			`"\x78\x01"`
			`when "location"`
			`"\xb8\x01\x01"`
			`when "hdr"`
			`"\xc8\x01\x01"`
			`else`
			`raise "Unknown feature #{feature}"`
			`end`
			`end`

			`if body.size > 0`
			`token = head + "\x12" + body.size.to_u8.unsafe_chr + body`
			`else`
			`token = head`
			`end`

			`token = Base64.urlsafe_encode(token)`
			`token = URI.escape(token)`

			`return token`
			`end`