• ☆ Yσɠƚԋσʂ ☆@lemmy.ml
    link
    fedilink
    arrow-up
    5
    ·
    2 months ago

    I made a script for grabbing reddit videos that’s been working pretty well for me, needs Babashka to run https://babashka.org/

    #!/usr/bin/env bb
    (require '[clojure.java.shell :refer [sh]]
             '[clojure.string :as string]
             '[cheshire.core :as cheshire]
             '[org.httpkit.client :as http]
             '[clojure.walk :as walk])
    
    (defn http-get [url]
      (-> @(http/get url {})
          :body))
    
    (defn find-base-url [data]
      (let [results (atom [])]
        (walk/postwalk
         (fn [node]
           (when (and (string? node) (.contains node "DASH"))
             (swap! results conj node))
           node)
         data)
        (some-> @results first (string/replace #"DASH_[0-9]+\.mp4" ""))))
    
    (defn find-best-quality [names audio?]
      (->> ((if audio? filter remove) #(.contains (.toLowerCase %) "audio") names)
           (sort-by
            (fn [n]
              (-> n
                  (string/replace #"\.mp4" "")
                  (string/replace #"[a-zA-Z_]" "")
                  (Integer/parseInt))))
           (last)))
    
    (defn find-parts [base-url data]
      (let [url (atom nil)
            _ (walk/prewalk
               (fn [node]
                 (when (and (map? node)
                            (contains? node :dash_url))
                   (reset! url (:dash_url node)))
                 node)
               data)
            xml (http-get @url)
            parts (->> (re-seq #"<BaseURL>(.*?)</BaseURL>" xml) (map second))
            best-video (find-best-quality parts false)
            best-audio (find-best-quality parts true)]
        [(str base-url best-video) (str base-url best-audio)]))
    
    (defn filename [url]
      (let [idx (inc (.lastIndexOf url "/"))]
        (subs url idx)))
    
    (defn tsname []
      (str "video-" (System/currentTimeMillis) ".mp4"))
    
    (let [data (-> (first *command-line-args*) (str ".json") http-get (cheshire/decode true)) 
          base-url (find-base-url data)
          [video-url audio-url] (find-parts base-url data)
          video-file (filename video-url)
          audio-file (filename audio-url)]
      (sh "wget" video-url)
      (sh "wget" audio-url)
      (sh "ffmpeg" "-i" video-file "-stream_loop" "-1" "-i" audio-file "-shortest" "-map" "0:v:0" "-map" "1:a:0" "-y" (tsname))
      (sh "rm" audio-file video-file))