dotfiles
Seven years worth of accumulated configuration cruft
dotfiles/scripts/.local/bin/dl
Download raw file: scripts/.local/bin/dl
#!/bin/sh # This script does most of my data hoarding set -e LATEST_CHROME_ON_WINDOWS="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" # Strip comments and blank lines from data files. injest() { sed '/^[[:space:]]*$/d; /^#.*/d' "$1"; } ######################################################################## ### Music From YouTube _ytmu() { YTMU_YTDLP_EXTRA_OPTS="--download-archive archived.txt" ytmu "$1"; } dl_youtube_music() { injest ~/media/.data/youtube_music.txt | while read -r line; do url="${line% *}" dir="$HOME/media/music/${line#* }" mkdir -p "$dir" (cd "$dir"; _ytmu "$url") done } ######################################################################## ### Artwork gallery_dl_wrapper() { url="$1" dir="$2" gallery-dl --ugoira-conv-copy --write-metadata --write-info-json \ --write-tags --destination "$dir/" \ --write-unsupported "$HOME/dl_script_gallery-dl_unsupported_urls" \ --user-agent "$LATEST_CHROME_ON_WINDOWS" \ --download-archive "$dir/archive" "$url" } dl_artwork() { injest ~/media/.data/art_archive.txt | while read -r url; do gallery_dl_wrapper "$url" "$HOME/media/gallery-dl" || case $? in # FIXME Tumblr returns 1 for API rate limit # exceeded, handle this! 4) continue ;; # Cloudflare or Instagram being a dick 16) continue ;; # Login required or oauth expired esac done } ######################################################################## ### Torrents # FIXME: write this! dl_torrents() { return 0 } ######################################################################## ### RFCs dl_rfcs() { rsync -avz --delete rsync.rfc-editor.org::rfcs-text-only ~/media/rfc/ } ######################################################################## ### US Patents # FIXME Figure out how to grab from https://bulkdata.uspto.gov/ dl_patents() { return 0 } ######################################################################## ### Project Gutenberg dl_gutenberg() { rsync -av --delete aleph.gutenberg.org::gutenberg-epub ~/media/gutenberg.org } ######################################################################## ### Bitsavers dl_bitsavers() { rsync -av --delete rsync://bitsavers.org:/bitsavers/ ~/media/bitsavers.org } ######################################################################## ### Anarchist Library dl_anarchist_library() { curl https://theanarchistlibrary.org/mirror.txt | wget -x -N -i - -P ~/media/ } ######################################################################## ### Miscellaneous Sites _get_domain() { echo "$1" | sed "s/^http:\/\///; s/^https:\/\///" | cut -d '/' -f1; } dl_misc_sites() { injest ~/media/.data/sites_to_archive.txt \ | wget -r -np -k -p -c -N -D "$(_get_domain "$1")" -i - -P ~/media/sites/ } ######################################################################## # Allows this file to be sourced as a library. if [ "$(basename "$0")" = "dl" ]; then set -e # dl_youtube_music # dl_artwork # dl_torrents # dl_rfcs # dl_patents # dl_gutenberg # dl_bitsavers # dl_anarchist_library # dl_misc_sites fi