#!/usr/bin/env bash
# author: joe.zheng
# version: 24.03.24

set -e

SELF="$(basename $0)"
SELF_VERSION="$(sed -n '1,4s/# version: \(.*\)/\1/p' $0)"

DIR_HOME="${DIR_HOME:-$(dirname $0)/files}"
DIR_REPO="$DIR_HOME/repo"
CFG_REPO="$DIR_HOME/repo.conf"
DIR_TEMP="$DIR_HOME/.download"

UPSTREAM="$UPSTREAM"         # will try upstream first to download

SVC_NAME="${SVC_NAME:-file}" # container name
SVC_PORT="${SVC_PORT:-9070}" # port number

DRY_RUN="${DRY_RUN:-n}"
RUN='' # command prefix for dry run
if [[ $DRY_RUN == 'y' ]]; then
  RUN='echo'
fi

PREFIX='> '
DELIMITER='^'

function usage() {
  cat <<EOF
Usage: $SELF [command] [args]
  Download files and host a local file server

  Files will be temporarily saved in $DIR_TEMP. Upon a successful download,
  they will be moved to $DIR_REPO. If the download fails, the process will
  terminate and will try to download continously on subsequent attempts.

  It can download files from multiple sources, attempting each sequentially
  until one succeeds. Just add sources in the url delimited by $DELIMITER.

Commands:
  pull [<url>...]  download the files, read from stdin if no url
  get <url>        download first and get the local file path
  serve            start the local file server

Environment variables:
  DRY_RUN:     print out information only if it is "y", default: $DRY_RUN
  DIR_HOME:    root folder for this app, default: $DIR_HOME
  UPSTREAM:    download from upstream first if specified, default: $UPSTREAM
  SVC_NAME:    container name to use: default: $SVC_NAME
  SVC_PORT:    port number to listen: default: $SVC_PORT

HTTP proxy:

  To use as an HTTP proxy, simply prefix your request URL with "/proxy".
  For example, if the service is running at "https://host:port" and you want to
  access "https://target/path/file", format your request as:

    https://host:port/proxy/https://target/path/file

  This process will first check for the file locally. If unavailable, it will
  proxy the request to the remote server and serve it to the client, mimicking
  the behavior of an HTTP proxy.

Examples:

  # Print out how it will be processed
  DRY_RUN=y $SELF pull https://iffi.me/{demo-bot,file-mirror}

  # Download files from remote server
  $SELF pull https://iffi.me/{demo-bot,file-mirror}

  # Get the local file path
  $SELF get https://iffi.me/demo-bot

  # Start the local file server
  $SELF serve

  # Download a file from multiple sources
  f=go1.22.1.linux-amd64.tar.gz && \\
  $SELF pull https://go.dev/dl/\$f^https://golang.google.cn/dl/\$f

  # Download files from remote server, try upstream first
  UPSTREAM=https://iffi.me/proxy $SELF pull https://go.dev/doc/effective_go

  # Download files with a file list
  $SELF pull < file-list.txt

  # Get url through $SELF as a HTTP proxy
  target=https://github.com/joez/dots/raw/master/LICENSE && \\
  curl localhost:$SVC_PORT/proxy/\$target

Version: $SELF_VERSION
EOF
}

function msg {
  echo "$PREFIX$@"
}

function err {
  echo "$PREFIX$@" >&2
}

function has() {
  [[ -z "${1##*$2*}" ]] && [[ -z "$2" || -n "$1" ]]
}

function url_to_try() {
  local target="${1:?missing target}" && shift
  local sep="$DELIMITER"
  local url=

  for upstream in "$@"; do
	upstream="${upstream%/}"
    url+="$upstream/$target$sep"
  done

  echo "$url$target"
}

# print out "OK: $download" when success
# CAUTION: for simplicity, all the files will be stored
# in one folder, which may have name conflict!
# The caller should ensure the file names are unique
function smart_download() {
  local dir="$DIR_REPO"
  local tmp="$DIR_TEMP"
  local sep="$DELIMITER"

  $RUN mkdir -p $dir $tmp
  for i in "$@"; do
    msg "download: $i"
    local name="$(basename $i)"
    local downloading="$tmp/$name"
    local downloaded="$dir/$name"

    if [[ -f "$downloaded" ]]; then
      msg "already downloaded"
      echo "OK: $downloaded"
      break
    fi

    if has "$i" $sep; then
      msg "has multiple sources"
      IFS=$sep read -a choices <<< "$i"
      for c in "${choices[@]}"; do
        msg "trying: $c"
        if $RUN wget -c -t 3 -T 5 -P $tmp $c; then
          ok='y'
          break
        fi
      done
      if [[ $ok != 'y' ]]; then
        err "all failed"
        exit 1
      fi
    else
      $RUN wget -c -P $tmp $i
    fi
    $RUN mv $downloading $downloaded
    echo "OK: $downloaded"
  done
}

function create_svc_conf {
  mkdir -p "$(dirname $1)"
  cat <<'EOF' > $1
user  nginx;
worker_processes  auto;

error_log  /var/log/nginx/error.log notice;
pid        /var/run/nginx.pid;

events {
    worker_connections  1024;
}

http {
    include       /etc/nginx/mime.types;
    default_type  application/octet-stream;

    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                      '$status $body_bytes_sent "$http_referer" '
                      '"$http_user_agent" "$http_x_forwarded_for"';
    access_log  /var/log/nginx/access.log  main;

    sendfile           on;
    keepalive_timeout  65;

    limit_conn_zone $binary_remote_addr zone=addr:10m;
    limit_req_zone  $binary_remote_addr zone=one:10m rate=1r/s;

    server {
        listen       8080;
        server_name  localhost;
        root         /repo;

        # default security control, very restricted!
        limit_conn addr 2;
        limit_req  zone=one;
        limit_rate 10k;

        # to make it easy for /proxy/http://host/*
        # otherwise the http:// will become http:/
        merge_slashes off;

        # dns resolution for proxy
        resolver 8.8.8.8;
        # pass server name through SNI
        proxy_ssl_server_name on;
        # large proxy buffering for big header
        proxy_buffers         4 512k;
        proxy_buffer_size       256k;
        proxy_busy_buffers_size 512k;

        location / {
            autoindex on;
            autoindex_exact_size off;
            autoindex_localtime on;
            charset utf-8;
        }

        location ~ ^/proxy/?$ {
            return 200 OK;
        }

        # try local file first, and then the original server
        # e.g. /proxy/http://target/path
        # try /path first, then request http://target/path
        location ~ ^/proxy/https?:// {
            rewrite ^/proxy/(.+(/[^/]+))$ $1 break;
            try_files $2 @proxy;
        }
        location @proxy {
            proxy_pass $uri;

            # follow redirects
            proxy_intercept_errors on;
            error_page 301 302 307 = @proxy_redirect;
        }
        location @proxy_redirect {
            set $saved_redirect_location $upstream_http_location;
            proxy_pass $saved_redirect_location;
        }
    }
}
EOF

}

# main

if [[ -z "$1" || "$1" == "-h" || "$1" == "--help" || "$1" == "help" ]]; then
  usage && exit
else
  cmd="$1"
  shift
fi

$RUN mkdir -p $DIR_HOME $DIR_REPO

if [[ "$cmd" == "pull" ]]; then
  if [[ -t 0 ]]; then
    # pass args into stdin
    exec < <(printf '%s\n' "$@")
  fi
  while IFS= read -r url; do
    msg "processing $url"
    smart_download "$(url_to_try "$url" $UPSTREAM)"
  done
elif [[ "$cmd" == "get" ]]; then
  [[ -z "$1" ]] && exit 1
  downloaded="$(smart_download "$(url_to_try "$1" $UPSTREAM)" | sed -n 's/^OK: *//p')"
  [[ -f "$downloaded" ]] && echo "$downloaded"
  exit
elif [[ "$cmd" == "serve" ]]; then
  SUDO=""
  if [[ $(id -u) != "0" ]]; then
    SUDO="sudo"
  fi

  if [[ ! -f $CFG_REPO ]]; then
     msg "create $CFG_REPO"
     $RUN create_svc_conf $CFG_REPO
  fi
  if $SUDO docker container inspect -f '{{.ID}}' $SVC_NAME >/dev/null; then
    msg "stop and delete the old container: $SVC_NAME"
    $RUN $SUDO docker rm -f $SVC_NAME
  fi
  msg "create container: $SVC_NAME"
  $RUN $SUDO docker create --name $SVC_NAME \
    -v $(realpath $DIR_REPO):/repo \
    -v $(realpath $CFG_REPO):/etc/nginx/nginx.conf \
    -p $SVC_PORT:8080 --restart unless-stopped \
    nginx:1.23.4-alpine
  msg "start container: $SVC_NAME"
  $RUN $SUDO docker start $SVC_NAME

  cat <<EOF
# check the local repo
  no_proxy='*' curl localhost:$SVC_PORT
EOF
else
  err "not supported cmd: $cmd"
  exit 1
fi

msg "done"
