From 31c7ea7466813802b4a48a21770c41a722f6945f Mon Sep 17 00:00:00 2001 From: Hisham Date: Thu, 29 Jul 2021 22:27:02 +1000 Subject: [PATCH] add capability to support non resolveable upstreams --- Dockerfile | 2 ++ README.md | 38 ++++++++++++++++++++------------------ entrypoint.sh | 24 ++++++++++++++++++++++++ nginx.conf | 3 +++ 4 files changed, 49 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index c1e0f3f..b1d319d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,6 +63,8 @@ EXPOSE 8082 ENV REGISTRIES="k8s.gcr.io gcr.io quay.io" # A space delimited list of registry:user:password to inject authentication for ENV AUTH_REGISTRIES="some.authenticated.registry:oneuser:onepassword another.registry:user:password" +# A space delimited list of domain=IP1,IP2 to inject upstream -> server mapping for +ENV UPSTREAM_MAPPINGS="" # Should we verify upstream's certificates? Default to true. ENV VERIFY_SSL="true" # Enable debugging mode; this inserts mitmproxy/mitmweb between the CONNECT proxy and the caching layer diff --git a/README.md b/README.md index 7c6fd67..43bf572 100644 --- a/README.md +++ b/README.md @@ -10,17 +10,17 @@ Caches the potentially huge blob/layer requests (for bandwidth/time savings), an ### NEW: avoiding DockerHub Pull Rate Limits with Caching -Starting November 2nd, 2020, DockerHub will -[supposedly](https://www.docker.com/blog/docker-hub-image-retention-policy-delayed-and-subscription-updates/) +Starting November 2nd, 2020, DockerHub will +[supposedly](https://www.docker.com/blog/docker-hub-image-retention-policy-delayed-and-subscription-updates/) [start](https://www.docker.com/blog/scaling-docker-to-serve-millions-more-developers-network-egress/) -[rate-limiting pulls](https://docs.docker.com/docker-hub/download-rate-limit/), -also known as the _Docker Apocalypse_. +[rate-limiting pulls](https://docs.docker.com/docker-hub/download-rate-limit/), +also known as the _Docker Apocalypse_. The main symptom is `Error response from daemon: toomanyrequests: Too Many Requests. Please see https://docs.docker.com/docker-hub/download-rate-limit/` during pulls. Many unknowing Kubernetes clusters will hit the limit, and struggle to configure `imagePullSecrets` and `imagePullPolicy`. -Since version `0.6.0`, this proxy can be configured with the env var `ENABLE_MANIFEST_CACHE=true` which provides +Since version `0.6.0`, this proxy can be configured with the env var `ENABLE_MANIFEST_CACHE=true` which provides configurable caching of the manifest requests that DockerHub throttles. You can then fine-tune other parameters to your needs. -Together with the possibility to centrally inject authentication (since 0.3x), this is probably one of the best ways to bring relief to your distressed cluster, while at the same time saving lots of bandwidth and time. +Together with the possibility to centrally inject authentication (since 0.3x), this is probably one of the best ways to bring relief to your distressed cluster, while at the same time saving lots of bandwidth and time. Note: enabling manifest caching, in its default config, effectively makes some tags **immutable**. Use with care. The configuration ENVs are explained in the [Dockerfile](./Dockerfile), relevant parts included below. @@ -51,13 +51,13 @@ ENV MANIFEST_CACHE_DEFAULT_TIME="1h" ## What? -Essentially, it's a [man in the middle](https://en.wikipedia.org/wiki/Man-in-the-middle_attack): an intercepting proxy based on `nginx`, to which all docker traffic is directed using the `HTTPS_PROXY` mechanism and injected CA root certificates. +Essentially, it's a [man in the middle](https://en.wikipedia.org/wiki/Man-in-the-middle_attack): an intercepting proxy based on `nginx`, to which all docker traffic is directed using the `HTTPS_PROXY` mechanism and injected CA root certificates. -The main feature is Docker layer/image caching, including layers served from S3, Google Storage, etc. +The main feature is Docker layer/image caching, including layers served from S3, Google Storage, etc. As a bonus it allows for centralized management of Docker registry credentials, which can in itself be the main feature, eg in Kubernetes environments. -You configure the Docker clients (_err... Kubernetes Nodes?_) once, and then all configuration is done on the proxy -- +You configure the Docker clients (_err... Kubernetes Nodes?_) once, and then all configuration is done on the proxy -- for this to work it requires inserting a root CA certificate into system trusted root certs. ## master/:latest is unstable/beta @@ -87,6 +87,8 @@ for this to work it requires inserting a root CA certificate into system trusted - `hostname`s listed here should be listed in the REGISTRIES environment as well, so they can be intercepted. - Env `AUTH_REGISTRIES_DELIMITER` to change the separator between authentication info. By default, a space: "` `". If you use keys that contain spaces (as with Google Cloud Registry), you should update this variable, e.g. setting it to `AUTH_REGISTRIES_DELIMITER=";;;"`. In that case, `AUTH_REGISTRIES` could contain something like `registry1.com:user1:pass1;;;registry2.com:user2:pass2`. - Env `AUTH_REGISTRY_DELIMITER` to change the separator between authentication info *parts*. By default, a colon: "`:`". If you use keys that contain single colons, you should update this variable, e.g. setting it to `AUTH_REGISTRIES_DELIMITER=":::"`. In that case, `AUTH_REGISTRIES` could contain something like `registry1.com:::user1:::pass1 registry2.com:::user2:::pass2`. +- Env `UPSTREAM_MAPPINGS` to configure upstream server mappings (similar in functionality to /etc/hosts entries but with round-robin selection). +Useful when configured resolvers are unable to resolve a host. e.g. `UPSTREAM_MAPPINGS="registry1=10.0.1.10:443,10.0.1.11 registry2=5.0.1.10"` - Timeouts ENVS - all of them can pe specified to control different timeouts, and if not set, the defaults will be the ones from `Dockerfile`. The directives will be added into `http` block.: - SEND_TIMEOUT : see [send_timeout](http://nginx.org/en/docs/http/ngx_http_core_module.html#send_timeout) - CLIENT_BODY_TIMEOUT : see [client_body_timeout](http://nginx.org/en/docs/http/ngx_http_core_module.html#client_body_timeout) @@ -155,10 +157,10 @@ docker run --rm --name docker_registry_proxy -it \ ### Google Container Registry (GCR) auth -For Google Container Registry (GCR), username should be `_json_key` and the password should be the contents of the service account JSON. -Check out [GCR docs](https://cloud.google.com/container-registry/docs/advanced-authentication#json_key_file). +For Google Container Registry (GCR), username should be `_json_key` and the password should be the contents of the service account JSON. +Check out [GCR docs](https://cloud.google.com/container-registry/docs/advanced-authentication#json_key_file). -The service account key is in JSON format, it contains spaces ("` `") and colons ("`:`"). +The service account key is in JSON format, it contains spaces ("` `") and colons ("`:`"). To be able to use GCR you should set `AUTH_REGISTRIES_DELIMITER` to something different than space (e.g. `AUTH_REGISTRIES_DELIMITER=";;;"`) and `AUTH_REGISTRY_DELIMITER` to something different than a single colon (e.g. `AUTH_REGISTRY_DELIMITER=":::"`). @@ -274,7 +276,7 @@ Since `0.4` there is a separate `-debug` version of the image, which includes `n This allows very in-depth debugging. Use sparingly, and definitely not in production. ```bash -docker run --rm --name docker_registry_proxy -it +docker run --rm --name docker_registry_proxy -it -e DEBUG_NGINX=true -e DEBUG=true -e DEBUG_HUB=true -p 0.0.0.0:8081:8081 -p 0.0.0.0:8082:8082 \ -p 0.0.0.0:3128:3128 -e ENABLE_MANIFEST_CACHE=true \ -v $(pwd)/docker_mirror_cache:/docker_mirror_cache \ @@ -297,15 +299,15 @@ docker run --rm --name docker_registry_proxy -it ### Why not use Docker's own registry, which has a mirror feature? -Yes, Docker offers [Registry as a pull through cache](https://docs.docker.com/registry/recipes/mirror/), *unfortunately* +Yes, Docker offers [Registry as a pull through cache](https://docs.docker.com/registry/recipes/mirror/), *unfortunately* it only covers the DockerHub case. It won't cache images from `quay.io`, `k8s.gcr.io`, `gcr.io`, or any such, including any private registries. -That means that your shiny new Kubernetes cluster is now a bandwidth hog, since every image will be pulled from the +That means that your shiny new Kubernetes cluster is now a bandwidth hog, since every image will be pulled from the Internet on every Node it runs on, with no reuse. -This is due to the way the Docker "client" implements `--registry-mirror`, it only ever contacts mirrors for images +This is due to the way the Docker "client" implements `--registry-mirror`, it only ever contacts mirrors for images with no repository reference (eg, from DockerHub). -When a repository is specified `dockerd` goes directly there, via HTTPS (and also via HTTP if included in a +When a repository is specified `dockerd` goes directly there, via HTTPS (and also via HTTP if included in a `--insecure-registry` list), thus completely ignoring the configured mirror. ### Docker itself should provide this. @@ -315,7 +317,7 @@ Yeah. Docker Inc should do it. So should NPM, Inc. Wonder why they don't. 😼 ### TODO: - [x] Basic Docker-for-Mac set-up instructions -- [x] Basic Docker-for-Windows set-up instructions. +- [x] Basic Docker-for-Windows set-up instructions. - [ ] Test and make auth work with quay.io, unfortunately I don't have access to it (_hint, hint, quay_) - [x] Hide the mitmproxy building code under a Docker build ARG. - [ ] "Developer Office" proxy scenario, where many developers on a fast LAN share a proxy for bandwidth and speed savings (already works for pulls, but messes up pushes, which developers tend to use a lot) diff --git a/entrypoint.sh b/entrypoint.sh index 32ba88a..e5c1370 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -38,6 +38,30 @@ else echo "Not using resolver config, keep existing '$confpath' -- mounted by user?" fi +# Generate nginx upstream blocks into file. Function similar to a /etc/hosts file but includes round-robin selection +# e.g when UPSTREAM_MAPPINGS="registry1=10.0.1.10:443,10.0.1.11 registry2=5.0.1.10", the following file is generated +# upstream registry1 { +# server 10.0.1.10:443; +# server 10.0.1.11; +# } +# upstream registry2 { +# server 5.0.1.10; +# } +echo -n "" >> /etc/nginx/upstreams.conf + +if [ ! -z "$UPSTREAM_MAPPINGS" ]; then + + for UPSTREAM in ${UPSTREAM_MAPPINGS}; do + echo "upstream ${UPSTREAM%=*} {" >> /etc/nginx/upstreams.conf + comma_separated_hosts="${UPSTREAM#*=}" + hosts=`echo $comma_separated_hosts | tr ',' ' '` + for host in ${hosts}; do + echo -e "\tserver $host;" >> /etc/nginx/upstreams.conf + done + echo "}" >> /etc/nginx/upstreams.conf + done +fi + # The list of SAN (Subject Alternative Names) for which we will create a TLS certificate. ALLDOMAINS="" diff --git a/nginx.conf b/nginx.conf index 897628e..bf0142e 100644 --- a/nginx.conf +++ b/nginx.conf @@ -78,6 +78,9 @@ http { gzip off; + # Entrypoint generates the upstreams.conf config. + include /etc/nginx/upstreams.conf; + # Entrypoint generates the proxy_cache_path here, so it is configurable externally. include /etc/nginx/conf.d/cache_max_size.conf;