diff --git a/README.md b/README.md index 740b85a..0559d14 100644 --- a/README.md +++ b/README.md @@ -44,9 +44,8 @@ Connection: keep-alive ## @todo -- Use default configurations for nodeapp, but replace if user place a custom -- Use default configurations for varnish, but replace if user place a custom -- Use default configurations for nginx, but replace if user place a custom +- Implement SSL +- Implement Crawler via proxies ## Troubleshot diff --git a/configuration/default/crawler/configuration.json b/configuration/default/crawler/configuration.json index 4db9dc8..49c6076 100644 --- a/configuration/default/crawler/configuration.json +++ b/configuration/default/crawler/configuration.json @@ -1,5 +1,8 @@ { + "Cache-Control": "public, max-age=2592001", "debug": true, + "proxyList": [ + ], "allowedDomains": [ "cdn.fititnt.org", "alligo.com.br" diff --git a/configuration/default/nginx/conf.d/default.conf b/configuration/default/nginx/conf.d/default.conf index 19ae7c9..f3a2c67 100644 --- a/configuration/default/nginx/conf.d/default.conf +++ b/configuration/default/nginx/conf.d/default.conf @@ -1,8 +1,19 @@ + +# Use NGinx as caching, 2GB max, purge files inactive for more than 7 days +proxy_cache_path /tmp/nginx levels=1:2 keys_zone=my_zone:2048m inactive=7d; +#proxy_cache_key "$scheme$request_method$host$request_uri"; +proxy_cache_key "$request_method$host$request_uri"; + +proxy_cache_valid 200 302 30d; +proxy_cache_valid 404 1m; +proxy_cache_valid 401 1m; + server { listen 80; server_name localhost; location / { + proxy_cache my_zone; #proxy_pass http://127.0.0.1:8080; # Varnish, bug, disabled for now proxy_pass http://127.0.0.1:8888; # Crawler proxy_http_version 1.1; diff --git a/configuration/default/nginx/nginx.conf b/configuration/default/nginx/nginx.conf index e4bad8d..fa0f077 100644 --- a/configuration/default/nginx/nginx.conf +++ b/configuration/default/nginx/nginx.conf @@ -19,7 +19,9 @@ http { '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; - access_log /var/log/nginx/access.log main; + # Just ignore access log + access_log /dev/null; + #access_log /var/log/nginx/access.log main; sendfile on; #tcp_nopush on; diff --git a/crawler/index.js b/crawler/index.js index a15c508..d76c4a9 100644 --- a/crawler/index.js +++ b/crawler/index.js @@ -112,7 +112,19 @@ function returnInvalidObject(remoteUrl, res) { */ function returnObject(remoteUrl, res) { Conf.debug && console.log("DEBUG: requested " + remoteUrl); - Request(remoteUrl).pipe(res); + + // Proxies are not tested... yet + if (Conf.proxyList && Conf.proxyList.length) { + Request = Request.defaults({'proxy': Conf.proxyList[[Math.floor(Math.random() * Conf.proxyList.length)]]}); + } + + Request(remoteUrl).on('response', function (res) { + res.headers['x-cdn'] = 'alligo'; + delete res.headers['expires']; + delete res.headers['pragma']; + res.headers["Cache-Control"] = Conf["Cache-Control"]; + // ... + }).pipe(res); } http.createServer(forEachRequest).listen(PORT, function () {