diff --git a/bin/tor2web.sh b/bin/tor2web.sh new file mode 100755 index 00000000..0921df3f --- /dev/null +++ b/bin/tor2web.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +tor2web=./bin/tor2web +#tor2web=tor2web + +datadir=/run/user/$UID/tor2web-data + +if ! [ -e tor2web.conf ]; then + cp -v tor2web.conf.example tor2web.conf +fi + +mkdir -p $datadir +mkdir -p $datadir/certs +mkdir -p $datadir/logs +mkdir -p $datadir/run + +touch $datadir/certs/tor2web-key.pem +touch $datadir/certs/tor2web-cert.pem + +if ! [ -e $datadir/templates/ ]; then + cp -r data/templates/ $datadir/ +fi + +if [ -e $datadir/run/rpc.socket ]; then + rm $datadir/run/rpc.socket +fi + +exec $tor2web -c tor2web.conf --rundir $datadir/run --pidfile $datadir/tor2web.pid --nodaemon diff --git a/tor2web.conf.example b/tor2web.conf.example new file mode 100644 index 00000000..06d77021 --- /dev/null +++ b/tor2web.conf.example @@ -0,0 +1,121 @@ +# Tor2web configuration file +[main] + +listen_port_http = 1582 +listen_port_https = 15443 + +# Unique nodename identifier +# nodename = [UNIQUE_IDENTIFIER] +# nodename = localhost + +# Path to Tor2web data directory +# datadir = /home/tor2web +datadir = /run/user/1000/tor2web-data + +# Debug and logging +# logreqs = False +# debugmode = False +# debugtostdout = False +logreqs = True +debugmode = True +debugtostdout = True + +# Processes (suggested number of cores + 1) +# processes = 5 +# requests_per_process = 100000 +# processes = 1 + +# Ip addresses and ports +# transport = BOTH +# listen_ipv4 = [LISTENING_IPV4_ADDRESS] +# listen_ipv6 = [LISTENING_IPV6_ADDRESS] +# listen_port_http = 80 +# listen_port_https = 443 + +# This is the base hostname for the current tor2web node +# basehost = AUTO +# basehost = localhost + +# This is the SOCKS host and port on which Tor is listening +# sockshost = 127.0.0.1 +# socksport = 9050 +# socksoptimisticdata = True +# sockmaxpersistentperhost = 5 +# sockcachedconnectiontimeout = 240 +# sockretryautomatically = True + +# SSL configuration + +# TODO +disable_ssl = True + +# TODO +inject_header = False + +# ssl_key = /home/tor2web/certs/tor2web-key.pem +# ssl_cert = /home/tor2web/certs/tor2web-cert.pem + +# BE SURE TO CONFIGURE THE INTERMEDIATE CA OR YOUR WEB BROWSER WILL RESPOND +# WITH VERY LOUD WARNINGS AND ERRORS +# ssl_intermediate = /home/tor2web/certs/tor2web-intermediate.pem +# TO GENERATE DH Parameters: +# $ cd /home/tor2web/certs/ +# $ openssl dhparam -out tor2web-dh.pem 2048 + +# ssl_dh = /home/tor2web/certs/tor2web-dh.pem +# cipher_list = ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:DHE-RSA-AES128-SHA +# ssl_tofu_cache_size = 100 + +# Access Blocking +# mode = BLOCKLIST +# onion = None +# blockhotlinking = True +# blockhotlinking_exts = [jpg, png, gif] + + +# Add special HTTP headers +# This option makes it possible to add headers to responses sent to clients. +# Do NOT use this unless you are positive you need it. +# extra_HTTP_headers_to_response = [ 'Cache-Control: max-age=600', 'Surrogate-Control: max-age=86400' ] + +# Disable the automagical redirect of Tor user on Tor HSs +# disable_tor_redirection = False + +# If set to True will disable the tor2web disclaimer +# disable_disclaimer = False +disable_disclaimer = True + +# If set to True will disable the tor2web banner +# disable_banner = False + +# If set to True will avoid rewriting visible data (experimental; will result +# in less functional proxy) Could be useful in relation to DMCA for US law only +# avoid_rewriting_visible_content = True +avoid_rewriting_visible_content = False + +# Mail configuration for automatic exception and user abuse notifications +# smtpuser = [USERNAME] +# smtppass = [PASSWORD] +# smtpmail = [EMAIL] +# smtpmailto_exceptions = [EMAIL_FOR_ABUSES_EXCEPTIONS] +# smtpmailto_notifications = [EMAIL_FOR_ABUSES_NOTIFICATION] +# smtpdomain = [DOMAIN] +# smtpport = [PORT] + +# Exit nodes list refresh period (in seconds) +# exit_node_list_refresh = 600 + +# Enables the automatic fetching of the hashed blocklist +# automatic_blocklist_updates_source = https://ahmia.fi/bannedMD5.txt +# automatic_blocklist_updates_refresh = 600 +# automatic_blocklist_updates_mode = MERGE + +# This publishes blocklist which will be available at::/antanistaticmap/lists/blocklist +# publish_blocklist = False + +# List of mirrors shown in Tor2web disclaimer and banner +# An updated list of know mirrors can be found at: https://github.com/globaleaks/tor2web/wiki +# mirror = [tor2web.org, mirror2.tld, mirror3.tld, ...] + +# This allows Tor2web to make use of a simple TCP proxies +# dummyproxy = https://127.0.0.1:8080 diff --git a/tor2web/t2w.py b/tor2web/t2w.py index 8099c28b..a2213fec 100644 --- a/tor2web/t2w.py +++ b/tor2web/t2w.py @@ -22,6 +22,8 @@ import signal import socket from cgi import parse_header +# TODO https://pypi.org/project/legacy-cgi/ +#from legacy_cgi import parse_header from functools import partial from io import BytesIO from random import choice @@ -36,7 +38,7 @@ from twisted.internet.task import LoopingCall from twisted.protocols.policies import WrappingFactory from twisted.python import log, logfile -from twisted.python.compat import networkString, intToBytes +from twisted.python.compat import networkString from twisted.python.failure import Failure from twisted.python.filepath import FilePath from twisted.spread import pb @@ -200,7 +202,8 @@ def processExited(self, reason): def spawnT2W(father, childFDs, fds_https, fds_http): child_env = os.environ.copy() - child_env['T2W_FDS_HTTPS'] = fds_https + # TODO if not config.disable_ssl: + #child_env['T2W_FDS_HTTPS'] = fds_https child_env['T2W_FDS_HTTP'] = fds_http return reactor.spawnProcess(T2WPP(father, childFDs, fds_https, fds_http), @@ -258,6 +261,10 @@ def __init__(self, streamfunction, finished): self._streamfunction = streamfunction def dataReceived(self, data): + # FIXME data is gzip compressed + #print("BodyStreamer dataReceived", "data", len(data), repr(data)) + print("BodyStreamer dataReceived", "data", len(data), data.hex()) + #print("BodyStreamer dataReceived", "len(data)", len(data)) self._streamfunction(data) def connectionLost(self, reason): @@ -309,7 +316,7 @@ def __init__(self, reactor, def request(self, method, uri, headers, bodyProducer=None): for key, values in headers.getAllRawHeaders(): - fixed_values = [re_sub(rexp['w2t'], b'http://\2.onion', value) for value in values] + fixed_values = [re_sub(rexp['w2t'], rb'http://\2.onion', value) for value in values] headers.setRawHeaders(key, fixed_values) return client.Agent.request(self, method, uri, headers, bodyProducer) @@ -474,25 +481,44 @@ def add_banner(self, banner, data): """ return data.group(1) + banner + # TODO refactor handleFixPart and handleFixEnd + def handleFixPart(self, data): + print("handleFixPart", "self.obj.server_response_is_gzip", repr(self.obj.server_response_is_gzip)) if self.obj.server_response_is_gzip: data = self.unzip(data) data = self.stream + data + #print("handleFixPart", "len(data)", len(data)) + print("handleFixPart", "data", len(data), repr(data)) + print("handleFixPart", "config.bufsize", config.bufsize) + # handleFixPart config.bufsize 4096 + if len(data) >= config.bufsize * 2: if self.obj.special_content == 'HTML': + # TODO case-insensitive search for "click me + # TODO maybe use a library to sanitize html + data = re_sub(b"", b"", data) data = re_sub(rexp['html_t2w'], - b'\1\2' + self.proto + b'\3.' + self.var['basehost'].encode('utf-8') + self.port.encode( - 'utf-8') + b'\4', data) + rb'\1\2' + self.proto + rb'\3.' + self.var['basehost'].encode('utf-8') + self.port.encode( + 'utf-8') + rb'\4', data) else: + print("handleFixPart not rewriting_visible_content") data = re_sub(rexp['t2w'], - self.proto + b'\2.' + self.var['basehost'].encode('utf-8') + self.port.encode('utf-8'), + self.proto + rb'\2.' + self.var['basehost'].encode('utf-8') + self.port.encode('utf-8'), data) if len(data) >= config.bufsize * 2: @@ -503,24 +529,46 @@ def handleFixPart(self, data): self.stream = data def handleFixEnd(self, data): + + #print("handleFixEnd", "len(data)", len(data)) # 0 + + #print("handleFixEnd", "self.obj.server_response_is_gzip", repr(self.obj.server_response_is_gzip)) + if self.obj.server_response_is_gzip: data = self.unzip(data, True) data = self.stream + data - if len(data) >= config.bufsize * 2: + print("handleFixEnd data", len(data), repr(data)) + + # len(data) == 5628 + # config.bufsize * 2 == 2 * 4096 == 8192 + + #if len(data) >= config.bufsize * 2: # why ?! + if True: if self.obj.special_content == 'HTML': + # TODO case-insensitive search for "", b"", data) - if config.avoid_rewriting_visible_content and self.obj.special_content == 'HTML': data = re_sub(rexp['html_t2w'], - b'\1\2' + self.proto + b'\3.' + self.var['basehost'].encode('utf-8') + self.port.encode( - 'utf-8') + b'\4', data) + rb'\1\2' + self.proto + rb'\3.' + self.var['basehost'].encode('utf-8') + self.port.encode( + 'utf-8') + rb'\4', data) else: + print("handleFixEnd not rewriting_visible_content") data = re_sub(rexp['t2w'], - self.proto + b'\2.' + self.var['basehost'].encode('utf-8') + self.port.encode('utf-8'), + self.proto + rb'\2.' + self.var['basehost'].encode('utf-8') + self.port.encode('utf-8'), data) self.forwardData(self.handleCleartextForwardPart(data, True), True) @@ -542,6 +590,7 @@ def handleCleartextForwardPart(self, data, end=False): return data def handleForwardPart(self, data): + print("handleForwardPart", "self.obj.server_response_is_gzip", repr(self.obj.server_response_is_gzip)) if self.obj.server_response_is_gzip: data = self.handleGzippedForwardPart(data) else: @@ -550,6 +599,7 @@ def handleForwardPart(self, data): self.forwardData(data) def handleForwardEnd(self, data): + print("handleForwardEnd", "self.obj.server_response_is_gzip", repr(self.obj.server_response_is_gzip)) if self.obj.server_response_is_gzip: data = self.handleGzippedForwardPart(data, True) else: @@ -580,10 +630,10 @@ def writeContent(self, data): self.setHeader(b'content-encoding', b'gzip') data = self.zip(data, True) - self.setHeader(b'content-length', intToBytes(len(data))) + self.setHeader(b'content-length', b'%d' % len(data)) self.write(data) else: - self.setHeader(b'content-length', intToBytes(0)) + self.setHeader(b'content-length', b'0') self.finish() @@ -606,13 +656,19 @@ def unzip(self, data, end=False): try: if self.decoderGzip is None: + print("unzip zlib.decompressobj ...") self.decoderGzip = zlib.decompressobj(16 + zlib.MAX_WBITS) + print("unzip zlib.decompressobj ok") if data: + print("unzip self.decoderGzip.decompress ...") data1 = self.decoderGzip.decompress(data) + print(f"unzip self.decoderGzip.decompress ok: -> {repr(data1)}") if end: + print("unzip self.decoderGzip.flush ...") data2 = self.decoderGzip.flush() + print(f"unzip self.decoderGzip.flush ok: -> {repr(data2)}") except: pass @@ -706,6 +762,8 @@ def process(self): elif len(config.mirror) == 1: self.var['mirror'] = config.mirror[0] + # TODO if not config.disable_ssl + """ # we serve contents only over HTTPS if not self.isSecure() and (config.transport != 'HTTP'): if config.listen_port_https == 443: @@ -716,6 +774,7 @@ def process(self): self.finish() return + """ # check if the user is using Tor self.obj.client_ip = self.getClientIP() @@ -928,12 +987,16 @@ def process(self): self.proxy_d.addErrback(self.handleError) def cbResponse(self, response): + self.proxy_response = response if 600 <= int(response.code) <= 699: + print("cbResponse", "error", response.code) self.setResponseCode(500) self.var['errorcode'] = int(response.code) - 600 return flattenString(self, templates['error_sock.tpl']).addCallback(self.writeContent) + print("cbResponse", "code", response.code, "length", response.length) + self.setResponseCode(response.code) self.processResponseHeaders(response.headers) @@ -941,10 +1004,13 @@ def cbResponse(self, response): # if there's no response, we're done. if not response.length: - self.setHeader(b'content-length', intToBytes(0)) + print("cbResponse", "no response") + self.setHeader(b'content-length', b'0') self.finish() return defer.succeed + print("cbResponse self.obj.special_content", repr(self.obj.special_content)) + finished = defer.Deferred() if self.obj.special_content: finished.addCallback(self.handleFixEnd) @@ -962,20 +1028,37 @@ def handleHeader(self, key, values): # in case of multiple occurrences we evaluate only the first valueLower = values[0].lower() + print("handleHeader", "keyLower", keyLower, "valueLower", valueLower) + + # handleHeader keyLower b'content-encoding' valueLower b'gzip' + if keyLower == b'transfer-encoding' and valueLower == b'chunked': # this header needs to be stripped return elif keyLower == b'content-encoding' and valueLower == b'gzip': + #print("handleHeader", "keyLower", keyLower, "valueLower", valueLower) self.obj.server_response_is_gzip = True # this header needs to be stripped return elif keyLower == b'content-type': + + print("handleHeader content-type", valueLower) + if valueLower.startswith(b'text/html'): self.obj.special_content = 'HTML' elif valueLower.startswith(b'application/javascript'): self.obj.special_content = 'JS' + + # TODO disable javascript + # TODO dont request js in the first place + self.responseHeaders.setRawHeaders(b"content-length", [b"0"]) + self.setResponseCode(500) + self.setHeader(b'content-length', b'0') + self.finish() + return defer.succeed + elif valueLower.startswith(b'text/css'): self.obj.special_content = 'CSS' elif valueLower.startswith(b'text/xml'): @@ -987,12 +1070,12 @@ def handleHeader(self, key, values): return elif keyLower == 'set-cookie': - values = [re_sub(rexp['set_cookie_t2w'], b'domain=\1.' + config.basehost.encode('utf-8') + b'\2', x) for x + values = [re_sub(rexp['set_cookie_t2w'], rb'domain=\1.' + config.basehost.encode('utf-8') + rb'\2', x) for x in values] else: values = [ - re_sub(rexp['t2w'], self.proto + b'\2.' + config.basehost.encode('utf-8') + self.port.encode('utf-8'), + re_sub(rexp['t2w'], self.proto + rb'\2.' + config.basehost.encode('utf-8') + self.port.encode('utf-8'), x) for x in values] self.responseHeaders.setRawHeaders(key, values)