diff --git a/bin/tor2web.sh b/bin/tor2web.sh new file mode 100755 index 00000000..0921df3f --- /dev/null +++ b/bin/tor2web.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +tor2web=./bin/tor2web +#tor2web=tor2web + +datadir=/run/user/$UID/tor2web-data + +if ! [ -e tor2web.conf ]; then + cp -v tor2web.conf.example tor2web.conf +fi + +mkdir -p $datadir +mkdir -p $datadir/certs +mkdir -p $datadir/logs +mkdir -p $datadir/run + +touch $datadir/certs/tor2web-key.pem +touch $datadir/certs/tor2web-cert.pem + +if ! [ -e $datadir/templates/ ]; then + cp -r data/templates/ $datadir/ +fi + +if [ -e $datadir/run/rpc.socket ]; then + rm $datadir/run/rpc.socket +fi + +exec $tor2web -c tor2web.conf --rundir $datadir/run --pidfile $datadir/tor2web.pid --nodaemon diff --git a/tor2web.conf.example b/tor2web.conf.example new file mode 100644 index 00000000..06d77021 --- /dev/null +++ b/tor2web.conf.example @@ -0,0 +1,121 @@ +# Tor2web configuration file +[main] + +listen_port_http = 1582 +listen_port_https = 15443 + +# Unique nodename identifier +# nodename = [UNIQUE_IDENTIFIER] +# nodename = localhost + +# Path to Tor2web data directory +# datadir = /home/tor2web +datadir = /run/user/1000/tor2web-data + +# Debug and logging +# logreqs = False +# debugmode = False +# debugtostdout = False +logreqs = True +debugmode = True +debugtostdout = True + +# Processes (suggested number of cores + 1) +# processes = 5 +# requests_per_process = 100000 +# processes = 1 + +# Ip addresses and ports +# transport = BOTH +# listen_ipv4 = [LISTENING_IPV4_ADDRESS] +# listen_ipv6 = [LISTENING_IPV6_ADDRESS] +# listen_port_http = 80 +# listen_port_https = 443 + +# This is the base hostname for the current tor2web node +# basehost = AUTO +# basehost = localhost + +# This is the SOCKS host and port on which Tor is listening +# sockshost = 127.0.0.1 +# socksport = 9050 +# socksoptimisticdata = True +# sockmaxpersistentperhost = 5 +# sockcachedconnectiontimeout = 240 +# sockretryautomatically = True + +# SSL configuration + +# TODO +disable_ssl = True + +# TODO +inject_header = False + +# ssl_key = /home/tor2web/certs/tor2web-key.pem +# ssl_cert = /home/tor2web/certs/tor2web-cert.pem + +# BE SURE TO CONFIGURE THE INTERMEDIATE CA OR YOUR WEB BROWSER WILL RESPOND +# WITH VERY LOUD WARNINGS AND ERRORS +# ssl_intermediate = /home/tor2web/certs/tor2web-intermediate.pem +# TO GENERATE DH Parameters: +# $ cd /home/tor2web/certs/ +# $ openssl dhparam -out tor2web-dh.pem 2048 + +# ssl_dh = /home/tor2web/certs/tor2web-dh.pem +# cipher_list = ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA:DHE-DSS-AES256-SHA:DHE-RSA-AES128-SHA +# ssl_tofu_cache_size = 100 + +# Access Blocking +# mode = BLOCKLIST +# onion = None +# blockhotlinking = True +# blockhotlinking_exts = [jpg, png, gif] + + +# Add special HTTP headers +# This option makes it possible to add headers to responses sent to clients. +# Do NOT use this unless you are positive you need it. +# extra_HTTP_headers_to_response = [ 'Cache-Control: max-age=600', 'Surrogate-Control: max-age=86400' ] + +# Disable the automagical redirect of Tor user on Tor HSs +# disable_tor_redirection = False + +# If set to True will disable the tor2web disclaimer +# disable_disclaimer = False +disable_disclaimer = True + +# If set to True will disable the tor2web banner +# disable_banner = False + +# If set to True will avoid rewriting visible data (experimental; will result +# in less functional proxy) Could be useful in relation to DMCA for US law only +# avoid_rewriting_visible_content = True +avoid_rewriting_visible_content = False + +# Mail configuration for automatic exception and user abuse notifications +# smtpuser = [USERNAME] +# smtppass = [PASSWORD] +# smtpmail = [EMAIL] +# smtpmailto_exceptions = [EMAIL_FOR_ABUSES_EXCEPTIONS] +# smtpmailto_notifications = [EMAIL_FOR_ABUSES_NOTIFICATION] +# smtpdomain = [DOMAIN] +# smtpport = [PORT] + +# Exit nodes list refresh period (in seconds) +# exit_node_list_refresh = 600 + +# Enables the automatic fetching of the hashed blocklist +# automatic_blocklist_updates_source = https://ahmia.fi/bannedMD5.txt +# automatic_blocklist_updates_refresh = 600 +# automatic_blocklist_updates_mode = MERGE + +# This publishes blocklist which will be available at::/antanistaticmap/lists/blocklist +# publish_blocklist = False + +# List of mirrors shown in Tor2web disclaimer and banner +# An updated list of know mirrors can be found at: https://github.com/globaleaks/tor2web/wiki +# mirror = [tor2web.org, mirror2.tld, mirror3.tld, ...] + +# This allows Tor2web to make use of a simple TCP proxies +# dummyproxy = https://127.0.0.1:8080 diff --git a/tor2web/t2w.py b/tor2web/t2w.py index 8099c28b..a2213fec 100644 --- a/tor2web/t2w.py +++ b/tor2web/t2w.py @@ -22,6 +22,8 @@ import signal import socket from cgi import parse_header +# TODO https://pypi.org/project/legacy-cgi/ +#from legacy_cgi import parse_header from functools import partial from io import BytesIO from random import choice @@ -36,7 +38,7 @@ from twisted.internet.task import LoopingCall from twisted.protocols.policies import WrappingFactory from twisted.python import log, logfile -from twisted.python.compat import networkString, intToBytes +from twisted.python.compat import networkString from twisted.python.failure import Failure from twisted.python.filepath import FilePath from twisted.spread import pb @@ -200,7 +202,8 @@ def processExited(self, reason): def spawnT2W(father, childFDs, fds_https, fds_http): child_env = os.environ.copy() - child_env['T2W_FDS_HTTPS'] = fds_https + # TODO if not config.disable_ssl: + #child_env['T2W_FDS_HTTPS'] = fds_https child_env['T2W_FDS_HTTP'] = fds_http return reactor.spawnProcess(T2WPP(father, childFDs, fds_https, fds_http), @@ -258,6 +261,10 @@ def __init__(self, streamfunction, finished): self._streamfunction = streamfunction def dataReceived(self, data): + # FIXME data is gzip compressed + #print("BodyStreamer dataReceived", "data", len(data), repr(data)) + print("BodyStreamer dataReceived", "data", len(data), data.hex()) + #print("BodyStreamer dataReceived", "len(data)", len(data)) self._streamfunction(data) def connectionLost(self, reason): @@ -309,7 +316,7 @@ def __init__(self, reactor, def request(self, method, uri, headers, bodyProducer=None): for key, values in headers.getAllRawHeaders(): - fixed_values = [re_sub(rexp['w2t'], b'http://\2.onion', value) for value in values] + fixed_values = [re_sub(rexp['w2t'], rb'http://\2.onion', value) for value in values] headers.setRawHeaders(key, fixed_values) return client.Agent.request(self, method, uri, headers, bodyProducer) @@ -474,25 +481,44 @@ def add_banner(self, banner, data): """ return data.group(1) + banner + # TODO refactor handleFixPart and handleFixEnd + def handleFixPart(self, data): + print("handleFixPart", "self.obj.server_response_is_gzip", repr(self.obj.server_response_is_gzip)) if self.obj.server_response_is_gzip: data = self.unzip(data) data = self.stream + data + #print("handleFixPart", "len(data)", len(data)) + print("handleFixPart", "data", len(data), repr(data)) + print("handleFixPart", "config.bufsize", config.bufsize) + # handleFixPart config.bufsize 4096 + if len(data) >= config.bufsize * 2: if self.obj.special_content == 'HTML': + # TODO case-insensitive search for "
click me + # TODO maybe use a library to sanitize html + data = re_sub(b"