A fork of pappy proxy
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3661 lines
121 KiB

9 years ago
import StringIO
9 years ago
import base64
9 years ago
import bs4
9 years ago
import crochet
import datetime
import gzip
import json
9 years ago
import pygments
9 years ago
import re
9 years ago
import time
9 years ago
import urlparse
import zlib
9 years ago
import weakref
8 years ago
import Queue
9 years ago
8 years ago
from .util import PappyException, printable_data, short_data, PappyStringTransport, sha1, print_traceback
9 years ago
from .requestcache import RequestCache
9 years ago
from .colors import Colors, host_color, path_formatter
8 years ago
from .proxy import ProtocolProxy, make_proxied_connection, get_http_proxy_addr, start_maybe_tls
9 years ago
from pygments.formatters import TerminalFormatter
from pygments.lexers import get_lexer_for_mimetype, HttpLexer
9 years ago
from twisted.internet import defer
8 years ago
from autobahn.twisted.websocket import WebSocketServerFactory, WebSocketClientFactory, WebSocketServerProtocol, WebSocketClientProtocol
from autobahn.websocket.protocol import WebSocketProtocol
from autobahn.websocket.compress import PERMESSAGE_COMPRESSION_EXTENSION
9 years ago
9 years ago
ENCODE_NONE = 0
ENCODE_DEFLATE = 1
ENCODE_GZIP = 2
9 years ago
PATH_RELATIVE = 0
PATH_ABSOLUTE = 1
PATH_HOST = 2
9 years ago
dbpool = None
8 years ago
web_server = None
9 years ago
def init(pool):
"""
Initialize the http module.
:param pool: The ConnectionPool to use to store the request/response objects
:type pool: SQLite ConnectionPool
"""
8 years ago
from .site import PappyWebServer
9 years ago
global dbpool
8 years ago
global web_server
9 years ago
if dbpool is None:
dbpool = pool
8 years ago
web_server = PappyWebServer()
9 years ago
assert(dbpool)
def destruct():
assert(dbpool)
dbpool.close()
9 years ago
def _decode_encoded(data, encoding):
9 years ago
if encoding == ENCODE_NONE:
return data
if encoding == ENCODE_DEFLATE:
dec_data = zlib.decompress(data, -15)
9 years ago
else:
dec_data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data))
dec_data = dec_data.read()
return dec_data
9 years ago
def _strip_leading_newlines(string):
while (len(string) > 1 and string[0:2] == '\r\n') or \
(len(string) > 0 and string[0] == '\n'):
if len(string) > 1 and string[0:2] == '\r\n':
string = string[2:]
elif len(string) > 0 and string[0] == '\n':
string = string[1:]
return string
def _consume_line(instr):
# returns (line, rest)
l = []
pos = 0
while pos < len(instr):
if instr[pos] == '\n':
if l and l[-1] == '\r':
l = l[:-1]
return (''.join(l), instr[pos+1:])
l.append(instr[pos])
pos += 1
9 years ago
return (instr, '')
###################
## Functions to use
def get_request(url='', url_params={}):
"""
get_request(url='', url_params={})
Create a request object that makes a GET request to the given url with the
given url params.
"""
r = Request()
9 years ago
r.start_line = 'GET / HTTP/1.1'
r.url = url
r.headers['Host'] = r.host
if url_params:
r.url_params.from_dict(url_params)
return r
def post_request(url, post_params={}, url_params={}):
"""
post_request(url, post_params={}, url_params={})
Create a request object that makes a POST request to the given url with the
given post and url params.
"""
r = Request()
9 years ago
r.start_line = 'POST / HTTP/1.1'
r.url = url
r.headers['Host'] = r.host
if url_params:
r.url_params.from_dict(url_params)
if post_params:
r.post_params.from_dict(post_params)
return r
def repeatable_parse_qs(s):
pairs = s.split('&')
ret_dict = RepeatableDict()
for pair in pairs:
if '=' in pair:
t = tuple(pair.split('=', 1))
ret_dict.append(t[0], t[1])
else:
ret_dict.append(pair, None)
return ret_dict
9 years ago
@crochet.wait_for(timeout=180.0)
@defer.inlineCallbacks
def request_by_id(reqid):
9 years ago
req = yield Request.load_request(str(reqid))
9 years ago
defer.returnValue(req)
9 years ago
@defer.inlineCallbacks
8 years ago
def async_submit_requests(reqs, mangle=False, save=False, save_in_mem=False, unique_paths=False,
unique_path_and_args=False, use_cookie_jar=False, tag=None):
9 years ago
"""
async_submit_requests(reqs, mangle=False)
:param mangle: Whether to pass the requests through intercepting macros
:type mangle: Bool
:rtype: DeferredList
8 years ago
Copies and submits a list of requests at the same time asynchronously.
9 years ago
Responses/unmangled versions will be attached to the request objects in the list.
Prints progress to stdout.
8 years ago
Returns list of submitted requests
9 years ago
"""
8 years ago
import sys
from pappyproxy.plugin import add_to_history
if unique_paths and unique_path_and_args:
raise PappyException("Cannot use both unique_paths and unique_paths_and_argts")
if save and save_in_mem:
raise PappyException("Cannot use both save and save_in_mem")
to_submit = [r.copy() for r in reqs]
if unique_paths or unique_path_and_args:
endpoints = set()
new_reqs = []
for r in reqs:
if unique_path_and_args:
s = r.url
else:
s = r.path
if not s in endpoints:
new_reqs.append(r)
endpoints.add(s)
to_submit = new_reqs
print 'Submitting %d request(s)' % len(to_submit)
sys.stdout.flush()
9 years ago
dones = 0
errors = 0
list_deferred = defer.Deferred()
deferreds = []
8 years ago
for r in to_submit:
if tag is not None:
r.tags.add(tag)
9 years ago
d = r.async_submit(mangle=mangle)
deferreds.append(d)
# Really not the best way to do this. If one request hangs forever the whole thing will
# just hang in the middle
for d in deferreds:
try:
yield d
dones += 1
except Exception as e:
errors += 1
print e
finished = dones+errors
8 years ago
if finished % 30 == 0 or finished == len(to_submit):
9 years ago
if errors > 0:
8 years ago
print '{0}/{1} complete with {3} errors ({2:.2f}%)'.format(finished, len(to_submit), (float(finished)/len(to_submit))*100, errors)
9 years ago
else:
8 years ago
print '{0}/{1} complete ({2:.2f}%)'.format(finished, len(to_submit), (float(finished)/len(to_submit))*100)
sys.stdout.flush()
if finished == len(to_submit):
9 years ago
list_deferred.callback(None)
if save:
8 years ago
for r in to_submit:
9 years ago
yield r.async_deep_save()
8 years ago
elif save_in_mem:
for r in to_submit:
add_to_history(r)
9 years ago
@crochet.wait_for(timeout=180.0)
@defer.inlineCallbacks
def submit_requests(*args, **kwargs):
ret = yield async_submit_requests(*args, **kwargs)
defer.returnValue(ret)
9 years ago
8 years ago
def apply_http_proxy_creds(req):
from pappyproxy import pappy
if not pappy.session.config.http_proxy:
return
if 'username' in pappy.session.config.http_proxy and \
'password' in pappy.session.config.http_proxy:
req.proxy_creds = (pappy.session.config.http_proxy['username'],
pappy.session.config.http_proxy['password'])
@defer.inlineCallbacks
def handle_webserver_request(req):
global web_server
yield web_server.handle_request(req)
##########
## Classes
9 years ago
class RepeatableDict:
"""
A dict that retains the order of items inserted and keeps track of
duplicate values. Can optionally treat keys as case insensitive.
Custom made for the proxy, so it has strange features
"""
def __init__(self, from_pairs=None, case_insensitive=False):
# If efficiency becomes a problem, add a dict that keeps a list by key
# and use that for getting data. But until then, this stays.
self._pairs = []
self._keys = set()
self._modify_callback = None
self.case_insensitive = case_insensitive
if from_pairs:
for k, v in from_pairs:
self.append(k, v)
def _ef_key(self, key):
# "effective key", returns key.lower() if we're case insensitive,
# otherwise it returns the same key
if self.case_insensitive:
return key.lower()
return key
def _mod_callback(self):
# Calls the modify callback if we have one
if self._modify_callback:
self._modify_callback()
def __contains__(self, val):
return self._ef_key(val) in self._keys
def __getitem__(self, key):
for p in reversed(self._pairs):
if self._ef_key(p[0]) == self._ef_key(key):
return p[1]
raise KeyError
def __setitem__(self, key, val):
# Replaces first instance of `key` and deletes the rest
self.set_val(key, val)
def __delitem__(self, key):
8 years ago
self._remove_key(key)
9 years ago
self._pairs = [p for p in self._pairs if self._ef_key(p[0]) != self._ef_key(key)]
self._mod_callback()
def __nonzero__(self):
if self._pairs:
return True
else:
return False
def _add_key(self, key):
self._keys.add(self._ef_key(key))
def _remove_key(self, key):
self._keys.remove(self._ef_key(key))
def all_pairs(self):
9 years ago
"""
A list of all the key/value pairs stored in the dictionary
"""
9 years ago
return self._pairs[:]
def append(self, key, val, do_callback=True):
9 years ago
"""
append(key, val)
Append a pair to the end of the dictionary. Will add a duplicate if the key already exists.
"""
9 years ago
# Add a duplicate entry for key
self._add_key(key)
self._pairs.append((key, val))
if do_callback:
self._mod_callback()
def set_val(self, key, val, do_callback=True):
9 years ago
"""
set_val(key, val)
Set a value in the dictionary. Will replace the first instance of the
key with the value. If multiple values of the keys are already in the
dictionary, the duplicates of the key will be removed and the first instance
of the key will be replaced with the value. If the dictionary is case
insensitive, it will maintain the original capitalization. This is the same
behavior as assigning a value via ``d[key] = val``. If the key is not
present, it will be added to the end of the dict.
"""
9 years ago
new_pairs = []
added = False
self._add_key(key)
for p in self._pairs:
if self._ef_key(p[0]) == self._ef_key(key):
if not added:
# only add the first instance
new_pairs.append((key, val))
added = True
else:
new_pairs.append(p)
if not added:
new_pairs.append((key, val))
self._pairs = new_pairs
if do_callback:
self._mod_callback()
def update(self, key, val, do_callback=True):
# If key is already in the dict, replace that value with the new value
if key in self:
for k, v in self.all_pairs():
if self._ef_key(k) == self._ef_key(key):
self.set_val(k, val, do_callback=do_callback)
break
else:
self.set_val(key, val, do_callback=do_callback)
def clear(self, do_callback=True):
9 years ago
"""
clear()
Remove all key/value pairs from the dictionary
"""
9 years ago
self._pairs = []
if do_callback:
self._mod_callback()
def all_vals(self, key):
9 years ago
"""
all_vals(key)
Return all the values associated with a given key
"""
9 years ago
return [p[1] for p in self._pairs if self._ef_key(p[0]) == self._ef_key(key)]
def add_pairs(self, pairs, do_callback=True):
9 years ago
"""
add_pairs(pairs)
Add a list of pairs to the dictionary.
:param pairs: The list of key/value pairs to add
:type pairs: List of tuples of length 2
"""
9 years ago
for pair in pairs:
self._add_key(pair[0])
self._pairs += pairs
if do_callback:
self._mod_callback()
9 years ago
def from_dict(self, d, do_callback=True):
"""
from_dict(d)
Set the RepeatableDict to contain the same items as a normal dictionary.
:param d: The dictionary to use
:type d: dict
"""
self._pairs = list(d.items())
9 years ago
if do_callback:
self._mod_callback()
9 years ago
def sort(self):
9 years ago
"""
sort()
Sort the dictionary by the key. Requires that all keys can be compared
to each other
"""
9 years ago
# Sorts pairs by key alphabetaclly
9 years ago
self._pairs = sorted(self._pairs, key=lambda x: x[0])
9 years ago
def set_modify_callback(self, callback):
# Add a function to be called whenever an element is added, changed, or
# deleted. Set to None to remove
self._modify_callback = callback
class LengthData:
def __init__(self, length=None):
9 years ago
self.body = ''
9 years ago
self.complete = False
self.length = length or 0
8 years ago
if self.length == 0 or self.length == -1:
9 years ago
self.complete = True
def add_data(self, data):
8 years ago
if self.complete and self.length != -1:
raise PappyException("Data already complete!")
9 years ago
remaining_length = self.length-len(self.body)
8 years ago
if self.length == -1:
9 years ago
self.body += data
8 years ago
complete = True
else:
if len(data) >= remaining_length:
self.body += data[:remaining_length]
assert(len(self.body) == self.length)
self.complete = True
else:
self.body += data
9 years ago
class ChunkedData:
def __init__(self):
9 years ago
self.body = ''
9 years ago
self._pos = 0
self._state = 0 # 0=reading length, 1=reading data, 2=going over known string
self._len_str = ''
self._chunk_remaining = 0
self._known_str = ''
self._known_str_pos = 0
self._next_state = 0
9 years ago
self._body = []
9 years ago
self.complete = False
9 years ago
self.unchunked_data = []
9 years ago
def add_data(self, data):
9 years ago
for c in data:
self._body.append(c)
9 years ago
self.scan_forward()
def scan_forward(self):
# Don't add more data if we're already done
if self.complete:
return
9 years ago
while self._pos < len(self._body):
curchar = self._body[self._pos]
9 years ago
if self._state == 0:
if curchar.lower() in '0123456789abcdef':
# Read the next char of the length
self._len_str += curchar
# Move to the next char
self._pos += 1
elif curchar == '\r':
# Save how much chunk to read
self._chunk_remaining = int(self._len_str, 16)
# If the length is 0, chunked encoding is done!
if self._chunk_remaining == 0:
self.complete = True
9 years ago
self.body = ''.join(self.unchunked_data)
9 years ago
return
# There should be a newline after the \r
self._known_str = '\n'
self._state = 2
self._next_state = 1
# Reset the length str
self._len_str = ''
# Move to the next char
self._pos += 1
else:
raise Exception("Malformed chunked encoding!")
elif self._state == 1:
if self._chunk_remaining > 0:
# Read next byte of data
9 years ago
self.unchunked_data.append(curchar)
9 years ago
self._chunk_remaining -= 1
self._pos += 1
else:
# Read newline then read a new chunk
self._known_str = '\r\n'
self._next_state = 0 # Read len after newlines
self._state = 2 # Read newlines
# Don't move to the next char because we didn't do anything
elif self._state == 2:
# Read a char of an expected string
# If the expected char doesn't match, throw an error
if self._known_str[self._known_str_pos] != curchar:
raise Exception("Unexpected data")
# Move to the next char in the raw data and in our known string
self._known_str_pos += 1
self._pos += 1
# If we've reached the end of the known string, go to the next state
if self._known_str_pos == len(self._known_str):
self._known_str_pos = 0
self._state = self._next_state
class ResponseCookie(object):
"""
A cookie representing a cookie set by a response
9 years ago
:ivar key: The key of the cookie
:type key: string
:ivar val: The value of the cookie
:type val: string
:ivar expires: The value of the "expires" attribute
:type expires: string
:ivar max_age: The max age of the cookie
:type max_age: int
:ivar domain: The domain of the cookie
:type domain: string
:ivar path: The path of the cookie
:type path: string
:ivar secure: The secure flag of the cookie
:type secure: Bool
:ivar http_only: The httponly flag of the cookie
:type http_only: Bool
"""
9 years ago
def __init__(self, set_cookie_string=None):
self.key = None
self.val = None
self.expires = None
self.max_age = None
self.domain = None
self.path = None
self.secure = False
self.http_only = False
if set_cookie_string:
self._from_cookie(set_cookie_string)
9 years ago
@property
def cookie_str(self):
"""
Returns the full string of the cookie. ie ``foo=bar; secure; path=/``
:getter: Returns the full string of the cookie.
:setter: Set the metadata from a cookie string. ie from a ``Set-Cookie`` header
"""
9 years ago
av = '%s=%s' % (self.key, self.val)
to_add = [av]
if self.expires:
to_add.append('expires=%s'%self.expires)
if self.max_age:
to_add.append('Max-Age=%d'%self.max_age)
if self.domain:
to_add.append('Domain=%s'%self.domain)
if self.path:
to_add.append('Path=%s'%self.path)
if self.secure:
to_add.append('secure')
if self.http_only:
to_add.append('httponly')
return '; '.join(to_add)
@cookie_str.setter
def cookie_str(self, val):
self._from_cookie(val)
def _parse_cookie_av(self, cookie_av):
9 years ago
if '=' in cookie_av:
key, val = cookie_av.split('=', 1)
key = key.lstrip()
if key.lower() == 'expires':
self.expires = val
if key.lower() == 'max-age':
self.max_age = int(val)
if key.lower() == 'domain':
self.domain = val
if key.lower() == 'path':
self.path = val
elif cookie_av.lstrip().lower() == 'secure':
self.secure = True
elif cookie_av.lstrip().lower() == 'httponly':
self.http_only = True
def _from_cookie(self, set_cookie_string):
self.key = None
self.val = None
self.expires = None
self.max_age = None
self.domain = None
self.path = None
self.secure = False
self.http_only = False
9 years ago
if ';' in set_cookie_string:
cookie_pair, rest = set_cookie_string.split(';', 1)
if '=' in cookie_pair:
self.key, self.val = cookie_pair.split('=',1)
elif cookie_pair == '' or re.match('\s+', cookie_pair):
self.key = ''
self.val = ''
else:
self.key = cookie_pair
self.val = ''
9 years ago
cookie_avs = rest.split(';')
for cookie_av in cookie_avs:
cookie_av.lstrip()
self._parse_cookie_av(cookie_av)
9 years ago
else:
self.key, self.val = set_cookie_string.split('=',1)
8 years ago
class WebSocketMessage(object):
"""
A class representing a websocket message
"""
DIRECTION_OUTGOING = 0
DIRECTION_INCOMING = 1
TYPE_MESSAGE = 0
def __init__(self, direction=None, contents=None, message_type=None, time_sent=None, is_binary=None):
self.direction = None
self.contents = None
self.message_type = WebSocketMessage.TYPE_MESSAGE
self.time_sent = None
self.unmangled = None
self.parent_request = None
self.is_unmangled_version = False
self.is_binary = False
self.msgid = '--'
def __eq__(self, other):
if self.contents != other.contents:
return False
if self.is_binary != other.is_binary:
return False
return True
def __copy__(self):
new_msg = WebSocketMessage()
new_msg.direction = self.direction
new_msg.contents = self.contents
new_msg.message_type = self.message_type
new_msg.is_binary = self.is_binary
return new_msg
def copy(self):
return self.__copy__()
def duplicate(self):
m = self.copy()
m.msgid = self.msgid
m.time_sent = self.time_sent
if self.unmangled:
m.unmangled = self.unmangled.duplicate()
return m
@defer.inlineCallbacks
def async_save(self, cust_dbpool=None):
from .pappy import main_context
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
else:
use_dbpool = dbpool
assert(use_dbpool)
if not self.msgid:
self.msgid = '--'
try:
# Check for intyness
_ = int(self.msgid)
# If we have msgid, we're updating
yield use_dbpool.runInteraction(self._update)
assert(self.msgid is not None)
except (ValueError, TypeError):
# Either no id or in-memory
yield use_dbpool.runInteraction(self._insert)
assert(self.msgid is not None)
main_context.cache_reset()
@defer.inlineCallbacks
def async_deep_save(self):
if self.unmangled:
yield self.unmangled.async_deep_save()
yield self.async_save()
def _update(self, txn):
setnames = ['is_binary=?', 'contents=?']
queryargs = [self.is_binary, self.contents]
# Direction
if self.direction == 'OUTGOING':
setnames.append('direction=?')
queryargs.append(0)
elif self.direction == 'INCOMING':
setnames.append('direction=?')
queryargs.append(1)
else:
setnames.append('direction=?')
queryargs.append(-1)
# Unmangled
if self.unmangled:
setnames.append('unmangled_id=?')
assert(self.unmangled.msgid is not None) # should be saved first
queryargs.append(self.unmangled.msgid)
# Message time
if self.time_sent:
setnames.append('time_sent=?')
queryargs.append((self.time_sent-datetime.datetime(1970,1,1)).total_seconds())
# Parent request
if self.parent_request:
assert(self.parent_request.reqid)
setnames.append('parent_request=?')
queryargs.append(self.parent_request.reqid)
queryargs.append(self.msgid)
txn.execute(
"""
UPDATE websocket_messages SET %s WHERE id=?;
""" % ','.join(setnames),
tuple(queryargs)
)
def _insert(self, txn):
colnames = ['is_binary', 'contents']
colvals = [self.is_binary, self.contents]
# Direction
if self.direction == 'OUTGOING':
colnames.append('direction')
colvals.append(0)
elif self.direction == 'INCOMING':
colnames.append('direction')
colvals.append(1)
else:
colnames.append('direction')
colvals.append(-1)
# Unmangled
if self.unmangled:
colnames.append('unmangled_id')
assert(self.unmangled.msgid is not None) # should be saved first
colvals.append(self.unmangled.msgid)
# Message time
if self.time_sent:
colnames.append('time_sent')
colvals.append((self.time_sent-datetime.datetime(1970,1,1)).total_seconds())
# Parent request
if self.parent_request:
assert(self.parent_request.reqid)
colnames.append('parent_request')
colvals.append(self.parent_request.reqid)
txn.execute(
"""
INSERT INTO websocket_messages (%s) VALUES (%s);
""" % (','.join(colnames), ','.join(['?']*len(colvals))),
tuple(colvals)
)
self.msgid = str(txn.lastrowid)
class WebSocketProxy(object):
def __init__(self, server_transport, client_transport, client_handshake, server_handshake, parent_proxy, log_id=None):
self.message_callback = None
self.close_callback = None
self.log_id = log_id
self.intercepting_macros = None
self.parent_http_proxy = parent_proxy
# The raw http messages from the handshake
self.client_handshake = client_handshake
self.server_handshake = server_handshake
self.server_factory = WebSocketServerFactory()
self.server_factory.protocol = WebSocketServerProtocol
self.server_transport = server_transport
self.client_factory = WebSocketClientFactory()
self.client_factory.protocol = WebSocketClientProtocol
self.client_transport = client_transport
self._fake_handshake()
def log(self, message, symbol='*', verbosity_level=3):
from pappyproxy.proxy import log
if self.log_id:
log(message, id=log_id, symbol=symbol, verbosity_level=verbosity_level)
else:
log(message, symbol=symbol, verbosity_level=verbosity_level)
def _build_server_protocol(self):
self.server_protocol = self.server_factory.buildProtocol(None)
self.server_protocol.makeConnection(self.server_transport)
self._patch_server_protocol()
def _build_client_protocol(self):
"""
Builds the client protocol then returns the bytes written to the
transport after creation
"""
self.client_protocol = self.client_factory.buildProtocol(None)
tr = PappyStringTransport()
self.client_protocol.makeConnection(tr)
tval = tr.pop_value()
self.client_protocol.transport = self.client_transport
self._patch_client_protocol()
return tval
def _patch_server_protocol(self):
self.log("Patching server protocol")
self._add_hook(self.server_protocol, 'onMessage', '_on_server_message')
def _patch_client_protocol(self):
self.log("Patching client protocol")
self._add_hook(self.client_protocol, 'onMessage', '_on_client_message')
def _add_hook(self, c, their_method, my_method):
"""
Make it so that c.their_method(*args, **kwargs) calls self.my_method(*args, **kwargs)
before calling their method
"""
old = getattr(c, their_method)
def patched(*args, **kwargs):
getattr(self, my_method)(*args, **kwargs)
old(*args, **kwargs)
setattr(c, their_method, patched)
@staticmethod
def _gen_sec_websocket_accept(token):
MAGIC_STRING = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
concatenated = token + MAGIC_STRING
hashed = sha1(concatenated)
reencoded = base64.b64encode(hashed)
return reencoded
def _parse_extension_header(self, hd):
"""
Parses the extension header and returns a list of offers
"""
offers = []
extensions = self.server_protocol._parseExtensionsHeader(hd)
for (extension, params) in extensions:
if extension in PERMESSAGE_COMPRESSION_EXTENSION:
PMCE = PERMESSAGE_COMPRESSION_EXTENSION[extension]
try:
offer = PMCE['Offer'].parse(params)
offers.append(offer)
except Exception as e:
raise PappyException(str(e))
else:
self.log("Bad extension: %s" % extension)
return offers
def _set_up_ws_client(self, ws_server_factory, ws_client_factory, hs_req, hs_rsp):
"""
Sets up a WebSocketClientProtocolFactory from a WebSocketServerProtocolFactory
"""
self.log("Setting up websocket client")
client_settings = {}
# Common attributes
skip_attrs = ('logFrames','trackTimings','logOctets')
for attr in WebSocketProtocol.CONFIG_ATTRS_COMMON:
if attr not in skip_attrs:
client_settings[attr] = getattr(ws_server_factory, attr)
def accept(response):
from autobahn.websocket.compress import PerMessageDeflateResponse, \
PerMessageDeflateResponseAccept, \
PerMessageBzip2Response, \
PerMessageBzip2ResponseAccept, \
PerMessageSnappyResponse, \
PerMessageSnappyResponseAccept
if isinstance(response, PerMessageDeflateResponse):
return PerMessageDeflateResponseAccept(response)
elif isinstance(response, PerMessageBzip2Response):
return PerMessageBzip2ResponseAccept(response)
elif isinstance(response, PerMessageSnappyResponse):
return PerMessageSnappyResponseAccept(response)
# Client specific attributes
client_settings['version'] = int(hs_req.headers['Sec-WebSocket-Version'])
client_settings['acceptMaskedServerFrames'] = True
client_settings['maskClientFrames'] = True
#client_settings['serverConnectionDropTimeout'] = None
offers = self._parse_extension_header(hs_req.headers['Sec-WebSocket-Extensions'])
client_settings['perMessageCompressionOffers'] = offers
client_settings['perMessageCompressionAccept'] = accept
# debug
client_settings['openHandshakeTimeout'] = 8
self.log("Setting client options: %s" % client_settings)
ws_client_factory.setProtocolOptions(**client_settings)
def _fake_handshake(self):
self.log("Performing fake handshake")
# Set up our server listener
self._build_server_protocol()
old_server_transport = self.server_protocol.transport
self.server_protocol.transport = PappyStringTransport()
self.log("Sending fake handshake response to server protocol:")
self.log("pf> %s" % printable_data(self.server_handshake.full_message))
self.server_protocol.dataReceived(self.server_handshake.full_message)
tval = self.server_protocol.transport.pop_value()
self.log("<fp %s" % printable_data(tval))
assert tval
self.server_protocol.transport = old_server_transport
# Set up our client
self._set_up_ws_client(self.server_factory,
self.client_factory,
self.server_handshake,
self.client_handshake)
# Perform rest of client handshake
tval = self._build_client_protocol()
self.log("Getting handshake request start from client protocol")
self.log("<fp %s" % printable_data(tval))
# Revert transport handshake so that any data sent after our response is
# sent to the real transport
self.log("Reverting client transport")
self.log("Sending fake handshake response to client protocol:")
handshake_req = Request(tval)
handshake_rsp = self.client_handshake.copy()
sec_ws_key = handshake_req.headers['Sec-WebSocket-Key']
self.log("Generating Sec-WebSocket-Accept for %s" % sec_ws_key)
sec_ws_accept = WebSocketProxy._gen_sec_websocket_accept(sec_ws_key)
self.log("Sec-WebSocket-Accept = %s" % sec_ws_accept)
handshake_rsp.headers['Sec-WebSocket-Accept'] = sec_ws_accept
self.log("pf> %s" % printable_data(handshake_rsp.full_message))
self.client_protocol.dataReceived(handshake_rsp.full_message) # Tell it whatever the remote server told us
self.log("Compressed after handshake? %s" % self.client_protocol._perMessageCompress)
self.log("Fake handshake complete")
def _on_server_message(self, payload, is_binary):
"""
Wrapper to convert arguments to onMessage into an object and call on_server_message
"""
# Turn into object
m = WebSocketMessage()
m.direction = 'OUTGOING'
m.contents = payload
m.time_sent = datetime.datetime.utcnow()
m.is_binary = is_binary
self.log("rp< (websocket, binary=%s) %s" % (m.is_binary, m.contents))
self.on_server_message(m)
def _on_client_message(self, payload, is_binary):
"""
Wrapper function for when the client protocol gets a data frame
"""
# Turn into object
m = WebSocketMessage()
m.direction = 'INCOMING'
m.contents = payload
m.time_sent = datetime.datetime.utcnow()
m.is_binary = is_binary
self.log("cp> (websocket, binary=%s) %s" % (m.is_binary, m.contents))
self.on_client_message(m)
@defer.inlineCallbacks
def mangle_message(self, message):
from pappyproxy import context, macros
# Mangle object
sendreq = self.server_handshake
custom_macros = self.parent_http_proxy.custom_macros
mangle_macros = self.parent_http_proxy.mangle_macros
client_protocol = self.parent_http_proxy.client_protocol
save_requests = self.parent_http_proxy.save_requests
dropped = False
sendmsg = message
if context.in_scope(sendreq) or custom_macros:
self.log("Passing ws message through macros")
if not custom_macros:
mangle_macros = client_protocol.factory.get_macro_list()
if save_requests:
yield sendreq.async_deep_save()
(mangmsg, mangled) = yield macros.mangle_websocket_message(message, sendreq, mangle_macros)
if mangmsg is None:
self.log("Message dropped")
dropped = True
elif mangled:
self.log("Message mangled")
sendmsg.time_sent = None
mangmsg.unmangled = sendmsg
sendmsg = mangmsg
else:
self.log("Message not modified")
mangmsg.time_sent = datetime.datetime.utcnow()
if not dropped:
sendreq.add_websocket_message(sendmsg)
if save_requests:
yield sendreq.async_deep_save()
else:
sendreq.add_websocket_message(sendmsg)
self.log("Request out of scope, passing ws message along unmangled")
# Send data frame through server object to the client
defer.returnValue((sendmsg, dropped))
@defer.inlineCallbacks
def on_server_message(self, message):
"""
Wrapper function for when the server protocol gets a data frame
"""
# Mangle object
sendmsg, dropped = yield self.mangle_message(message)
# Send data frame through client object to remote server
if not dropped:
self.send_server_message(sendmsg.contents, sendmsg.is_binary)
@defer.inlineCallbacks
def on_client_message(self, message):
"""
Wrapper function for when the client protocol gets a data frame
"""
sendmsg, dropped = yield self.mangle_message(message)
# Send data frame through server object to the client
if not dropped:
self.send_client_message(sendmsg.contents, sendmsg.is_binary)
def client_data_received(self, data):
# Called when data is received from the client
# Sends RAW DATA to server protocol. NOT frame data
self.server_protocol.dataReceived(data)
def server_data_received(self, data):
# Called when data is received from the server
# Sends RAW DATA to client protocol. NOT frame data
self.client_protocol.dataReceived(data)
def send_client_message(self, message_data, is_binary=False):
# Write a message to the client over the actual transport
self.server_protocol.sendMessage(message_data, is_binary)
def send_server_message(self, message_data, is_binary=False):
# Write a message to the remote server over the actual transport
self.client_protocol.sendMessage(message_data, is_binary)
def on_client_frame(self, frame_data, is_binary=False):
# Call callback and have callback return mangled frame data
self.server_protocol.sendFrame(frame_data, is_binary)
9 years ago
9 years ago
class HTTPMessage(object):
"""
A base class which represents an HTTP message. It is used to implement
both requests and responses
:ivar complete: When loading data with
:func:`~pappyproxy.http.HTTPMessage.add_line` and
:func:`~pappyproxy.http.HTTPMessage.add_data`, returns whether the message
is complete
:vartype complete: bool
:ivar headers: Headers of the message
:vartype complete: RepeatableDict
:ivar headers_complete: When creating the message with
:func:`~pappyproxy.http.HTTPMessage.add_line` and
:func:`~pappyproxy.http.HTTPMessage.add_data`, returns whether the headers
are complete
:ivar start_line: The start line of the message
:vartype start_line: string
"""
9 years ago
9 years ago
reserved_meta_keys = ['full_message']
9 years ago
"""
Internal class variable. Do not modify.
"""
9 years ago
def __init__(self, full_message=None, update_content_length=False):
9 years ago
# Initializes instance variables too
self.clear()
9 years ago
9 years ago
self.metadata_unique_keys = tuple()
9 years ago
if full_message is not None:
self._from_full_message(full_message, update_content_length)
def __eq__(self, other):
if self.full_message != other.full_message:
return False
9 years ago
m1 = self.get_metadata()
m2 = other.get_metadata()
for k in self.metadata_unique_keys:
if k in m1:
del m1[k]
if k in m2:
del m2[k]
if m1 != m2:
9 years ago
return False
return True
def __copy__(self):
if not self.complete:
raise PappyException("Cannot copy incomplete http messages")
retmsg = self.__class__(self.full_message)
9 years ago
retmsg.set_metadata(self.get_metadata(include_unique=False))
9 years ago
return retmsg
def copy(self):
"""
Returns a copy of the request
:rtype: Request
"""
return self.__copy__()
9 years ago
def deepcopy(self):
"""
Returns a deep copy of the message. Implemented by child.
9 years ago
NOINDEX
9 years ago
"""
return self.__deepcopy__()
9 years ago
def clear(self):
"""
Resets all internal data and clears the message
"""
self.complete = False
self.headers = RepeatableDict(case_insensitive=True)
self.headers_complete = False
self.malformed = False
self.start_line = ''
self.reset_metadata()
self._decoded = False
self._encoding_type = ENCODE_NONE
self._first_line = True
self._data_obj = None
self._end_after_headers = False
8 years ago
self._data_buffer = ''
9 years ago
9 years ago
def _from_full_message(self, full_message, update_content_length=False, meta=None):
# Set defaults for metadata
9 years ago
self.clear()
9 years ago
# Get rid of leading CRLF. Not in spec, should remove eventually
full_message = _strip_leading_newlines(full_message)
if full_message == '':
return
8 years ago
self.add_data(full_message, enforce_content_length=(not update_content_length))
9 years ago
if meta:
self.set_metadata(meta)
8 years ago
if update_content_length:
# If we have no body and didn't have a content-length header in the first
# place, don't add one
if 'Content-Length' in self.headers or len(self.body) > 0:
self.headers['Content-Length'] = str(len(self.body))
9 years ago
assert(self.complete)
###############################
## Properties/attribute setters
@property
def headers_section(self):
"""
The raw text of the headers including the extra newline at the end.
:getter: Returns the raw text of the headers including the extra newline at the end.
:type: string
"""
ret = ''
if self.start_line:
ret = self.start_line + '\r\n'
for k, v in self.headers.all_pairs():
ret = ret + "%s: %s\r\n" % (k, v)
if ret:
ret = ret + '\r\n'
return ret
@property
def headers_section_pretty(self):
"""
Same thing as :func:`pappyproxy.http.HTTPMessage.headers_section` except
that the headers are colorized for terminal printing.
"""
8 years ago
to_ret = printable_data(self.headers_section, colors=False)
9 years ago
to_ret = pygments.highlight(to_ret, HttpLexer(), TerminalFormatter())
return to_ret
@property
def body(self):
"""
The data portion of the message
:getter: Returns the data portion of the message
:setter: Set the data of the response and update metadata
:type: string
"""
if self._data_obj:
return self._data_obj.body
else:
return ''
@body.setter
def body(self, val):
self._data_obj = LengthData(len(val))
if len(val) > 0:
self._data_obj.add_data(val)
self._encoding_type = ENCODE_NONE
self.complete = True
self.update_from_body()
@property
def body_pretty(self):
"""
Same thing as :func:`pappy.http.HTTPMessage.body` but the output is
colorized for the terminal.
"""
8 years ago
if self.body == '':
return ''
to_ret = printable_data(self.body, colors=False)
9 years ago
if 'content-type' in self.headers:
try:
lexer = get_lexer_for_mimetype(self.headers['content-type'].split(';')[0])
to_ret = pygments.highlight(to_ret, lexer, TerminalFormatter())
except:
pass
return to_ret
@property
def full_message(self):
"""
The full message including the start line, headers, and body
"""
if self.headers_section == '':
return self.body
else:
return (self.headers_section + self.body)
@property
def full_message_pretty(self):
"""
Same as :func:`pappyproxy.http.HTTPMessage.full_message` except the
output is colorized
"""
8 years ago
if self.body:
return (self.headers_section_pretty + '\r\n' + self.body_pretty)
else:
return self.headers_section_pretty
9 years ago
###############
## Data loading
8 years ago
def add_line(self, line, enforce_content_length=True):
9 years ago
"""
Used for building a message from a Twisted protocol.
Add a line (for status line and headers). Lines must be added in order
and the first line must be the status line. The line should not contain
the trailing carriage return/newline. I do not suggest you use this for
anything.
:param line: The line to add
:type line: string
"""
assert(not self.headers_complete)
if not line and self._first_line:
return
if not line:
self.headers_complete = True
8 years ago
if not self._data_obj:
self._data_obj = LengthData(0)
9 years ago
if self._end_after_headers:
self.complete = True
return
8 years ago
if not enforce_content_length or 'content-length' not in self.headers:
if isinstance(self._data_obj, LengthData):
self._data_obj = LengthData(-1)
9 years ago
self.complete = self._data_obj.complete
self.headers_end()
return
if self._first_line:
self.handle_start_line(line)
self._first_line = False
else:
9 years ago
if ':' in line:
key, val = line.split(':', 1)
val = val.strip()
else:
key = line
val = None
9 years ago
if self.handle_header(key, val):
self.headers.append(key, val, do_callback=False)
8 years ago
def add_data(self, data, enforce_content_length=True):
9 years ago
"""
Used for building a message from a Twisted protocol.
Add data to the message. The data must conform to the content encoding
and transfer encoding given in the headers passed in to
:func:`~pappyproxy.http.HTTPMessage.add_line`. Can be any fragment of the data.
I do not suggest that you use this function ever.
:param data: The data to add
:type data: string
"""
8 years ago
assert not self.complete or not enforce_content_length
self._data_buffer += data
def popline(s):
parts = s.split('\n', 1)
if len(parts) == 1:
return (s, '')
line, rest = (parts[0], ''.join(parts[1:]))
if line[-1] == '\r':
line = line[:-1]
return (line, rest)
# Add headers from the data buffer if headers aren't complete
if not self.headers_complete:
while '\n' in self._data_buffer and not self.headers_complete:
line, self._data_buffer = popline(self._data_buffer)
self.add_line(line, enforce_content_length=enforce_content_length)
# Add the data section if the headers are complete and we have stuff left in the buffer
if self.headers_complete:
if self._data_buffer:
to_add = self._data_buffer
self._data_buffer = ''
self._data_obj.add_data(to_add)
if self._data_obj.complete:
self.complete = True
self.body_complete()
9 years ago
###############
## Data parsing
def handle_header(self, key, val):
"""
Called when a header is loaded into the message. Should not be called
outside of implementation.
:param key: Header key
:type line: string
:param key: Header value
:type line: string
9 years ago
NOINDEX
9 years ago
"""
9 years ago
if val is None:
return True
9 years ago
stripped = False
if key.lower() == 'content-encoding':
if val in ('gzip', 'x-gzip'):
self._encoding_type = ENCODE_GZIP
elif val in ('deflate'):
self._encoding_type = ENCODE_DEFLATE
# We send our requests already decoded, so we don't want a header
# saying it's encoded
if self._encoding_type != ENCODE_NONE:
self._decoded = True
stripped = True
elif key.lower() == 'transfer-encoding' and val.lower() == 'chunked':
self._data_obj = ChunkedData()
self.complete = self._data_obj.complete
self._decoded = True
stripped = True
elif key.lower() == 'content-length':
# We use our own content length
9 years ago
if self._data_obj and self._data_obj.complete:
# We're regenerating or something so we want to base this header
# off our existing body
val = self._data_obj.body
self._data_obj = LengthData(len(val))
if len(val) > 0:
self._data_obj.add_data(val)
self._encoding_type = ENCODE_NONE
self.complete = True
else:
self._data_obj = LengthData(int(val))
9 years ago
return (not stripped)
def handle_start_line(self, start_line):
"""
A handler function for the status line.
9 years ago
NOINDEX
9 years ago
"""
self.start_line = start_line
def headers_end(self):
"""
Called when the headers are complete.
9 years ago
NOINDEX
9 years ago
"""
pass
def body_complete(self):
"""
Called when the body of the message is complete
9 years ago
NOINDEX
9 years ago
"""
9 years ago
try:
self.body = _decode_encoded(self._data_obj.body,
self._encoding_type)
9 years ago
except IOError:
9 years ago
# Screw handling it gracefully, this is the server's fault.
print 'Error decoding request, storing raw data in body instead'
self.body = self._data_obj.body
9 years ago
def update_from_body(self):
"""
Called when the body of the message is modified directly. Should be used
to update metadata that depends on the body of the message.
9 years ago
NOINDEX
9 years ago
"""
if len(self.body) > 0 or 'Content-Length' in self.headers:
self.headers.update('Content-Length', str(len(self.body)), do_callback=False)
def update_from_headers(self):
"""
Called when a header is modified. Should be used to update metadata that
depends on the values of headers.
9 years ago
NOINDEX
9 years ago
"""
pass
###########
## Metadata
# The metadata functions are used so that we only have to make changes in a
# few similar functions which will update all copying, serialization, etc
# functions at the same time.
def get_metadata(self):
"""
Get all the metadata of the message in dictionary form.
Should be implemented in child class.
Should not be invoked outside of implementation!
9 years ago
NOINDEX
9 years ago
"""
pass
def set_metadata(self, data):
"""
Set metadata values based off of a data dictionary.
Should be implemented in child class.
Should not be invoked outside of implementation!
:param data: Metadata to apply
:type line: dict
9 years ago
NOINDEX
9 years ago
"""
pass
def reset_metadata(self):
"""
Reset meta values to default values. Overridden by child class.
Should not be invoked outside of implementation!
9 years ago
NOINDEX
9 years ago
"""
pass
##############
## Serializing
def to_json(self):
"""
Return a JSON encoding of the message that can be used by
:func:`~pappyproxy.http.Message.from_json` to recreate the message.
The ``full_message`` portion is base64 encoded because json doesn't play
nice with binary blobs.
"""
data = {
'full_message': base64.b64encode(self.full_message),
}
9 years ago
9 years ago
metadata = self.get_metadata()
for k, v in metadata.iteritems():
if k in HTTPMessage.reserved_meta_keys:
raise PappyException('A message with %s as a key for a metavalue cannot be encoded into JSON')
data[k] = v
return json.dumps(data)
def from_json(self, json_string):
"""
Update the metadata of the message to match data from
:func:`~pappyproxy.http.Message.to_json`
:param json_string: The JSON data to use
:type json_string: JSON data in a string
"""
data = json.loads(json_string)
full_message = base64.b64decode(data['full_message'])
for k in HTTPMessage.reserved_meta_keys:
if k in data:
del data[k]
self._from_full_message(full_message, meta=data)
# self.update_from_headers()
# self.update_from_body()
8 years ago
9 years ago
class Request(HTTPMessage):
"""
:ivar time_end: The datetime that the request ended.
:vartype time_end: datetime.datetime
:ivar time_start: The datetime that the request was made
:vartype time_start: datetime.datetime
:ivar cookies: Cookies sent with the request
:vartype cookies: RepeatableDict
:ivar fragment: The fragment part of the url (The part that comes after the #)
:vartype fragment: String
:ivar url_params: The url parameters of the request (aka the get parameters)
:vartype url_params: RepeatableDict
:ivar path: The path of the request
:vartype path: String
:ivar port: The port that the request was sent to (or will be sent to)
:vartype port: Integer
:ivar post_params: The post parameters of the request
:vartype post_params: RepeatableDict
:ivar reqid: The request id of the request
:vartype reqid: String
:ivar response: The associated response of this request
:vartype response: Response
:ivar submitted: Whether the request has been submitted
:vartype submitted: Bool
:ivar unmangled: If the request was mangled, the version of the request
before it was mangled.
:vartype unmangled: Request
:ivar verb: The HTTP verb of the request (ie POST, GET)
:vartype verb: String
:ivar version: The HTTP version of the request (ie HTTP/1.1)
:vartype version: String
:ivar tags: Tags associated with the request
:vartype tags: List of Strings
9 years ago
:ivar plugin_data: Data about the request created by plugins. If you modify this, please add your own key to it for your plugin and store all your plugin's data under that key (probably as another dict). For example if you have a plugin called ``foo``, try and store all your data under ``req.plugin_data['foo']``.
:vartype plugin_data: Dict
9 years ago
:ivar path_type: An enum which describes how the path portion of the request should be represented. ``PATH_RELATIVE`` -> normal relative path, ``PATH_ABSOLUTE`` -> The absolute path (including the protocol), ``PATH_HOST`` -> Just the path and the port (Used for CONNECT requests when connecting to an upstream HTTP proxy).
:vartype path_type: Enum
:ivar explicit_port: A flag to indicate that the port should always be included in the URL
"""
9 years ago
9 years ago
cache = RequestCache(100)
"""
The request cache that stores requests in memory for performance
"""
def __init__(self, full_request=None, update_content_length=True,
9 years ago
port=None, is_ssl=None, host=None, path_type=None,
proxy_creds=None, explicit_port=False):
9 years ago
# Resets instance variables
self.clear()
9 years ago
9 years ago
# Called after instance vars since some callbacks depend on
# instance vars
HTTPMessage.__init__(self, full_request, update_content_length)
9 years ago
9 years ago
# metadata that is unique to a specific Request instance
self.metadata_unique_keys = ('reqid',)
9 years ago
# After message init so that other instance vars are initialized
self._set_dict_callbacks()
# Set values from init
if is_ssl:
self.is_ssl = True
if port:
self.port = port
9 years ago
if host:
self._host = host
9 years ago
if path_type:
self.path_type = path_type
if explicit_port:
self.explicit_port = explicit_port
8 years ago
self.is_websocket = False
self.websocket_messages = []
9 years ago
def __copy__(self):
if not self.complete:
8 years ago
import pdb; pdb.set_trace()
9 years ago
raise PappyException("Cannot copy incomplete http messages")
retreq = self.__class__(self.full_message)
retreq.set_metadata(self.get_metadata())
retreq.time_start = self.time_start
retreq.time_end = self.time_end
retreq.reqid = None
if self.response:
retreq.response = self.response.copy()
if self.unmangled:
retreq.unmangled = self.unmangled.copy()
8 years ago
retreq.set_websocket_messages([m.duplicate() for m in self.websocket_messages])
return retreq
9 years ago
def duplicate(self):
retreq = self.copy()
retreq.reqid = self.reqid
if self.unmangled:
retreq.unmangled = self.unmangled.duplicate()
if self.response:
retreq.response = self.response.duplicate()
return retreq
9 years ago
9 years ago
@property
def rsptime(self):
"""
The response time of the request
:getter: Returns the response time of the request
:type: datetime.timedelta
"""
9 years ago
if self.time_start and self.time_end:
return self.time_end-self.time_start
else:
return None
@property
9 years ago
def start_line(self):
"""
The status line of the request. ie `GET / HTTP/1.1`
:getter: Returns the status line of the request
:setter: Sets the status line of the request
:type: string
"""
9 years ago
if not self.verb and not self.full_path and not self.version:
return ''
9 years ago
if self.path_type == PATH_ABSOLUTE:
path = self._url_helper(always_have_path=True)
elif self.path_type == PATH_HOST:
path = ':'.join((self.host, str(self.port)))
else:
path = self.full_path
return '%s %s %s' % (self.verb, path, self.version)
9 years ago
@start_line.setter
def start_line(self, val):
self.handle_start_line(val)
@property
def status_line(self):
"""
Alias for `pappyproxy.http.Request.start_line`.
:getter: Returns the status line of the request
:setter: Sets the status line of the request
:type: string
"""
return self.start_line
@status_line.setter
def status_line(self, val):
9 years ago
self.start_line = val
@property
def full_path(self):
"""
The full path of the request including URL params and fragment.
ie `/path/to/stuff?foo=bar&baz=something#somewhere`
:getter: Returns the full path of the request
:type: string
"""
9 years ago
path = self.path
if self.url_params:
9 years ago
path += '?'
pairs = []
for pair in self.url_params.all_pairs():
9 years ago
if pair[1] is None:
pairs.append(pair[0])
else:
pairs.append('='.join(pair))
path += '&'.join(pairs)
if self.fragment:
path += '#'
path += self.fragment
return path
9 years ago
@property
def raw_headers(self):
"""
9 years ago
Alias for Request.headers_section
:getter: Returns the raw text of the headers including the extra newline at the end.
:type: string
9 years ago
"""
return self.headers_section
9 years ago
@property
def full_request(self):
"""
9 years ago
Alias for Request.full_message
:getter: Returns the full text of the request
:type: string
"""
9 years ago
return self.full_message
9 years ago
@property
def raw_data(self):
"""
9 years ago
Alias for Request.body
:getter: Returns the data portion of the request
:setter: Set the data of the request and update metadata
:type: string
"""
9 years ago
return self.body
9 years ago
@raw_data.setter
def raw_data(self, val):
9 years ago
self.body = val
9 years ago
@property
def connect_request(self):
"""
If the request uses SSL, this will be a request object that can be used
with an upstream HTTP server to connect to a server using SSL
"""
if not self.is_ssl:
return None
ret = Request()
ret.status_line = self.status_line
ret.host = self.host
ret.port = self.port
ret.explicit_port = True
ret.path_type = PATH_HOST
authu, authp = self.proxy_creds
ret.verb = 'CONNECT'
if authu and authp:
ret.proxy_creds = self.proxy_creds
return ret
@property
def proxy_creds(self):
"""
A username/password tuple representing the username/password to
authenticate to a proxy server. Sets the ``Proxy-Authorization``
header. Getter will return (None, None) if no creds exist
:getter: Returns the username/password tuple used for proxy authorization
:setter: Sets the username/password tuple used for proxy authorization
:type: Tuple of two strings: (username, password)
"""
if not 'Proxy-Authorization' in self.headers:
return (None, None)
return Request._parse_basic_auth(self.headers['Proxy-Authorization'])
@proxy_creds.setter
def proxy_creds(self, creds):
username, password = creds
self.headers['Proxy-Authorization'] = Request._encode_basic_auth(username, password)
@staticmethod
def _parse_basic_auth(header):
"""
Parse a raw basic auth header and return (username, password)
"""
_, creds = header.split(' ', 1)
decoded = base64.b64decode(creds)
username, password = decoded.split(':', 1)
return (username, password)
@staticmethod
def _encode_basic_auth(username, password):
decoded = '%s:%s' % (username, password)
encoded = base64.b64encode(decoded)
header = 'Basic %s' % encoded
return header
8 years ago
9 years ago
def _url_helper(self, colored=False, always_have_path=False):
9 years ago
retstr = ''
if self.is_ssl:
9 years ago
retstr += 'https://'
else:
9 years ago
if colored:
retstr += Colors.RED
retstr += 'http'
retstr += Colors.ENDC
retstr += '://'
else:
retstr += 'http://'
if colored:
retstr += host_color(self.host)
retstr += self.host
retstr += Colors.ENDC
else:
retstr += self.host
if not ((self.is_ssl and self.port == 443) or \
9 years ago
(not self.is_ssl and self.port == 80) or \
self.explicit_port):
9 years ago
if colored:
retstr += ':'
retstr += Colors.MAGENTA
retstr += str(self.port)
retstr += Colors.ENDC
else:
retstr += ':%d' % self.port
9 years ago
if (self.path and self.path != '/') or always_have_path:
9 years ago
if colored:
retstr += path_formatter(self.path)
else:
retstr += self.path
if self.url_params:
retstr += '?'
pairs = []
for p in self.url_params.all_pairs():
8 years ago
if p[1] is None:
pairs.append(p)
else:
pairs.append('='.join(p))
retstr += '&'.join(pairs)
if self.fragment:
retstr += '#%s' % self.fragment
return retstr
9 years ago
@property
def url(self):
"""
The full url of the request including url params, protocol, etc.
ie `https://www.google.com`, `http://foo.fakewebsite.com:1234/path?a=b`.
When setting the URL, the port, is_ssl, path, url params, host, etc are all
automatically updated.
:getter: Returns the url of the request
:setter: Sets the url of the request and updates metadata
:type: string
"""
return self._url_helper(False)
@property
def url_color(self):
"""
same as .url, except colored. Used for printing URLs to the terminal.
:getter: Returns the url of the request
:type: string
"""
return self._url_helper(True)
@url.setter
def url(self, val):
self._handle_statusline_uri(val)
@property
def host(self):
"""
The host of the request. ie `www.google.com`.
:getter: Returns the host of the request
:setter: Changes the host of the request and updates the Host header
:type: string
"""
return self._host
@host.setter
def host(self, val):
self._host = val
self.headers.update('Host', val, do_callback=False)
@property
def is_ssl(self):
"""
Whether the request is sent over SSL
:getter: Returns if the request is sent over SSL
:setter: Sets if the request is sent over SSL
:type: Bool
"""
return self._is_ssl
@is_ssl.setter
def is_ssl(self, val):
if val:
self._is_ssl = True
if self.port == 80:
self.port = 443
else:
self._is_ssl = False
if self.port == 443:
self.port = 80
9 years ago
@property
def saved(self):
"""
If the request is saved in the data file
:getter: Returns True if the request is saved in the data file
:type: Bool
"""
if self.reqid is None:
return False
try:
_ = int(self.reqid)
return True
except (ValueError, TypeError):
return False
@property
def path_tuple(self):
"""
The path in tuple form starting with the host. For example, path_parts for
a request to http://www.example.com/foo/bar.php would be::
('www.example.com', 'foo', 'bar.php')
:getter: Returns the path in tuple form
:type: Tuple
"""
# the first element is blank because the path always starts with /
ret = [self.host] + self.path.split('/')[1:]
if ret[-1] == '':
ret = ret[:-1]
return tuple(ret)
9 years ago
@property
def sort_time(self):
"""
If the request has a submit time, returns the submit time's unix timestamp.
Returns 0 otherwise
"""
if self.time_start:
return time.mktime(self.time_start.timetuple())
else:
return 0
9 years ago
###########
## Metadata
9 years ago
9 years ago
def get_metadata(self, include_unique=True):
9 years ago
data = {}
if self.port is not None:
data['port'] = self.port
data['is_ssl'] = self.is_ssl
data['host'] = self.host
data['reqid'] = self.reqid
if self.response:
data['response_id'] = self.response.rspid
9 years ago
data['tags'] = list(self.tags)
9 years ago
if not include_unique:
for k in self.metadata_unique_keys:
if k in data:
del data[k]
9 years ago
return data
9 years ago
9 years ago
def set_metadata(self, data):
if 'reqid' in data:
self.reqid = data['reqid']
if 'is_ssl' in data:
self.is_ssl = data['is_ssl']
if 'host' in data:
self._host = data['host']
if 'port' in data:
self.port = data['port']
if 'tags' in data:
9 years ago
self.tags = set(data['tags'])
9 years ago
9 years ago
def reset_metadata(self):
self.port = 80
self.is_ssl = False
self.reqid = None
self._host = ''
9 years ago
self.tags = set()
9 years ago
def get_plugin_dict(self, name):
9 years ago
"""
Get the data dictionary for the given plugin name.
"""
9 years ago
if not name in self.plugin_data:
self.plugin_data[name] = {}
return self.plugin_data[name]
9 years ago
def clear(self):
HTTPMessage.clear(self)
self.time_end = None
self.time_start = None
self.cookies = RepeatableDict()
self.fragment = None
self.url_params = RepeatableDict()
self._is_ssl = False
self.path = ''
self.post_params = RepeatableDict()
self.response = None
self.submitted = False
self.unmangled = None
self.verb = ''
self.version = ''
self.plugin_data = {}
self.reset_metadata()
self.is_unmangled_version = False
9 years ago
self.path_type = PATH_RELATIVE
self.explicit_port = False
9 years ago
############################
## Internal update functions
9 years ago
def _set_dict_callbacks(self):
# Add callbacks to dicts
9 years ago
def f1():
obj = weakref.proxy(self)
obj.update_from_headers()
def f2():
obj = weakref.proxy(self)
obj._update_from_objects()
self.headers.set_modify_callback(f1)
self.cookies.set_modify_callback(f2)
self.post_params.set_modify_callback(f2)
9 years ago
def update_from_body(self):
9 years ago
# Updates metadata that's based off of data
9 years ago
HTTPMessage.update_from_body(self)
9 years ago
if 'content-type' in self.headers:
9 years ago
if 'application/x-www-form-urlencoded' in self.headers['content-type']:
9 years ago
self.post_params = repeatable_parse_qs(self.body)
self._set_dict_callbacks()
9 years ago
def _update_from_objects(self):
9 years ago
# Updates text values that depend on objects.
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
if self.cookies:
assignments = []
for ck, cv in self.cookies.all_pairs():
asn = '%s=%s' % (ck, cv)
assignments.append(asn)
header_val = '; '.join(assignments)
self.headers.update('Cookie', header_val, do_callback=False)
8 years ago
elif 'Cookie' in self.headers:
del self.headers['Cookie']
9 years ago
if self.post_params:
pairs = []
for k, v in self.post_params.all_pairs():
9 years ago
pairs.append('%s=%s' % (k, v))
9 years ago
self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
9 years ago
self.body = '&'.join(pairs)
9 years ago
9 years ago
def update_from_headers(self):
9 years ago
# Updates metadata that depends on header/status line values
self.cookies = RepeatableDict()
self._set_dict_callbacks()
9 years ago
for k, v in self.headers.all_pairs():
9 years ago
self.handle_header(k, v)
9 years ago
8 years ago
def _sort_websocket_messages(self):
"""
Sort websocket message list by their submission time
"""
def sort_key(mes):
if not mes.time_sent:
return datetime.datetime(1970,1,1)
return mes.time_sent
self.websocket_messages = sorted(self.websocket_messages, key=sort_key)
def add_websocket_message(self, mes):
# Right now just insert and sort. We'll do a better structure later
mes.parent_request = self
self.websocket_messages.append(mes)
self._sort_websocket_messages()
def set_websocket_messages(self, msgs):
self.websocket_messages = msgs
self._sort_websocket_messages()
###############
## Data parsing
def _process_host(self, hostline):
9 years ago
# Get address and port
# Returns true if port was explicitly stated
9 years ago
# Used only for processing host header
port_given = False
9 years ago
if ':' in hostline:
self._host, self.port = hostline.split(':')
9 years ago
self.port = int(self.port)
if self.port == 443:
self._is_ssl = True
port_given = True
9 years ago
else:
self._host = hostline
if not self.port:
9 years ago
self.port = 80
self._host.strip()
return port_given
9 years ago
def _handle_statusline_uri(self, uri):
if not re.match('(?:^.+)://', uri):
uri = '//' + uri
parsed_path = urlparse.urlparse(uri)
netloc = parsed_path.netloc
port_given = False
if netloc:
port_given = self._process_host(netloc)
if re.match('^https://', uri) or self.port == 443:
self._is_ssl = True
if not port_given:
self.port = 443
if re.match('^http://', uri):
self._is_ssl = False
if not self.port:
if self.is_ssl:
self.port = 443
else:
self.port = 80
reqpath = parsed_path.path
if parsed_path.path:
self.path = parsed_path.path
else:
self.path = '/'
if parsed_path.query:
reqpath += '?'
reqpath += parsed_path.query
self.url_params = repeatable_parse_qs(parsed_path.query)
if parsed_path.fragment:
reqpath += '#'
reqpath += parsed_path.fragment
self.fragment = parsed_path.fragment
9 years ago
def handle_start_line(self, start_line):
#HTTPMessage.handle_start_line(self, start_line)
if start_line == '':
self.verb = ''
self.path = ''
self.version = ''
return
parts = start_line.split()
9 years ago
uri = None
if len(parts) == 3:
self.verb, uri, self.version = parts
elif len(parts) == 2:
self.verb, self.version = parts
else:
raise Exception("Unexpected format of first line of request")
# Get path using urlparse
if uri is not None:
self._handle_statusline_uri(uri)
9 years ago
9 years ago
def handle_header(self, key, val):
9 years ago
# We may have duplicate headers
9 years ago
if val is None:
return True
9 years ago
keep = HTTPMessage.handle_header(self, key, val)
if not keep:
return False
9 years ago
9 years ago
stripped = False
if key.lower() == 'cookie':
9 years ago
# We still want the raw key/val for the cookies header
# because it's still a header
cookie_strs = val.split('; ')
# The only whitespace that matters is the space right after the
# semicolon. If actual implementations mess this up, we could
# probably strip whitespace around the key/value
for cookie_str in cookie_strs:
if '=' in cookie_str:
splitted = cookie_str.split('=',1)
assert(len(splitted) == 2)
(cookie_key, cookie_val) = splitted
else:
cookie_key = cookie_str
cookie_val = ''
9 years ago
# we want to parse duplicate cookies
self.cookies.append(cookie_key, cookie_val, do_callback=False)
elif key.lower() == 'host':
self._process_host(val)
8 years ago
elif re.match('^proxy.*', key.lower()):
stripped = True
9 years ago
return (not stripped)
9 years ago
def body_complete(self):
HTTPMessage.body_complete(self)
9 years ago
if 'content-type' in self.headers:
if self.headers['content-type'] == 'application/x-www-form-urlencoded':
9 years ago
self.post_params = repeatable_parse_qs(self.body)
self._set_dict_callbacks()
#######################
## Data store functions
9 years ago
def save_in_mem(self, cust_cache=None):
if cust_cache:
use_cache = cust_cache
else:
use_cache = Request.cache
if not self.reqid:
use_cache.add(self)
9 years ago
@defer.inlineCallbacks
9 years ago
def async_save(self, cust_dbpool=None, cust_cache=None):
"""
async_save()
Save/update the request in the data file. Returns a twisted deferred which
fires when the save is complete.
:rtype: twisted.internet.defer.Deferred
"""
9 years ago
from .pappy import main_context
8 years ago
if not self.complete:
raise PappyException('Cannot save incomplete messages')
9 years ago
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
use_cache = cust_cache
else:
use_dbpool = dbpool
use_cache = Request.cache
assert(use_dbpool)
9 years ago
if not self.reqid:
self.reqid = '--'
try:
# Check for intyness
_ = int(self.reqid)
9 years ago
# If we have reqid, we're updating
9 years ago
yield use_dbpool.runInteraction(self._update)
9 years ago
assert(self.reqid is not None)
9 years ago
yield use_dbpool.runInteraction(self._update_tags)
except (ValueError, TypeError):
# Either no id or in-memory
9 years ago
yield use_dbpool.runInteraction(self._insert)
9 years ago
assert(self.reqid is not None)
9 years ago
yield use_dbpool.runInteraction(self._update_tags)
if use_cache:
use_cache.add(self)
9 years ago
main_context.cache_reset()
9 years ago
@crochet.wait_for(timeout=180.0)
9 years ago
@defer.inlineCallbacks
def save(self):
"""
save()
Save/update the request in the data file.
Saves the request, its unmangled version, the response, and the unmanbled response.
Cannot be called from inside an async function.
"""
yield self.async_deep_save()
@defer.inlineCallbacks
def async_deep_save(self):
"""
async_deep_save()
8 years ago
Saves self, unmangled, response, unmangled response, and websocket messages. Returns a deferred
which fires after everything has been saved.
:rtype: twisted.internet.defer.Deferred
"""
9 years ago
if self.response:
if self.response.unmangled:
yield self.response.unmangled.async_save()
yield self.response.async_save()
9 years ago
if self.unmangled:
yield self.unmangled.async_save()
8 years ago
for m in self.websocket_messages:
yield m.async_deep_save()
yield self.async_save()
def _update_tags(self, txn):
# This should never be called on an unsaved or in-memory request
txn.execute(
"""
DELETE FROM tagged WHERE reqid=?;
""",
(self.reqid,)
)
tagids = []
tags_to_add = []
# Find ids that already exist
for tag in self.tags:
txn.execute(
"""
SELECT id, tag FROM tags WHERE tag=?;
""",
(tag,)
)
result = txn.fetchall()
if len(result) == 0:
tags_to_add.append(tag)
else:
tagid = int(result[0][0])
tagids.append(tagid)
# Add new tags
for tag in tags_to_add:
txn.execute(
"""
INSERT INTO tags (tag) VALUES (?);
""",
(tag,)
)
tagids.append(int(txn.lastrowid))
# Tag our request
for tagid in tagids:
txn.execute(
"""
INSERT INTO tagged (reqid, tagid) VALUES (?, ?);
""",
(int(self.reqid), tagid)
)
9 years ago
def _update(self, txn):
# If we don't have an reqid, we're creating a new reuqest row
setnames = ["full_request=?", "port=?"]
queryargs = [self.full_request, self.port]
9 years ago
if self.response:
setnames.append('response_id=?')
assert(self.response.rspid is not None) # should be saved first
queryargs.append(self.response.rspid)
if self.unmangled:
setnames.append('unmangled_id=?')
assert(self.unmangled.reqid is not None) # should be saved first
queryargs.append(self.unmangled.reqid)
if self.time_start:
setnames.append('start_datetime=?')
9 years ago
queryargs.append((self.time_start-datetime.datetime(1970,1,1)).total_seconds())
9 years ago
if self.time_end:
setnames.append('end_datetime=?')
9 years ago
queryargs.append((self.time_end-datetime.datetime(1970,1,1)).total_seconds())
9 years ago
setnames.append('is_ssl=?')
if self.is_ssl:
queryargs.append('1')
else:
queryargs.append('0')
9 years ago
setnames.append('submitted=?')
if self.submitted:
queryargs.append('1')
else:
queryargs.append('0')
9 years ago
setnames.append('host=?')
if self.host:
queryargs.append(self.host)
else:
queryargs.append('')
setnames.append('plugin_data=?')
if self.plugin_data:
queryargs.append(json.dumps(self.plugin_data))
else:
queryargs.append('{}')
9 years ago
queryargs.append(self.reqid)
txn.execute(
"""
UPDATE requests SET %s WHERE id=?;
""" % ','.join(setnames),
tuple(queryargs)
)
def _insert(self, txn):
# If we don't have an reqid, we're creating a new reuqest row
colnames = ["full_request", "port"]
colvals = [self.full_request, self.port]
9 years ago
if self.response and self.response.rspid:
9 years ago
colnames.append('response_id')
colvals.append(self.response.rspid)
9 years ago
if self.unmangled and self.unmangled.reqid:
9 years ago
colnames.append('unmangled_id')
colvals.append(self.unmangled.reqid)
if self.time_start:
colnames.append('start_datetime')
9 years ago
colvals.append((self.time_start-datetime.datetime(1970,1,1)).total_seconds())
9 years ago
if self.time_end:
colnames.append('end_datetime')
9 years ago
colvals.append((self.time_end-datetime.datetime(1970,1,1)).total_seconds())
9 years ago
colnames.append('submitted')
if self.submitted:
colvals.append('1')
else:
colvals.append('0')
colnames.append('is_ssl')
if self.is_ssl:
colvals.append('1')
else:
colvals.append('0')
9 years ago
colnames.append('host')
if self.host:
colvals.append(self.host)
else:
colvals.append('')
colnames.append('plugin_data')
if self.plugin_data:
colvals.append(json.dumps(self.plugin_data))
else:
colvals.append('{}')
9 years ago
txn.execute(
"""
INSERT INTO requests (%s) VALUES (%s);
""" % (','.join(colnames), ','.join(['?']*len(colvals))),
tuple(colvals)
)
self.reqid = str(txn.lastrowid)
9 years ago
assert txn.lastrowid is not None
assert self.reqid is not None
@defer.inlineCallbacks
9 years ago
def delete(self, cust_dbpool=None, cust_cache=None):
9 years ago
from .context import reset_context_caches
9 years ago
9 years ago
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
use_cache = cust_cache
else:
use_dbpool = dbpool
use_cache = Request.cache
9 years ago
if self.reqid is None:
raise PappyException("Cannot delete request with id=None")
9 years ago
if use_cache:
use_cache.evict(self.reqid)
Request.cache.all_ids.remove(self.reqid)
9 years ago
if self.reqid in Request.cache.ordered_ids:
Request.cache.ordered_ids.remove(self.reqid)
9 years ago
if self.reqid in Request.cache.req_times:
del Request.cache.req_times[self.reqid]
if self.reqid in Request.cache.inmem_reqs:
Request.cache.inmem_reqs.remove(self.reqid)
if self.reqid in Request.cache.unmangled_ids:
Request.cache.unmangled_ids.remove(self.reqid)
9 years ago
reset_context_caches()
if self.reqid[0] != 'm':
9 years ago
yield use_dbpool.runQuery(
9 years ago
"""
DELETE FROM requests WHERE id=?;
""",
(self.reqid,)
)
9 years ago
yield use_dbpool.runQuery(
9 years ago
"""
DELETE FROM tagged WHERE reqid=?;
""",
(self.reqid,)
9 years ago
)
self.reqid = None
9 years ago
@defer.inlineCallbacks
def deep_delete(self):
9 years ago
"""
deep_delete()
Delete a request, its unmangled version, its response, and its response's
unmangled version from history. Also removes the request from all contexts.
Returns a Twisted deferred.
:rtype: Deferred
"""
if self.unmangled:
yield self.unmangled.delete()
if self.response:
if self.response.unmangled:
yield self.response.unmangled.delete()
yield self.response.delete()
yield self.delete()
9 years ago
@staticmethod
def _gen_sql_row(tablename=None):
9 years ago
template = "{pre}full_request, {pre}response_id, {pre}id, {pre}unmangled_id, {pre}start_datetime, {pre}end_datetime, {pre}port, {pre}is_ssl, {pre}host, {pre}plugin_data"
if tablename:
return template.format(pre=('%s.'%tablename))
9 years ago
else:
return template.format(pre='')
8 years ago
@staticmethod
@defer.inlineCallbacks
def _load_request_ws_messages(req):
ws_messages = yield Request._async_load_ws_by_parent(req.reqid)
req.set_websocket_messages(ws_messages)
@staticmethod
@defer.inlineCallbacks
9 years ago
def _from_sql_row(row, cust_dbpool=None, cust_cache=None):
9 years ago
from .http import Request
9 years ago
global dbpool
9 years ago
9 years ago
if cust_dbpool:
use_dbpool = cust_dbpool
else:
use_dbpool = dbpool
req = Request(row[0])
if row[1]:
9 years ago
rsp = yield Response.load_response(str(row[1]),
cust_dbpool=cust_dbpool,
cust_cache=cust_cache)
req.response = rsp
if row[3]:
9 years ago
unmangled_req = yield Request.load_request(str(row[3]),
cust_dbpool=cust_dbpool,
cust_cache=cust_cache)
req.unmangled = unmangled_req
9 years ago
req.unmangled.is_unmangled_version = True
if row[4]:
9 years ago
req.time_start = datetime.datetime.utcfromtimestamp(row[4])
if row[5]:
9 years ago
req.time_end = datetime.datetime.utcfromtimestamp(row[5])
if row[6] is not None:
req.port = int(row[6])
if row[7] == 1:
req._is_ssl = True
9 years ago
if row[8]:
req._host = row[8]
if row[9]:
req.plugin_data = json.loads(row[9])
req.reqid = str(row[2])
# tags
9 years ago
rows = yield use_dbpool.runQuery(
"""
SELECT tg.tag
FROM tagged tgd, tags tg
WHERE tgd.tagid=tg.id AND tgd.reqid=?;
""",
(req.reqid,)
)
9 years ago
req.tags = set()
for row in rows:
9 years ago
req.tags.add(row[0])
8 years ago
yield Request._load_request_ws_messages(req)
defer.returnValue(req)
9 years ago
@staticmethod
@defer.inlineCallbacks
9 years ago
def load_requests_by_time(first, num, cust_dbpool=None, cust_cache=None):
"""
9 years ago
load_requests_by_time()
Load all the requests in the data file and return them in a list.
Returns a deferred which calls back with the list of requests when complete.
:rtype: twisted.internet.defer.Deferred
"""
9 years ago
from .http import Request
9 years ago
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
use_cache = cust_cache
else:
use_dbpool = dbpool
use_cache = Request.cache
if use_cache:
starttime = use_cache.req_times[first]
rows = yield use_dbpool.runQuery(
"""
SELECT %s
FROM requests
WHERE start_datetime<=? ORDER BY start_datetime desc LIMIT ?;
""" % Request._gen_sql_row(), (starttime, num)
)
else:
rows = yield use_dbpool.runQuery(
"""
SELECT %s
FROM requests r1, requests r2
WHERE r2.id=? AND
r1.start_datetime<=r2.start_datetime
ORDER BY start_datetime desc LIMIT ?;
""" % Request._gen_sql_row('r1'), (first, num)
)
9 years ago
reqs = []
for row in rows:
9 years ago
req = yield Request._from_sql_row(row, cust_dbpool=cust_dbpool, cust_cache=cust_cache)
reqs.append(req)
defer.returnValue(reqs)
9 years ago
@staticmethod
@defer.inlineCallbacks
9 years ago
def load_requests_by_tag(tag, cust_dbpool=None, cust_cache=None):
"""
load_requests_by_tag(tag)
Load all the requests in the data file with a given tag and return them in a list.
Returns a deferred which calls back with the list of requests when complete.
:rtype: twisted.internet.defer.Deferred
"""
9 years ago
from .http import Request
9 years ago
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
else:
use_dbpool = dbpool
# tags
9 years ago
rows = yield use_dbpool.runQuery(
"""
SELECT tgd.reqid
FROM tagged tgd, tags tg
WHERE tgd.tagid=tg.id AND tg.tag=?;
""",
(tag,)
)
reqs = []
for row in rows:
9 years ago
req = Request.load_request(row[0],
cust_dbpool=cust_dbpool,
cust_cache=cust_cache)
reqs.append(req)
defer.returnValue(reqs)
@staticmethod
@defer.inlineCallbacks
9 years ago
def load_request(to_load, allow_special=True, use_cache=True, cust_dbpool=None, cust_cache=None):
"""
load_request(to_load)
Load a request with the given request id and return it.
Returns a deferred which calls back with the request when complete.
9 years ago
:param allow_special: Whether to allow special IDs such as ``u##`` or ``s##``
:type allow_special: bool
:param use_cache: Whether to use the cache. If set to false, it will always query the data file to get the request
:type use_cache: bool
:rtype: twisted.internet.defer.Deferred
"""
9 years ago
global dbpool
9 years ago
9 years ago
if cust_dbpool:
use_dbpool = cust_dbpool
cache_to_use = cust_cache
else:
use_dbpool = dbpool
cache_to_use = Request.cache
if not use_dbpool:
9 years ago
raise PappyException('No database connection to load from')
9 years ago
if to_load == '--':
raise PappyException('Invalid request ID. Wait for it to save first.')
if not allow_special:
try:
int(to_load)
except (ValueError, TypeError):
raise PappyException('Cannot load special id %s' % to_load)
ret_unmangled = False
rsp_unmangled = False
if to_load[0] == 'u':
ret_unmangled = True
loadid = to_load[1:]
elif to_load[0] == 's':
rsp_unmangled = True
loadid = to_load[1:]
else:
loadid = to_load
def retreq(r):
if ret_unmangled:
if not r.unmangled:
raise PappyException("Request %s was not mangled"%r.reqid)
9 years ago
return r.unmangled.duplicate()
if rsp_unmangled:
if not r.response:
raise PappyException("Request %s does not have a response" % r.reqid)
if not r.response.unmangled:
raise PappyException("Response to request %s was not mangled" % r.reqid)
9 years ago
retreq = r.duplicate()
retreq.response = retreq.response.unmangled
return retreq
else:
9 years ago
return r.duplicate()
9 years ago
# Get it through the cache
9 years ago
if use_cache and cache_to_use:
9 years ago
# If it's not cached, load_request will be called again and be told
# not to use the cache.
9 years ago
r = yield cache_to_use.get(loadid)
9 years ago
defer.returnValue(retreq(r))
9 years ago
# Load it from the data file
9 years ago
rows = yield use_dbpool.runQuery(
9 years ago
"""
SELECT %s
9 years ago
FROM requests
WHERE id=?;
""" % Request._gen_sql_row(),
(loadid,)
9 years ago
)
if len(rows) != 1:
raise PappyException("Request with id %s does not exist" % loadid)
9 years ago
req = yield Request._from_sql_row(rows[0], cust_dbpool=cust_dbpool, cust_cache=cust_cache)
9 years ago
assert req.reqid == loadid
9 years ago
if cache_to_use:
cache_to_use.add(req)
defer.returnValue(retreq(req))
9 years ago
8 years ago
@staticmethod
@defer.inlineCallbacks
def _async_load_ws(msgid, cust_dbpool=None):
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
else:
use_dbpool = dbpool
assert(use_dbpool)
rows = yield use_dbpool.runQuery(
"""
SELECT id,parent_request,unmangled_id,is_binary,direction,time_sent,contents
FROM websocket_messages
WHERE id=?;
""",
(int(msgid),)
)
if len(rows) == 0:
raise PappyException("No websocket message with id=%s" % msgid)
assert(len(rows) == 1)
row = rows[0]
m = WebSocketMessage()
m.msgid = row[0]
#if load_parent:
# m.parent_request = yield Request.load_request(str(row[1]))
if row[2]:
m.unmangled = yield Request._async_load_ws(str(row[2]))
if row[3] >= 0:
m.is_binary = True
else:
m.is_binary = False
if row[4] == 0:
m.direction = 'OUTGOING'
else:
m.direction = 'INCOMING'
if row[5] is not None:
m.time_sent = datetime.datetime.utcfromtimestamp(row[5])
m.contents = row[6]
defer.returnValue(m)
@staticmethod
@defer.inlineCallbacks
def _async_load_ws_by_parent(reqid, cust_dbpool=None):
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
else:
use_dbpool = dbpool
assert(use_dbpool)
rows = yield use_dbpool.runQuery(
"""
SELECT id
FROM websocket_messages
WHERE parent_request=?;
""",
(int(reqid),)
)
messages = []
# Yes we could do it in one query but we'll deal with it if it causes
# performance issues
for row in rows:
m = yield Request._async_load_ws(row[0])
messages.append(m)
defer.returnValue(messages)
######################
## Submitting Requests
8 years ago
@staticmethod
@defer.inlineCallbacks
8 years ago
def submit_raw_request(message, host, port, is_ssl, mangle=False, int_macros=[]):
"""
Submits a request to the target host and returns a request object
"""
from pappyproxy.plugin import active_intercepting_macros
8 years ago
use_int_macros = int_macros
if mangle:
use_int_macros = active_intercepting_macros()
proxy_protocol = HTTPProtocolProxy(addr=(host, port, is_ssl),
int_macros=use_int_macros,
save_requests=False)
d = proxy_protocol.get_next_response()
proxy_protocol.client_data_received(message)
retreq = yield d
proxy_protocol.close_connections()
defer.returnValue(retreq)
@defer.inlineCallbacks
9 years ago
def async_submit(self, mangle=False):
"""
async_submit()
Same as :func:`~pappyproxy.http.Request.submit` but generates deferreds.
Submits the request using its host, port, etc. and updates its response value
to the resulting response.
9 years ago
:param mangle: Whether to pass the request through active intercepting macros.
:type mangle: Bool
:rtype: Twisted deferred
"""
9 years ago
from pappyproxy.plugin import active_intercepting_macros
8 years ago
newreq = yield Request.submit_raw_request(self.full_message, self.host, self.port,
self.is_ssl, mangle=mangle)
self.unmangled = newreq.unmangled
self.response = newreq.response
self.time_start = newreq.time_start
self.time_end = newreq.time_end
self.set_metadata(newreq.get_metadata())
@crochet.wait_for(timeout=180.0)
@defer.inlineCallbacks
8 years ago
def submit(self, *args, **kwargs):
"""
submit()
Submits the request using its host, port, etc. and updates its response value
to the resulting response.
Cannot be called in async functions.
9 years ago
If an error is encountered while submitting the request, it is printed
to the console.
This is what you should use to submit your requests in macros.
"""
9 years ago
try:
8 years ago
yield self.async_submit(*args, **kwargs)
9 years ago
except Exception as e:
print 'Submitting request to %s failed: %s' % (self.host, str(e))
9 years ago
9 years ago
class Response(HTTPMessage):
"""
:ivar cookies: Cookies set by the response
:vartype cookies: RepeatableDict of ResponseCookie objects
:ivar response_code: The response code of the response
:vartype response_code: Integer
:ivar response_text: The text associated with the response code (ie OK, NOT FOUND, etc)
:vartype response_text: String
:ivar rspid: If the response is saved in the data file, the id of the response
:vartype rspid: String
:ivar unmangled: If the response was mangled, the unmangled version of the response
:vartype unmangled: Response
:ivar version: The version part of the status line (ie HTTP/1.1)
:vartype version: String
"""
9 years ago
9 years ago
def __init__(self, full_response=None, update_content_length=True):
9 years ago
# Resets instance variables
self.clear()
9 years ago
9 years ago
# Called after instance vars since some callbacks depend on
# instance vars
HTTPMessage.__init__(self, full_response, update_content_length)
9 years ago
9 years ago
# metadata that is unique to a specific Response instance
self.metadata_unique_keys = ('rspid',)
9 years ago
# After message init so that other instance vars are initialized
self._set_dict_callbacks()
9 years ago
9 years ago
def __copy__(self):
if not self.complete:
raise PappyException("Cannot copy incomplete http messages")
retrsp = self.__class__(self.full_message)
retrsp.set_metadata(self.get_metadata())
retrsp.rspid = None
if self.unmangled:
retrsp.unmangled = self.unmangled.copy()
return retrsp
9 years ago
def duplicate(self):
retrsp = self.copy()
retrsp.rspid = self.rspid
if self.unmangled:
retrsp.unmangled = self.unmangled.duplicate()
return retrsp
9 years ago
@property
def raw_headers(self):
"""
9 years ago
Alias for Response.headers_section
:getter: Returns the raw text of the headers including the extra newline at the end.
:type: string
"""
9 years ago
return self.headers_section
9 years ago
@property
9 years ago
def start_line(self):
"""
The status line of the response. ie `HTTP/1.1 200 OK`
:getter: Returns the status line of the response
:setter: Sets the status line of the response
:type: string
"""
if not self.version and self.response_code == 0 and not self.version:
return ''
9 years ago
if self.response_text == '':
return '%s %d' % (self.version, self.response_code)
else:
return '%s %d %s' % (self.version, self.response_code, self.response_text)
9 years ago
9 years ago
@start_line.setter
def start_line(self, val):
self.handle_start_line(val)
@property
def status_line(self):
return self.start_line
9 years ago
@status_line.setter
def status_line(self, val):
9 years ago
self.start_line = val
9 years ago
@property
def raw_data(self):
"""
9 years ago
Alias for Response.body
:getter: Returns the data portion of the response
:setter: Set the data of the response and update metadata
:type: string
"""
9 years ago
return self.body
9 years ago
@raw_data.setter
def raw_data(self, val):
9 years ago
self.body = val
9 years ago
@property
def full_response(self):
"""
The full text of the response including the headers and data.
9 years ago
Alias for Response.full_message
:getter: Returns the full text of the response
:type: string
"""
9 years ago
return self.full_message
9 years ago
@property
def soup(self):
"""
Returns a beautifulsoup4 object for parsing the html of the response
9 years ago
:getter: Returns a BeautifulSoup object representing the html of the response
"""
9 years ago
return bs4.BeautifulSoup(self.body, 'lxml')
9 years ago
###########
## Metadata
9 years ago
def get_metadata(self, include_unique=True):
9 years ago
data = {}
data['rspid'] = self.rspid
9 years ago
if not include_unique:
for k in self.metadata_unique_keys:
if k in data:
del data[k]
9 years ago
return data
9 years ago
9 years ago
def set_metadata(self, data):
if 'rspid' in data:
self.rspid = data['rspid']
9 years ago
9 years ago
def reset_metadata(self):
self.rspid = None
9 years ago
def clear(self):
HTTPMessage.clear(self)
self.cookies = RepeatableDict()
self.response_code = 0
self.response_text = ''
self.rspid = None
self.unmangled = None
self.version = ''
9 years ago
############################
## Internal update functions
def _set_dict_callbacks(self):
# Add callbacks to dicts
9 years ago
def f1():
obj = weakref.proxy(self)
obj.update_from_headers()
def f2():
obj = weakref.proxy(self)
obj._update_from_objects()
self.headers.set_modify_callback(f1)
self.cookies.set_modify_callback(f2)
9 years ago
9 years ago
def update_from_body(self):
HTTPMessage.update_from_body(self)
9 years ago
def _update_from_objects(self):
# Updates headers from objects
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
9 years ago
# Cookies
new_headers = RepeatableDict()
cookies_added = False
for pair in self.headers.all_pairs():
if pair[0].lower() == 'set-cookie':
# If we haven't added our cookies, add them all. Otherwise
# strip the header (do nothing)
if not cookies_added:
# Add all our cookies here
for k, c in self.cookies.all_pairs():
new_headers.append('Set-Cookie', c.cookie_str)
cookies_added = True
else:
new_headers.append(pair[0], pair[1])
if not cookies_added:
# Add all our cookies to the end
for k, c in self.cookies.all_pairs():
new_headers.append('Set-Cookie', c.cookie_str)
9 years ago
self.headers = new_headers
self._set_dict_callbacks()
9 years ago
def update_from_headers(self):
self.cookies = RepeatableDict()
self._set_dict_callbacks()
for k, v in self.headers.all_pairs():
if k.lower() == 'set-cookie':
# Parse the cookie
cookie = ResponseCookie(v)
self.cookies.append(cookie.key, cookie, do_callback=False)
###############
## Data parsing
9 years ago
def handle_start_line(self, start_line):
if start_line == '':
self.response_code = 0
self.version = ''
self.response_text = ''
return
9 years ago
self._first_line = False
9 years ago
if len(start_line.split(' ')) > 2:
self.version, self.response_code, self.response_text = \
start_line.split(' ', 2)
else:
self.version, self.response_code = start_line.split(' ', 1)
self.response_text = ''
9 years ago
self.response_code = int(self.response_code)
if self.response_code == 304 or self.response_code == 204 or \
self.response_code/100 == 1:
self._end_after_headers = True
9 years ago
def handle_header(self, key, val):
9 years ago
if val is None:
return True
9 years ago
keep = HTTPMessage.handle_header(self, key, val)
if not keep:
return False
9 years ago
stripped = False
9 years ago
if key.lower() == 'set-cookie':
9 years ago
cookie = ResponseCookie(val)
self.cookies.append(cookie.key, cookie, do_callback=False)
if stripped:
return False
else:
return True
####################
## Cookie management
9 years ago
def add_cookie(self, cookie):
"""
Add a :class:`pappyproxy.http.ResponseCookie` to the response.
.. warning::
This will add duplicate cookies. If you want to add a cookie you're not sure exists,
use :func:`~pappyproxy.http.Response.set_cookie`
"""
self.cookies.append(cookie.key, cookie)
def set_cookie(self, cookie):
"""
Set a cookie in the response. ``cookie`` must be a :class:`pappyproxy.http.ResponseCookie`
"""
self.cookies[cookie.key] = cookie
def set_cookie_kv(self, key, val):
"""
Set a cookie by key and value. Will not have path, secure, etc set at all.
"""
cookie = ResponseCookie()
cookie.key = key
cookie.val = val
self.cookies[cookie.key] = cookie
def delete_cookie(self, key):
"""
Delete a cookie from the response by its key
"""
del self.cookies[key]
#######################
## Database interaction
9 years ago
@defer.inlineCallbacks
9 years ago
def async_save(self, cust_dbpool=None, cust_cache=None):
"""
async_save()
Save/update the just request in the data file. Returns a twisted deferred which
fires when the save is complete. It is suggested that you use
:func: `~pappyproxy.http.Request.async_deep_save` instead to save responses.
:rtype: twisted.internet.defer.Deferred
"""
8 years ago
if not self.complete:
raise PappyException('Cannot save incomplete messages')
9 years ago
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
else:
use_dbpool = dbpool
assert(use_dbpool)
9 years ago
try:
# Check for intyness
_ = int(self.rspid)
9 years ago
# If we have rspid, we're updating
9 years ago
yield use_dbpool.runInteraction(self._update)
9 years ago
except (ValueError, TypeError):
9 years ago
yield use_dbpool.runInteraction(self._insert)
9 years ago
assert(self.rspid is not None)
9 years ago
# Right now responses without requests are unviewable
# @crochet.wait_for(timeout=180.0)
# @defer.inlineCallbacks
# def save(self):
# yield self.save()
9 years ago
def _update(self, txn):
setnames = ["full_response=?"]
queryargs = [self.full_response]
if self.unmangled:
setnames.append('unmangled_id=?')
assert(self.unmangled.rspid is not None) # should be saved first
queryargs.append(self.unmangled.rspid)
queryargs.append(self.rspid)
txn.execute(
"""
UPDATE responses SET %s WHERE id=?;
""" % ','.join(setnames),
tuple(queryargs)
)
assert(self.rspid is not None)
def _insert(self, txn):
# If we don't have an rspid, we're creating a new one
colnames = ["full_response"]
colvals = [self.full_response]
if self.unmangled is not None:
colnames.append('unmangled_id')
assert(self.unmangled.rspid is not None) # should be saved first
colvals.append(self.unmangled.rspid)
txn.execute(
"""
INSERT INTO responses (%s) VALUES (%s);
""" % (','.join(colnames), ','.join(['?']*len(colvals))),
tuple(colvals)
)
9 years ago
self.rspid = str(txn.lastrowid)
9 years ago
assert(self.rspid is not None)
@defer.inlineCallbacks
9 years ago
def delete(self):
9 years ago
if self.rspid is not None:
9 years ago
yield dbpool.runQuery(
9 years ago
"""
DELETE FROM responses WHERE id=?;
""",
(self.rspid,)
)
self.rspid = None
9 years ago
8 years ago
@staticmethod
def is_ws_upgrade(rsp):
if rsp.response_code != 101:
return False
if 'Upgrade' not in rsp.headers:
return False
if 'Connection' not in rsp.headers:
return False
if rsp.headers['Upgrade'].lower() != 'websocket':
return False
if rsp.headers['Connection'].lower() != 'upgrade':
return False
return True
9 years ago
@staticmethod
@defer.inlineCallbacks
9 years ago
def load_response(respid, cust_dbpool=None, cust_cache=None):
"""
Load a response from its response id. Returns a deferred. I don't suggest you use this.
:rtype: twisted.internet.defer.Deferred
"""
9 years ago
global dbpool
if cust_dbpool:
use_dbpool = cust_dbpool
else:
use_dbpool = dbpool
assert(use_dbpool)
rows = yield use_dbpool.runQuery(
9 years ago
"""
SELECT full_response, id, unmangled_id
FROM responses
WHERE id=?;
""",
(respid,)
)
if len(rows) != 1:
raise PappyException("Response with request id %s does not exist" % respid)
9 years ago
full_response = rows[0][0]
resp = Response(full_response)
resp.rspid = str(rows[0][1])
9 years ago
if rows[0][2]:
9 years ago
unmangled_response = yield Response.load_response(int(rows[0][2]),
cust_dbpool=cust_dbpool,
cust_cache=cust_cache)
9 years ago
resp.unmangled = unmangled_response
defer.returnValue(resp)
8 years ago
class HTTPProtocolProxy(ProtocolProxy):
CLIENT_STATE_CLOSED = 1
CLIENT_STATE_HTTP_READY = 2
CLINET_STATE_WEBSDOCKET_READY = 3
CLIENT_STATE_WEBSOCKET = 4
SERVER_STATE_HTTP_READY = 1
SERVER_STATE_PROCESSING_HTTP_REQUEST = 2
SERVER_STATE_HTTP_AWAITING_RESPONSE = 3
SERVER_STATE_HTTP_AWAITING_PROXY_CONNECT = 4
SERVER_STATE_WEBSOCKET = 5
BLANK_RESPONSE = ('HTTP/1.1 200 OK\r\n'
'Connection: close\r\n'
'Cache-control: no-cache\r\n'
'Pragma: no-cache\r\n'
'Cache-control: no-store\r\n'
'Content-Length: 0\r\n'
'X-Frame-Options: DENY\r\n\r\n')
def __init__(self, *args, **kwargs):
self.request_queue = Queue.Queue()
self.request = Request()
self.response = Response()
self.stream_responses = True
self.next_rsp_def = None
self.ws_proxy = None
if 'int_macros' in kwargs:
self.custom_macros = True
self.mangle_macros = kwargs['int_macros']
else:
self.custom_macros = False
self.mangle_macros = []
if 'save_requests' in kwargs:
self.save_requests = kwargs['save_requests']
else:
self.save_requests = True
if 'addr' in kwargs:
self.connect_addr = kwargs['addr']
self.force_addr = True
else:
self.connect_addr = None
self.force_addr = False
self.using_upstream_http_proxy = False
self.was_streaming_responses = self.stream_responses
self.upstream_http_connected = False
self.client_state = HTTPProtocolProxy.CLIENT_STATE_CLOSED
self.server_state = HTTPProtocolProxy.SERVER_STATE_HTTP_READY
ProtocolProxy.__init__(self)
def _set_client_state(self, state):
# Updates the state and sets other metadata states
# (like is_websocket)
self.client_state = state
self.log("Changing client state to %s" % self.client_state)
def _set_server_state(self, state):
# Updates the state and sets other metadata states
# (like is_websocket)
self.server_state = state
self.log("Changing server state to %s" % self.server_state)
def get_next_response(self):
"""
Returns a deferred that fires with the next completed
request/response pair
"""
self.next_rsp_def = defer.Deferred()
return self.next_rsp_def
def set_connection(self, host, port, use_ssl, use_socks=False):
"""
If not connected to the given host, port, use_ssl, disconnects
the current connection and connects to the given endpoint. Otherwise
does nothing
"""
self.log("setting connection...")
if self.conn_host != host or \
self.conn_port != port or \
self.conn_is_ssl != use_ssl:
self.log("Closing connection to old server")
self.close_server_connection()
self.connect(host, port, use_ssl, use_socks=use_socks)
def server_connection_lost(self, reason):
self.log("Connection to server lost: %s" % str(reason))
self.log(str(reason.getTraceback()))
ProtocolProxy.server_connection_lost(self, reason)
def client_data_received(self, data):
self.log("cp> %s" % short_data(data))
if self.client_state != HTTPProtocolProxy.CLIENT_STATE_WEBSOCKET:
self.request.add_data(data)
if self.request.complete:
req = self.request
self.request = Request()
self.request_complete(req)
else:
self.ws_proxy.client_data_received(data)
def server_data_received(self, data):
self.log("rp< %s" % short_data(data))
if self.client_state != HTTPProtocolProxy.CLIENT_STATE_WEBSOCKET:
self.response.add_data(data)
if self.stream_responses:
self.send_client_data(data)
if self.response.complete:
rsp = self.response
self.response = Response()
self.response_complete(rsp)
else:
self.ws_proxy.server_data_received(data)
@defer.inlineCallbacks
def process_request(self, pop_request=True):
from pappyproxy import context, macros, pappy
if self.server_state != HTTPProtocolProxy.SERVER_STATE_HTTP_READY:
self.log("Not ready to process a new HTTP request")
return
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_PROCESSING_HTTP_REQUEST)
if pop_request and self.request_queue.empty():
#defer.returnValue(None)
self.log("Request queue is empty, not processing request")
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
return
if pop_request:
self.waiting_req = self.request_queue.get()
if self.waiting_req.host == 'pappy':
self.log("Processing request to internal webserver")
yield handle_webserver_request(self.waiting_req)
self.send_response(self.waiting_req.response.full_message)
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
self.process_request()
return
self.mangle_macros = []
self.stream_responses = True
if self.waiting_req.verb.upper() == 'CONNECT':
self.send_response('HTTP/1.1 200 Connection established\r\n\r\n')
self.start_client_maybe_tls(cert_host=self.waiting_req.host)
self.connect_addr = (self.waiting_req.host, self.waiting_req.port, self.waiting_req.is_ssl)
#defer.returnValue(None)
self.log("Client TLS setup, ready for next request")
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
self.process_request()
return
## Process/store the request
sendreq = self.waiting_req
upstream_http_addr = get_http_proxy_addr()
if self.connect_addr is not None:
sendreq.host = self.connect_addr[0]
sendreq.port = self.connect_addr[1]
sendreq.is_ssl = self.connect_addr[2]
if context.in_scope(sendreq) or self.custom_macros:
if not self.custom_macros:
self.mangle_macros = self.client_protocol.factory.get_macro_list()
sendreq.time_start = datetime.datetime.utcnow()
if self.mangle_macros:
self.stream_responses = False
else:
self.stream_responses = True
if self.stream_responses and not self.mangle_macros:
if self.save_requests:
sendreq.async_deep_save()
else:
if self.save_requests:
yield sendreq.async_deep_save()
(mangreq, mangled) = yield macros.mangle_request(sendreq, self.mangle_macros)
if mangreq is None:
self.log("Request dropped. Closing connections.")
sendreq.tags.add('dropped')
sendreq.response = None
self.dropped_request = True
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
self.send_response(self.BLANK_RESPONSE)
defer.returnValue(None)
elif mangled:
sendreq = mangreq
self.log("Request in scope, saving")
sendreq.time_start = datetime.datetime.utcnow()
assert sendreq.complete
if self.save_requests:
yield sendreq.async_deep_save()
self.waiting_req = sendreq
else:
self.log("Request out of scope, passing along unmangled")
self.log("Making new connection to %s:%d is_ssl=%s" % (self.waiting_req.host, self.waiting_req.port, self.waiting_req.is_ssl))
if upstream_http_addr is not None:
self.log("Using upstream http proxy")
self.log(str(self.waiting_req))
self.set_connection(upstream_http_addr[0], upstream_http_addr[1], False, use_socks=True)
self.using_upstream_http_proxy = True
else:
self.log("Not using upstream http proxy")
self.set_connection(sendreq.host, sendreq.port, sendreq.is_ssl, use_socks=True)
self.using_upstream_http_proxy = False
if self.using_upstream_http_proxy and sendreq.is_ssl and not self.upstream_http_connected:
self.was_streaming_responses = self.stream_responses
self.stream_responses = False
r = sendreq.connect_request
apply_http_proxy_creds(r)
self.send_server_data(r.full_message)
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_PROXY_CONNECT)
return
elif self.using_upstream_http_proxy and not sendreq.is_ssl:
# We're already connected to the proxy
r = sendreq.copy()
r.path_type = PATH_ABSOLUTE
apply_http_proxy_creds(r)
self.send_server_data(r.full_message)
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_RESPONSE)
return
else:
self.send_server_data(sendreq.full_message)
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_RESPONSE)
return
def request_complete(self, req):
self.log("Request complete, adding to queue")
self.request_queue.put(req)
self.process_request()
@defer.inlineCallbacks
def response_complete(self, rsp):
from pappyproxy import context, macros, http
if self.server_state == HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_PROXY_CONNECT:
self.log("Got upstream connection established response")
self.stream_responses = self.was_streaming_responses
if rsp.response_code == 407:
print "Incorrect credentials for HTTP proxy. Please check your username and password."
self.close_server_connection()
return
self.upstream_http_connected = True
self.start_server_tls()
self.log("Remote proxy TLS setup, ready for next request")
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
self.process_request(pop_request=False) # waiting_req is already the request we want to send
return
assert self.server_state == HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_RESPONSE
self.waiting_req.response = self.response
sendreq = self.waiting_req
sendreq.response = rsp
self.waiting_req = None
if context.in_scope(sendreq) or self.custom_macros:
self.log("Response in scope, saving")
sendreq.time_end = datetime.datetime.utcnow()
assert sendreq.complete
if self.stream_responses and not self.mangle_macros:
if self.save_requests:
sendreq.async_deep_save()
else:
if self.save_requests:
yield sendreq.async_deep_save()
if self.mangle_macros:
if sendreq.response:
mangled = yield macros.mangle_response(sendreq, self.mangle_macros)
if mangled is None:
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
self.send_response(self.BLANK_RESPONSE)
defer.returnValue(None)
if mangled and self.save_requests:
yield sendreq.async_deep_save()
else:
self.log("Out of scope, not touching response")
self.log("Checking if we need to write response to transport")
if self.next_rsp_def is not None:
self.next_rsp_def.callback(sendreq)
if not self.stream_responses:
self.log("Wasn't streaming, writing response")
self.send_response(sendreq.response.full_message)
else:
self.log("Already streamed response, not writing to transport, ending response")
self.end_response()
if Response.is_ws_upgrade(rsp):
self.log("Upgrading to websocket connection")
self.server_state = HTTPProtocolProxy.SERVER_STATE_WEBSOCKET
self.client_state = HTTPProtocolProxy.CLIENT_STATE_WEBSOCKET
self.ws_proxy = WebSocketProxy(self.client_transport, self.server_transport, rsp, sendreq, self)
self.ws_proxy.message_callback = self.ws_message_complete
self.request.is_websocket = True
self.log("Upgraded to websocket")
else:
self.log("Processing of response complete, ready for next request")
self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
self.process_request()
def ws_message_complete(self, wsobj):
"""
Handles objects from both client and server. The object itself
has a direction attribute that says which direction it went.
"""
self.request.websocket_messages.append(wsobj)
# Probably just going to wait for the connection to close
# def ws_close(self):
# self.log("Websocket connection closed. Processing next request.")
# self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY)
# self.process_request()
def send_response(self, data):
"""
Sends a response back to the client and processes the next request
"""
self.send_client_data(data)
self.end_response()
def end_response(self):
"""
Signals that all response data has been sent and the object is ready
to process the next request
"""
self.process_request()
def submit_websocket_frame(self, data):
raise PappyException("Websocket stream is not active")
def abort_connection(self):
pass
def client_connection_made(self, protocol):
self._set_client_state(HTTPProtocolProxy.CLIENT_STATE_HTTP_READY)
ProtocolProxy.client_connection_made(self, protocol)