import StringIO import base64 import bs4 import crochet import datetime import gzip import json import pygments import re import time import urlparse import zlib import weakref import Queue from .util import PappyException, printable_data, short_data, PappyStringTransport, sha1, print_traceback from .requestcache import RequestCache from .colors import Colors, host_color, path_formatter from .proxy import ProtocolProxy, make_proxied_connection, get_http_proxy_addr, start_maybe_tls from pygments.formatters import TerminalFormatter from pygments.lexers import get_lexer_for_mimetype, HttpLexer from twisted.internet import defer from autobahn.twisted.websocket import WebSocketServerFactory, WebSocketClientFactory, WebSocketServerProtocol, WebSocketClientProtocol from autobahn.websocket.protocol import WebSocketProtocol from autobahn.websocket.compress import PERMESSAGE_COMPRESSION_EXTENSION ENCODE_NONE = 0 ENCODE_DEFLATE = 1 ENCODE_GZIP = 2 PATH_RELATIVE = 0 PATH_ABSOLUTE = 1 PATH_HOST = 2 dbpool = None web_server = None def init(pool): """ Initialize the http module. :param pool: The ConnectionPool to use to store the request/response objects :type pool: SQLite ConnectionPool """ from .site import PappyWebServer global dbpool global web_server if dbpool is None: dbpool = pool web_server = PappyWebServer() assert(dbpool) def destruct(): assert(dbpool) dbpool.close() def _decode_encoded(data, encoding): if encoding == ENCODE_NONE: return data if encoding == ENCODE_DEFLATE: dec_data = zlib.decompress(data, -15) else: dec_data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data)) dec_data = dec_data.read() return dec_data def _strip_leading_newlines(string): while (len(string) > 1 and string[0:2] == '\r\n') or \ (len(string) > 0 and string[0] == '\n'): if len(string) > 1 and string[0:2] == '\r\n': string = string[2:] elif len(string) > 0 and string[0] == '\n': string = string[1:] return string def _consume_line(instr): # returns (line, rest) l = [] pos = 0 while pos < len(instr): if instr[pos] == '\n': if l and l[-1] == '\r': l = l[:-1] return (''.join(l), instr[pos+1:]) l.append(instr[pos]) pos += 1 return (instr, '') ################### ## Functions to use def get_request(url='', url_params={}): """ get_request(url='', url_params={}) Create a request object that makes a GET request to the given url with the given url params. """ r = Request() r.start_line = 'GET / HTTP/1.1' r.url = url r.headers['Host'] = r.host if url_params: r.url_params.from_dict(url_params) return r def post_request(url, post_params={}, url_params={}): """ post_request(url, post_params={}, url_params={}) Create a request object that makes a POST request to the given url with the given post and url params. """ r = Request() r.start_line = 'POST / HTTP/1.1' r.url = url r.headers['Host'] = r.host if url_params: r.url_params.from_dict(url_params) if post_params: r.post_params.from_dict(post_params) return r def repeatable_parse_qs(s): pairs = s.split('&') ret_dict = RepeatableDict() for pair in pairs: if '=' in pair: t = tuple(pair.split('=', 1)) ret_dict.append(t[0], t[1]) else: ret_dict.append(pair, None) return ret_dict @crochet.wait_for(timeout=180.0) @defer.inlineCallbacks def request_by_id(reqid): req = yield Request.load_request(str(reqid)) defer.returnValue(req) @defer.inlineCallbacks def async_submit_requests(reqs, mangle=False, save=False, save_in_mem=False, unique_paths=False, unique_path_and_args=False, use_cookie_jar=False, tag=None): """ async_submit_requests(reqs, mangle=False) :param mangle: Whether to pass the requests through intercepting macros :type mangle: Bool :rtype: DeferredList Copies and submits a list of requests at the same time asynchronously. Responses/unmangled versions will be attached to the request objects in the list. Prints progress to stdout. Returns list of submitted requests """ import sys from pappyproxy.plugin import add_to_history if unique_paths and unique_path_and_args: raise PappyException("Cannot use both unique_paths and unique_paths_and_argts") if save and save_in_mem: raise PappyException("Cannot use both save and save_in_mem") to_submit = [r.copy() for r in reqs] if unique_paths or unique_path_and_args: endpoints = set() new_reqs = [] for r in reqs: if unique_path_and_args: s = r.url else: s = r.path if not s in endpoints: new_reqs.append(r) endpoints.add(s) to_submit = new_reqs print 'Submitting %d request(s)' % len(to_submit) sys.stdout.flush() dones = 0 errors = 0 list_deferred = defer.Deferred() deferreds = [] for r in to_submit: if tag is not None: r.tags.add(tag) d = r.async_submit(mangle=mangle) deferreds.append(d) # Really not the best way to do this. If one request hangs forever the whole thing will # just hang in the middle for d in deferreds: try: yield d dones += 1 except Exception as e: errors += 1 print e finished = dones+errors if finished % 30 == 0 or finished == len(to_submit): if errors > 0: print '{0}/{1} complete with {3} errors ({2:.2f}%)'.format(finished, len(to_submit), (float(finished)/len(to_submit))*100, errors) else: print '{0}/{1} complete ({2:.2f}%)'.format(finished, len(to_submit), (float(finished)/len(to_submit))*100) sys.stdout.flush() if finished == len(to_submit): list_deferred.callback(None) if save: for r in to_submit: yield r.async_deep_save() elif save_in_mem: for r in to_submit: add_to_history(r) @crochet.wait_for(timeout=180.0) @defer.inlineCallbacks def submit_requests(*args, **kwargs): ret = yield async_submit_requests(*args, **kwargs) defer.returnValue(ret) def apply_http_proxy_creds(req): from pappyproxy import pappy if not pappy.session.config.http_proxy: return if 'username' in pappy.session.config.http_proxy and \ 'password' in pappy.session.config.http_proxy: req.proxy_creds = (pappy.session.config.http_proxy['username'], pappy.session.config.http_proxy['password']) @defer.inlineCallbacks def handle_webserver_request(req): global web_server yield web_server.handle_request(req) ########## ## Classes class RepeatableDict: """ A dict that retains the order of items inserted and keeps track of duplicate values. Can optionally treat keys as case insensitive. Custom made for the proxy, so it has strange features """ def __init__(self, from_pairs=None, case_insensitive=False): # If efficiency becomes a problem, add a dict that keeps a list by key # and use that for getting data. But until then, this stays. self._pairs = [] self._keys = set() self._modify_callback = None self.case_insensitive = case_insensitive if from_pairs: for k, v in from_pairs: self.append(k, v) def _ef_key(self, key): # "effective key", returns key.lower() if we're case insensitive, # otherwise it returns the same key if self.case_insensitive: return key.lower() return key def _mod_callback(self): # Calls the modify callback if we have one if self._modify_callback: self._modify_callback() def __contains__(self, val): return self._ef_key(val) in self._keys def __getitem__(self, key): for p in reversed(self._pairs): if self._ef_key(p[0]) == self._ef_key(key): return p[1] raise KeyError def __setitem__(self, key, val): # Replaces first instance of `key` and deletes the rest self.set_val(key, val) def __delitem__(self, key): self._remove_key(key) self._pairs = [p for p in self._pairs if self._ef_key(p[0]) != self._ef_key(key)] self._mod_callback() def __nonzero__(self): if self._pairs: return True else: return False def _add_key(self, key): self._keys.add(self._ef_key(key)) def _remove_key(self, key): self._keys.remove(self._ef_key(key)) def all_pairs(self): """ A list of all the key/value pairs stored in the dictionary """ return self._pairs[:] def append(self, key, val, do_callback=True): """ append(key, val) Append a pair to the end of the dictionary. Will add a duplicate if the key already exists. """ # Add a duplicate entry for key self._add_key(key) self._pairs.append((key, val)) if do_callback: self._mod_callback() def set_val(self, key, val, do_callback=True): """ set_val(key, val) Set a value in the dictionary. Will replace the first instance of the key with the value. If multiple values of the keys are already in the dictionary, the duplicates of the key will be removed and the first instance of the key will be replaced with the value. If the dictionary is case insensitive, it will maintain the original capitalization. This is the same behavior as assigning a value via ``d[key] = val``. If the key is not present, it will be added to the end of the dict. """ new_pairs = [] added = False self._add_key(key) for p in self._pairs: if self._ef_key(p[0]) == self._ef_key(key): if not added: # only add the first instance new_pairs.append((key, val)) added = True else: new_pairs.append(p) if not added: new_pairs.append((key, val)) self._pairs = new_pairs if do_callback: self._mod_callback() def update(self, key, val, do_callback=True): # If key is already in the dict, replace that value with the new value if key in self: for k, v in self.all_pairs(): if self._ef_key(k) == self._ef_key(key): self.set_val(k, val, do_callback=do_callback) break else: self.set_val(key, val, do_callback=do_callback) def clear(self, do_callback=True): """ clear() Remove all key/value pairs from the dictionary """ self._pairs = [] if do_callback: self._mod_callback() def all_vals(self, key): """ all_vals(key) Return all the values associated with a given key """ return [p[1] for p in self._pairs if self._ef_key(p[0]) == self._ef_key(key)] def add_pairs(self, pairs, do_callback=True): """ add_pairs(pairs) Add a list of pairs to the dictionary. :param pairs: The list of key/value pairs to add :type pairs: List of tuples of length 2 """ for pair in pairs: self._add_key(pair[0]) self._pairs += pairs if do_callback: self._mod_callback() def from_dict(self, d, do_callback=True): """ from_dict(d) Set the RepeatableDict to contain the same items as a normal dictionary. :param d: The dictionary to use :type d: dict """ self._pairs = list(d.items()) if do_callback: self._mod_callback() def sort(self): """ sort() Sort the dictionary by the key. Requires that all keys can be compared to each other """ # Sorts pairs by key alphabetaclly self._pairs = sorted(self._pairs, key=lambda x: x[0]) def set_modify_callback(self, callback): # Add a function to be called whenever an element is added, changed, or # deleted. Set to None to remove self._modify_callback = callback class LengthData: def __init__(self, length=None): self.body = '' self.complete = False self.length = length or 0 if self.length == 0 or self.length == -1: self.complete = True def add_data(self, data): if self.complete and self.length != -1: raise PappyException("Data already complete!") remaining_length = self.length-len(self.body) if self.length == -1: self.body += data complete = True else: if len(data) >= remaining_length: self.body += data[:remaining_length] assert(len(self.body) == self.length) self.complete = True else: self.body += data class ChunkedData: def __init__(self): self.body = '' self._pos = 0 self._state = 0 # 0=reading length, 1=reading data, 2=going over known string self._len_str = '' self._chunk_remaining = 0 self._known_str = '' self._known_str_pos = 0 self._next_state = 0 self._body = [] self.complete = False self.unchunked_data = [] def add_data(self, data): for c in data: self._body.append(c) self.scan_forward() def scan_forward(self): # Don't add more data if we're already done if self.complete: return while self._pos < len(self._body): curchar = self._body[self._pos] if self._state == 0: if curchar.lower() in '0123456789abcdef': # Read the next char of the length self._len_str += curchar # Move to the next char self._pos += 1 elif curchar == '\r': # Save how much chunk to read self._chunk_remaining = int(self._len_str, 16) # If the length is 0, chunked encoding is done! if self._chunk_remaining == 0: self.complete = True self.body = ''.join(self.unchunked_data) return # There should be a newline after the \r self._known_str = '\n' self._state = 2 self._next_state = 1 # Reset the length str self._len_str = '' # Move to the next char self._pos += 1 else: raise Exception("Malformed chunked encoding!") elif self._state == 1: if self._chunk_remaining > 0: # Read next byte of data self.unchunked_data.append(curchar) self._chunk_remaining -= 1 self._pos += 1 else: # Read newline then read a new chunk self._known_str = '\r\n' self._next_state = 0 # Read len after newlines self._state = 2 # Read newlines # Don't move to the next char because we didn't do anything elif self._state == 2: # Read a char of an expected string # If the expected char doesn't match, throw an error if self._known_str[self._known_str_pos] != curchar: raise Exception("Unexpected data") # Move to the next char in the raw data and in our known string self._known_str_pos += 1 self._pos += 1 # If we've reached the end of the known string, go to the next state if self._known_str_pos == len(self._known_str): self._known_str_pos = 0 self._state = self._next_state class ResponseCookie(object): """ A cookie representing a cookie set by a response :ivar key: The key of the cookie :type key: string :ivar val: The value of the cookie :type val: string :ivar expires: The value of the "expires" attribute :type expires: string :ivar max_age: The max age of the cookie :type max_age: int :ivar domain: The domain of the cookie :type domain: string :ivar path: The path of the cookie :type path: string :ivar secure: The secure flag of the cookie :type secure: Bool :ivar http_only: The httponly flag of the cookie :type http_only: Bool """ def __init__(self, set_cookie_string=None): self.key = None self.val = None self.expires = None self.max_age = None self.domain = None self.path = None self.secure = False self.http_only = False if set_cookie_string: self._from_cookie(set_cookie_string) @property def cookie_str(self): """ Returns the full string of the cookie. ie ``foo=bar; secure; path=/`` :getter: Returns the full string of the cookie. :setter: Set the metadata from a cookie string. ie from a ``Set-Cookie`` header """ av = '%s=%s' % (self.key, self.val) to_add = [av] if self.expires: to_add.append('expires=%s'%self.expires) if self.max_age: to_add.append('Max-Age=%d'%self.max_age) if self.domain: to_add.append('Domain=%s'%self.domain) if self.path: to_add.append('Path=%s'%self.path) if self.secure: to_add.append('secure') if self.http_only: to_add.append('httponly') return '; '.join(to_add) @cookie_str.setter def cookie_str(self, val): self._from_cookie(val) def _parse_cookie_av(self, cookie_av): if '=' in cookie_av: key, val = cookie_av.split('=', 1) key = key.lstrip() if key.lower() == 'expires': self.expires = val if key.lower() == 'max-age': self.max_age = int(val) if key.lower() == 'domain': self.domain = val if key.lower() == 'path': self.path = val elif cookie_av.lstrip().lower() == 'secure': self.secure = True elif cookie_av.lstrip().lower() == 'httponly': self.http_only = True def _from_cookie(self, set_cookie_string): self.key = None self.val = None self.expires = None self.max_age = None self.domain = None self.path = None self.secure = False self.http_only = False if ';' in set_cookie_string: cookie_pair, rest = set_cookie_string.split(';', 1) if '=' in cookie_pair: self.key, self.val = cookie_pair.split('=',1) elif cookie_pair == '' or re.match('\s+', cookie_pair): self.key = '' self.val = '' else: self.key = cookie_pair self.val = '' cookie_avs = rest.split(';') for cookie_av in cookie_avs: cookie_av.lstrip() self._parse_cookie_av(cookie_av) else: self.key, self.val = set_cookie_string.split('=',1) class WebSocketMessage(object): """ A class representing a websocket message """ DIRECTION_OUTGOING = 0 DIRECTION_INCOMING = 1 TYPE_MESSAGE = 0 def __init__(self, direction=None, contents=None, message_type=None, time_sent=None, is_binary=None): self.direction = None self.contents = None self.message_type = WebSocketMessage.TYPE_MESSAGE self.time_sent = None self.unmangled = None self.parent_request = None self.is_unmangled_version = False self.is_binary = False self.msgid = '--' def __eq__(self, other): if self.contents != other.contents: return False if self.is_binary != other.is_binary: return False return True def __copy__(self): new_msg = WebSocketMessage() new_msg.direction = self.direction new_msg.contents = self.contents new_msg.message_type = self.message_type new_msg.is_binary = self.is_binary return new_msg def copy(self): return self.__copy__() def duplicate(self): m = self.copy() m.msgid = self.msgid m.time_sent = self.time_sent if self.unmangled: m.unmangled = self.unmangled.duplicate() return m @defer.inlineCallbacks def async_save(self, cust_dbpool=None): from .pappy import main_context global dbpool if cust_dbpool: use_dbpool = cust_dbpool else: use_dbpool = dbpool assert(use_dbpool) if not self.msgid: self.msgid = '--' try: # Check for intyness _ = int(self.msgid) # If we have msgid, we're updating yield use_dbpool.runInteraction(self._update) assert(self.msgid is not None) except (ValueError, TypeError): # Either no id or in-memory yield use_dbpool.runInteraction(self._insert) assert(self.msgid is not None) main_context.cache_reset() @defer.inlineCallbacks def async_deep_save(self): if self.unmangled: yield self.unmangled.async_deep_save() yield self.async_save() def _update(self, txn): setnames = ['is_binary=?', 'contents=?'] queryargs = [self.is_binary, self.contents] # Direction if self.direction == 'OUTGOING': setnames.append('direction=?') queryargs.append(0) elif self.direction == 'INCOMING': setnames.append('direction=?') queryargs.append(1) else: setnames.append('direction=?') queryargs.append(-1) # Unmangled if self.unmangled: setnames.append('unmangled_id=?') assert(self.unmangled.msgid is not None) # should be saved first queryargs.append(self.unmangled.msgid) # Message time if self.time_sent: setnames.append('time_sent=?') queryargs.append((self.time_sent-datetime.datetime(1970,1,1)).total_seconds()) # Parent request if self.parent_request: assert(self.parent_request.reqid) setnames.append('parent_request=?') queryargs.append(self.parent_request.reqid) queryargs.append(self.msgid) txn.execute( """ UPDATE websocket_messages SET %s WHERE id=?; """ % ','.join(setnames), tuple(queryargs) ) def _insert(self, txn): colnames = ['is_binary', 'contents'] colvals = [self.is_binary, self.contents] # Direction if self.direction == 'OUTGOING': colnames.append('direction') colvals.append(0) elif self.direction == 'INCOMING': colnames.append('direction') colvals.append(1) else: colnames.append('direction') colvals.append(-1) # Unmangled if self.unmangled: colnames.append('unmangled_id') assert(self.unmangled.msgid is not None) # should be saved first colvals.append(self.unmangled.msgid) # Message time if self.time_sent: colnames.append('time_sent') colvals.append((self.time_sent-datetime.datetime(1970,1,1)).total_seconds()) # Parent request if self.parent_request: assert(self.parent_request.reqid) colnames.append('parent_request') colvals.append(self.parent_request.reqid) txn.execute( """ INSERT INTO websocket_messages (%s) VALUES (%s); """ % (','.join(colnames), ','.join(['?']*len(colvals))), tuple(colvals) ) self.msgid = str(txn.lastrowid) class WebSocketProxy(object): def __init__(self, server_transport, client_transport, client_handshake, server_handshake, parent_proxy, log_id=None): self.message_callback = None self.close_callback = None self.log_id = log_id self.intercepting_macros = None self.parent_http_proxy = parent_proxy # The raw http messages from the handshake self.client_handshake = client_handshake self.server_handshake = server_handshake self.server_factory = WebSocketServerFactory() self.server_factory.protocol = WebSocketServerProtocol self.server_transport = server_transport self.client_factory = WebSocketClientFactory() self.client_factory.protocol = WebSocketClientProtocol self.client_transport = client_transport self._fake_handshake() def log(self, message, symbol='*', verbosity_level=3): from pappyproxy.proxy import log if self.log_id: log(message, id=log_id, symbol=symbol, verbosity_level=verbosity_level) else: log(message, symbol=symbol, verbosity_level=verbosity_level) def _build_server_protocol(self): self.server_protocol = self.server_factory.buildProtocol(None) self.server_protocol.makeConnection(self.server_transport) self._patch_server_protocol() def _build_client_protocol(self): """ Builds the client protocol then returns the bytes written to the transport after creation """ self.client_protocol = self.client_factory.buildProtocol(None) tr = PappyStringTransport() self.client_protocol.makeConnection(tr) tval = tr.pop_value() self.client_protocol.transport = self.client_transport self._patch_client_protocol() return tval def _patch_server_protocol(self): self.log("Patching server protocol") self._add_hook(self.server_protocol, 'onMessage', '_on_server_message') def _patch_client_protocol(self): self.log("Patching client protocol") self._add_hook(self.client_protocol, 'onMessage', '_on_client_message') def _add_hook(self, c, their_method, my_method): """ Make it so that c.their_method(*args, **kwargs) calls self.my_method(*args, **kwargs) before calling their method """ old = getattr(c, their_method) def patched(*args, **kwargs): getattr(self, my_method)(*args, **kwargs) old(*args, **kwargs) setattr(c, their_method, patched) @staticmethod def _gen_sec_websocket_accept(token): MAGIC_STRING = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" concatenated = token + MAGIC_STRING hashed = sha1(concatenated) reencoded = base64.b64encode(hashed) return reencoded def _parse_extension_header(self, hd): """ Parses the extension header and returns a list of offers """ offers = [] extensions = self.server_protocol._parseExtensionsHeader(hd) for (extension, params) in extensions: if extension in PERMESSAGE_COMPRESSION_EXTENSION: PMCE = PERMESSAGE_COMPRESSION_EXTENSION[extension] try: offer = PMCE['Offer'].parse(params) offers.append(offer) except Exception as e: raise PappyException(str(e)) else: self.log("Bad extension: %s" % extension) return offers def _set_up_ws_client(self, ws_server_factory, ws_client_factory, hs_req, hs_rsp): """ Sets up a WebSocketClientProtocolFactory from a WebSocketServerProtocolFactory """ self.log("Setting up websocket client") client_settings = {} # Common attributes skip_attrs = ('logFrames','trackTimings','logOctets') for attr in WebSocketProtocol.CONFIG_ATTRS_COMMON: if attr not in skip_attrs: client_settings[attr] = getattr(ws_server_factory, attr) def accept(response): from autobahn.websocket.compress import PerMessageDeflateResponse, \ PerMessageDeflateResponseAccept, \ PerMessageBzip2Response, \ PerMessageBzip2ResponseAccept, \ PerMessageSnappyResponse, \ PerMessageSnappyResponseAccept if isinstance(response, PerMessageDeflateResponse): return PerMessageDeflateResponseAccept(response) elif isinstance(response, PerMessageBzip2Response): return PerMessageBzip2ResponseAccept(response) elif isinstance(response, PerMessageSnappyResponse): return PerMessageSnappyResponseAccept(response) # Client specific attributes client_settings['version'] = int(hs_req.headers['Sec-WebSocket-Version']) client_settings['acceptMaskedServerFrames'] = True client_settings['maskClientFrames'] = True #client_settings['serverConnectionDropTimeout'] = None offers = self._parse_extension_header(hs_req.headers['Sec-WebSocket-Extensions']) client_settings['perMessageCompressionOffers'] = offers client_settings['perMessageCompressionAccept'] = accept # debug client_settings['openHandshakeTimeout'] = 8 self.log("Setting client options: %s" % client_settings) ws_client_factory.setProtocolOptions(**client_settings) def _fake_handshake(self): self.log("Performing fake handshake") # Set up our server listener self._build_server_protocol() old_server_transport = self.server_protocol.transport self.server_protocol.transport = PappyStringTransport() self.log("Sending fake handshake response to server protocol:") self.log("pf> %s" % printable_data(self.server_handshake.full_message)) self.server_protocol.dataReceived(self.server_handshake.full_message) tval = self.server_protocol.transport.pop_value() self.log(" %s" % printable_data(handshake_rsp.full_message)) self.client_protocol.dataReceived(handshake_rsp.full_message) # Tell it whatever the remote server told us self.log("Compressed after handshake? %s" % self.client_protocol._perMessageCompress) self.log("Fake handshake complete") def _on_server_message(self, payload, is_binary): """ Wrapper to convert arguments to onMessage into an object and call on_server_message """ # Turn into object m = WebSocketMessage() m.direction = 'OUTGOING' m.contents = payload m.time_sent = datetime.datetime.utcnow() m.is_binary = is_binary self.log("rp< (websocket, binary=%s) %s" % (m.is_binary, m.contents)) self.on_server_message(m) def _on_client_message(self, payload, is_binary): """ Wrapper function for when the client protocol gets a data frame """ # Turn into object m = WebSocketMessage() m.direction = 'INCOMING' m.contents = payload m.time_sent = datetime.datetime.utcnow() m.is_binary = is_binary self.log("cp> (websocket, binary=%s) %s" % (m.is_binary, m.contents)) self.on_client_message(m) @defer.inlineCallbacks def mangle_message(self, message): from pappyproxy import context, macros # Mangle object sendreq = self.server_handshake custom_macros = self.parent_http_proxy.custom_macros mangle_macros = self.parent_http_proxy.mangle_macros client_protocol = self.parent_http_proxy.client_protocol save_requests = self.parent_http_proxy.save_requests dropped = False sendmsg = message if context.in_scope(sendreq) or custom_macros: self.log("Passing ws message through macros") if not custom_macros: mangle_macros = client_protocol.factory.get_macro_list() if save_requests: yield sendreq.async_deep_save() (mangmsg, mangled) = yield macros.mangle_websocket_message(message, sendreq, mangle_macros) if mangmsg is None: self.log("Message dropped") dropped = True elif mangled: self.log("Message mangled") sendmsg.time_sent = None mangmsg.unmangled = sendmsg sendmsg = mangmsg else: self.log("Message not modified") mangmsg.time_sent = datetime.datetime.utcnow() if not dropped: sendreq.add_websocket_message(sendmsg) if save_requests: yield sendreq.async_deep_save() else: sendreq.add_websocket_message(sendmsg) self.log("Request out of scope, passing ws message along unmangled") # Send data frame through server object to the client defer.returnValue((sendmsg, dropped)) @defer.inlineCallbacks def on_server_message(self, message): """ Wrapper function for when the server protocol gets a data frame """ # Mangle object sendmsg, dropped = yield self.mangle_message(message) # Send data frame through client object to remote server if not dropped: self.send_server_message(sendmsg.contents, sendmsg.is_binary) @defer.inlineCallbacks def on_client_message(self, message): """ Wrapper function for when the client protocol gets a data frame """ sendmsg, dropped = yield self.mangle_message(message) # Send data frame through server object to the client if not dropped: self.send_client_message(sendmsg.contents, sendmsg.is_binary) def client_data_received(self, data): # Called when data is received from the client # Sends RAW DATA to server protocol. NOT frame data self.server_protocol.dataReceived(data) def server_data_received(self, data): # Called when data is received from the server # Sends RAW DATA to client protocol. NOT frame data self.client_protocol.dataReceived(data) def send_client_message(self, message_data, is_binary=False): # Write a message to the client over the actual transport self.server_protocol.sendMessage(message_data, is_binary) def send_server_message(self, message_data, is_binary=False): # Write a message to the remote server over the actual transport self.client_protocol.sendMessage(message_data, is_binary) def on_client_frame(self, frame_data, is_binary=False): # Call callback and have callback return mangled frame data self.server_protocol.sendFrame(frame_data, is_binary) class HTTPMessage(object): """ A base class which represents an HTTP message. It is used to implement both requests and responses :ivar complete: When loading data with :func:`~pappyproxy.http.HTTPMessage.add_line` and :func:`~pappyproxy.http.HTTPMessage.add_data`, returns whether the message is complete :vartype complete: bool :ivar headers: Headers of the message :vartype complete: RepeatableDict :ivar headers_complete: When creating the message with :func:`~pappyproxy.http.HTTPMessage.add_line` and :func:`~pappyproxy.http.HTTPMessage.add_data`, returns whether the headers are complete :ivar start_line: The start line of the message :vartype start_line: string """ reserved_meta_keys = ['full_message'] """ Internal class variable. Do not modify. """ def __init__(self, full_message=None, update_content_length=False): # Initializes instance variables too self.clear() self.metadata_unique_keys = tuple() if full_message is not None: self._from_full_message(full_message, update_content_length) def __eq__(self, other): if self.full_message != other.full_message: return False m1 = self.get_metadata() m2 = other.get_metadata() for k in self.metadata_unique_keys: if k in m1: del m1[k] if k in m2: del m2[k] if m1 != m2: return False return True def __copy__(self): if not self.complete: raise PappyException("Cannot copy incomplete http messages") retmsg = self.__class__(self.full_message) retmsg.set_metadata(self.get_metadata(include_unique=False)) return retmsg def copy(self): """ Returns a copy of the request :rtype: Request """ return self.__copy__() def deepcopy(self): """ Returns a deep copy of the message. Implemented by child. NOINDEX """ return self.__deepcopy__() def clear(self): """ Resets all internal data and clears the message """ self.complete = False self.headers = RepeatableDict(case_insensitive=True) self.headers_complete = False self.malformed = False self.start_line = '' self.reset_metadata() self._decoded = False self._encoding_type = ENCODE_NONE self._first_line = True self._data_obj = None self._end_after_headers = False self._data_buffer = '' def _from_full_message(self, full_message, update_content_length=False, meta=None): # Set defaults for metadata self.clear() # Get rid of leading CRLF. Not in spec, should remove eventually full_message = _strip_leading_newlines(full_message) if full_message == '': return self.add_data(full_message, enforce_content_length=(not update_content_length)) if meta: self.set_metadata(meta) if update_content_length: # If we have no body and didn't have a content-length header in the first # place, don't add one if 'Content-Length' in self.headers or len(self.body) > 0: self.headers['Content-Length'] = str(len(self.body)) assert(self.complete) ############################### ## Properties/attribute setters @property def headers_section(self): """ The raw text of the headers including the extra newline at the end. :getter: Returns the raw text of the headers including the extra newline at the end. :type: string """ ret = '' if self.start_line: ret = self.start_line + '\r\n' for k, v in self.headers.all_pairs(): ret = ret + "%s: %s\r\n" % (k, v) if ret: ret = ret + '\r\n' return ret @property def headers_section_pretty(self): """ Same thing as :func:`pappyproxy.http.HTTPMessage.headers_section` except that the headers are colorized for terminal printing. """ to_ret = printable_data(self.headers_section, colors=False) to_ret = pygments.highlight(to_ret, HttpLexer(), TerminalFormatter()) return to_ret @property def body(self): """ The data portion of the message :getter: Returns the data portion of the message :setter: Set the data of the response and update metadata :type: string """ if self._data_obj: return self._data_obj.body else: return '' @body.setter def body(self, val): self._data_obj = LengthData(len(val)) if len(val) > 0: self._data_obj.add_data(val) self._encoding_type = ENCODE_NONE self.complete = True self.update_from_body() @property def body_pretty(self): """ Same thing as :func:`pappy.http.HTTPMessage.body` but the output is colorized for the terminal. """ if self.body == '': return '' to_ret = printable_data(self.body, colors=False) if 'content-type' in self.headers: try: lexer = get_lexer_for_mimetype(self.headers['content-type'].split(';')[0]) to_ret = pygments.highlight(to_ret, lexer, TerminalFormatter()) except: pass return to_ret @property def full_message(self): """ The full message including the start line, headers, and body """ if self.headers_section == '': return self.body else: return (self.headers_section + self.body) @property def full_message_pretty(self): """ Same as :func:`pappyproxy.http.HTTPMessage.full_message` except the output is colorized """ if self.body: return (self.headers_section_pretty + '\r\n' + self.body_pretty) else: return self.headers_section_pretty ############### ## Data loading def add_line(self, line, enforce_content_length=True): """ Used for building a message from a Twisted protocol. Add a line (for status line and headers). Lines must be added in order and the first line must be the status line. The line should not contain the trailing carriage return/newline. I do not suggest you use this for anything. :param line: The line to add :type line: string """ assert(not self.headers_complete) if not line and self._first_line: return if not line: self.headers_complete = True if not self._data_obj: self._data_obj = LengthData(0) if self._end_after_headers: self.complete = True return if not enforce_content_length or 'content-length' not in self.headers: if isinstance(self._data_obj, LengthData): self._data_obj = LengthData(-1) self.complete = self._data_obj.complete self.headers_end() return if self._first_line: self.handle_start_line(line) self._first_line = False else: if ':' in line: key, val = line.split(':', 1) val = val.strip() else: key = line val = None if self.handle_header(key, val): self.headers.append(key, val, do_callback=False) def add_data(self, data, enforce_content_length=True): """ Used for building a message from a Twisted protocol. Add data to the message. The data must conform to the content encoding and transfer encoding given in the headers passed in to :func:`~pappyproxy.http.HTTPMessage.add_line`. Can be any fragment of the data. I do not suggest that you use this function ever. :param data: The data to add :type data: string """ assert not self.complete or not enforce_content_length self._data_buffer += data def popline(s): parts = s.split('\n', 1) if len(parts) == 1: return (s, '') line, rest = (parts[0], ''.join(parts[1:])) if line[-1] == '\r': line = line[:-1] return (line, rest) # Add headers from the data buffer if headers aren't complete if not self.headers_complete: while '\n' in self._data_buffer and not self.headers_complete: line, self._data_buffer = popline(self._data_buffer) self.add_line(line, enforce_content_length=enforce_content_length) # Add the data section if the headers are complete and we have stuff left in the buffer if self.headers_complete: if self._data_buffer: to_add = self._data_buffer self._data_buffer = '' self._data_obj.add_data(to_add) if self._data_obj.complete: self.complete = True self.body_complete() ############### ## Data parsing def handle_header(self, key, val): """ Called when a header is loaded into the message. Should not be called outside of implementation. :param key: Header key :type line: string :param key: Header value :type line: string NOINDEX """ if val is None: return True stripped = False if key.lower() == 'content-encoding': if val in ('gzip', 'x-gzip'): self._encoding_type = ENCODE_GZIP elif val in ('deflate'): self._encoding_type = ENCODE_DEFLATE # We send our requests already decoded, so we don't want a header # saying it's encoded if self._encoding_type != ENCODE_NONE: self._decoded = True stripped = True elif key.lower() == 'transfer-encoding' and val.lower() == 'chunked': self._data_obj = ChunkedData() self.complete = self._data_obj.complete self._decoded = True stripped = True elif key.lower() == 'content-length': # We use our own content length if self._data_obj and self._data_obj.complete: # We're regenerating or something so we want to base this header # off our existing body val = self._data_obj.body self._data_obj = LengthData(len(val)) if len(val) > 0: self._data_obj.add_data(val) self._encoding_type = ENCODE_NONE self.complete = True else: self._data_obj = LengthData(int(val)) return (not stripped) def handle_start_line(self, start_line): """ A handler function for the status line. NOINDEX """ self.start_line = start_line def headers_end(self): """ Called when the headers are complete. NOINDEX """ pass def body_complete(self): """ Called when the body of the message is complete NOINDEX """ try: self.body = _decode_encoded(self._data_obj.body, self._encoding_type) except IOError: # Screw handling it gracefully, this is the server's fault. print 'Error decoding request, storing raw data in body instead' self.body = self._data_obj.body def update_from_body(self): """ Called when the body of the message is modified directly. Should be used to update metadata that depends on the body of the message. NOINDEX """ if len(self.body) > 0 or 'Content-Length' in self.headers: self.headers.update('Content-Length', str(len(self.body)), do_callback=False) def update_from_headers(self): """ Called when a header is modified. Should be used to update metadata that depends on the values of headers. NOINDEX """ pass ########### ## Metadata # The metadata functions are used so that we only have to make changes in a # few similar functions which will update all copying, serialization, etc # functions at the same time. def get_metadata(self): """ Get all the metadata of the message in dictionary form. Should be implemented in child class. Should not be invoked outside of implementation! NOINDEX """ pass def set_metadata(self, data): """ Set metadata values based off of a data dictionary. Should be implemented in child class. Should not be invoked outside of implementation! :param data: Metadata to apply :type line: dict NOINDEX """ pass def reset_metadata(self): """ Reset meta values to default values. Overridden by child class. Should not be invoked outside of implementation! NOINDEX """ pass ############## ## Serializing def to_json(self): """ Return a JSON encoding of the message that can be used by :func:`~pappyproxy.http.Message.from_json` to recreate the message. The ``full_message`` portion is base64 encoded because json doesn't play nice with binary blobs. """ data = { 'full_message': base64.b64encode(self.full_message), } metadata = self.get_metadata() for k, v in metadata.iteritems(): if k in HTTPMessage.reserved_meta_keys: raise PappyException('A message with %s as a key for a metavalue cannot be encoded into JSON') data[k] = v return json.dumps(data) def from_json(self, json_string): """ Update the metadata of the message to match data from :func:`~pappyproxy.http.Message.to_json` :param json_string: The JSON data to use :type json_string: JSON data in a string """ data = json.loads(json_string) full_message = base64.b64decode(data['full_message']) for k in HTTPMessage.reserved_meta_keys: if k in data: del data[k] self._from_full_message(full_message, meta=data) # self.update_from_headers() # self.update_from_body() class Request(HTTPMessage): """ :ivar time_end: The datetime that the request ended. :vartype time_end: datetime.datetime :ivar time_start: The datetime that the request was made :vartype time_start: datetime.datetime :ivar cookies: Cookies sent with the request :vartype cookies: RepeatableDict :ivar fragment: The fragment part of the url (The part that comes after the #) :vartype fragment: String :ivar url_params: The url parameters of the request (aka the get parameters) :vartype url_params: RepeatableDict :ivar path: The path of the request :vartype path: String :ivar port: The port that the request was sent to (or will be sent to) :vartype port: Integer :ivar post_params: The post parameters of the request :vartype post_params: RepeatableDict :ivar reqid: The request id of the request :vartype reqid: String :ivar response: The associated response of this request :vartype response: Response :ivar submitted: Whether the request has been submitted :vartype submitted: Bool :ivar unmangled: If the request was mangled, the version of the request before it was mangled. :vartype unmangled: Request :ivar verb: The HTTP verb of the request (ie POST, GET) :vartype verb: String :ivar version: The HTTP version of the request (ie HTTP/1.1) :vartype version: String :ivar tags: Tags associated with the request :vartype tags: List of Strings :ivar plugin_data: Data about the request created by plugins. If you modify this, please add your own key to it for your plugin and store all your plugin's data under that key (probably as another dict). For example if you have a plugin called ``foo``, try and store all your data under ``req.plugin_data['foo']``. :vartype plugin_data: Dict :ivar path_type: An enum which describes how the path portion of the request should be represented. ``PATH_RELATIVE`` -> normal relative path, ``PATH_ABSOLUTE`` -> The absolute path (including the protocol), ``PATH_HOST`` -> Just the path and the port (Used for CONNECT requests when connecting to an upstream HTTP proxy). :vartype path_type: Enum :ivar explicit_port: A flag to indicate that the port should always be included in the URL """ cache = RequestCache(100) """ The request cache that stores requests in memory for performance """ def __init__(self, full_request=None, update_content_length=True, port=None, is_ssl=None, host=None, path_type=None, proxy_creds=None, explicit_port=False): # Resets instance variables self.clear() # Called after instance vars since some callbacks depend on # instance vars HTTPMessage.__init__(self, full_request, update_content_length) # metadata that is unique to a specific Request instance self.metadata_unique_keys = ('reqid',) # After message init so that other instance vars are initialized self._set_dict_callbacks() # Set values from init if is_ssl: self.is_ssl = True if port: self.port = port if host: self._host = host if path_type: self.path_type = path_type if explicit_port: self.explicit_port = explicit_port self.is_websocket = False self.websocket_messages = [] def __copy__(self): if not self.complete: import pdb; pdb.set_trace() raise PappyException("Cannot copy incomplete http messages") retreq = self.__class__(self.full_message) retreq.set_metadata(self.get_metadata()) retreq.time_start = self.time_start retreq.time_end = self.time_end retreq.reqid = None if self.response: retreq.response = self.response.copy() if self.unmangled: retreq.unmangled = self.unmangled.copy() retreq.set_websocket_messages([m.duplicate() for m in self.websocket_messages]) return retreq def duplicate(self): retreq = self.copy() retreq.reqid = self.reqid if self.unmangled: retreq.unmangled = self.unmangled.duplicate() if self.response: retreq.response = self.response.duplicate() return retreq def duplicate(self): retreq = self.copy() retreq.reqid = self.reqid if self.unmangled: retreq.unmangled = self.unmangled.duplicate() if self.response: retreq.response = self.response.duplicate() return retreq @property def rsptime(self): """ The response time of the request :getter: Returns the response time of the request :type: datetime.timedelta """ if self.time_start and self.time_end: return self.time_end-self.time_start else: return None @property def start_line(self): """ The status line of the request. ie `GET / HTTP/1.1` :getter: Returns the status line of the request :setter: Sets the status line of the request :type: string """ if not self.verb and not self.full_path and not self.version: return '' if self.path_type == PATH_ABSOLUTE: path = self._url_helper(always_have_path=True) elif self.path_type == PATH_HOST: path = ':'.join((self.host, str(self.port))) else: path = self.full_path return '%s %s %s' % (self.verb, path, self.version) @start_line.setter def start_line(self, val): self.handle_start_line(val) @property def status_line(self): """ Alias for `pappyproxy.http.Request.start_line`. :getter: Returns the status line of the request :setter: Sets the status line of the request :type: string """ return self.start_line @status_line.setter def status_line(self, val): self.start_line = val @property def full_path(self): """ The full path of the request including URL params and fragment. ie `/path/to/stuff?foo=bar&baz=something#somewhere` :getter: Returns the full path of the request :type: string """ path = self.path if self.url_params: path += '?' pairs = [] for pair in self.url_params.all_pairs(): if pair[1] is None: pairs.append(pair[0]) else: pairs.append('='.join(pair)) path += '&'.join(pairs) if self.fragment: path += '#' path += self.fragment return path @property def raw_headers(self): """ Alias for Request.headers_section :getter: Returns the raw text of the headers including the extra newline at the end. :type: string """ return self.headers_section @property def full_request(self): """ Alias for Request.full_message :getter: Returns the full text of the request :type: string """ return self.full_message @property def raw_data(self): """ Alias for Request.body :getter: Returns the data portion of the request :setter: Set the data of the request and update metadata :type: string """ return self.body @raw_data.setter def raw_data(self, val): self.body = val @property def connect_request(self): """ If the request uses SSL, this will be a request object that can be used with an upstream HTTP server to connect to a server using SSL """ if not self.is_ssl: return None ret = Request() ret.status_line = self.status_line ret.host = self.host ret.port = self.port ret.explicit_port = True ret.path_type = PATH_HOST authu, authp = self.proxy_creds ret.verb = 'CONNECT' if authu and authp: ret.proxy_creds = self.proxy_creds return ret @property def proxy_creds(self): """ A username/password tuple representing the username/password to authenticate to a proxy server. Sets the ``Proxy-Authorization`` header. Getter will return (None, None) if no creds exist :getter: Returns the username/password tuple used for proxy authorization :setter: Sets the username/password tuple used for proxy authorization :type: Tuple of two strings: (username, password) """ if not 'Proxy-Authorization' in self.headers: return (None, None) return Request._parse_basic_auth(self.headers['Proxy-Authorization']) @proxy_creds.setter def proxy_creds(self, creds): username, password = creds self.headers['Proxy-Authorization'] = Request._encode_basic_auth(username, password) @staticmethod def _parse_basic_auth(header): """ Parse a raw basic auth header and return (username, password) """ _, creds = header.split(' ', 1) decoded = base64.b64decode(creds) username, password = decoded.split(':', 1) return (username, password) @staticmethod def _encode_basic_auth(username, password): decoded = '%s:%s' % (username, password) encoded = base64.b64encode(decoded) header = 'Basic %s' % encoded return header def _url_helper(self, colored=False, always_have_path=False): retstr = '' if self.is_ssl: retstr += 'https://' else: if colored: retstr += Colors.RED retstr += 'http' retstr += Colors.ENDC retstr += '://' else: retstr += 'http://' if colored: retstr += host_color(self.host) retstr += self.host retstr += Colors.ENDC else: retstr += self.host if not ((self.is_ssl and self.port == 443) or \ (not self.is_ssl and self.port == 80) or \ self.explicit_port): if colored: retstr += ':' retstr += Colors.MAGENTA retstr += str(self.port) retstr += Colors.ENDC else: retstr += ':%d' % self.port if (self.path and self.path != '/') or always_have_path: if colored: retstr += path_formatter(self.path) else: retstr += self.path if self.url_params: retstr += '?' pairs = [] for p in self.url_params.all_pairs(): if p[1] is None: pairs.append(p) else: pairs.append('='.join(p)) retstr += '&'.join(pairs) if self.fragment: retstr += '#%s' % self.fragment return retstr @property def url(self): """ The full url of the request including url params, protocol, etc. ie `https://www.google.com`, `http://foo.fakewebsite.com:1234/path?a=b`. When setting the URL, the port, is_ssl, path, url params, host, etc are all automatically updated. :getter: Returns the url of the request :setter: Sets the url of the request and updates metadata :type: string """ return self._url_helper(False) @property def url_color(self): """ same as .url, except colored. Used for printing URLs to the terminal. :getter: Returns the url of the request :type: string """ return self._url_helper(True) @url.setter def url(self, val): self._handle_statusline_uri(val) @property def host(self): """ The host of the request. ie `www.google.com`. :getter: Returns the host of the request :setter: Changes the host of the request and updates the Host header :type: string """ return self._host @host.setter def host(self, val): self._host = val self.headers.update('Host', val, do_callback=False) @property def is_ssl(self): """ Whether the request is sent over SSL :getter: Returns if the request is sent over SSL :setter: Sets if the request is sent over SSL :type: Bool """ return self._is_ssl @is_ssl.setter def is_ssl(self, val): if val: self._is_ssl = True if self.port == 80: self.port = 443 else: self._is_ssl = False if self.port == 443: self.port = 80 @property def saved(self): """ If the request is saved in the data file :getter: Returns True if the request is saved in the data file :type: Bool """ if self.reqid is None: return False try: _ = int(self.reqid) return True except (ValueError, TypeError): return False @property def path_tuple(self): """ The path in tuple form starting with the host. For example, path_parts for a request to http://www.example.com/foo/bar.php would be:: ('www.example.com', 'foo', 'bar.php') :getter: Returns the path in tuple form :type: Tuple """ # the first element is blank because the path always starts with / ret = [self.host] + self.path.split('/')[1:] if ret[-1] == '': ret = ret[:-1] return tuple(ret) @property def sort_time(self): """ If the request has a submit time, returns the submit time's unix timestamp. Returns 0 otherwise """ if self.time_start: return time.mktime(self.time_start.timetuple()) else: return 0 ########### ## Metadata def get_metadata(self, include_unique=True): data = {} if self.port is not None: data['port'] = self.port data['is_ssl'] = self.is_ssl data['host'] = self.host data['reqid'] = self.reqid if self.response: data['response_id'] = self.response.rspid data['tags'] = list(self.tags) if not include_unique: for k in self.metadata_unique_keys: if k in data: del data[k] return data def set_metadata(self, data): if 'reqid' in data: self.reqid = data['reqid'] if 'is_ssl' in data: self.is_ssl = data['is_ssl'] if 'host' in data: self._host = data['host'] if 'port' in data: self.port = data['port'] if 'tags' in data: self.tags = set(data['tags']) def reset_metadata(self): self.port = 80 self.is_ssl = False self.reqid = None self._host = '' self.tags = set() def get_plugin_dict(self, name): """ Get the data dictionary for the given plugin name. """ if not name in self.plugin_data: self.plugin_data[name] = {} return self.plugin_data[name] def clear(self): HTTPMessage.clear(self) self.time_end = None self.time_start = None self.cookies = RepeatableDict() self.fragment = None self.url_params = RepeatableDict() self._is_ssl = False self.path = '' self.post_params = RepeatableDict() self.response = None self.submitted = False self.unmangled = None self.verb = '' self.version = '' self.plugin_data = {} self.reset_metadata() self.is_unmangled_version = False self.path_type = PATH_RELATIVE self.explicit_port = False ############################ ## Internal update functions def _set_dict_callbacks(self): # Add callbacks to dicts def f1(): obj = weakref.proxy(self) obj.update_from_headers() def f2(): obj = weakref.proxy(self) obj._update_from_objects() self.headers.set_modify_callback(f1) self.cookies.set_modify_callback(f2) self.post_params.set_modify_callback(f2) def update_from_body(self): # Updates metadata that's based off of data HTTPMessage.update_from_body(self) if 'content-type' in self.headers: if 'application/x-www-form-urlencoded' in self.headers['content-type']: self.post_params = repeatable_parse_qs(self.body) self._set_dict_callbacks() def _update_from_objects(self): # Updates text values that depend on objects. # DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION if self.cookies: assignments = [] for ck, cv in self.cookies.all_pairs(): asn = '%s=%s' % (ck, cv) assignments.append(asn) header_val = '; '.join(assignments) self.headers.update('Cookie', header_val, do_callback=False) elif 'Cookie' in self.headers: del self.headers['Cookie'] if self.post_params: pairs = [] for k, v in self.post_params.all_pairs(): pairs.append('%s=%s' % (k, v)) self.headers['Content-Type'] = 'application/x-www-form-urlencoded' self.body = '&'.join(pairs) def update_from_headers(self): # Updates metadata that depends on header/status line values self.cookies = RepeatableDict() self._set_dict_callbacks() for k, v in self.headers.all_pairs(): self.handle_header(k, v) def _sort_websocket_messages(self): """ Sort websocket message list by their submission time """ def sort_key(mes): if not mes.time_sent: return datetime.datetime(1970,1,1) return mes.time_sent self.websocket_messages = sorted(self.websocket_messages, key=sort_key) def add_websocket_message(self, mes): # Right now just insert and sort. We'll do a better structure later mes.parent_request = self self.websocket_messages.append(mes) self._sort_websocket_messages() def set_websocket_messages(self, msgs): self.websocket_messages = msgs self._sort_websocket_messages() ############### ## Data parsing def _process_host(self, hostline): # Get address and port # Returns true if port was explicitly stated # Used only for processing host header port_given = False if ':' in hostline: self._host, self.port = hostline.split(':') self.port = int(self.port) if self.port == 443: self._is_ssl = True port_given = True else: self._host = hostline if not self.port: self.port = 80 self._host.strip() return port_given def _handle_statusline_uri(self, uri): if not re.match('(?:^.+)://', uri): uri = '//' + uri parsed_path = urlparse.urlparse(uri) netloc = parsed_path.netloc port_given = False if netloc: port_given = self._process_host(netloc) if re.match('^https://', uri) or self.port == 443: self._is_ssl = True if not port_given: self.port = 443 if re.match('^http://', uri): self._is_ssl = False if not self.port: if self.is_ssl: self.port = 443 else: self.port = 80 reqpath = parsed_path.path if parsed_path.path: self.path = parsed_path.path else: self.path = '/' if parsed_path.query: reqpath += '?' reqpath += parsed_path.query self.url_params = repeatable_parse_qs(parsed_path.query) if parsed_path.fragment: reqpath += '#' reqpath += parsed_path.fragment self.fragment = parsed_path.fragment def handle_start_line(self, start_line): #HTTPMessage.handle_start_line(self, start_line) if start_line == '': self.verb = '' self.path = '' self.version = '' return parts = start_line.split() uri = None if len(parts) == 3: self.verb, uri, self.version = parts elif len(parts) == 2: self.verb, self.version = parts else: raise Exception("Unexpected format of first line of request") # Get path using urlparse if uri is not None: self._handle_statusline_uri(uri) def handle_header(self, key, val): # We may have duplicate headers if val is None: return True keep = HTTPMessage.handle_header(self, key, val) if not keep: return False stripped = False if key.lower() == 'cookie': # We still want the raw key/val for the cookies header # because it's still a header cookie_strs = val.split('; ') # The only whitespace that matters is the space right after the # semicolon. If actual implementations mess this up, we could # probably strip whitespace around the key/value for cookie_str in cookie_strs: if '=' in cookie_str: splitted = cookie_str.split('=',1) assert(len(splitted) == 2) (cookie_key, cookie_val) = splitted else: cookie_key = cookie_str cookie_val = '' # we want to parse duplicate cookies self.cookies.append(cookie_key, cookie_val, do_callback=False) elif key.lower() == 'host': self._process_host(val) elif re.match('^proxy.*', key.lower()): stripped = True return (not stripped) def body_complete(self): HTTPMessage.body_complete(self) if 'content-type' in self.headers: if self.headers['content-type'] == 'application/x-www-form-urlencoded': self.post_params = repeatable_parse_qs(self.body) self._set_dict_callbacks() ####################### ## Data store functions def save_in_mem(self, cust_cache=None): if cust_cache: use_cache = cust_cache else: use_cache = Request.cache if not self.reqid: use_cache.add(self) @defer.inlineCallbacks def async_save(self, cust_dbpool=None, cust_cache=None): """ async_save() Save/update the request in the data file. Returns a twisted deferred which fires when the save is complete. :rtype: twisted.internet.defer.Deferred """ from .pappy import main_context if not self.complete: raise PappyException('Cannot save incomplete messages') global dbpool if cust_dbpool: use_dbpool = cust_dbpool use_cache = cust_cache else: use_dbpool = dbpool use_cache = Request.cache assert(use_dbpool) if not self.reqid: self.reqid = '--' try: # Check for intyness _ = int(self.reqid) # If we have reqid, we're updating yield use_dbpool.runInteraction(self._update) assert(self.reqid is not None) yield use_dbpool.runInteraction(self._update_tags) except (ValueError, TypeError): # Either no id or in-memory yield use_dbpool.runInteraction(self._insert) assert(self.reqid is not None) yield use_dbpool.runInteraction(self._update_tags) if use_cache: use_cache.add(self) main_context.cache_reset() @crochet.wait_for(timeout=180.0) @defer.inlineCallbacks def save(self): """ save() Save/update the request in the data file. Saves the request, its unmangled version, the response, and the unmanbled response. Cannot be called from inside an async function. """ yield self.async_deep_save() @defer.inlineCallbacks def async_deep_save(self): """ async_deep_save() Saves self, unmangled, response, unmangled response, and websocket messages. Returns a deferred which fires after everything has been saved. :rtype: twisted.internet.defer.Deferred """ if self.response: if self.response.unmangled: yield self.response.unmangled.async_save() yield self.response.async_save() if self.unmangled: yield self.unmangled.async_save() for m in self.websocket_messages: yield m.async_deep_save() yield self.async_save() def _update_tags(self, txn): # This should never be called on an unsaved or in-memory request txn.execute( """ DELETE FROM tagged WHERE reqid=?; """, (self.reqid,) ) tagids = [] tags_to_add = [] # Find ids that already exist for tag in self.tags: txn.execute( """ SELECT id, tag FROM tags WHERE tag=?; """, (tag,) ) result = txn.fetchall() if len(result) == 0: tags_to_add.append(tag) else: tagid = int(result[0][0]) tagids.append(tagid) # Add new tags for tag in tags_to_add: txn.execute( """ INSERT INTO tags (tag) VALUES (?); """, (tag,) ) tagids.append(int(txn.lastrowid)) # Tag our request for tagid in tagids: txn.execute( """ INSERT INTO tagged (reqid, tagid) VALUES (?, ?); """, (int(self.reqid), tagid) ) def _update(self, txn): # If we don't have an reqid, we're creating a new reuqest row setnames = ["full_request=?", "port=?"] queryargs = [self.full_request, self.port] if self.response: setnames.append('response_id=?') assert(self.response.rspid is not None) # should be saved first queryargs.append(self.response.rspid) if self.unmangled: setnames.append('unmangled_id=?') assert(self.unmangled.reqid is not None) # should be saved first queryargs.append(self.unmangled.reqid) if self.time_start: setnames.append('start_datetime=?') queryargs.append((self.time_start-datetime.datetime(1970,1,1)).total_seconds()) if self.time_end: setnames.append('end_datetime=?') queryargs.append((self.time_end-datetime.datetime(1970,1,1)).total_seconds()) setnames.append('is_ssl=?') if self.is_ssl: queryargs.append('1') else: queryargs.append('0') setnames.append('submitted=?') if self.submitted: queryargs.append('1') else: queryargs.append('0') setnames.append('host=?') if self.host: queryargs.append(self.host) else: queryargs.append('') setnames.append('plugin_data=?') if self.plugin_data: queryargs.append(json.dumps(self.plugin_data)) else: queryargs.append('{}') queryargs.append(self.reqid) txn.execute( """ UPDATE requests SET %s WHERE id=?; """ % ','.join(setnames), tuple(queryargs) ) def _insert(self, txn): # If we don't have an reqid, we're creating a new reuqest row colnames = ["full_request", "port"] colvals = [self.full_request, self.port] if self.response and self.response.rspid: colnames.append('response_id') colvals.append(self.response.rspid) if self.unmangled and self.unmangled.reqid: colnames.append('unmangled_id') colvals.append(self.unmangled.reqid) if self.time_start: colnames.append('start_datetime') colvals.append((self.time_start-datetime.datetime(1970,1,1)).total_seconds()) if self.time_end: colnames.append('end_datetime') colvals.append((self.time_end-datetime.datetime(1970,1,1)).total_seconds()) colnames.append('submitted') if self.submitted: colvals.append('1') else: colvals.append('0') colnames.append('is_ssl') if self.is_ssl: colvals.append('1') else: colvals.append('0') colnames.append('host') if self.host: colvals.append(self.host) else: colvals.append('') colnames.append('plugin_data') if self.plugin_data: colvals.append(json.dumps(self.plugin_data)) else: colvals.append('{}') txn.execute( """ INSERT INTO requests (%s) VALUES (%s); """ % (','.join(colnames), ','.join(['?']*len(colvals))), tuple(colvals) ) self.reqid = str(txn.lastrowid) assert txn.lastrowid is not None assert self.reqid is not None @defer.inlineCallbacks def delete(self, cust_dbpool=None, cust_cache=None): from .context import reset_context_caches global dbpool if cust_dbpool: use_dbpool = cust_dbpool use_cache = cust_cache else: use_dbpool = dbpool use_cache = Request.cache if self.reqid is None: raise PappyException("Cannot delete request with id=None") if use_cache: use_cache.evict(self.reqid) Request.cache.all_ids.remove(self.reqid) if self.reqid in Request.cache.ordered_ids: Request.cache.ordered_ids.remove(self.reqid) if self.reqid in Request.cache.req_times: del Request.cache.req_times[self.reqid] if self.reqid in Request.cache.inmem_reqs: Request.cache.inmem_reqs.remove(self.reqid) if self.reqid in Request.cache.unmangled_ids: Request.cache.unmangled_ids.remove(self.reqid) reset_context_caches() if self.reqid[0] != 'm': yield use_dbpool.runQuery( """ DELETE FROM requests WHERE id=?; """, (self.reqid,) ) yield use_dbpool.runQuery( """ DELETE FROM tagged WHERE reqid=?; """, (self.reqid,) ) self.reqid = None @defer.inlineCallbacks def deep_delete(self): """ deep_delete() Delete a request, its unmangled version, its response, and its response's unmangled version from history. Also removes the request from all contexts. Returns a Twisted deferred. :rtype: Deferred """ if self.unmangled: yield self.unmangled.delete() if self.response: if self.response.unmangled: yield self.response.unmangled.delete() yield self.response.delete() yield self.delete() @staticmethod def _gen_sql_row(tablename=None): template = "{pre}full_request, {pre}response_id, {pre}id, {pre}unmangled_id, {pre}start_datetime, {pre}end_datetime, {pre}port, {pre}is_ssl, {pre}host, {pre}plugin_data" if tablename: return template.format(pre=('%s.'%tablename)) else: return template.format(pre='') @staticmethod @defer.inlineCallbacks def _load_request_ws_messages(req): ws_messages = yield Request._async_load_ws_by_parent(req.reqid) req.set_websocket_messages(ws_messages) @staticmethod @defer.inlineCallbacks def _from_sql_row(row, cust_dbpool=None, cust_cache=None): from .http import Request global dbpool if cust_dbpool: use_dbpool = cust_dbpool else: use_dbpool = dbpool req = Request(row[0]) if row[1]: rsp = yield Response.load_response(str(row[1]), cust_dbpool=cust_dbpool, cust_cache=cust_cache) req.response = rsp if row[3]: unmangled_req = yield Request.load_request(str(row[3]), cust_dbpool=cust_dbpool, cust_cache=cust_cache) req.unmangled = unmangled_req req.unmangled.is_unmangled_version = True if row[4]: req.time_start = datetime.datetime.utcfromtimestamp(row[4]) if row[5]: req.time_end = datetime.datetime.utcfromtimestamp(row[5]) if row[6] is not None: req.port = int(row[6]) if row[7] == 1: req._is_ssl = True if row[8]: req._host = row[8] if row[9]: req.plugin_data = json.loads(row[9]) req.reqid = str(row[2]) # tags rows = yield use_dbpool.runQuery( """ SELECT tg.tag FROM tagged tgd, tags tg WHERE tgd.tagid=tg.id AND tgd.reqid=?; """, (req.reqid,) ) req.tags = set() for row in rows: req.tags.add(row[0]) yield Request._load_request_ws_messages(req) defer.returnValue(req) @staticmethod @defer.inlineCallbacks def load_requests_by_time(first, num, cust_dbpool=None, cust_cache=None): """ load_requests_by_time() Load all the requests in the data file and return them in a list. Returns a deferred which calls back with the list of requests when complete. :rtype: twisted.internet.defer.Deferred """ from .http import Request global dbpool if cust_dbpool: use_dbpool = cust_dbpool use_cache = cust_cache else: use_dbpool = dbpool use_cache = Request.cache if use_cache: starttime = use_cache.req_times[first] rows = yield use_dbpool.runQuery( """ SELECT %s FROM requests WHERE start_datetime<=? ORDER BY start_datetime desc LIMIT ?; """ % Request._gen_sql_row(), (starttime, num) ) else: rows = yield use_dbpool.runQuery( """ SELECT %s FROM requests r1, requests r2 WHERE r2.id=? AND r1.start_datetime<=r2.start_datetime ORDER BY start_datetime desc LIMIT ?; """ % Request._gen_sql_row('r1'), (first, num) ) reqs = [] for row in rows: req = yield Request._from_sql_row(row, cust_dbpool=cust_dbpool, cust_cache=cust_cache) reqs.append(req) defer.returnValue(reqs) @staticmethod @defer.inlineCallbacks def load_requests_by_tag(tag, cust_dbpool=None, cust_cache=None): """ load_requests_by_tag(tag) Load all the requests in the data file with a given tag and return them in a list. Returns a deferred which calls back with the list of requests when complete. :rtype: twisted.internet.defer.Deferred """ from .http import Request global dbpool if cust_dbpool: use_dbpool = cust_dbpool else: use_dbpool = dbpool # tags rows = yield use_dbpool.runQuery( """ SELECT tgd.reqid FROM tagged tgd, tags tg WHERE tgd.tagid=tg.id AND tg.tag=?; """, (tag,) ) reqs = [] for row in rows: req = Request.load_request(row[0], cust_dbpool=cust_dbpool, cust_cache=cust_cache) reqs.append(req) defer.returnValue(reqs) @staticmethod @defer.inlineCallbacks def load_request(to_load, allow_special=True, use_cache=True, cust_dbpool=None, cust_cache=None): """ load_request(to_load) Load a request with the given request id and return it. Returns a deferred which calls back with the request when complete. :param allow_special: Whether to allow special IDs such as ``u##`` or ``s##`` :type allow_special: bool :param use_cache: Whether to use the cache. If set to false, it will always query the data file to get the request :type use_cache: bool :rtype: twisted.internet.defer.Deferred """ global dbpool if cust_dbpool: use_dbpool = cust_dbpool cache_to_use = cust_cache else: use_dbpool = dbpool cache_to_use = Request.cache if not use_dbpool: raise PappyException('No database connection to load from') if to_load == '--': raise PappyException('Invalid request ID. Wait for it to save first.') if not allow_special: try: int(to_load) except (ValueError, TypeError): raise PappyException('Cannot load special id %s' % to_load) ret_unmangled = False rsp_unmangled = False if to_load[0] == 'u': ret_unmangled = True loadid = to_load[1:] elif to_load[0] == 's': rsp_unmangled = True loadid = to_load[1:] else: loadid = to_load def retreq(r): if ret_unmangled: if not r.unmangled: raise PappyException("Request %s was not mangled"%r.reqid) return r.unmangled.duplicate() if rsp_unmangled: if not r.response: raise PappyException("Request %s does not have a response" % r.reqid) if not r.response.unmangled: raise PappyException("Response to request %s was not mangled" % r.reqid) retreq = r.duplicate() retreq.response = retreq.response.unmangled return retreq else: return r.duplicate() # Get it through the cache if use_cache and cache_to_use: # If it's not cached, load_request will be called again and be told # not to use the cache. r = yield cache_to_use.get(loadid) defer.returnValue(retreq(r)) # Load it from the data file rows = yield use_dbpool.runQuery( """ SELECT %s FROM requests WHERE id=?; """ % Request._gen_sql_row(), (loadid,) ) if len(rows) != 1: raise PappyException("Request with id %s does not exist" % loadid) req = yield Request._from_sql_row(rows[0], cust_dbpool=cust_dbpool, cust_cache=cust_cache) assert req.reqid == loadid if cache_to_use: cache_to_use.add(req) defer.returnValue(retreq(req)) @staticmethod @defer.inlineCallbacks def _async_load_ws(msgid, cust_dbpool=None): global dbpool if cust_dbpool: use_dbpool = cust_dbpool else: use_dbpool = dbpool assert(use_dbpool) rows = yield use_dbpool.runQuery( """ SELECT id,parent_request,unmangled_id,is_binary,direction,time_sent,contents FROM websocket_messages WHERE id=?; """, (int(msgid),) ) if len(rows) == 0: raise PappyException("No websocket message with id=%s" % msgid) assert(len(rows) == 1) row = rows[0] m = WebSocketMessage() m.msgid = row[0] #if load_parent: # m.parent_request = yield Request.load_request(str(row[1])) if row[2]: m.unmangled = yield Request._async_load_ws(str(row[2])) if row[3] >= 0: m.is_binary = True else: m.is_binary = False if row[4] == 0: m.direction = 'OUTGOING' else: m.direction = 'INCOMING' if row[5] is not None: m.time_sent = datetime.datetime.utcfromtimestamp(row[5]) m.contents = row[6] defer.returnValue(m) @staticmethod @defer.inlineCallbacks def _async_load_ws_by_parent(reqid, cust_dbpool=None): global dbpool if cust_dbpool: use_dbpool = cust_dbpool else: use_dbpool = dbpool assert(use_dbpool) rows = yield use_dbpool.runQuery( """ SELECT id FROM websocket_messages WHERE parent_request=?; """, (int(reqid),) ) messages = [] # Yes we could do it in one query but we'll deal with it if it causes # performance issues for row in rows: m = yield Request._async_load_ws(row[0]) messages.append(m) defer.returnValue(messages) ###################### ## Submitting Requests @staticmethod @defer.inlineCallbacks def submit_raw_request(message, host, port, is_ssl, mangle=False, int_macros=[]): """ Submits a request to the target host and returns a request object """ from pappyproxy.plugin import active_intercepting_macros use_int_macros = int_macros if mangle: use_int_macros = active_intercepting_macros() proxy_protocol = HTTPProtocolProxy(addr=(host, port, is_ssl), int_macros=use_int_macros, save_requests=False) d = proxy_protocol.get_next_response() proxy_protocol.client_data_received(message) retreq = yield d proxy_protocol.close_connections() defer.returnValue(retreq) @defer.inlineCallbacks def async_submit(self, mangle=False): """ async_submit() Same as :func:`~pappyproxy.http.Request.submit` but generates deferreds. Submits the request using its host, port, etc. and updates its response value to the resulting response. :param mangle: Whether to pass the request through active intercepting macros. :type mangle: Bool :rtype: Twisted deferred """ from pappyproxy.plugin import active_intercepting_macros newreq = yield Request.submit_raw_request(self.full_message, self.host, self.port, self.is_ssl, mangle=mangle) self.unmangled = newreq.unmangled self.response = newreq.response self.time_start = newreq.time_start self.time_end = newreq.time_end self.set_metadata(newreq.get_metadata()) @crochet.wait_for(timeout=180.0) @defer.inlineCallbacks def submit(self, *args, **kwargs): """ submit() Submits the request using its host, port, etc. and updates its response value to the resulting response. Cannot be called in async functions. If an error is encountered while submitting the request, it is printed to the console. This is what you should use to submit your requests in macros. """ try: yield self.async_submit(*args, **kwargs) except Exception as e: print 'Submitting request to %s failed: %s' % (self.host, str(e)) class Response(HTTPMessage): """ :ivar cookies: Cookies set by the response :vartype cookies: RepeatableDict of ResponseCookie objects :ivar response_code: The response code of the response :vartype response_code: Integer :ivar response_text: The text associated with the response code (ie OK, NOT FOUND, etc) :vartype response_text: String :ivar rspid: If the response is saved in the data file, the id of the response :vartype rspid: String :ivar unmangled: If the response was mangled, the unmangled version of the response :vartype unmangled: Response :ivar version: The version part of the status line (ie HTTP/1.1) :vartype version: String """ def __init__(self, full_response=None, update_content_length=True): # Resets instance variables self.clear() # Called after instance vars since some callbacks depend on # instance vars HTTPMessage.__init__(self, full_response, update_content_length) # metadata that is unique to a specific Response instance self.metadata_unique_keys = ('rspid',) # After message init so that other instance vars are initialized self._set_dict_callbacks() def __copy__(self): if not self.complete: raise PappyException("Cannot copy incomplete http messages") retrsp = self.__class__(self.full_message) retrsp.set_metadata(self.get_metadata()) retrsp.rspid = None if self.unmangled: retrsp.unmangled = self.unmangled.copy() return retrsp def duplicate(self): retrsp = self.copy() retrsp.rspid = self.rspid if self.unmangled: retrsp.unmangled = self.unmangled.duplicate() return retrsp @property def raw_headers(self): """ Alias for Response.headers_section :getter: Returns the raw text of the headers including the extra newline at the end. :type: string """ return self.headers_section @property def start_line(self): """ The status line of the response. ie `HTTP/1.1 200 OK` :getter: Returns the status line of the response :setter: Sets the status line of the response :type: string """ if not self.version and self.response_code == 0 and not self.version: return '' if self.response_text == '': return '%s %d' % (self.version, self.response_code) else: return '%s %d %s' % (self.version, self.response_code, self.response_text) @start_line.setter def start_line(self, val): self.handle_start_line(val) @property def status_line(self): return self.start_line @status_line.setter def status_line(self, val): self.start_line = val @property def raw_data(self): """ Alias for Response.body :getter: Returns the data portion of the response :setter: Set the data of the response and update metadata :type: string """ return self.body @raw_data.setter def raw_data(self, val): self.body = val @property def full_response(self): """ The full text of the response including the headers and data. Alias for Response.full_message :getter: Returns the full text of the response :type: string """ return self.full_message @property def soup(self): """ Returns a beautifulsoup4 object for parsing the html of the response :getter: Returns a BeautifulSoup object representing the html of the response """ return bs4.BeautifulSoup(self.body, 'lxml') ########### ## Metadata def get_metadata(self, include_unique=True): data = {} data['rspid'] = self.rspid if not include_unique: for k in self.metadata_unique_keys: if k in data: del data[k] return data def set_metadata(self, data): if 'rspid' in data: self.rspid = data['rspid'] def reset_metadata(self): self.rspid = None def clear(self): HTTPMessage.clear(self) self.cookies = RepeatableDict() self.response_code = 0 self.response_text = '' self.rspid = None self.unmangled = None self.version = '' ############################ ## Internal update functions def _set_dict_callbacks(self): # Add callbacks to dicts def f1(): obj = weakref.proxy(self) obj.update_from_headers() def f2(): obj = weakref.proxy(self) obj._update_from_objects() self.headers.set_modify_callback(f1) self.cookies.set_modify_callback(f2) def update_from_body(self): HTTPMessage.update_from_body(self) def _update_from_objects(self): # Updates headers from objects # DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION # Cookies new_headers = RepeatableDict() cookies_added = False for pair in self.headers.all_pairs(): if pair[0].lower() == 'set-cookie': # If we haven't added our cookies, add them all. Otherwise # strip the header (do nothing) if not cookies_added: # Add all our cookies here for k, c in self.cookies.all_pairs(): new_headers.append('Set-Cookie', c.cookie_str) cookies_added = True else: new_headers.append(pair[0], pair[1]) if not cookies_added: # Add all our cookies to the end for k, c in self.cookies.all_pairs(): new_headers.append('Set-Cookie', c.cookie_str) self.headers = new_headers self._set_dict_callbacks() def update_from_headers(self): self.cookies = RepeatableDict() self._set_dict_callbacks() for k, v in self.headers.all_pairs(): if k.lower() == 'set-cookie': # Parse the cookie cookie = ResponseCookie(v) self.cookies.append(cookie.key, cookie, do_callback=False) ############### ## Data parsing def handle_start_line(self, start_line): if start_line == '': self.response_code = 0 self.version = '' self.response_text = '' return self._first_line = False if len(start_line.split(' ')) > 2: self.version, self.response_code, self.response_text = \ start_line.split(' ', 2) else: self.version, self.response_code = start_line.split(' ', 1) self.response_text = '' self.response_code = int(self.response_code) if self.response_code == 304 or self.response_code == 204 or \ self.response_code/100 == 1: self._end_after_headers = True def handle_header(self, key, val): if val is None: return True keep = HTTPMessage.handle_header(self, key, val) if not keep: return False stripped = False if key.lower() == 'set-cookie': cookie = ResponseCookie(val) self.cookies.append(cookie.key, cookie, do_callback=False) if stripped: return False else: return True #################### ## Cookie management def add_cookie(self, cookie): """ Add a :class:`pappyproxy.http.ResponseCookie` to the response. .. warning:: This will add duplicate cookies. If you want to add a cookie you're not sure exists, use :func:`~pappyproxy.http.Response.set_cookie` """ self.cookies.append(cookie.key, cookie) def set_cookie(self, cookie): """ Set a cookie in the response. ``cookie`` must be a :class:`pappyproxy.http.ResponseCookie` """ self.cookies[cookie.key] = cookie def set_cookie_kv(self, key, val): """ Set a cookie by key and value. Will not have path, secure, etc set at all. """ cookie = ResponseCookie() cookie.key = key cookie.val = val self.cookies[cookie.key] = cookie def delete_cookie(self, key): """ Delete a cookie from the response by its key """ del self.cookies[key] ####################### ## Database interaction @defer.inlineCallbacks def async_save(self, cust_dbpool=None, cust_cache=None): """ async_save() Save/update the just request in the data file. Returns a twisted deferred which fires when the save is complete. It is suggested that you use :func: `~pappyproxy.http.Request.async_deep_save` instead to save responses. :rtype: twisted.internet.defer.Deferred """ if not self.complete: raise PappyException('Cannot save incomplete messages') global dbpool if cust_dbpool: use_dbpool = cust_dbpool else: use_dbpool = dbpool assert(use_dbpool) try: # Check for intyness _ = int(self.rspid) # If we have rspid, we're updating yield use_dbpool.runInteraction(self._update) except (ValueError, TypeError): yield use_dbpool.runInteraction(self._insert) assert(self.rspid is not None) # Right now responses without requests are unviewable # @crochet.wait_for(timeout=180.0) # @defer.inlineCallbacks # def save(self): # yield self.save() def _update(self, txn): setnames = ["full_response=?"] queryargs = [self.full_response] if self.unmangled: setnames.append('unmangled_id=?') assert(self.unmangled.rspid is not None) # should be saved first queryargs.append(self.unmangled.rspid) queryargs.append(self.rspid) txn.execute( """ UPDATE responses SET %s WHERE id=?; """ % ','.join(setnames), tuple(queryargs) ) assert(self.rspid is not None) def _insert(self, txn): # If we don't have an rspid, we're creating a new one colnames = ["full_response"] colvals = [self.full_response] if self.unmangled is not None: colnames.append('unmangled_id') assert(self.unmangled.rspid is not None) # should be saved first colvals.append(self.unmangled.rspid) txn.execute( """ INSERT INTO responses (%s) VALUES (%s); """ % (','.join(colnames), ','.join(['?']*len(colvals))), tuple(colvals) ) self.rspid = str(txn.lastrowid) assert(self.rspid is not None) @defer.inlineCallbacks def delete(self): if self.rspid is not None: yield dbpool.runQuery( """ DELETE FROM responses WHERE id=?; """, (self.rspid,) ) self.rspid = None @staticmethod def is_ws_upgrade(rsp): if rsp.response_code != 101: return False if 'Upgrade' not in rsp.headers: return False if 'Connection' not in rsp.headers: return False if rsp.headers['Upgrade'].lower() != 'websocket': return False if rsp.headers['Connection'].lower() != 'upgrade': return False return True @staticmethod @defer.inlineCallbacks def load_response(respid, cust_dbpool=None, cust_cache=None): """ Load a response from its response id. Returns a deferred. I don't suggest you use this. :rtype: twisted.internet.defer.Deferred """ global dbpool if cust_dbpool: use_dbpool = cust_dbpool else: use_dbpool = dbpool assert(use_dbpool) rows = yield use_dbpool.runQuery( """ SELECT full_response, id, unmangled_id FROM responses WHERE id=?; """, (respid,) ) if len(rows) != 1: raise PappyException("Response with request id %s does not exist" % respid) full_response = rows[0][0] resp = Response(full_response) resp.rspid = str(rows[0][1]) if rows[0][2]: unmangled_response = yield Response.load_response(int(rows[0][2]), cust_dbpool=cust_dbpool, cust_cache=cust_cache) resp.unmangled = unmangled_response defer.returnValue(resp) class HTTPProtocolProxy(ProtocolProxy): CLIENT_STATE_CLOSED = 1 CLIENT_STATE_HTTP_READY = 2 CLINET_STATE_WEBSDOCKET_READY = 3 CLIENT_STATE_WEBSOCKET = 4 SERVER_STATE_HTTP_READY = 1 SERVER_STATE_PROCESSING_HTTP_REQUEST = 2 SERVER_STATE_HTTP_AWAITING_RESPONSE = 3 SERVER_STATE_HTTP_AWAITING_PROXY_CONNECT = 4 SERVER_STATE_WEBSOCKET = 5 BLANK_RESPONSE = ('HTTP/1.1 200 OK\r\n' 'Connection: close\r\n' 'Cache-control: no-cache\r\n' 'Pragma: no-cache\r\n' 'Cache-control: no-store\r\n' 'Content-Length: 0\r\n' 'X-Frame-Options: DENY\r\n\r\n') def __init__(self, *args, **kwargs): self.request_queue = Queue.Queue() self.request = Request() self.response = Response() self.stream_responses = True self.next_rsp_def = None self.ws_proxy = None if 'int_macros' in kwargs: self.custom_macros = True self.mangle_macros = kwargs['int_macros'] else: self.custom_macros = False self.mangle_macros = [] if 'save_requests' in kwargs: self.save_requests = kwargs['save_requests'] else: self.save_requests = True if 'addr' in kwargs: self.connect_addr = kwargs['addr'] self.force_addr = True else: self.connect_addr = None self.force_addr = False self.using_upstream_http_proxy = False self.was_streaming_responses = self.stream_responses self.upstream_http_connected = False self.client_state = HTTPProtocolProxy.CLIENT_STATE_CLOSED self.server_state = HTTPProtocolProxy.SERVER_STATE_HTTP_READY ProtocolProxy.__init__(self) def _set_client_state(self, state): # Updates the state and sets other metadata states # (like is_websocket) self.client_state = state self.log("Changing client state to %s" % self.client_state) def _set_server_state(self, state): # Updates the state and sets other metadata states # (like is_websocket) self.server_state = state self.log("Changing server state to %s" % self.server_state) def get_next_response(self): """ Returns a deferred that fires with the next completed request/response pair """ self.next_rsp_def = defer.Deferred() return self.next_rsp_def def set_connection(self, host, port, use_ssl, use_socks=False): """ If not connected to the given host, port, use_ssl, disconnects the current connection and connects to the given endpoint. Otherwise does nothing """ self.log("setting connection...") if self.conn_host != host or \ self.conn_port != port or \ self.conn_is_ssl != use_ssl: self.log("Closing connection to old server") self.close_server_connection() self.connect(host, port, use_ssl, use_socks=use_socks) def server_connection_lost(self, reason): self.log("Connection to server lost: %s" % str(reason)) self.log(str(reason.getTraceback())) ProtocolProxy.server_connection_lost(self, reason) def client_data_received(self, data): self.log("cp> %s" % short_data(data)) if self.client_state != HTTPProtocolProxy.CLIENT_STATE_WEBSOCKET: self.request.add_data(data) if self.request.complete: req = self.request self.request = Request() self.request_complete(req) else: self.ws_proxy.client_data_received(data) def server_data_received(self, data): self.log("rp< %s" % short_data(data)) if self.client_state != HTTPProtocolProxy.CLIENT_STATE_WEBSOCKET: self.response.add_data(data) if self.stream_responses: self.send_client_data(data) if self.response.complete: rsp = self.response self.response = Response() self.response_complete(rsp) else: self.ws_proxy.server_data_received(data) @defer.inlineCallbacks def process_request(self, pop_request=True): from pappyproxy import context, macros, pappy if self.server_state != HTTPProtocolProxy.SERVER_STATE_HTTP_READY: self.log("Not ready to process a new HTTP request") return self._set_server_state(HTTPProtocolProxy.SERVER_STATE_PROCESSING_HTTP_REQUEST) if pop_request and self.request_queue.empty(): #defer.returnValue(None) self.log("Request queue is empty, not processing request") self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) return if pop_request: self.waiting_req = self.request_queue.get() if self.waiting_req.host == 'pappy': self.log("Processing request to internal webserver") yield handle_webserver_request(self.waiting_req) self.send_response(self.waiting_req.response.full_message) self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) self.process_request() return self.mangle_macros = [] self.stream_responses = True if self.waiting_req.verb.upper() == 'CONNECT': self.send_response('HTTP/1.1 200 Connection established\r\n\r\n') self.start_client_maybe_tls(cert_host=self.waiting_req.host) self.connect_addr = (self.waiting_req.host, self.waiting_req.port, self.waiting_req.is_ssl) #defer.returnValue(None) self.log("Client TLS setup, ready for next request") self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) self.process_request() return ## Process/store the request sendreq = self.waiting_req upstream_http_addr = get_http_proxy_addr() if self.connect_addr is not None: sendreq.host = self.connect_addr[0] sendreq.port = self.connect_addr[1] sendreq.is_ssl = self.connect_addr[2] if context.in_scope(sendreq) or self.custom_macros: if not self.custom_macros: self.mangle_macros = self.client_protocol.factory.get_macro_list() sendreq.time_start = datetime.datetime.utcnow() if self.mangle_macros: self.stream_responses = False else: self.stream_responses = True if self.stream_responses and not self.mangle_macros: if self.save_requests: sendreq.async_deep_save() else: if self.save_requests: yield sendreq.async_deep_save() (mangreq, mangled) = yield macros.mangle_request(sendreq, self.mangle_macros) if mangreq is None: self.log("Request dropped. Closing connections.") sendreq.tags.add('dropped') sendreq.response = None self.dropped_request = True self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) self.send_response(self.BLANK_RESPONSE) defer.returnValue(None) elif mangled: sendreq = mangreq self.log("Request in scope, saving") sendreq.time_start = datetime.datetime.utcnow() assert sendreq.complete if self.save_requests: yield sendreq.async_deep_save() self.waiting_req = sendreq else: self.log("Request out of scope, passing along unmangled") self.log("Making new connection to %s:%d is_ssl=%s" % (self.waiting_req.host, self.waiting_req.port, self.waiting_req.is_ssl)) if upstream_http_addr is not None: self.log("Using upstream http proxy") self.log(str(self.waiting_req)) self.set_connection(upstream_http_addr[0], upstream_http_addr[1], False, use_socks=True) self.using_upstream_http_proxy = True else: self.log("Not using upstream http proxy") self.set_connection(sendreq.host, sendreq.port, sendreq.is_ssl, use_socks=True) self.using_upstream_http_proxy = False if self.using_upstream_http_proxy and sendreq.is_ssl and not self.upstream_http_connected: self.was_streaming_responses = self.stream_responses self.stream_responses = False r = sendreq.connect_request apply_http_proxy_creds(r) self.send_server_data(r.full_message) self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_PROXY_CONNECT) return elif self.using_upstream_http_proxy and not sendreq.is_ssl: # We're already connected to the proxy r = sendreq.copy() r.path_type = PATH_ABSOLUTE apply_http_proxy_creds(r) self.send_server_data(r.full_message) self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_RESPONSE) return else: self.send_server_data(sendreq.full_message) self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_RESPONSE) return def request_complete(self, req): self.log("Request complete, adding to queue") self.request_queue.put(req) self.process_request() @defer.inlineCallbacks def response_complete(self, rsp): from pappyproxy import context, macros, http if self.server_state == HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_PROXY_CONNECT: self.log("Got upstream connection established response") self.stream_responses = self.was_streaming_responses if rsp.response_code == 407: print "Incorrect credentials for HTTP proxy. Please check your username and password." self.close_server_connection() return self.upstream_http_connected = True self.start_server_tls() self.log("Remote proxy TLS setup, ready for next request") self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) self.process_request(pop_request=False) # waiting_req is already the request we want to send return assert self.server_state == HTTPProtocolProxy.SERVER_STATE_HTTP_AWAITING_RESPONSE self.waiting_req.response = self.response sendreq = self.waiting_req sendreq.response = rsp self.waiting_req = None if context.in_scope(sendreq) or self.custom_macros: self.log("Response in scope, saving") sendreq.time_end = datetime.datetime.utcnow() assert sendreq.complete if self.stream_responses and not self.mangle_macros: if self.save_requests: sendreq.async_deep_save() else: if self.save_requests: yield sendreq.async_deep_save() if self.mangle_macros: if sendreq.response: mangled = yield macros.mangle_response(sendreq, self.mangle_macros) if mangled is None: self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) self.send_response(self.BLANK_RESPONSE) defer.returnValue(None) if mangled and self.save_requests: yield sendreq.async_deep_save() else: self.log("Out of scope, not touching response") self.log("Checking if we need to write response to transport") if self.next_rsp_def is not None: self.next_rsp_def.callback(sendreq) if not self.stream_responses: self.log("Wasn't streaming, writing response") self.send_response(sendreq.response.full_message) else: self.log("Already streamed response, not writing to transport, ending response") self.end_response() if Response.is_ws_upgrade(rsp): self.log("Upgrading to websocket connection") self.server_state = HTTPProtocolProxy.SERVER_STATE_WEBSOCKET self.client_state = HTTPProtocolProxy.CLIENT_STATE_WEBSOCKET self.ws_proxy = WebSocketProxy(self.client_transport, self.server_transport, rsp, sendreq, self) self.ws_proxy.message_callback = self.ws_message_complete self.request.is_websocket = True self.log("Upgraded to websocket") else: self.log("Processing of response complete, ready for next request") self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) self.process_request() def ws_message_complete(self, wsobj): """ Handles objects from both client and server. The object itself has a direction attribute that says which direction it went. """ self.request.websocket_messages.append(wsobj) # Probably just going to wait for the connection to close # def ws_close(self): # self.log("Websocket connection closed. Processing next request.") # self._set_server_state(HTTPProtocolProxy.SERVER_STATE_HTTP_READY) # self.process_request() def send_response(self, data): """ Sends a response back to the client and processes the next request """ self.send_client_data(data) self.end_response() def end_response(self): """ Signals that all response data has been sent and the object is ready to process the next request """ self.process_request() def submit_websocket_frame(self, data): raise PappyException("Websocket stream is not active") def abort_connection(self): pass def client_connection_made(self, protocol): self._set_client_state(HTTPProtocolProxy.CLIENT_STATE_HTTP_READY) ProtocolProxy.client_connection_made(self, protocol)