|
|
|
import base64
|
|
|
|
import collections
|
|
|
|
import crochet
|
|
|
|
import datetime
|
|
|
|
import gzip
|
|
|
|
import json
|
|
|
|
import pappyproxy
|
|
|
|
import re
|
|
|
|
import StringIO
|
|
|
|
import urlparse
|
|
|
|
import zlib
|
|
|
|
from twisted.internet import defer, reactor
|
|
|
|
from pappyproxy.util import PappyException
|
|
|
|
import bs4
|
|
|
|
|
|
|
|
ENCODE_NONE = 0
|
|
|
|
ENCODE_DEFLATE = 1
|
|
|
|
ENCODE_GZIP = 2
|
|
|
|
|
|
|
|
dbpool = None
|
|
|
|
|
|
|
|
def init(pool):
|
|
|
|
"""
|
|
|
|
Initialize the http module.
|
|
|
|
|
|
|
|
:param pool: The ConnectionPool to use to store the request/response objects
|
|
|
|
:type pool: SQLite ConnectionPool
|
|
|
|
"""
|
|
|
|
global dbpool
|
|
|
|
if dbpool is None:
|
|
|
|
dbpool = pool
|
|
|
|
assert(dbpool)
|
|
|
|
|
|
|
|
def destruct():
|
|
|
|
assert(dbpool)
|
|
|
|
dbpool.close()
|
|
|
|
|
|
|
|
def _decode_encoded(data, encoding):
|
|
|
|
if encoding == ENCODE_NONE:
|
|
|
|
return data
|
|
|
|
|
|
|
|
if encoding == ENCODE_DEFLATE:
|
|
|
|
dec_data = zlib.decompress(data, -15)
|
|
|
|
else:
|
|
|
|
dec_data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data))
|
|
|
|
dec_data = dec_data.read()
|
|
|
|
return dec_data
|
|
|
|
|
|
|
|
def _strip_leading_newlines(string):
|
|
|
|
while (len(string) > 1 and string[0:2] == '\r\n') or \
|
|
|
|
(len(string) > 0 and string[0] == '\n'):
|
|
|
|
if len(string) > 1 and string[0:2] == '\r\n':
|
|
|
|
string = string[2:]
|
|
|
|
elif len(string) > 0 and string[0] == '\n':
|
|
|
|
string = string[1:]
|
|
|
|
return string
|
|
|
|
|
|
|
|
def _consume_line(instr):
|
|
|
|
# returns (line, rest)
|
|
|
|
l = []
|
|
|
|
pos = 0
|
|
|
|
while pos < len(instr):
|
|
|
|
if instr[pos] == '\n':
|
|
|
|
if l and l[-1] == '\r':
|
|
|
|
l = l[:-1]
|
|
|
|
return (''.join(l), instr[pos+1:])
|
|
|
|
l.append(instr[pos])
|
|
|
|
pos += 1
|
|
|
|
return instr
|
|
|
|
|
|
|
|
###################
|
|
|
|
## Functions to use
|
|
|
|
|
|
|
|
def get_request(url='', url_params={}):
|
|
|
|
"""
|
|
|
|
get_request(url='', url_params={})
|
|
|
|
|
|
|
|
Create a request object that makes a GET request to the given url with the
|
|
|
|
given url params.
|
|
|
|
"""
|
|
|
|
r = Request()
|
|
|
|
r.status_line = 'GET / HTTP/1.1'
|
|
|
|
r.url = url
|
|
|
|
r.headers['Host'] = r.host
|
|
|
|
if url_params:
|
|
|
|
r.url_params.from_dict(url_params)
|
|
|
|
return r
|
|
|
|
|
|
|
|
def post_request(url, post_params={}, url_params={}):
|
|
|
|
"""
|
|
|
|
post_request(url, post_params={}, url_params={})
|
|
|
|
|
|
|
|
Create a request object that makes a POST request to the given url with the
|
|
|
|
given post and url params.
|
|
|
|
"""
|
|
|
|
r = Request()
|
|
|
|
r.status_line = 'POST / HTTP/1.1'
|
|
|
|
r.url = url
|
|
|
|
r.headers['Host'] = r.host
|
|
|
|
if url_params:
|
|
|
|
r.url_params.from_dict(url_params)
|
|
|
|
if post_params:
|
|
|
|
r.post_params.from_dict(post_params)
|
|
|
|
return r
|
|
|
|
|
|
|
|
def repeatable_parse_qs(s):
|
|
|
|
pairs = s.split('&')
|
|
|
|
ret_dict = RepeatableDict()
|
|
|
|
for pair in pairs:
|
|
|
|
if '=' in pair:
|
|
|
|
t = tuple(pair.split('=', 1))
|
|
|
|
ret_dict.append(t[0], t[1])
|
|
|
|
else:
|
|
|
|
ret_dict.append(pair, None)
|
|
|
|
return ret_dict
|
|
|
|
|
|
|
|
##########
|
|
|
|
## Classes
|
|
|
|
|
|
|
|
class RepeatableDict:
|
|
|
|
"""
|
|
|
|
A dict that retains the order of items inserted and keeps track of
|
|
|
|
duplicate values. Can optionally treat keys as case insensitive.
|
|
|
|
Custom made for the proxy, so it has strange features
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, from_pairs=None, case_insensitive=False):
|
|
|
|
# If efficiency becomes a problem, add a dict that keeps a list by key
|
|
|
|
# and use that for getting data. But until then, this stays.
|
|
|
|
self._pairs = []
|
|
|
|
self._keys = set()
|
|
|
|
self._modify_callback = None
|
|
|
|
self.case_insensitive = case_insensitive
|
|
|
|
|
|
|
|
if from_pairs:
|
|
|
|
for k, v in from_pairs:
|
|
|
|
self.append(k, v)
|
|
|
|
|
|
|
|
def _ef_key(self, key):
|
|
|
|
# "effective key", returns key.lower() if we're case insensitive,
|
|
|
|
# otherwise it returns the same key
|
|
|
|
if self.case_insensitive:
|
|
|
|
return key.lower()
|
|
|
|
return key
|
|
|
|
|
|
|
|
def _mod_callback(self):
|
|
|
|
# Calls the modify callback if we have one
|
|
|
|
if self._modify_callback:
|
|
|
|
self._modify_callback()
|
|
|
|
|
|
|
|
def __contains__(self, val):
|
|
|
|
return self._ef_key(val) in self._keys
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
for p in reversed(self._pairs):
|
|
|
|
if self._ef_key(p[0]) == self._ef_key(key):
|
|
|
|
return p[1]
|
|
|
|
raise KeyError
|
|
|
|
|
|
|
|
def __setitem__(self, key, val):
|
|
|
|
# Replaces first instance of `key` and deletes the rest
|
|
|
|
self.set_val(key, val)
|
|
|
|
|
|
|
|
def __delitem__(self, key):
|
|
|
|
self._keys.remove(key)
|
|
|
|
self._pairs = [p for p in self._pairs if self._ef_key(p[0]) != self._ef_key(key)]
|
|
|
|
self._mod_callback()
|
|
|
|
|
|
|
|
def __nonzero__(self):
|
|
|
|
if self._pairs:
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def _add_key(self, key):
|
|
|
|
self._keys.add(self._ef_key(key))
|
|
|
|
|
|
|
|
def _remove_key(self, key):
|
|
|
|
self._keys.remove(self._ef_key(key))
|
|
|
|
|
|
|
|
def all_pairs(self):
|
|
|
|
return self._pairs[:]
|
|
|
|
|
|
|
|
def append(self, key, val, do_callback=True):
|
|
|
|
# Add a duplicate entry for key
|
|
|
|
self._add_key(key)
|
|
|
|
self._pairs.append((key, val))
|
|
|
|
if do_callback:
|
|
|
|
self._mod_callback()
|
|
|
|
|
|
|
|
def set_val(self, key, val, do_callback=True):
|
|
|
|
new_pairs = []
|
|
|
|
added = False
|
|
|
|
self._add_key(key)
|
|
|
|
for p in self._pairs:
|
|
|
|
if self._ef_key(p[0]) == self._ef_key(key):
|
|
|
|
if not added:
|
|
|
|
# only add the first instance
|
|
|
|
new_pairs.append((key, val))
|
|
|
|
added = True
|
|
|
|
else:
|
|
|
|
new_pairs.append(p)
|
|
|
|
if not added:
|
|
|
|
new_pairs.append((key, val))
|
|
|
|
self._pairs = new_pairs
|
|
|
|
|
|
|
|
if do_callback:
|
|
|
|
self._mod_callback()
|
|
|
|
|
|
|
|
def update(self, key, val, do_callback=True):
|
|
|
|
# If key is already in the dict, replace that value with the new value
|
|
|
|
if key in self:
|
|
|
|
for k, v in self.all_pairs():
|
|
|
|
if self._ef_key(k) == self._ef_key(key):
|
|
|
|
self.set_val(k, val, do_callback=do_callback)
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
self.set_val(key, val, do_callback=do_callback)
|
|
|
|
|
|
|
|
def clear(self, do_callback=True):
|
|
|
|
self._pairs = []
|
|
|
|
if do_callback:
|
|
|
|
self._mod_callback()
|
|
|
|
|
|
|
|
def all_vals(self, key):
|
|
|
|
return [p[1] for p in self._pairs if self._ef_key(p[0]) == self._ef_key(key)]
|
|
|
|
|
|
|
|
def add_pairs(self, pairs, do_callback=True):
|
|
|
|
for pair in pairs:
|
|
|
|
self._add_key(pair[0])
|
|
|
|
self._pairs += pairs
|
|
|
|
if do_callback:
|
|
|
|
self._mod_callback()
|
|
|
|
|
|
|
|
def from_dict(self, d):
|
|
|
|
self._pairs = list(d.items())
|
|
|
|
self._mod_callback()
|
|
|
|
|
|
|
|
def sort(self):
|
|
|
|
# Sorts pairs by key alphabetaclly
|
|
|
|
pairs = sorted(pairs, key=lambda x: x[0])
|
|
|
|
|
|
|
|
def set_modify_callback(self, callback):
|
|
|
|
# Add a function to be called whenever an element is added, changed, or
|
|
|
|
# deleted. Set to None to remove
|
|
|
|
self._modify_callback = callback
|
|
|
|
|
|
|
|
|
|
|
|
class LengthData:
|
|
|
|
def __init__(self, length=None):
|
|
|
|
self.raw_data = ''
|
|
|
|
self.complete = False
|
|
|
|
self.length = length or 0
|
|
|
|
|
|
|
|
if self.length == 0:
|
|
|
|
self.complete = True
|
|
|
|
|
|
|
|
def add_data(self, data):
|
|
|
|
if self.complete:
|
|
|
|
raise PappyException("Data already complete!")
|
|
|
|
remaining_length = self.length-len(self.raw_data)
|
|
|
|
if len(data) >= remaining_length:
|
|
|
|
self.raw_data += data[:remaining_length]
|
|
|
|
assert(len(self.raw_data) == self.length)
|
|
|
|
self.complete = True
|
|
|
|
else:
|
|
|
|
self.raw_data += data
|
|
|
|
|
|
|
|
class ChunkedData:
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self.raw_data = ''
|
|
|
|
self._pos = 0
|
|
|
|
self._state = 0 # 0=reading length, 1=reading data, 2=going over known string
|
|
|
|
self._len_str = ''
|
|
|
|
self._chunk_remaining = 0
|
|
|
|
self._known_str = ''
|
|
|
|
self._known_str_pos = 0
|
|
|
|
self._next_state = 0
|
|
|
|
self._raw_data = ''
|
|
|
|
self.complete = False
|
|
|
|
self.unchunked_data = ''
|
|
|
|
|
|
|
|
def add_data(self, data):
|
|
|
|
self._raw_data += data
|
|
|
|
self.scan_forward()
|
|
|
|
|
|
|
|
def scan_forward(self):
|
|
|
|
# Don't add more data if we're already done
|
|
|
|
if self.complete:
|
|
|
|
return
|
|
|
|
|
|
|
|
while self._pos < len(self._raw_data):
|
|
|
|
curchar = self._raw_data[self._pos]
|
|
|
|
if self._state == 0:
|
|
|
|
if curchar.lower() in '0123456789abcdef':
|
|
|
|
# Read the next char of the length
|
|
|
|
self._len_str += curchar
|
|
|
|
|
|
|
|
# Move to the next char
|
|
|
|
self._pos += 1
|
|
|
|
elif curchar == '\r':
|
|
|
|
# Save how much chunk to read
|
|
|
|
self._chunk_remaining = int(self._len_str, 16)
|
|
|
|
|
|
|
|
# If the length is 0, chunked encoding is done!
|
|
|
|
if self._chunk_remaining == 0:
|
|
|
|
self.complete = True
|
|
|
|
# I should probably just rename raw_data since it's what
|
|
|
|
# you use to look at unchunked data, but you're not
|
|
|
|
# supposed to look at it until after it's complete
|
|
|
|
# anyways
|
|
|
|
self._raw_data = self.unchunked_data
|
|
|
|
self.raw_data = self._raw_data # Expose raw_data
|
|
|
|
return
|
|
|
|
|
|
|
|
# There should be a newline after the \r
|
|
|
|
self._known_str = '\n'
|
|
|
|
self._state = 2
|
|
|
|
self._next_state = 1
|
|
|
|
|
|
|
|
# Reset the length str
|
|
|
|
self._len_str = ''
|
|
|
|
|
|
|
|
# Move to the next char
|
|
|
|
self._pos += 1
|
|
|
|
else:
|
|
|
|
raise Exception("Malformed chunked encoding!")
|
|
|
|
|
|
|
|
elif self._state == 1:
|
|
|
|
if self._chunk_remaining > 0:
|
|
|
|
# Read next byte of data
|
|
|
|
self.unchunked_data += curchar
|
|
|
|
self._chunk_remaining -= 1
|
|
|
|
self._pos += 1
|
|
|
|
else:
|
|
|
|
# Read newline then read a new chunk
|
|
|
|
self._known_str = '\r\n'
|
|
|
|
self._next_state = 0 # Read len after newlines
|
|
|
|
self._state = 2 # Read newlines
|
|
|
|
# Don't move to the next char because we didn't do anything
|
|
|
|
elif self._state == 2:
|
|
|
|
# Read a char of an expected string
|
|
|
|
|
|
|
|
# If the expected char doesn't match, throw an error
|
|
|
|
if self._known_str[self._known_str_pos] != curchar:
|
|
|
|
raise Exception("Unexpected data")
|
|
|
|
|
|
|
|
# Move to the next char in the raw data and in our known string
|
|
|
|
self._known_str_pos += 1
|
|
|
|
self._pos += 1
|
|
|
|
|
|
|
|
# If we've reached the end of the known string, go to the next state
|
|
|
|
if self._known_str_pos == len(self._known_str):
|
|
|
|
self._known_str_pos = 0
|
|
|
|
self._state = self._next_state
|
|
|
|
|
|
|
|
class ResponseCookie(object):
|
|
|
|
"""
|
|
|
|
A cookie representing a cookie set by a response
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, set_cookie_string=None):
|
|
|
|
self.key = None
|
|
|
|
self.val = None
|
|
|
|
self.expires = None
|
|
|
|
self.max_age = None
|
|
|
|
self.domain = None
|
|
|
|
self.path = None
|
|
|
|
self.secure = False
|
|
|
|
self.http_only = False
|
|
|
|
|
|
|
|
if set_cookie_string:
|
|
|
|
self._from_cookie(set_cookie_string)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def cookie_str(self):
|
|
|
|
"""
|
|
|
|
Returns the full string of the cookie. ie ``foo=bar; secure; path=/``
|
|
|
|
|
|
|
|
:getter: Returns the full string of the cookie.
|
|
|
|
:setter: Set the metadata from a cookie string. ie from a ``Set-Cookie`` header
|
|
|
|
"""
|
|
|
|
av = '%s=%s' % (self.key, self.val)
|
|
|
|
to_add = [av]
|
|
|
|
if self.expires:
|
|
|
|
to_add.append('expires=%s'%self.expires)
|
|
|
|
if self.max_age:
|
|
|
|
to_add.append('Max-Age=%d'%self.max_age)
|
|
|
|
if self.domain:
|
|
|
|
to_add.append('Domain=%s'%self.domain)
|
|
|
|
if self.path:
|
|
|
|
to_add.append('Path=%s'%self.path)
|
|
|
|
if self.secure:
|
|
|
|
to_add.append('secure')
|
|
|
|
if self.http_only:
|
|
|
|
to_add.append('httponly')
|
|
|
|
return '; '.join(to_add)
|
|
|
|
|
|
|
|
@cookie_str.setter
|
|
|
|
def cookie_str(self, val):
|
|
|
|
self._from_cookie(val)
|
|
|
|
|
|
|
|
def _parse_cookie_av(self, cookie_av):
|
|
|
|
if '=' in cookie_av:
|
|
|
|
key, val = cookie_av.split('=', 1)
|
|
|
|
key = key.lstrip()
|
|
|
|
if key.lower() == 'expires':
|
|
|
|
self.expires = val
|
|
|
|
if key.lower() == 'max-age':
|
|
|
|
self.max_age = int(val)
|
|
|
|
if key.lower() == 'domain':
|
|
|
|
self.domain = val
|
|
|
|
if key.lower() == 'path':
|
|
|
|
self.path = val
|
|
|
|
elif cookie_av.lstrip().lower() == 'secure':
|
|
|
|
self.secure = True
|
|
|
|
elif cookie_av.lstrip().lower() == 'httponly':
|
|
|
|
self.http_only = True
|
|
|
|
|
|
|
|
def _from_cookie(self, set_cookie_string):
|
|
|
|
self.key = None
|
|
|
|
self.val = None
|
|
|
|
self.expires = None
|
|
|
|
self.max_age = None
|
|
|
|
self.domain = None
|
|
|
|
self.path = None
|
|
|
|
self.secure = False
|
|
|
|
self.http_only = False
|
|
|
|
if ';' in set_cookie_string:
|
|
|
|
cookie_pair, rest = set_cookie_string.split(';', 1)
|
|
|
|
if '=' in cookie_pair:
|
|
|
|
self.key, self.val = cookie_pair.split('=',1)
|
|
|
|
elif cookie_pair == '' or re.match('\s+', cookie_pair):
|
|
|
|
self.key = ''
|
|
|
|
self.val = ''
|
|
|
|
else:
|
|
|
|
self.key = cookie_pair
|
|
|
|
self.val = ''
|
|
|
|
cookie_avs = rest.split(';')
|
|
|
|
for cookie_av in cookie_avs:
|
|
|
|
cookie_av.lstrip()
|
|
|
|
self._parse_cookie_av(cookie_av)
|
|
|
|
else:
|
|
|
|
self.key, self.val = set_cookie_string.split('=',1)
|
|
|
|
|
|
|
|
|
|
|
|
class Request(object):
|
|
|
|
"""
|
|
|
|
:ivar time_end: The datetime that the request ended.
|
|
|
|
:vartype time_end: datetime.datetime
|
|
|
|
:ivar time_start: The datetime that the request was made
|
|
|
|
:vartype time_start: datetime.datetime
|
|
|
|
:ivar complete: When creating the request with :func:`~pappyproxy.http.Request.add_line`
|
|
|
|
and :func:`~pappyproxy.http.Request.add_data`, returns whether
|
|
|
|
the request is complete.
|
|
|
|
:vartype complete: Bool
|
|
|
|
:ivar cookies: Cookies sent with the request
|
|
|
|
:vartype cookies: RepeatableDict
|
|
|
|
:ivar fragment: The fragment part of the url (The part that comes after the #)
|
|
|
|
:vartype fragment: String
|
|
|
|
:ivar url_params: The url parameters of the request (aka the get parameters)
|
|
|
|
:vartype url_params: RepeatableDict
|
|
|
|
:ivar headers: The headers of the request
|
|
|
|
:vartype headers: RepeatableDict
|
|
|
|
:ivar headers_complete: When creating the request with
|
|
|
|
:func:`~pappyproxy.http.Request.add_line` and
|
|
|
|
:func:`~pappyproxy.http.Request.add_data`, returns whether the headers
|
|
|
|
are complete
|
|
|
|
:vartype headers_complete: Bool
|
|
|
|
:ivar path: The path of the request
|
|
|
|
:vartype path: String
|
|
|
|
:ivar port: The port that the request was sent to (or will be sent to)
|
|
|
|
:vartype port: Integer
|
|
|
|
:ivar post_params: The post parameters of the request
|
|
|
|
:vartype post_params: RepeatableDict
|
|
|
|
:ivar reqid: The request id of the request
|
|
|
|
:vartype reqid: String
|
|
|
|
:ivar response: The associated response of this request
|
|
|
|
:vartype response: Response
|
|
|
|
:ivar submitted: Whether the request has been submitted
|
|
|
|
:vartype submitted: Bool
|
|
|
|
:ivar unmangled: If the request was mangled, the version of the request
|
|
|
|
before it was mangled.
|
|
|
|
:vartype unmangled: Request
|
|
|
|
:ivar verb: The HTTP verb of the request (ie POST, GET)
|
|
|
|
:vartype verb: String
|
|
|
|
:ivar version: The HTTP version of the request (ie HTTP/1.1)
|
|
|
|
:vartype version: String
|
|
|
|
:ivar tags: Tags associated with the request
|
|
|
|
:vartype tags: List of Strings
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, full_request=None, update_content_length=True,
|
|
|
|
port=None, is_ssl=None):
|
|
|
|
self.time_end = None
|
|
|
|
self.time_start = None
|
|
|
|
self.complete = False
|
|
|
|
self.cookies = RepeatableDict()
|
|
|
|
self.fragment = None
|
|
|
|
self.url_params = RepeatableDict()
|
|
|
|
self.headers = RepeatableDict(case_insensitive=True)
|
|
|
|
self.headers_complete = False
|
|
|
|
self._host = None
|
|
|
|
self._is_ssl = False
|
|
|
|
self.path = ''
|
|
|
|
self.port = None
|
|
|
|
self.post_params = RepeatableDict()
|
|
|
|
self._raw_data = ''
|
|
|
|
self.reqid = None
|
|
|
|
self.response = None
|
|
|
|
self.submitted = False
|
|
|
|
self.unmangled = None
|
|
|
|
self.verb = ''
|
|
|
|
self.version = ''
|
|
|
|
self.tags = []
|
|
|
|
|
|
|
|
self._first_line = True
|
|
|
|
self._data_length = 0
|
|
|
|
self._partial_data = ''
|
|
|
|
|
|
|
|
self._set_dict_callbacks()
|
|
|
|
|
|
|
|
# Set values from init
|
|
|
|
if is_ssl:
|
|
|
|
self.is_ssl = True
|
|
|
|
if port:
|
|
|
|
self.port = port
|
|
|
|
|
|
|
|
# Get values from the raw request
|
|
|
|
if full_request is not None:
|
|
|
|
self._from_full_request(full_request, update_content_length)
|
|
|
|
|
|
|
|
def __copy__(self):
|
|
|
|
if not self.complete:
|
|
|
|
raise PappyException("Cannot copy incomplete requests")
|
|
|
|
newreq = Request(self.full_request)
|
|
|
|
newreq.is_ssl = self.is_ssl
|
|
|
|
newreq.port = self.port
|
|
|
|
newreq._host = self._host
|
|
|
|
newreq.time_start = self.time_start
|
|
|
|
newreq.time_end = self.time_end
|
|
|
|
if self.unmangled:
|
|
|
|
newreq.unmangled = self.unmangled.copy()
|
|
|
|
if self.response:
|
|
|
|
newreq.response = self.response.copy()
|
|
|
|
return newreq
|
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
if self.full_request != other.full_request:
|
|
|
|
return False
|
|
|
|
if self.port != other.port:
|
|
|
|
return False
|
|
|
|
if self.is_ssl != other.is_ssl:
|
|
|
|
return False
|
|
|
|
if self._host != other._host:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
"""
|
|
|
|
Returns a copy of the request
|
|
|
|
|
|
|
|
:rtype: Request
|
|
|
|
"""
|
|
|
|
return self.__copy__()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def rsptime(self):
|
|
|
|
"""
|
|
|
|
The response time of the request
|
|
|
|
|
|
|
|
:getter: Returns the response time of the request
|
|
|
|
:type: datetime.timedelta
|
|
|
|
"""
|
|
|
|
if self.time_start and self.time_end:
|
|
|
|
return self.time_end-self.time_start
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def status_line(self):
|
|
|
|
"""
|
|
|
|
The status line of the request. ie `GET / HTTP/1.1`
|
|
|
|
|
|
|
|
:getter: Returns the status line of the request
|
|
|
|
:setter: Sets the status line of the request
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
if not self.verb and not self.path and not self.version:
|
|
|
|
return ''
|
|
|
|
return '%s %s %s' % (self.verb, self.full_path, self.version)
|
|
|
|
|
|
|
|
@status_line.setter
|
|
|
|
def status_line(self, val):
|
|
|
|
self._handle_statusline(val)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def full_path(self):
|
|
|
|
"""
|
|
|
|
The full path of the request including URL params and fragment.
|
|
|
|
ie `/path/to/stuff?foo=bar&baz=something#somewhere`
|
|
|
|
|
|
|
|
:getter: Returns the full path of the request
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
|
|
|
|
path = self.path
|
|
|
|
if self.url_params:
|
|
|
|
path += '?'
|
|
|
|
pairs = []
|
|
|
|
for pair in self.url_params.all_pairs():
|
|
|
|
if pair[1] is None:
|
|
|
|
pairs.append(pair[0])
|
|
|
|
else:
|
|
|
|
pairs.append('='.join(pair))
|
|
|
|
path += '&'.join(pairs)
|
|
|
|
if self.fragment:
|
|
|
|
path += '#'
|
|
|
|
path += self.fragment
|
|
|
|
return path
|
|
|
|
|
|
|
|
@property
|
|
|
|
def raw_headers(self):
|
|
|
|
"""
|
|
|
|
The raw text of the headers including the extra newline at the end.
|
|
|
|
|
|
|
|
:getter: Returns the raw text of the headers including the extra newline at the end.
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
ret = self.status_line + '\r\n'
|
|
|
|
for k, v in self.headers.all_pairs():
|
|
|
|
ret = ret + "%s: %s\r\n" % (k, v)
|
|
|
|
ret = ret + '\r\n'
|
|
|
|
return ret
|
|
|
|
|
|
|
|
@property
|
|
|
|
def full_request(self):
|
|
|
|
"""
|
|
|
|
The full text of the request including the headers and data.
|
|
|
|
|
|
|
|
:getter: Returns the full text of the request
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
if not self.status_line:
|
|
|
|
return ''
|
|
|
|
ret = self.raw_headers
|
|
|
|
ret = ret + self.raw_data
|
|
|
|
return ret
|
|
|
|
|
|
|
|
@property
|
|
|
|
def raw_data(self):
|
|
|
|
"""
|
|
|
|
The data portion of the request
|
|
|
|
|
|
|
|
:getter: Returns the data portion of the request
|
|
|
|
:setter: Set the data of the request and update metadata
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
return self._raw_data
|
|
|
|
|
|
|
|
@raw_data.setter
|
|
|
|
def raw_data(self, val):
|
|
|
|
self._raw_data = val
|
|
|
|
self._update_from_data()
|
|
|
|
self.complete = True
|
|
|
|
|
|
|
|
@property
|
|
|
|
def url(self):
|
|
|
|
"""
|
|
|
|
The full url of the request including url params, protocol, etc.
|
|
|
|
ie `https://www.google.com`, `http://foo.fakewebsite.com:1234/path?a=b`.
|
|
|
|
When setting the URL, the port, is_ssl, path, url params, host, etc are all
|
|
|
|
automatically updated.
|
|
|
|
|
|
|
|
:getter: Returns the url of the request
|
|
|
|
:setter: Sets the url of the request and updates metadata
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
if self.is_ssl:
|
|
|
|
retstr = 'https://'
|
|
|
|
else:
|
|
|
|
retstr = 'http://'
|
|
|
|
retstr += self.host
|
|
|
|
if not ((self.is_ssl and self.port == 443) or \
|
|
|
|
(not self.is_ssl and self.port == 80)):
|
|
|
|
retstr += ':%d' % self.port
|
|
|
|
if self.path and self.path != '/':
|
|
|
|
retstr += self.path
|
|
|
|
if self.url_params:
|
|
|
|
retstr += '?'
|
|
|
|
pairs = []
|
|
|
|
for p in self.url_params.all_pairs():
|
|
|
|
pairs.append('='.join(p))
|
|
|
|
retstr += '&'.join(pairs)
|
|
|
|
if self.fragment:
|
|
|
|
retstr += '#%s' % self.fragment
|
|
|
|
return retstr
|
|
|
|
|
|
|
|
@url.setter
|
|
|
|
def url(self, val):
|
|
|
|
self._handle_statusline_uri(val)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def host(self):
|
|
|
|
"""
|
|
|
|
The host of the request. ie `www.google.com`.
|
|
|
|
|
|
|
|
:getter: Returns the host of the request
|
|
|
|
:setter: Changes the host of the request and updates the Host header
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
return self._host
|
|
|
|
|
|
|
|
@host.setter
|
|
|
|
def host(self, val):
|
|
|
|
self._host = val
|
|
|
|
self.headers.update('Host', val, do_callback=False)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_ssl(self):
|
|
|
|
"""
|
|
|
|
Whether the request is sent over SSL
|
|
|
|
|
|
|
|
:getter: Returns if the request is sent over SSL
|
|
|
|
:setter: Sets if the request is sent over SSL
|
|
|
|
:type: Bool
|
|
|
|
"""
|
|
|
|
return self._is_ssl
|
|
|
|
|
|
|
|
@is_ssl.setter
|
|
|
|
def is_ssl(self, val):
|
|
|
|
if val:
|
|
|
|
self._is_ssl = True
|
|
|
|
if self.port == 80:
|
|
|
|
self.port = 443
|
|
|
|
else:
|
|
|
|
self._is_ssl = False
|
|
|
|
if self.port == 443:
|
|
|
|
self.port = 80
|
|
|
|
|
|
|
|
@property
|
|
|
|
def saved(self):
|
|
|
|
"""
|
|
|
|
If the request is saved in the data file
|
|
|
|
|
|
|
|
:getter: Returns True if the request is saved in the data file
|
|
|
|
:type: Bool
|
|
|
|
"""
|
|
|
|
if self.reqid is None:
|
|
|
|
return False
|
|
|
|
try:
|
|
|
|
_ = int(self.reqid)
|
|
|
|
return True
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
return False
|
|
|
|
|
|
|
|
@property
|
|
|
|
def path_tuple(self):
|
|
|
|
"""
|
|
|
|
The path in tuple form starting with the host. For example, path_parts for
|
|
|
|
a request to http://www.example.com/foo/bar.php would be::
|
|
|
|
|
|
|
|
('www.example.com', 'foo', 'bar.php')
|
|
|
|
|
|
|
|
:getter: Returns the path in tuple form
|
|
|
|
:type: Tuple
|
|
|
|
"""
|
|
|
|
# the first element is blank because the path always starts with /
|
|
|
|
ret = [self.host] + self.path.split('/')[1:]
|
|
|
|
if ret[-1] == '':
|
|
|
|
ret = ret[:-1]
|
|
|
|
return tuple(ret)
|
|
|
|
|
|
|
|
def _from_full_request(self, full_request, update_content_length=False):
|
|
|
|
# Get rid of leading CRLF. Not in spec, should remove eventually
|
|
|
|
# technically doesn't treat \r\n same as \n, but whatever.
|
|
|
|
full_request = _strip_leading_newlines(full_request)
|
|
|
|
if full_request == '':
|
|
|
|
return
|
|
|
|
|
|
|
|
remaining = full_request
|
|
|
|
while remaining and not self.headers_complete:
|
|
|
|
line, remaining = _consume_line(remaining)
|
|
|
|
self.add_line(line)
|
|
|
|
|
|
|
|
if not self.headers_complete:
|
|
|
|
self.add_line('')
|
|
|
|
|
|
|
|
if not self.complete:
|
|
|
|
if update_content_length:
|
|
|
|
self.raw_data = remaining
|
|
|
|
else:
|
|
|
|
self.add_data(remaining)
|
|
|
|
assert(self.complete)
|
|
|
|
self._handle_data_end()
|
|
|
|
|
|
|
|
############################
|
|
|
|
## Internal update functions
|
|
|
|
|
|
|
|
def _set_dict_callbacks(self):
|
|
|
|
# Add callbacks to dicts
|
|
|
|
self.headers.set_modify_callback(self._update_from_text)
|
|
|
|
self.cookies.set_modify_callback(self._update_from_objects)
|
|
|
|
self.post_params.set_modify_callback(self._update_from_objects)
|
|
|
|
|
|
|
|
def _update_from_data(self):
|
|
|
|
# Updates metadata that's based off of data
|
|
|
|
self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False)
|
|
|
|
if 'content-type' in self.headers:
|
|
|
|
if self.headers['content-type'] == 'application/x-www-form-urlencoded':
|
|
|
|
self.post_params = repeatable_parse_qs(self.raw_data)
|
|
|
|
self._set_dict_callbacks()
|
|
|
|
|
|
|
|
def _update_from_objects(self):
|
|
|
|
# Updates text values that depend on objects.
|
|
|
|
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
|
|
|
|
if self.cookies:
|
|
|
|
assignments = []
|
|
|
|
for ck, cv in self.cookies.all_pairs():
|
|
|
|
asn = '%s=%s' % (ck, cv)
|
|
|
|
assignments.append(asn)
|
|
|
|
header_val = '; '.join(assignments)
|
|
|
|
self.headers.update('Cookie', header_val, do_callback=False)
|
|
|
|
if self.post_params:
|
|
|
|
pairs = []
|
|
|
|
for k, v in self.post_params.all_pairs():
|
|
|
|
pairs.append('%s=%s' % (k, v))
|
|
|
|
self.raw_data = '&'.join(pairs)
|
|
|
|
|
|
|
|
def _update_from_text(self):
|
|
|
|
# Updates metadata that depends on header/status line values
|
|
|
|
self.cookies = RepeatableDict()
|
|
|
|
self._set_dict_callbacks()
|
|
|
|
for k, v in self.headers.all_pairs():
|
|
|
|
self._handle_header(k, v)
|
|
|
|
|
|
|
|
###############
|
|
|
|
## Data loading
|
|
|
|
|
|
|
|
def add_line(self, line):
|
|
|
|
"""
|
|
|
|
Used for building a request from a Twisted protocol.
|
|
|
|
Add a line (for status line and headers). Lines must be added in order
|
|
|
|
and the first line must be the status line. The line should not contain
|
|
|
|
the trailing carriage return/newline. I do not suggest you use this for
|
|
|
|
anything.
|
|
|
|
|
|
|
|
:param line: The line to add
|
|
|
|
:type line: string
|
|
|
|
"""
|
|
|
|
|
|
|
|
if self._first_line and line == '':
|
|
|
|
# Ignore leading newlines because fuck the spec
|
|
|
|
return
|
|
|
|
|
|
|
|
if self._first_line:
|
|
|
|
self._handle_statusline(line)
|
|
|
|
self._first_line = False
|
|
|
|
else:
|
|
|
|
# Either header or newline (end of headers)
|
|
|
|
if line == '':
|
|
|
|
self.headers_complete = True
|
|
|
|
if self._data_length == 0:
|
|
|
|
self.complete = True
|
|
|
|
else:
|
|
|
|
key, val = line.split(':', 1)
|
|
|
|
val = val.strip()
|
|
|
|
if self._handle_header(key, val):
|
|
|
|
self.headers.append(key, val, do_callback=False)
|
|
|
|
|
|
|
|
def add_data(self, data):
|
|
|
|
"""
|
|
|
|
Used for building a request from a Twisted protocol.
|
|
|
|
Add data to the request.
|
|
|
|
I do not suggest that you use this function ever.
|
|
|
|
|
|
|
|
:param data: The data to add
|
|
|
|
:type data: string
|
|
|
|
"""
|
|
|
|
# Add data (headers must be complete)
|
|
|
|
len_remaining = self._data_length - len(self._partial_data)
|
|
|
|
if len(data) >= len_remaining:
|
|
|
|
self._partial_data += data[:len_remaining]
|
|
|
|
self._raw_data = self._partial_data
|
|
|
|
self.complete = True
|
|
|
|
self._handle_data_end()
|
|
|
|
else:
|
|
|
|
self._partial_data += data
|
|
|
|
|
|
|
|
###############
|
|
|
|
## Data parsing
|
|
|
|
|
|
|
|
def _process_host(self, hostline):
|
|
|
|
# Get address and port
|
|
|
|
# Returns true if port was explicitly stated
|
|
|
|
port_given = False
|
|
|
|
if ':' in hostline:
|
|
|
|
self._host, self.port = hostline.split(':')
|
|
|
|
self.port = int(self.port)
|
|
|
|
if self.port == 443:
|
|
|
|
self._is_ssl = True
|
|
|
|
port_given = True
|
|
|
|
else:
|
|
|
|
self._host = hostline
|
|
|
|
if not self.port:
|
|
|
|
self.port = 80
|
|
|
|
self._host.strip()
|
|
|
|
return port_given
|
|
|
|
|
|
|
|
def _handle_statusline_uri(self, uri):
|
|
|
|
if not re.match('(?:^.+)://', uri):
|
|
|
|
uri = '//' + uri
|
|
|
|
|
|
|
|
parsed_path = urlparse.urlparse(uri)
|
|
|
|
netloc = parsed_path.netloc
|
|
|
|
port_given = False
|
|
|
|
if netloc:
|
|
|
|
port_given = self._process_host(netloc)
|
|
|
|
|
|
|
|
if re.match('^https://', uri) or self.port == 443:
|
|
|
|
self._is_ssl = True
|
|
|
|
if not port_given:
|
|
|
|
self.port = 443
|
|
|
|
if re.match('^http://', uri):
|
|
|
|
self._is_ssl = False
|
|
|
|
|
|
|
|
if not self.port:
|
|
|
|
if self.is_ssl:
|
|
|
|
self.port = 443
|
|
|
|
else:
|
|
|
|
self.port = 80
|
|
|
|
|
|
|
|
reqpath = parsed_path.path
|
|
|
|
if parsed_path.path:
|
|
|
|
self.path = parsed_path.path
|
|
|
|
else:
|
|
|
|
self.path = '/'
|
|
|
|
if parsed_path.query:
|
|
|
|
reqpath += '?'
|
|
|
|
reqpath += parsed_path.query
|
|
|
|
self.url_params = repeatable_parse_qs(parsed_path.query)
|
|
|
|
if parsed_path.fragment:
|
|
|
|
reqpath += '#'
|
|
|
|
reqpath += parsed_path.fragment
|
|
|
|
self.fragment = parsed_path.fragment
|
|
|
|
|
|
|
|
def _handle_statusline(self, status_line):
|
|
|
|
parts = status_line.split()
|
|
|
|
uri = None
|
|
|
|
if len(parts) == 3:
|
|
|
|
self.verb, uri, self.version = parts
|
|
|
|
elif len(parts) == 2:
|
|
|
|
self.verb, self.version = parts
|
|
|
|
else:
|
|
|
|
raise Exception("Unexpected format of first line of request")
|
|
|
|
|
|
|
|
# Get path using urlparse
|
|
|
|
if uri is not None:
|
|
|
|
self._handle_statusline_uri(uri)
|
|
|
|
|
|
|
|
def _handle_header(self, key, val):
|
|
|
|
# We may have duplicate headers
|
|
|
|
stripped = False
|
|
|
|
|
|
|
|
if key.lower() == 'content-length':
|
|
|
|
self._data_length = int(val)
|
|
|
|
elif key.lower() == 'cookie':
|
|
|
|
# We still want the raw key/val for the cookies header
|
|
|
|
# because it's still a header
|
|
|
|
cookie_strs = val.split('; ')
|
|
|
|
|
|
|
|
# The only whitespace that matters is the space right after the
|
|
|
|
# semicolon. If actual implementations mess this up, we could
|
|
|
|
# probably strip whitespace around the key/value
|
|
|
|
for cookie_str in cookie_strs:
|
|
|
|
if '=' in cookie_str:
|
|
|
|
splitted = cookie_str.split('=',1)
|
|
|
|
assert(len(splitted) == 2)
|
|
|
|
(cookie_key, cookie_val) = splitted
|
|
|
|
else:
|
|
|
|
cookie_key = cookie_str
|
|
|
|
cookie_val = ''
|
|
|
|
# we want to parse duplicate cookies
|
|
|
|
self.cookies.append(cookie_key, cookie_val, do_callback=False)
|
|
|
|
elif key.lower() == 'host':
|
|
|
|
self._process_host(val)
|
|
|
|
elif key.lower() == 'connection':
|
|
|
|
#stripped = True
|
|
|
|
pass
|
|
|
|
|
|
|
|
return (not stripped)
|
|
|
|
|
|
|
|
def _handle_data_end(self):
|
|
|
|
if 'content-type' in self.headers:
|
|
|
|
if self.headers['content-type'] == 'application/x-www-form-urlencoded':
|
|
|
|
self.post_params = repeatable_parse_qs(self.raw_data)
|
|
|
|
self._set_dict_callbacks()
|
|
|
|
|
|
|
|
##############
|
|
|
|
## Serializing
|
|
|
|
|
|
|
|
def to_json(self):
|
|
|
|
"""
|
|
|
|
Return a JSON encoding of the request that can be used by
|
|
|
|
:func:`~pappyproxy.http.Request.from_json` to recreate the request.
|
|
|
|
The `full_request` portion is base64 encoded because json doesn't play
|
|
|
|
nice with binary blobs.
|
|
|
|
"""
|
|
|
|
# We base64 encode the full response because json doesn't paly nice with
|
|
|
|
# binary blobs
|
|
|
|
data = {
|
|
|
|
'full_request': base64.b64encode(self.full_request),
|
|
|
|
'reqid': self.reqid,
|
|
|
|
}
|
|
|
|
if self.response:
|
|
|
|
data['response_id'] = self.response.rspid
|
|
|
|
else:
|
|
|
|
data['response_id'] = None
|
|
|
|
|
|
|
|
if self.unmangled:
|
|
|
|
data['unmangled_id'] = self.unmangled.reqid
|
|
|
|
|
|
|
|
if self.time_start:
|
|
|
|
data['start'] = self.time_start.isoformat()
|
|
|
|
if self.time_end:
|
|
|
|
data['end'] = self.time_end.isoformat()
|
|
|
|
data['tags'] = self.tags
|
|
|
|
data['port'] = self.port
|
|
|
|
data['is_ssl'] = self.is_ssl
|
|
|
|
|
|
|
|
return json.dumps(data)
|
|
|
|
|
|
|
|
def from_json(self, json_string):
|
|
|
|
"""
|
|
|
|
Update the metadata of the request to match data from
|
|
|
|
:func:`~pappyproxy.http.Request.to_json`
|
|
|
|
|
|
|
|
:param json_string: The JSON data to use
|
|
|
|
:type json_string: JSON data in a string
|
|
|
|
"""
|
|
|
|
|
|
|
|
data = json.loads(json_string)
|
|
|
|
self._from_full_request(base64.b64decode(data['full_request']))
|
|
|
|
self.port = data['port']
|
|
|
|
self._is_ssl = data['is_ssl']
|
|
|
|
if 'tags' in data:
|
|
|
|
self.tags = data['tags']
|
|
|
|
else:
|
|
|
|
self.tags = []
|
|
|
|
self._update_from_text()
|
|
|
|
self._update_from_data()
|
|
|
|
if data['reqid']:
|
|
|
|
self.reqid = data['reqid']
|
|
|
|
|
|
|
|
#######################
|
|
|
|
## Data store functions
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def async_save(self):
|
|
|
|
"""
|
|
|
|
async_save()
|
|
|
|
Save/update the request in the data file. Returns a twisted deferred which
|
|
|
|
fires when the save is complete.
|
|
|
|
|
|
|
|
:rtype: twisted.internet.defer.Deferred
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert(dbpool)
|
|
|
|
try:
|
|
|
|
# Check for intyness
|
|
|
|
_ = int(self.reqid)
|
|
|
|
|
|
|
|
# If we have reqid, we're updating
|
|
|
|
yield dbpool.runInteraction(self._update)
|
|
|
|
assert(self.reqid is not None)
|
|
|
|
yield dbpool.runInteraction(self._update_tags)
|
|
|
|
pappyproxy.context.add_request(self)
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
# Either no id or in-memory
|
|
|
|
yield dbpool.runInteraction(self._insert)
|
|
|
|
assert(self.reqid is not None)
|
|
|
|
yield dbpool.runInteraction(self._update_tags)
|
|
|
|
pappyproxy.context.add_request(self)
|
|
|
|
|
|
|
|
@crochet.wait_for(timeout=180.0)
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def save(self):
|
|
|
|
"""
|
|
|
|
save()
|
|
|
|
Save/update the request in the data file.
|
|
|
|
Saves the request, its unmangled version, the response, and the unmanbled response.
|
|
|
|
Cannot be called from inside an async function.
|
|
|
|
"""
|
|
|
|
|
|
|
|
yield self.async_deep_save()
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def async_deep_save(self):
|
|
|
|
"""
|
|
|
|
async_deep_save()
|
|
|
|
Saves self, unmangled, response, and unmangled response. Returns a deferred
|
|
|
|
which fires after everything has been saved.
|
|
|
|
|
|
|
|
:rtype: twisted.internet.defer.Deferred
|
|
|
|
"""
|
|
|
|
|
|
|
|
if self.response:
|
|
|
|
if self.response.unmangled:
|
|
|
|
yield self.response.unmangled.async_save()
|
|
|
|
yield self.response.async_save()
|
|
|
|
if self.unmangled:
|
|
|
|
yield self.unmangled.async_save()
|
|
|
|
yield self.async_save()
|
|
|
|
|
|
|
|
def _update_tags(self, txn):
|
|
|
|
# This should never be called on an unsaved or in-memory request
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
DELETE FROM tagged WHERE reqid=?;
|
|
|
|
""",
|
|
|
|
(self.reqid,)
|
|
|
|
)
|
|
|
|
|
|
|
|
tagids = []
|
|
|
|
tags_to_add = []
|
|
|
|
# Find ids that already exist
|
|
|
|
for tag in self.tags:
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
SELECT id, tag FROM tags WHERE tag=?;
|
|
|
|
""",
|
|
|
|
(tag,)
|
|
|
|
)
|
|
|
|
result = txn.fetchall()
|
|
|
|
if len(result) == 0:
|
|
|
|
tags_to_add.append(tag)
|
|
|
|
else:
|
|
|
|
tagid = int(result[0][0])
|
|
|
|
tagids.append(tagid)
|
|
|
|
|
|
|
|
# Add new tags
|
|
|
|
for tag in tags_to_add:
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
INSERT INTO tags (tag) VALUES (?);
|
|
|
|
""",
|
|
|
|
(tag,)
|
|
|
|
)
|
|
|
|
tagids.append(int(txn.lastrowid))
|
|
|
|
|
|
|
|
# Tag our request
|
|
|
|
for tagid in tagids:
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
INSERT INTO tagged (reqid, tagid) VALUES (?, ?);
|
|
|
|
""",
|
|
|
|
(int(self.reqid), tagid)
|
|
|
|
)
|
|
|
|
|
|
|
|
def _update(self, txn):
|
|
|
|
# If we don't have an reqid, we're creating a new reuqest row
|
|
|
|
setnames = ["full_request=?", "port=?"]
|
|
|
|
queryargs = [self.full_request, self.port]
|
|
|
|
if self.response:
|
|
|
|
setnames.append('response_id=?')
|
|
|
|
assert(self.response.rspid is not None) # should be saved first
|
|
|
|
queryargs.append(self.response.rspid)
|
|
|
|
if self.unmangled:
|
|
|
|
setnames.append('unmangled_id=?')
|
|
|
|
assert(self.unmangled.reqid is not None) # should be saved first
|
|
|
|
queryargs.append(self.unmangled.reqid)
|
|
|
|
if self.time_start:
|
|
|
|
setnames.append('start_datetime=?')
|
|
|
|
queryargs.append(self.time_start.isoformat())
|
|
|
|
if self.time_end:
|
|
|
|
setnames.append('end_datetime=?')
|
|
|
|
queryargs.append(self.time_end.isoformat())
|
|
|
|
|
|
|
|
setnames.append('is_ssl=?')
|
|
|
|
if self.is_ssl:
|
|
|
|
queryargs.append('1')
|
|
|
|
else:
|
|
|
|
queryargs.append('0')
|
|
|
|
|
|
|
|
setnames.append('submitted=?')
|
|
|
|
if self.submitted:
|
|
|
|
queryargs.append('1')
|
|
|
|
else:
|
|
|
|
queryargs.append('0')
|
|
|
|
|
|
|
|
queryargs.append(self.reqid)
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
UPDATE requests SET %s WHERE id=?;
|
|
|
|
""" % ','.join(setnames),
|
|
|
|
tuple(queryargs)
|
|
|
|
)
|
|
|
|
|
|
|
|
def _insert(self, txn):
|
|
|
|
# If we don't have an reqid, we're creating a new reuqest row
|
|
|
|
colnames = ["full_request", "port"]
|
|
|
|
colvals = [self.full_request, self.port]
|
|
|
|
if self.response:
|
|
|
|
colnames.append('response_id')
|
|
|
|
assert(self.response.rspid is not None) # should be saved first
|
|
|
|
colvals.append(self.response.rspid)
|
|
|
|
if self.unmangled:
|
|
|
|
colnames.append('unmangled_id')
|
|
|
|
assert(self.unmangled.reqid is not None) # should be saved first
|
|
|
|
colvals.append(self.unmangled.reqid)
|
|
|
|
if self.time_start:
|
|
|
|
colnames.append('start_datetime')
|
|
|
|
colvals.append(self.time_start.isoformat())
|
|
|
|
if self.time_end:
|
|
|
|
colnames.append('end_datetime')
|
|
|
|
colvals.append(self.time_end.isoformat())
|
|
|
|
colnames.append('submitted')
|
|
|
|
if self.submitted:
|
|
|
|
colvals.append('1')
|
|
|
|
else:
|
|
|
|
colvals.append('0')
|
|
|
|
|
|
|
|
colnames.append('is_ssl')
|
|
|
|
if self.is_ssl:
|
|
|
|
colvals.append('1')
|
|
|
|
else:
|
|
|
|
colvals.append('0')
|
|
|
|
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
INSERT INTO requests (%s) VALUES (%s);
|
|
|
|
""" % (','.join(colnames), ','.join(['?']*len(colvals))),
|
|
|
|
tuple(colvals)
|
|
|
|
)
|
|
|
|
self.reqid = str(txn.lastrowid)
|
|
|
|
assert txn.lastrowid is not None
|
|
|
|
assert self.reqid is not None
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def delete(self):
|
|
|
|
assert(self.reqid is not None)
|
|
|
|
yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
DELETE FROM requests WHERE id=?;
|
|
|
|
""",
|
|
|
|
(self.reqid,)
|
|
|
|
)
|
|
|
|
yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
DELETE FROM tagged WHERE reqid=?;
|
|
|
|
""",
|
|
|
|
(self.reqid,)
|
|
|
|
)
|
|
|
|
self.reqid = None
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def deep_delete(self):
|
|
|
|
if self.unmangled:
|
|
|
|
yield self.unmangled.delete()
|
|
|
|
if self.response:
|
|
|
|
if self.response.unmangled:
|
|
|
|
yield self.response.unmangled.delete()
|
|
|
|
yield self.response.delete()
|
|
|
|
yield self.delete()
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _gen_sql_row(tablename=None):
|
|
|
|
template = "{pre}full_request, {pre}response_id, {pre}id, {pre}unmangled_id, {pre}start_datetime, {pre}end_datetime, {pre}port, {pre}is_ssl"
|
|
|
|
if tablename:
|
|
|
|
return template.format(pre=('%s.'%tablename))
|
|
|
|
else:
|
|
|
|
return template.format(pre='')
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def _from_sql_row(row):
|
|
|
|
req = Request(row[0])
|
|
|
|
if row[1]:
|
|
|
|
rsp = yield Response.load_response(str(row[1]))
|
|
|
|
req.response = rsp
|
|
|
|
if row[3]:
|
|
|
|
unmangled_req = yield Request.load_request(str(row[3]))
|
|
|
|
req.unmangled = unmangled_req
|
|
|
|
if row[4]:
|
|
|
|
req.time_start = datetime.datetime.strptime(row[4], "%Y-%m-%dT%H:%M:%S.%f")
|
|
|
|
if row[5]:
|
|
|
|
req.time_end = datetime.datetime.strptime(row[5], "%Y-%m-%dT%H:%M:%S.%f")
|
|
|
|
if row[6] is not None:
|
|
|
|
req.port = int(row[6])
|
|
|
|
if row[7] == 1:
|
|
|
|
req._is_ssl = True
|
|
|
|
req.reqid = str(row[2])
|
|
|
|
|
|
|
|
# tags
|
|
|
|
rows = yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
SELECT tg.tag
|
|
|
|
FROM tagged tgd, tags tg
|
|
|
|
WHERE tgd.tagid=tg.id AND tgd.reqid=?;
|
|
|
|
""",
|
|
|
|
(req.reqid,)
|
|
|
|
)
|
|
|
|
req.tags = []
|
|
|
|
for row in rows:
|
|
|
|
req.tags.append(row[0])
|
|
|
|
defer.returnValue(req)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def load_all_requests():
|
|
|
|
"""
|
|
|
|
load_all_requests()
|
|
|
|
Load all the requests in the data file and return them in a list.
|
|
|
|
Returns a deferred which calls back with the list of requests when complete.
|
|
|
|
|
|
|
|
:rtype: twisted.internet.defer.Deferred
|
|
|
|
"""
|
|
|
|
|
|
|
|
reqs = []
|
|
|
|
reqs += list(pappyproxy.context.in_memory_requests)
|
|
|
|
rows = yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
SELECT %s
|
|
|
|
FROM requests;
|
|
|
|
""" % Request._gen_sql_row(),
|
|
|
|
)
|
|
|
|
for row in rows:
|
|
|
|
req = yield Request._from_sql_row(row)
|
|
|
|
reqs.append(req)
|
|
|
|
defer.returnValue(reqs)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def load_requests_by_tag(tag):
|
|
|
|
"""
|
|
|
|
load_requests_by_tag(tag)
|
|
|
|
Load all the requests in the data file with a given tag and return them in a list.
|
|
|
|
Returns a deferred which calls back with the list of requests when complete.
|
|
|
|
|
|
|
|
:rtype: twisted.internet.defer.Deferred
|
|
|
|
"""
|
|
|
|
# tags
|
|
|
|
rows = yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
SELECT tgd.reqid
|
|
|
|
FROM tagged tgd, tags tg
|
|
|
|
WHERE tgd.tagid=tg.id AND tg.tag=?;
|
|
|
|
""",
|
|
|
|
(tag,)
|
|
|
|
)
|
|
|
|
reqs = []
|
|
|
|
for row in rows:
|
|
|
|
req = Request.load_request(row[0])
|
|
|
|
reqs.append(req)
|
|
|
|
defer.returnValue(reqs)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def load_request(to_load, allow_special=True):
|
|
|
|
"""
|
|
|
|
load_request(to_load)
|
|
|
|
Load a request with the given request id and return it.
|
|
|
|
Returns a deferred which calls back with the request when complete.
|
|
|
|
|
|
|
|
:rtype: twisted.internet.defer.Deferred
|
|
|
|
"""
|
|
|
|
|
|
|
|
assert(dbpool)
|
|
|
|
|
|
|
|
if not allow_special:
|
|
|
|
try:
|
|
|
|
int(to_load)
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
raise PappyException('Cannot load special id %s' % to_load)
|
|
|
|
|
|
|
|
ret_unmangled = False
|
|
|
|
rsp_unmangled = False
|
|
|
|
if to_load[0] == 'u':
|
|
|
|
ret_unmangled = True
|
|
|
|
loadid = to_load[1:]
|
|
|
|
elif to_load[0] == 's':
|
|
|
|
rsp_unmangled = True
|
|
|
|
loadid = to_load[1:]
|
|
|
|
else:
|
|
|
|
loadid = to_load
|
|
|
|
|
|
|
|
def retreq(r):
|
|
|
|
if ret_unmangled:
|
|
|
|
if not r.unmangled:
|
|
|
|
raise PappyException("Request %s was not mangled"%r.reqid)
|
|
|
|
return r.unmangled
|
|
|
|
if rsp_unmangled:
|
|
|
|
if not r.response:
|
|
|
|
raise PappyException("Request %s does not have a response" % r.reqid)
|
|
|
|
if not r.response.unmangled:
|
|
|
|
raise PappyException("Response to request %s was not mangled" % r.reqid)
|
|
|
|
r.response = r.response.unmangled
|
|
|
|
return r
|
|
|
|
else:
|
|
|
|
return r
|
|
|
|
|
|
|
|
for r in pappyproxy.context.in_memory_requests:
|
|
|
|
if r.reqid == to_load:
|
|
|
|
defer.returnValue(retreq(r))
|
|
|
|
for r in pappyproxy.context.all_reqs:
|
|
|
|
if r.reqid == to_load:
|
|
|
|
defer.returnValue(retreq(r))
|
|
|
|
for r in pappyproxy.context.active_requests:
|
|
|
|
if r.reqid == to_load:
|
|
|
|
defer.returnValue(retreq(r))
|
|
|
|
if to_load[0] == 'm':
|
|
|
|
# An in-memory request should have been loaded in the previous loop
|
|
|
|
raise PappyException('In-memory request %s not found' % to_load)
|
|
|
|
rows = yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
SELECT %s
|
|
|
|
FROM requests
|
|
|
|
WHERE id=?;
|
|
|
|
""" % Request._gen_sql_row(),
|
|
|
|
(loadid,)
|
|
|
|
)
|
|
|
|
if len(rows) != 1:
|
|
|
|
raise PappyException("Request with id %s does not exist" % loadid)
|
|
|
|
req = yield Request._from_sql_row(rows[0])
|
|
|
|
req.reqid = to_load
|
|
|
|
|
|
|
|
defer.returnValue(retreq(req))
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def load_from_filters(filters):
|
|
|
|
# Not efficient in any way
|
|
|
|
# But it stays this way until we hit performance issues
|
|
|
|
assert(dbpool)
|
|
|
|
rows = yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
SELECT %s FROM requests r1
|
|
|
|
LEFT JOIN requests r2 ON r1.id=r2.unmangled_id
|
|
|
|
WHERE r2.id is NULL;
|
|
|
|
""" % Request._gen_sql_row('r1'),
|
|
|
|
)
|
|
|
|
reqs = []
|
|
|
|
for row in rows:
|
|
|
|
req = yield Request._from_sql_row(row)
|
|
|
|
reqs.append(req)
|
|
|
|
reqs += list(pappyproxy.context.in_memory_requests)
|
|
|
|
(reqs, _) = pappyproxy.context.filter_reqs(reqs, filters)
|
|
|
|
|
|
|
|
defer.returnValue(reqs)
|
|
|
|
|
|
|
|
######################
|
|
|
|
## Submitting Requests
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def submit_new(host, port, is_ssl, full_request):
|
|
|
|
"""
|
|
|
|
submit_new(host, port, is_ssl, full_request)
|
|
|
|
Submits a request with the given parameters and returns a request object
|
|
|
|
with the response.
|
|
|
|
|
|
|
|
:param host: The host to submit to
|
|
|
|
:type host: string
|
|
|
|
:param port: The port to submit to
|
|
|
|
:type port: Integer
|
|
|
|
:type is_ssl: Whether to use SSL
|
|
|
|
:param full_request: The request data to send
|
|
|
|
:type full_request: string
|
|
|
|
:rtype: Twisted deferred that calls back with a Request
|
|
|
|
"""
|
|
|
|
|
|
|
|
new_obj = Request(full_request)
|
|
|
|
factory = pappyproxy.proxy.ProxyClientFactory(new_obj, save_all=False)
|
|
|
|
factory.connection_id = pappyproxy.proxy.get_next_connection_id()
|
|
|
|
if is_ssl:
|
|
|
|
reactor.connectSSL(host, port, factory, pappyproxy.proxy.ClientTLSContext())
|
|
|
|
else:
|
|
|
|
reactor.connectTCP(host, port, factory)
|
|
|
|
new_req = yield factory.data_defer
|
|
|
|
defer.returnValue(new_req)
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def async_submit(self):
|
|
|
|
"""
|
|
|
|
async_submit()
|
|
|
|
Same as :func:`~pappyproxy.http.Request.submit` but generates deferreds.
|
|
|
|
Submits the request using its host, port, etc. and updates its response value
|
|
|
|
to the resulting response.
|
|
|
|
|
|
|
|
:rtype: Twisted deferred
|
|
|
|
"""
|
|
|
|
new_req = yield Request.submit_new(self.host, self.port, self.is_ssl,
|
|
|
|
self.full_request)
|
|
|
|
self.response = new_req.response
|
|
|
|
self.time_start = new_req.time_start
|
|
|
|
self.time_end = new_req.time_end
|
|
|
|
|
|
|
|
@crochet.wait_for(timeout=180.0)
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def submit(self):
|
|
|
|
"""
|
|
|
|
submit()
|
|
|
|
Submits the request using its host, port, etc. and updates its response value
|
|
|
|
to the resulting response.
|
|
|
|
Cannot be called in async functions.
|
|
|
|
This is what you should use to submit your requests in macros.
|
|
|
|
"""
|
|
|
|
new_req = yield Request.submit_new(self.host, self.port, self.is_ssl,
|
|
|
|
self.full_request)
|
|
|
|
self.response = new_req.response
|
|
|
|
self.time_start = new_req.time_start
|
|
|
|
self.time_end = new_req.time_end
|
|
|
|
|
|
|
|
|
|
|
|
class Response(object):
|
|
|
|
"""
|
|
|
|
:ivar complete: When creating the response with :func:`~pappyproxy.http.Response.add_line`
|
|
|
|
and :func:`~pappyproxy.http.Response.add_data`, returns whether
|
|
|
|
the request is complete.
|
|
|
|
:vartype complete: Bool
|
|
|
|
:ivar cookies: Cookies set by the response
|
|
|
|
:vartype cookies: RepeatableDict of ResponseCookie objects
|
|
|
|
:ivar headers: The headers of the response
|
|
|
|
:vartype headers: RepeatableDict
|
|
|
|
:ivar headers_complete: When creating the response with
|
|
|
|
:func:`~pappyproxy.http.Response.add_line` and
|
|
|
|
:func:`~pappyproxy.http.Response.add_data`, returns whether the headers
|
|
|
|
are complete
|
|
|
|
:vartype headers_complete: Bool
|
|
|
|
:ivar response_code: The response code of the response
|
|
|
|
:vartype response_code: Integer
|
|
|
|
:ivar response_text: The text associated with the response code (ie OK, NOT FOUND, etc)
|
|
|
|
:vartype response_text: String
|
|
|
|
:ivar rspid: If the response is saved in the data file, the id of the response
|
|
|
|
:vartype rspid: String
|
|
|
|
:ivar unmangled: If the response was mangled, the unmangled version of the response
|
|
|
|
:vartype unmangled: Response
|
|
|
|
:ivar version: The version part of the status line (ie HTTP/1.1)
|
|
|
|
:vartype version: String
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, full_response=None, update_content_length=False):
|
|
|
|
self.complete = False
|
|
|
|
self.cookies = RepeatableDict()
|
|
|
|
self.headers = RepeatableDict(case_insensitive=True)
|
|
|
|
self.headers_complete = False
|
|
|
|
self._raw_data = ''
|
|
|
|
self.response_code = 0
|
|
|
|
self.response_text = ''
|
|
|
|
self.rspid = None
|
|
|
|
self.unmangled = None
|
|
|
|
self.version = ''
|
|
|
|
|
|
|
|
self._encoding_type = ENCODE_NONE
|
|
|
|
self._first_line = True
|
|
|
|
self._data_obj = None
|
|
|
|
self._end_after_headers = False
|
|
|
|
|
|
|
|
self._set_dict_callbacks()
|
|
|
|
|
|
|
|
if full_response is not None:
|
|
|
|
self._from_full_response(full_response, update_content_length)
|
|
|
|
|
|
|
|
def __copy__(self):
|
|
|
|
if not self.complete:
|
|
|
|
raise PappyException("Cannot copy incomplete responses")
|
|
|
|
retrsp = Response(self.full_response)
|
|
|
|
if self.unmangled:
|
|
|
|
retrsp.unmangled = self.unmangled.copy()
|
|
|
|
return retrsp
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
return self.__copy__()
|
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
if self.full_response != other.full_response:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
@property
|
|
|
|
def raw_headers(self):
|
|
|
|
"""
|
|
|
|
The raw text of the headers including the extra newline at the end.
|
|
|
|
|
|
|
|
:getter: Returns the raw text of the headers including the extra newline at the end.
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
ret = self.status_line + '\r\n'
|
|
|
|
for k, v in self.headers.all_pairs():
|
|
|
|
ret = ret + "%s: %s\r\n" % (k, v)
|
|
|
|
ret = ret + '\r\n'
|
|
|
|
return ret
|
|
|
|
|
|
|
|
@property
|
|
|
|
def status_line(self):
|
|
|
|
"""
|
|
|
|
The status line of the response. ie `HTTP/1.1 200 OK`
|
|
|
|
|
|
|
|
:getter: Returns the status line of the response
|
|
|
|
:setter: Sets the status line of the response
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
if not self.version and self.response_code == 0 and not self.version:
|
|
|
|
return ''
|
|
|
|
return '%s %d %s' % (self.version, self.response_code, self.response_text)
|
|
|
|
|
|
|
|
@status_line.setter
|
|
|
|
def status_line(self, val):
|
|
|
|
self._handle_statusline(val)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def raw_data(self):
|
|
|
|
"""
|
|
|
|
The data portion of the response
|
|
|
|
|
|
|
|
:getter: Returns the data portion of the response
|
|
|
|
:setter: Set the data of the response and update metadata
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
return self._raw_data
|
|
|
|
|
|
|
|
@raw_data.setter
|
|
|
|
def raw_data(self, val):
|
|
|
|
self._raw_data = val
|
|
|
|
self._data_obj = LengthData(len(val))
|
|
|
|
if len(val) > 0:
|
|
|
|
self._data_obj.add_data(val)
|
|
|
|
self._encoding_type = ENCODE_NONE
|
|
|
|
self.complete = True
|
|
|
|
self._update_from_data()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def full_response(self):
|
|
|
|
"""
|
|
|
|
The full text of the response including the headers and data.
|
|
|
|
Response is automatically converted from compressed/chunked into an
|
|
|
|
uncompressed response with a Content-Length header.
|
|
|
|
|
|
|
|
:getter: Returns the full text of the response
|
|
|
|
:type: string
|
|
|
|
"""
|
|
|
|
if not self.status_line:
|
|
|
|
return ''
|
|
|
|
ret = self.raw_headers
|
|
|
|
ret = ret + self.raw_data
|
|
|
|
return ret
|
|
|
|
|
|
|
|
@property
|
|
|
|
def soup(self):
|
|
|
|
"""
|
|
|
|
Returns a beautifulsoup4 object for parsing the html of the response
|
|
|
|
|
|
|
|
:getter: Returns a BeautifulSoup object representing the html of the response
|
|
|
|
"""
|
|
|
|
return bs4.BeautifulSoup(self.raw_data, 'lxml')
|
|
|
|
|
|
|
|
def _from_full_response(self, full_response, update_content_length=False):
|
|
|
|
# Get rid of leading CRLF. Not in spec, should remove eventually
|
|
|
|
full_response = _strip_leading_newlines(full_response)
|
|
|
|
if full_response == '':
|
|
|
|
return
|
|
|
|
|
|
|
|
remaining = full_response
|
|
|
|
while remaining and not self.headers_complete:
|
|
|
|
line, remaining = _consume_line(remaining)
|
|
|
|
self.add_line(line)
|
|
|
|
|
|
|
|
if not self.headers_complete:
|
|
|
|
self.add_line('')
|
|
|
|
|
|
|
|
if update_content_length:
|
|
|
|
self.raw_data = remaining
|
|
|
|
if not self.complete:
|
|
|
|
self.add_data(remaining)
|
|
|
|
assert(self.complete)
|
|
|
|
|
|
|
|
############################
|
|
|
|
## Internal update functions
|
|
|
|
|
|
|
|
def _set_dict_callbacks(self):
|
|
|
|
# Add callbacks to dicts
|
|
|
|
self.headers.set_modify_callback(self._update_from_text)
|
|
|
|
self.cookies.set_modify_callback(self._update_from_objects)
|
|
|
|
|
|
|
|
def _update_from_data(self):
|
|
|
|
self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False)
|
|
|
|
|
|
|
|
def _update_from_objects(self):
|
|
|
|
# Updates headers from objects
|
|
|
|
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
|
|
|
|
|
|
|
|
# Cookies
|
|
|
|
new_headers = RepeatableDict()
|
|
|
|
cookies_added = False
|
|
|
|
for pair in self.headers.all_pairs():
|
|
|
|
if pair[0].lower() == 'set-cookie':
|
|
|
|
# If we haven't added our cookies, add them all. Otherwise
|
|
|
|
# strip the header (do nothing)
|
|
|
|
if not cookies_added:
|
|
|
|
# Add all our cookies here
|
|
|
|
for k, c in self.cookies.all_pairs():
|
|
|
|
new_headers.append('Set-Cookie', c.cookie_str)
|
|
|
|
cookies_added = True
|
|
|
|
else:
|
|
|
|
new_headers.append(pair[0], pair[1])
|
|
|
|
|
|
|
|
if not cookies_added:
|
|
|
|
# Add all our cookies to the end
|
|
|
|
for k, c in self.cookies.all_pairs():
|
|
|
|
new_headers.append('Set-Cookie', c.cookie_str)
|
|
|
|
|
|
|
|
self.headers = new_headers
|
|
|
|
self._set_dict_callbacks()
|
|
|
|
|
|
|
|
def _update_from_text(self):
|
|
|
|
self.cookies = RepeatableDict()
|
|
|
|
self._set_dict_callbacks()
|
|
|
|
for k, v in self.headers.all_pairs():
|
|
|
|
if k.lower() == 'set-cookie':
|
|
|
|
# Parse the cookie
|
|
|
|
cookie = ResponseCookie(v)
|
|
|
|
self.cookies.append(cookie.key, cookie, do_callback=False)
|
|
|
|
|
|
|
|
###############
|
|
|
|
## Data parsing
|
|
|
|
|
|
|
|
def _handle_statusline(self, status_line):
|
|
|
|
self._first_line = False
|
|
|
|
self.version, self.response_code, self.response_text = \
|
|
|
|
status_line.split(' ', 2)
|
|
|
|
self.response_code = int(self.response_code)
|
|
|
|
|
|
|
|
if self.response_code == 304 or self.response_code == 204 or \
|
|
|
|
self.response_code/100 == 1:
|
|
|
|
self._end_after_headers = True
|
|
|
|
|
|
|
|
def _handle_header(self, key, val):
|
|
|
|
stripped = False
|
|
|
|
if key.lower() == 'content-encoding':
|
|
|
|
if val in ('gzip', 'x-gzip'):
|
|
|
|
self._encoding_type = ENCODE_GZIP
|
|
|
|
elif val in ('deflate'):
|
|
|
|
self._encoding_type = ENCODE_DEFLATE
|
|
|
|
|
|
|
|
# We send our requests already decoded, so we don't want a header
|
|
|
|
# saying it's encoded
|
|
|
|
if self._encoding_type != ENCODE_NONE:
|
|
|
|
stripped = True
|
|
|
|
elif key.lower() == 'transfer-encoding' and val.lower() == 'chunked':
|
|
|
|
self._data_obj = ChunkedData()
|
|
|
|
self.complete = self._data_obj.complete
|
|
|
|
stripped = True
|
|
|
|
elif key.lower() == 'content-length':
|
|
|
|
# We use our own content length
|
|
|
|
self._data_obj = LengthData(int(val))
|
|
|
|
elif key.lower() == 'set-cookie':
|
|
|
|
cookie = ResponseCookie(val)
|
|
|
|
self.cookies.append(cookie.key, cookie, do_callback=False)
|
|
|
|
|
|
|
|
if stripped:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
self.headers.append(key, val, do_callback=False)
|
|
|
|
return True
|
|
|
|
|
|
|
|
###############
|
|
|
|
## Data loading
|
|
|
|
|
|
|
|
def add_line(self, line):
|
|
|
|
"""
|
|
|
|
Used for building a response from a Twisted protocol.
|
|
|
|
Add a line (for status line and headers). Lines must be added in order
|
|
|
|
and the first line must be the status line. The line should not contain
|
|
|
|
the trailing carriage return/newline. I do not suggest you use this for
|
|
|
|
anything.
|
|
|
|
|
|
|
|
:param line: The line to add
|
|
|
|
:type line: string
|
|
|
|
"""
|
|
|
|
assert(not self.headers_complete)
|
|
|
|
if not line and self._first_line:
|
|
|
|
return
|
|
|
|
if not line:
|
|
|
|
self.headers_complete = True
|
|
|
|
|
|
|
|
if self._end_after_headers:
|
|
|
|
self.complete = True
|
|
|
|
return
|
|
|
|
|
|
|
|
if not self._data_obj:
|
|
|
|
self._data_obj = LengthData(0)
|
|
|
|
self.complete = self._data_obj.complete
|
|
|
|
return
|
|
|
|
|
|
|
|
if self._first_line:
|
|
|
|
self._handle_statusline(line)
|
|
|
|
self._first_line = False
|
|
|
|
else:
|
|
|
|
key, val = line.split(':', 1)
|
|
|
|
val = val.strip()
|
|
|
|
self._handle_header(key, val)
|
|
|
|
|
|
|
|
def add_data(self, data):
|
|
|
|
"""
|
|
|
|
Used for building a response from a Twisted protocol.
|
|
|
|
Add data to the response. The data must conform to the content encoding
|
|
|
|
and transfer encoding given in the headers passed in to
|
|
|
|
:func:`~pappyproxy.http.Response.add_line`. Can be any fragment of the data.
|
|
|
|
I do not suggest that you use this function ever.
|
|
|
|
|
|
|
|
:param data: The data to add
|
|
|
|
:type data: string
|
|
|
|
"""
|
|
|
|
assert(self._data_obj)
|
|
|
|
assert(not self._data_obj.complete)
|
|
|
|
assert not self.complete
|
|
|
|
self._data_obj.add_data(data)
|
|
|
|
if self._data_obj.complete:
|
|
|
|
self._raw_data = _decode_encoded(self._data_obj.raw_data,
|
|
|
|
self._encoding_type)
|
|
|
|
self.complete = True
|
|
|
|
self._update_from_data()
|
|
|
|
|
|
|
|
####################
|
|
|
|
## Cookie management
|
|
|
|
|
|
|
|
def add_cookie(self, cookie):
|
|
|
|
"""
|
|
|
|
Add a :class:`pappyproxy.http.ResponseCookie` to the response.
|
|
|
|
|
|
|
|
.. warning::
|
|
|
|
This will add duplicate cookies. If you want to add a cookie you're not sure exists,
|
|
|
|
use :func:`~pappyproxy.http.Response.set_cookie`
|
|
|
|
"""
|
|
|
|
self.cookies.append(cookie.key, cookie)
|
|
|
|
|
|
|
|
def set_cookie(self, cookie):
|
|
|
|
"""
|
|
|
|
Set a cookie in the response. ``cookie`` must be a :class:`pappyproxy.http.ResponseCookie`
|
|
|
|
"""
|
|
|
|
self.cookies[cookie.key] = cookie
|
|
|
|
|
|
|
|
def set_cookie_kv(self, key, val):
|
|
|
|
"""
|
|
|
|
Set a cookie by key and value. Will not have path, secure, etc set at all.
|
|
|
|
"""
|
|
|
|
cookie = ResponseCookie()
|
|
|
|
cookie.key = key
|
|
|
|
cookie.val = val
|
|
|
|
self.cookies[cookie.key] = cookie
|
|
|
|
|
|
|
|
def delete_cookie(self, key):
|
|
|
|
"""
|
|
|
|
Delete a cookie from the response by its key
|
|
|
|
"""
|
|
|
|
del self.cookies[key]
|
|
|
|
|
|
|
|
##############
|
|
|
|
## Serializing
|
|
|
|
|
|
|
|
def to_json(self):
|
|
|
|
"""
|
|
|
|
Return a JSON encoding of the response that can be used by
|
|
|
|
:func:`~pappyproxy.http.Response.from_json` to recreate the response.
|
|
|
|
The ``full_response`` portion is base64 encoded because json doesn't play
|
|
|
|
nice with binary blobs.
|
|
|
|
"""
|
|
|
|
data = {
|
|
|
|
'rspid': self.rspid,
|
|
|
|
'full_response': base64.b64encode(self.full_response),
|
|
|
|
}
|
|
|
|
if self.unmangled:
|
|
|
|
data['unmangled_id'] = self.unmangled.rspid
|
|
|
|
|
|
|
|
return json.dumps(data)
|
|
|
|
|
|
|
|
|
|
|
|
def from_json(self, json_string):
|
|
|
|
"""
|
|
|
|
Update the metadata of the response to match data from
|
|
|
|
:func:`~pappyproxy.http.Response.to_json`
|
|
|
|
|
|
|
|
:param json_string: The JSON data to use
|
|
|
|
:type json_string: JSON data in a string
|
|
|
|
"""
|
|
|
|
data = json.loads(json_string)
|
|
|
|
self._from_full_response(base64.b64decode(data['full_response']))
|
|
|
|
self._update_from_text()
|
|
|
|
self._update_from_data()
|
|
|
|
if data['rspid']:
|
|
|
|
self.rspid = str(data['rspid'])
|
|
|
|
|
|
|
|
#######################
|
|
|
|
## Database interaction
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def async_save(self):
|
|
|
|
"""
|
|
|
|
async_save()
|
|
|
|
Save/update the just request in the data file. Returns a twisted deferred which
|
|
|
|
fires when the save is complete. It is suggested that you use
|
|
|
|
:func: `~pappyproxy.http.Request.async_deep_save` instead to save responses.
|
|
|
|
|
|
|
|
:rtype: twisted.internet.defer.Deferred
|
|
|
|
"""
|
|
|
|
assert(dbpool)
|
|
|
|
try:
|
|
|
|
# Check for intyness
|
|
|
|
_ = int(self.rspid)
|
|
|
|
|
|
|
|
# If we have rspid, we're updating
|
|
|
|
yield dbpool.runInteraction(self._update)
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
yield dbpool.runInteraction(self._insert)
|
|
|
|
assert(self.rspid is not None)
|
|
|
|
|
|
|
|
# Right now responses without requests are unviewable
|
|
|
|
# @crochet.wait_for(timeout=180.0)
|
|
|
|
# @defer.inlineCallbacks
|
|
|
|
# def save(self):
|
|
|
|
# yield self.save()
|
|
|
|
|
|
|
|
def _update(self, txn):
|
|
|
|
setnames = ["full_response=?"]
|
|
|
|
queryargs = [self.full_response]
|
|
|
|
if self.unmangled:
|
|
|
|
setnames.append('unmangled_id=?')
|
|
|
|
assert(self.unmangled.rspid is not None) # should be saved first
|
|
|
|
queryargs.append(self.unmangled.rspid)
|
|
|
|
|
|
|
|
queryargs.append(self.rspid)
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
UPDATE responses SET %s WHERE id=?;
|
|
|
|
""" % ','.join(setnames),
|
|
|
|
tuple(queryargs)
|
|
|
|
)
|
|
|
|
assert(self.rspid is not None)
|
|
|
|
|
|
|
|
def _insert(self, txn):
|
|
|
|
# If we don't have an rspid, we're creating a new one
|
|
|
|
colnames = ["full_response"]
|
|
|
|
colvals = [self.full_response]
|
|
|
|
if self.unmangled is not None:
|
|
|
|
colnames.append('unmangled_id')
|
|
|
|
assert(self.unmangled.rspid is not None) # should be saved first
|
|
|
|
colvals.append(self.unmangled.rspid)
|
|
|
|
|
|
|
|
txn.execute(
|
|
|
|
"""
|
|
|
|
INSERT INTO responses (%s) VALUES (%s);
|
|
|
|
""" % (','.join(colnames), ','.join(['?']*len(colvals))),
|
|
|
|
tuple(colvals)
|
|
|
|
)
|
|
|
|
self.rspid = txn.lastrowid
|
|
|
|
assert(self.rspid is not None)
|
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def delete(self):
|
|
|
|
assert(self.rspid is not None)
|
|
|
|
row = yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
DELETE FROM responses WHERE id=?;
|
|
|
|
""",
|
|
|
|
(self.rspid,)
|
|
|
|
)
|
|
|
|
self.rspid = None
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def load_response(respid):
|
|
|
|
"""
|
|
|
|
Load a response from its response id. Returns a deferred. I don't suggest you use this.
|
|
|
|
|
|
|
|
:rtype: twisted.internet.defer.Deferred
|
|
|
|
"""
|
|
|
|
assert(dbpool)
|
|
|
|
rows = yield dbpool.runQuery(
|
|
|
|
"""
|
|
|
|
SELECT full_response, id, unmangled_id
|
|
|
|
FROM responses
|
|
|
|
WHERE id=?;
|
|
|
|
""",
|
|
|
|
(respid,)
|
|
|
|
)
|
|
|
|
if len(rows) != 1:
|
|
|
|
raise PappyException("Response with request id %s does not exist" % respid)
|
|
|
|
full_response = rows[0][0]
|
|
|
|
resp = Response(full_response)
|
|
|
|
resp.rspid = str(rows[0][1])
|
|
|
|
if rows[0][2]:
|
|
|
|
unmangled_response = yield Response.load_response(int(rows[0][2]))
|
|
|
|
resp.unmangled = unmangled_response
|
|
|
|
defer.returnValue(resp)
|
|
|
|
|