You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1998 lines
64 KiB
1998 lines
64 KiB
import base64 |
|
import collections |
|
import crochet |
|
import datetime |
|
import gzip |
|
import json |
|
import pappyproxy |
|
import re |
|
import StringIO |
|
import urlparse |
|
import zlib |
|
from twisted.internet import defer, reactor |
|
from pappyproxy.util import PappyException |
|
import bs4 |
|
|
|
ENCODE_NONE = 0 |
|
ENCODE_DEFLATE = 1 |
|
ENCODE_GZIP = 2 |
|
|
|
dbpool = None |
|
|
|
def init(pool): |
|
""" |
|
Initialize the http module. |
|
|
|
:param pool: The ConnectionPool to use to store the request/response objects |
|
:type pool: SQLite ConnectionPool |
|
""" |
|
global dbpool |
|
if dbpool is None: |
|
dbpool = pool |
|
assert(dbpool) |
|
|
|
def destruct(): |
|
assert(dbpool) |
|
dbpool.close() |
|
|
|
def _decode_encoded(data, encoding): |
|
if encoding == ENCODE_NONE: |
|
return data |
|
|
|
if encoding == ENCODE_DEFLATE: |
|
dec_data = zlib.decompress(data, -15) |
|
else: |
|
dec_data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data)) |
|
dec_data = dec_data.read() |
|
return dec_data |
|
|
|
def _strip_leading_newlines(string): |
|
while (len(string) > 1 and string[0:2] == '\r\n') or \ |
|
(len(string) > 0 and string[0] == '\n'): |
|
if len(string) > 1 and string[0:2] == '\r\n': |
|
string = string[2:] |
|
elif len(string) > 0 and string[0] == '\n': |
|
string = string[1:] |
|
return string |
|
|
|
def _consume_line(instr): |
|
# returns (line, rest) |
|
l = [] |
|
pos = 0 |
|
while pos < len(instr): |
|
if instr[pos] == '\n': |
|
if l and l[-1] == '\r': |
|
l = l[:-1] |
|
return (''.join(l), instr[pos+1:]) |
|
l.append(instr[pos]) |
|
pos += 1 |
|
return instr |
|
|
|
################### |
|
## Functions to use |
|
|
|
def get_request(url='', url_params={}): |
|
""" |
|
get_request(url='', url_params={}) |
|
|
|
Create a request object that makes a GET request to the given url with the |
|
given url params. |
|
""" |
|
r = Request() |
|
r.status_line = 'GET / HTTP/1.1' |
|
r.url = url |
|
r.headers['Host'] = r.host |
|
if url_params: |
|
r.url_params.from_dict(url_params) |
|
return r |
|
|
|
def post_request(url, post_params={}, url_params={}): |
|
""" |
|
post_request(url, post_params={}, url_params={}) |
|
|
|
Create a request object that makes a POST request to the given url with the |
|
given post and url params. |
|
""" |
|
r = Request() |
|
r.status_line = 'POST / HTTP/1.1' |
|
r.url = url |
|
r.headers['Host'] = r.host |
|
if url_params: |
|
r.url_params.from_dict(url_params) |
|
if post_params: |
|
r.post_params.from_dict(post_params) |
|
return r |
|
|
|
def repeatable_parse_qs(s): |
|
pairs = s.split('&') |
|
ret_dict = RepeatableDict() |
|
for pair in pairs: |
|
if '=' in pair: |
|
t = tuple(pair.split('=', 1)) |
|
ret_dict.append(t[0], t[1]) |
|
else: |
|
ret_dict.append(pair, None) |
|
return ret_dict |
|
|
|
########## |
|
## Classes |
|
|
|
class RepeatableDict: |
|
""" |
|
A dict that retains the order of items inserted and keeps track of |
|
duplicate values. Can optionally treat keys as case insensitive. |
|
Custom made for the proxy, so it has strange features |
|
""" |
|
|
|
def __init__(self, from_pairs=None, case_insensitive=False): |
|
# If efficiency becomes a problem, add a dict that keeps a list by key |
|
# and use that for getting data. But until then, this stays. |
|
self._pairs = [] |
|
self._keys = set() |
|
self._modify_callback = None |
|
self.case_insensitive = case_insensitive |
|
|
|
if from_pairs: |
|
for k, v in from_pairs: |
|
self.append(k, v) |
|
|
|
def _ef_key(self, key): |
|
# "effective key", returns key.lower() if we're case insensitive, |
|
# otherwise it returns the same key |
|
if self.case_insensitive: |
|
return key.lower() |
|
return key |
|
|
|
def _mod_callback(self): |
|
# Calls the modify callback if we have one |
|
if self._modify_callback: |
|
self._modify_callback() |
|
|
|
def __contains__(self, val): |
|
return self._ef_key(val) in self._keys |
|
|
|
def __getitem__(self, key): |
|
for p in reversed(self._pairs): |
|
if self._ef_key(p[0]) == self._ef_key(key): |
|
return p[1] |
|
raise KeyError |
|
|
|
def __setitem__(self, key, val): |
|
# Replaces first instance of `key` and deletes the rest |
|
self.set_val(key, val) |
|
|
|
def __delitem__(self, key): |
|
self._keys.remove(key) |
|
self._pairs = [p for p in self._pairs if self._ef_key(p[0]) != self._ef_key(key)] |
|
self._mod_callback() |
|
|
|
def __nonzero__(self): |
|
if self._pairs: |
|
return True |
|
else: |
|
return False |
|
|
|
def _add_key(self, key): |
|
self._keys.add(self._ef_key(key)) |
|
|
|
def _remove_key(self, key): |
|
self._keys.remove(self._ef_key(key)) |
|
|
|
def all_pairs(self): |
|
return self._pairs[:] |
|
|
|
def append(self, key, val, do_callback=True): |
|
# Add a duplicate entry for key |
|
self._add_key(key) |
|
self._pairs.append((key, val)) |
|
if do_callback: |
|
self._mod_callback() |
|
|
|
def set_val(self, key, val, do_callback=True): |
|
new_pairs = [] |
|
added = False |
|
self._add_key(key) |
|
for p in self._pairs: |
|
if self._ef_key(p[0]) == self._ef_key(key): |
|
if not added: |
|
# only add the first instance |
|
new_pairs.append((key, val)) |
|
added = True |
|
else: |
|
new_pairs.append(p) |
|
if not added: |
|
new_pairs.append((key, val)) |
|
self._pairs = new_pairs |
|
|
|
if do_callback: |
|
self._mod_callback() |
|
|
|
def update(self, key, val, do_callback=True): |
|
# If key is already in the dict, replace that value with the new value |
|
if key in self: |
|
for k, v in self.all_pairs(): |
|
if self._ef_key(k) == self._ef_key(key): |
|
self.set_val(k, val, do_callback=do_callback) |
|
break |
|
else: |
|
self.set_val(key, val, do_callback=do_callback) |
|
|
|
def clear(self, do_callback=True): |
|
self._pairs = [] |
|
if do_callback: |
|
self._mod_callback() |
|
|
|
def all_vals(self, key): |
|
return [p[1] for p in self._pairs if self._ef_key(p[0]) == self._ef_key(key)] |
|
|
|
def add_pairs(self, pairs, do_callback=True): |
|
for pair in pairs: |
|
self._add_key(pair[0]) |
|
self._pairs += pairs |
|
if do_callback: |
|
self._mod_callback() |
|
|
|
def from_dict(self, d): |
|
self._pairs = list(d.items()) |
|
self._mod_callback() |
|
|
|
def sort(self): |
|
# Sorts pairs by key alphabetaclly |
|
pairs = sorted(pairs, key=lambda x: x[0]) |
|
|
|
def set_modify_callback(self, callback): |
|
# Add a function to be called whenever an element is added, changed, or |
|
# deleted. Set to None to remove |
|
self._modify_callback = callback |
|
|
|
|
|
class LengthData: |
|
def __init__(self, length=None): |
|
self.raw_data = '' |
|
self.complete = False |
|
self.length = length or 0 |
|
|
|
if self.length == 0: |
|
self.complete = True |
|
|
|
def add_data(self, data): |
|
if self.complete: |
|
raise PappyException("Data already complete!") |
|
remaining_length = self.length-len(self.raw_data) |
|
if len(data) >= remaining_length: |
|
self.raw_data += data[:remaining_length] |
|
assert(len(self.raw_data) == self.length) |
|
self.complete = True |
|
else: |
|
self.raw_data += data |
|
|
|
class ChunkedData: |
|
|
|
def __init__(self): |
|
self.raw_data = '' |
|
self._pos = 0 |
|
self._state = 0 # 0=reading length, 1=reading data, 2=going over known string |
|
self._len_str = '' |
|
self._chunk_remaining = 0 |
|
self._known_str = '' |
|
self._known_str_pos = 0 |
|
self._next_state = 0 |
|
self._raw_data = '' |
|
self.complete = False |
|
self.unchunked_data = '' |
|
|
|
def add_data(self, data): |
|
self._raw_data += data |
|
self.scan_forward() |
|
|
|
def scan_forward(self): |
|
# Don't add more data if we're already done |
|
if self.complete: |
|
return |
|
|
|
while self._pos < len(self._raw_data): |
|
curchar = self._raw_data[self._pos] |
|
if self._state == 0: |
|
if curchar.lower() in '0123456789abcdef': |
|
# Read the next char of the length |
|
self._len_str += curchar |
|
|
|
# Move to the next char |
|
self._pos += 1 |
|
elif curchar == '\r': |
|
# Save how much chunk to read |
|
self._chunk_remaining = int(self._len_str, 16) |
|
|
|
# If the length is 0, chunked encoding is done! |
|
if self._chunk_remaining == 0: |
|
self.complete = True |
|
# I should probably just rename raw_data since it's what |
|
# you use to look at unchunked data, but you're not |
|
# supposed to look at it until after it's complete |
|
# anyways |
|
self._raw_data = self.unchunked_data |
|
self.raw_data = self._raw_data # Expose raw_data |
|
return |
|
|
|
# There should be a newline after the \r |
|
self._known_str = '\n' |
|
self._state = 2 |
|
self._next_state = 1 |
|
|
|
# Reset the length str |
|
self._len_str = '' |
|
|
|
# Move to the next char |
|
self._pos += 1 |
|
else: |
|
raise Exception("Malformed chunked encoding!") |
|
|
|
elif self._state == 1: |
|
if self._chunk_remaining > 0: |
|
# Read next byte of data |
|
self.unchunked_data += curchar |
|
self._chunk_remaining -= 1 |
|
self._pos += 1 |
|
else: |
|
# Read newline then read a new chunk |
|
self._known_str = '\r\n' |
|
self._next_state = 0 # Read len after newlines |
|
self._state = 2 # Read newlines |
|
# Don't move to the next char because we didn't do anything |
|
elif self._state == 2: |
|
# Read a char of an expected string |
|
|
|
# If the expected char doesn't match, throw an error |
|
if self._known_str[self._known_str_pos] != curchar: |
|
raise Exception("Unexpected data") |
|
|
|
# Move to the next char in the raw data and in our known string |
|
self._known_str_pos += 1 |
|
self._pos += 1 |
|
|
|
# If we've reached the end of the known string, go to the next state |
|
if self._known_str_pos == len(self._known_str): |
|
self._known_str_pos = 0 |
|
self._state = self._next_state |
|
|
|
class ResponseCookie(object): |
|
""" |
|
A cookie representing a cookie set by a response |
|
""" |
|
|
|
def __init__(self, set_cookie_string=None): |
|
self.key = None |
|
self.val = None |
|
self.expires = None |
|
self.max_age = None |
|
self.domain = None |
|
self.path = None |
|
self.secure = False |
|
self.http_only = False |
|
|
|
if set_cookie_string: |
|
self._from_cookie(set_cookie_string) |
|
|
|
@property |
|
def cookie_str(self): |
|
""" |
|
Returns the full string of the cookie. ie ``foo=bar; secure; path=/`` |
|
|
|
:getter: Returns the full string of the cookie. |
|
:setter: Set the metadata from a cookie string. ie from a ``Set-Cookie`` header |
|
""" |
|
av = '%s=%s' % (self.key, self.val) |
|
to_add = [av] |
|
if self.expires: |
|
to_add.append('expires=%s'%self.expires) |
|
if self.max_age: |
|
to_add.append('Max-Age=%d'%self.max_age) |
|
if self.domain: |
|
to_add.append('Domain=%s'%self.domain) |
|
if self.path: |
|
to_add.append('Path=%s'%self.path) |
|
if self.secure: |
|
to_add.append('secure') |
|
if self.http_only: |
|
to_add.append('httponly') |
|
return '; '.join(to_add) |
|
|
|
@cookie_str.setter |
|
def cookie_str(self, val): |
|
self._from_cookie(val) |
|
|
|
def _parse_cookie_av(self, cookie_av): |
|
if '=' in cookie_av: |
|
key, val = cookie_av.split('=', 1) |
|
key = key.lstrip() |
|
if key.lower() == 'expires': |
|
self.expires = val |
|
if key.lower() == 'max-age': |
|
self.max_age = int(val) |
|
if key.lower() == 'domain': |
|
self.domain = val |
|
if key.lower() == 'path': |
|
self.path = val |
|
elif cookie_av.lstrip().lower() == 'secure': |
|
self.secure = True |
|
elif cookie_av.lstrip().lower() == 'httponly': |
|
self.http_only = True |
|
|
|
def _from_cookie(self, set_cookie_string): |
|
self.key = None |
|
self.val = None |
|
self.expires = None |
|
self.max_age = None |
|
self.domain = None |
|
self.path = None |
|
self.secure = False |
|
self.http_only = False |
|
if ';' in set_cookie_string: |
|
cookie_pair, rest = set_cookie_string.split(';', 1) |
|
if '=' in cookie_pair: |
|
self.key, self.val = cookie_pair.split('=',1) |
|
elif cookie_pair == '' or re.match('\s+', cookie_pair): |
|
self.key = '' |
|
self.val = '' |
|
else: |
|
self.key = cookie_pair |
|
self.val = '' |
|
cookie_avs = rest.split(';') |
|
for cookie_av in cookie_avs: |
|
cookie_av.lstrip() |
|
self._parse_cookie_av(cookie_av) |
|
else: |
|
self.key, self.val = set_cookie_string.split('=',1) |
|
|
|
|
|
class Request(object): |
|
""" |
|
:ivar time_end: The datetime that the request ended. |
|
:vartype time_end: datetime.datetime |
|
:ivar time_start: The datetime that the request was made |
|
:vartype time_start: datetime.datetime |
|
:ivar complete: When creating the request with :func:`~pappyproxy.http.Request.add_line` |
|
and :func:`~pappyproxy.http.Request.add_data`, returns whether |
|
the request is complete. |
|
:vartype complete: Bool |
|
:ivar cookies: Cookies sent with the request |
|
:vartype cookies: RepeatableDict |
|
:ivar fragment: The fragment part of the url (The part that comes after the #) |
|
:vartype fragment: String |
|
:ivar url_params: The url parameters of the request (aka the get parameters) |
|
:vartype url_params: RepeatableDict |
|
:ivar headers: The headers of the request |
|
:vartype headers: RepeatableDict |
|
:ivar headers_complete: When creating the request with |
|
:func:`~pappyproxy.http.Request.add_line` and |
|
:func:`~pappyproxy.http.Request.add_data`, returns whether the headers |
|
are complete |
|
:vartype headers_complete: Bool |
|
:ivar path: The path of the request |
|
:vartype path: String |
|
:ivar port: The port that the request was sent to (or will be sent to) |
|
:vartype port: Integer |
|
:ivar post_params: The post parameters of the request |
|
:vartype post_params: RepeatableDict |
|
:ivar reqid: The request id of the request |
|
:vartype reqid: String |
|
:ivar response: The associated response of this request |
|
:vartype response: Response |
|
:ivar submitted: Whether the request has been submitted |
|
:vartype submitted: Bool |
|
:ivar unmangled: If the request was mangled, the version of the request |
|
before it was mangled. |
|
:vartype unmangled: Request |
|
:ivar verb: The HTTP verb of the request (ie POST, GET) |
|
:vartype verb: String |
|
:ivar version: The HTTP version of the request (ie HTTP/1.1) |
|
:vartype version: String |
|
:ivar tags: Tags associated with the request |
|
:vartype tags: List of Strings |
|
""" |
|
|
|
|
|
def __init__(self, full_request=None, update_content_length=True, |
|
port=None, is_ssl=None): |
|
self.time_end = None |
|
self.time_start = None |
|
self.complete = False |
|
self.cookies = RepeatableDict() |
|
self.fragment = None |
|
self.url_params = RepeatableDict() |
|
self.headers = RepeatableDict(case_insensitive=True) |
|
self.headers_complete = False |
|
self._host = None |
|
self._is_ssl = False |
|
self.path = '' |
|
self.port = None |
|
self.post_params = RepeatableDict() |
|
self._raw_data = '' |
|
self.reqid = None |
|
self.response = None |
|
self.submitted = False |
|
self.unmangled = None |
|
self.verb = '' |
|
self.version = '' |
|
self.tags = [] |
|
|
|
self._first_line = True |
|
self._data_length = 0 |
|
self._partial_data = '' |
|
|
|
self._set_dict_callbacks() |
|
|
|
# Set values from init |
|
if is_ssl: |
|
self.is_ssl = True |
|
if port: |
|
self.port = port |
|
|
|
# Get values from the raw request |
|
if full_request is not None: |
|
self._from_full_request(full_request, update_content_length) |
|
|
|
def __copy__(self): |
|
if not self.complete: |
|
raise PappyException("Cannot copy incomplete requests") |
|
newreq = Request(self.full_request) |
|
newreq.is_ssl = self.is_ssl |
|
newreq.port = self.port |
|
newreq._host = self._host |
|
newreq.time_start = self.time_start |
|
newreq.time_end = self.time_end |
|
if self.unmangled: |
|
newreq.unmangled = self.unmangled.copy() |
|
if self.response: |
|
newreq.response = self.response.copy() |
|
return newreq |
|
|
|
def __eq__(self, other): |
|
if self.full_request != other.full_request: |
|
return False |
|
if self.port != other.port: |
|
return False |
|
if self.is_ssl != other.is_ssl: |
|
return False |
|
if self._host != other._host: |
|
return False |
|
return True |
|
|
|
def copy(self): |
|
""" |
|
Returns a copy of the request |
|
|
|
:rtype: Request |
|
""" |
|
return self.__copy__() |
|
|
|
@property |
|
def rsptime(self): |
|
""" |
|
The response time of the request |
|
|
|
:getter: Returns the response time of the request |
|
:type: datetime.timedelta |
|
""" |
|
if self.time_start and self.time_end: |
|
return self.time_end-self.time_start |
|
else: |
|
return None |
|
|
|
@property |
|
def status_line(self): |
|
""" |
|
The status line of the request. ie `GET / HTTP/1.1` |
|
|
|
:getter: Returns the status line of the request |
|
:setter: Sets the status line of the request |
|
:type: string |
|
""" |
|
if not self.verb and not self.path and not self.version: |
|
return '' |
|
return '%s %s %s' % (self.verb, self.full_path, self.version) |
|
|
|
@status_line.setter |
|
def status_line(self, val): |
|
self._handle_statusline(val) |
|
|
|
@property |
|
def full_path(self): |
|
""" |
|
The full path of the request including URL params and fragment. |
|
ie `/path/to/stuff?foo=bar&baz=something#somewhere` |
|
|
|
:getter: Returns the full path of the request |
|
:type: string |
|
""" |
|
|
|
path = self.path |
|
if self.url_params: |
|
path += '?' |
|
pairs = [] |
|
for pair in self.url_params.all_pairs(): |
|
if pair[1] is None: |
|
pairs.append(pair[0]) |
|
else: |
|
pairs.append('='.join(pair)) |
|
path += '&'.join(pairs) |
|
if self.fragment: |
|
path += '#' |
|
path += self.fragment |
|
return path |
|
|
|
@property |
|
def raw_headers(self): |
|
""" |
|
The raw text of the headers including the extra newline at the end. |
|
|
|
:getter: Returns the raw text of the headers including the extra newline at the end. |
|
:type: string |
|
""" |
|
ret = self.status_line + '\r\n' |
|
for k, v in self.headers.all_pairs(): |
|
ret = ret + "%s: %s\r\n" % (k, v) |
|
ret = ret + '\r\n' |
|
return ret |
|
|
|
@property |
|
def full_request(self): |
|
""" |
|
The full text of the request including the headers and data. |
|
|
|
:getter: Returns the full text of the request |
|
:type: string |
|
""" |
|
if not self.status_line: |
|
return '' |
|
ret = self.raw_headers |
|
ret = ret + self.raw_data |
|
return ret |
|
|
|
@property |
|
def raw_data(self): |
|
""" |
|
The data portion of the request |
|
|
|
:getter: Returns the data portion of the request |
|
:setter: Set the data of the request and update metadata |
|
:type: string |
|
""" |
|
return self._raw_data |
|
|
|
@raw_data.setter |
|
def raw_data(self, val): |
|
self._raw_data = val |
|
self._update_from_data() |
|
self.complete = True |
|
|
|
@property |
|
def url(self): |
|
""" |
|
The full url of the request including url params, protocol, etc. |
|
ie `https://www.google.com`, `http://foo.fakewebsite.com:1234/path?a=b`. |
|
When setting the URL, the port, is_ssl, path, url params, host, etc are all |
|
automatically updated. |
|
|
|
:getter: Returns the url of the request |
|
:setter: Sets the url of the request and updates metadata |
|
:type: string |
|
""" |
|
if self.is_ssl: |
|
retstr = 'https://' |
|
else: |
|
retstr = 'http://' |
|
retstr += self.host |
|
if not ((self.is_ssl and self.port == 443) or \ |
|
(not self.is_ssl and self.port == 80)): |
|
retstr += ':%d' % self.port |
|
if self.path and self.path != '/': |
|
retstr += self.path |
|
if self.url_params: |
|
retstr += '?' |
|
pairs = [] |
|
for p in self.url_params.all_pairs(): |
|
pairs.append('='.join(p)) |
|
retstr += '&'.join(pairs) |
|
if self.fragment: |
|
retstr += '#%s' % self.fragment |
|
return retstr |
|
|
|
@url.setter |
|
def url(self, val): |
|
self._handle_statusline_uri(val) |
|
|
|
@property |
|
def host(self): |
|
""" |
|
The host of the request. ie `www.google.com`. |
|
|
|
:getter: Returns the host of the request |
|
:setter: Changes the host of the request and updates the Host header |
|
:type: string |
|
""" |
|
return self._host |
|
|
|
@host.setter |
|
def host(self, val): |
|
self._host = val |
|
self.headers.update('Host', val, do_callback=False) |
|
|
|
@property |
|
def is_ssl(self): |
|
""" |
|
Whether the request is sent over SSL |
|
|
|
:getter: Returns if the request is sent over SSL |
|
:setter: Sets if the request is sent over SSL |
|
:type: Bool |
|
""" |
|
return self._is_ssl |
|
|
|
@is_ssl.setter |
|
def is_ssl(self, val): |
|
if val: |
|
self._is_ssl = True |
|
if self.port == 80: |
|
self.port = 443 |
|
else: |
|
self._is_ssl = False |
|
if self.port == 443: |
|
self.port = 80 |
|
|
|
@property |
|
def saved(self): |
|
""" |
|
If the request is saved in the data file |
|
|
|
:getter: Returns True if the request is saved in the data file |
|
:type: Bool |
|
""" |
|
if self.reqid is None: |
|
return False |
|
try: |
|
_ = int(self.reqid) |
|
return True |
|
except (ValueError, TypeError): |
|
return False |
|
|
|
@property |
|
def path_tuple(self): |
|
""" |
|
The path in tuple form starting with the host. For example, path_parts for |
|
a request to http://www.example.com/foo/bar.php would be:: |
|
|
|
('www.example.com', 'foo', 'bar.php') |
|
|
|
:getter: Returns the path in tuple form |
|
:type: Tuple |
|
""" |
|
# the first element is blank because the path always starts with / |
|
ret = [self.host] + self.path.split('/')[1:] |
|
if ret[-1] == '': |
|
ret = ret[:-1] |
|
return tuple(ret) |
|
|
|
def _from_full_request(self, full_request, update_content_length=False): |
|
# Get rid of leading CRLF. Not in spec, should remove eventually |
|
# technically doesn't treat \r\n same as \n, but whatever. |
|
full_request = _strip_leading_newlines(full_request) |
|
if full_request == '': |
|
return |
|
|
|
remaining = full_request |
|
while remaining and not self.headers_complete: |
|
line, remaining = _consume_line(remaining) |
|
self.add_line(line) |
|
|
|
if not self.headers_complete: |
|
self.add_line('') |
|
|
|
if not self.complete: |
|
if update_content_length: |
|
self.raw_data = remaining |
|
else: |
|
self.add_data(remaining) |
|
assert(self.complete) |
|
self._handle_data_end() |
|
|
|
############################ |
|
## Internal update functions |
|
|
|
def _set_dict_callbacks(self): |
|
# Add callbacks to dicts |
|
self.headers.set_modify_callback(self._update_from_text) |
|
self.cookies.set_modify_callback(self._update_from_objects) |
|
self.post_params.set_modify_callback(self._update_from_objects) |
|
|
|
def _update_from_data(self): |
|
# Updates metadata that's based off of data |
|
self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False) |
|
if 'content-type' in self.headers: |
|
if self.headers['content-type'] == 'application/x-www-form-urlencoded': |
|
self.post_params = repeatable_parse_qs(self.raw_data) |
|
self._set_dict_callbacks() |
|
|
|
def _update_from_objects(self): |
|
# Updates text values that depend on objects. |
|
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION |
|
if self.cookies: |
|
assignments = [] |
|
for ck, cv in self.cookies.all_pairs(): |
|
asn = '%s=%s' % (ck, cv) |
|
assignments.append(asn) |
|
header_val = '; '.join(assignments) |
|
self.headers.update('Cookie', header_val, do_callback=False) |
|
if self.post_params: |
|
pairs = [] |
|
for k, v in self.post_params.all_pairs(): |
|
pairs.append('%s=%s' % (k, v)) |
|
self.raw_data = '&'.join(pairs) |
|
|
|
def _update_from_text(self): |
|
# Updates metadata that depends on header/status line values |
|
self.cookies = RepeatableDict() |
|
self._set_dict_callbacks() |
|
for k, v in self.headers.all_pairs(): |
|
self._handle_header(k, v) |
|
|
|
############### |
|
## Data loading |
|
|
|
def add_line(self, line): |
|
""" |
|
Used for building a request from a Twisted protocol. |
|
Add a line (for status line and headers). Lines must be added in order |
|
and the first line must be the status line. The line should not contain |
|
the trailing carriage return/newline. I do not suggest you use this for |
|
anything. |
|
|
|
:param line: The line to add |
|
:type line: string |
|
""" |
|
|
|
if self._first_line and line == '': |
|
# Ignore leading newlines because fuck the spec |
|
return |
|
|
|
if self._first_line: |
|
self._handle_statusline(line) |
|
self._first_line = False |
|
else: |
|
# Either header or newline (end of headers) |
|
if line == '': |
|
self.headers_complete = True |
|
if self._data_length == 0: |
|
self.complete = True |
|
else: |
|
key, val = line.split(':', 1) |
|
val = val.strip() |
|
if self._handle_header(key, val): |
|
self.headers.append(key, val, do_callback=False) |
|
|
|
def add_data(self, data): |
|
""" |
|
Used for building a request from a Twisted protocol. |
|
Add data to the request. |
|
I do not suggest that you use this function ever. |
|
|
|
:param data: The data to add |
|
:type data: string |
|
""" |
|
# Add data (headers must be complete) |
|
len_remaining = self._data_length - len(self._partial_data) |
|
if len(data) >= len_remaining: |
|
self._partial_data += data[:len_remaining] |
|
self._raw_data = self._partial_data |
|
self.complete = True |
|
self._handle_data_end() |
|
else: |
|
self._partial_data += data |
|
|
|
############### |
|
## Data parsing |
|
|
|
def _process_host(self, hostline): |
|
# Get address and port |
|
# Returns true if port was explicitly stated |
|
port_given = False |
|
if ':' in hostline: |
|
self._host, self.port = hostline.split(':') |
|
self.port = int(self.port) |
|
if self.port == 443: |
|
self._is_ssl = True |
|
port_given = True |
|
else: |
|
self._host = hostline |
|
if not self.port: |
|
self.port = 80 |
|
self._host.strip() |
|
return port_given |
|
|
|
def _handle_statusline_uri(self, uri): |
|
if not re.match('(?:^.+)://', uri): |
|
uri = '//' + uri |
|
|
|
parsed_path = urlparse.urlparse(uri) |
|
netloc = parsed_path.netloc |
|
port_given = False |
|
if netloc: |
|
port_given = self._process_host(netloc) |
|
|
|
if re.match('^https://', uri) or self.port == 443: |
|
self._is_ssl = True |
|
if not port_given: |
|
self.port = 443 |
|
if re.match('^http://', uri): |
|
self._is_ssl = False |
|
|
|
if not self.port: |
|
if self.is_ssl: |
|
self.port = 443 |
|
else: |
|
self.port = 80 |
|
|
|
reqpath = parsed_path.path |
|
if parsed_path.path: |
|
self.path = parsed_path.path |
|
else: |
|
self.path = '/' |
|
if parsed_path.query: |
|
reqpath += '?' |
|
reqpath += parsed_path.query |
|
self.url_params = repeatable_parse_qs(parsed_path.query) |
|
if parsed_path.fragment: |
|
reqpath += '#' |
|
reqpath += parsed_path.fragment |
|
self.fragment = parsed_path.fragment |
|
|
|
def _handle_statusline(self, status_line): |
|
parts = status_line.split() |
|
uri = None |
|
if len(parts) == 3: |
|
self.verb, uri, self.version = parts |
|
elif len(parts) == 2: |
|
self.verb, self.version = parts |
|
else: |
|
raise Exception("Unexpected format of first line of request") |
|
|
|
# Get path using urlparse |
|
if uri is not None: |
|
self._handle_statusline_uri(uri) |
|
|
|
def _handle_header(self, key, val): |
|
# We may have duplicate headers |
|
stripped = False |
|
|
|
if key.lower() == 'content-length': |
|
self._data_length = int(val) |
|
elif key.lower() == 'cookie': |
|
# We still want the raw key/val for the cookies header |
|
# because it's still a header |
|
cookie_strs = val.split('; ') |
|
|
|
# The only whitespace that matters is the space right after the |
|
# semicolon. If actual implementations mess this up, we could |
|
# probably strip whitespace around the key/value |
|
for cookie_str in cookie_strs: |
|
if '=' in cookie_str: |
|
splitted = cookie_str.split('=',1) |
|
assert(len(splitted) == 2) |
|
(cookie_key, cookie_val) = splitted |
|
else: |
|
cookie_key = cookie_str |
|
cookie_val = '' |
|
# we want to parse duplicate cookies |
|
self.cookies.append(cookie_key, cookie_val, do_callback=False) |
|
elif key.lower() == 'host': |
|
self._process_host(val) |
|
elif key.lower() == 'connection': |
|
#stripped = True |
|
pass |
|
|
|
return (not stripped) |
|
|
|
def _handle_data_end(self): |
|
if 'content-type' in self.headers: |
|
if self.headers['content-type'] == 'application/x-www-form-urlencoded': |
|
self.post_params = repeatable_parse_qs(self.raw_data) |
|
self._set_dict_callbacks() |
|
|
|
############## |
|
## Serializing |
|
|
|
def to_json(self): |
|
""" |
|
Return a JSON encoding of the request that can be used by |
|
:func:`~pappyproxy.http.Request.from_json` to recreate the request. |
|
The `full_request` portion is base64 encoded because json doesn't play |
|
nice with binary blobs. |
|
""" |
|
# We base64 encode the full response because json doesn't paly nice with |
|
# binary blobs |
|
data = { |
|
'full_request': base64.b64encode(self.full_request), |
|
'reqid': self.reqid, |
|
} |
|
if self.response: |
|
data['response_id'] = self.response.rspid |
|
else: |
|
data['response_id'] = None |
|
|
|
if self.unmangled: |
|
data['unmangled_id'] = self.unmangled.reqid |
|
|
|
if self.time_start: |
|
data['start'] = self.time_start.isoformat() |
|
if self.time_end: |
|
data['end'] = self.time_end.isoformat() |
|
data['tags'] = self.tags |
|
data['port'] = self.port |
|
data['is_ssl'] = self.is_ssl |
|
|
|
return json.dumps(data) |
|
|
|
def from_json(self, json_string): |
|
""" |
|
Update the metadata of the request to match data from |
|
:func:`~pappyproxy.http.Request.to_json` |
|
|
|
:param json_string: The JSON data to use |
|
:type json_string: JSON data in a string |
|
""" |
|
|
|
data = json.loads(json_string) |
|
self._from_full_request(base64.b64decode(data['full_request'])) |
|
self.port = data['port'] |
|
self._is_ssl = data['is_ssl'] |
|
if 'tags' in data: |
|
self.tags = data['tags'] |
|
else: |
|
self.tags = [] |
|
self._update_from_text() |
|
self._update_from_data() |
|
if data['reqid']: |
|
self.reqid = data['reqid'] |
|
|
|
####################### |
|
## Data store functions |
|
|
|
@defer.inlineCallbacks |
|
def async_save(self): |
|
""" |
|
async_save() |
|
Save/update the request in the data file. Returns a twisted deferred which |
|
fires when the save is complete. |
|
|
|
:rtype: twisted.internet.defer.Deferred |
|
""" |
|
|
|
assert(dbpool) |
|
try: |
|
# Check for intyness |
|
_ = int(self.reqid) |
|
|
|
# If we have reqid, we're updating |
|
yield dbpool.runInteraction(self._update) |
|
assert(self.reqid is not None) |
|
yield dbpool.runInteraction(self._update_tags) |
|
pappyproxy.context.add_request(self) |
|
except (ValueError, TypeError): |
|
# Either no id or in-memory |
|
yield dbpool.runInteraction(self._insert) |
|
assert(self.reqid is not None) |
|
yield dbpool.runInteraction(self._update_tags) |
|
pappyproxy.context.add_request(self) |
|
|
|
@crochet.wait_for(timeout=180.0) |
|
@defer.inlineCallbacks |
|
def save(self): |
|
""" |
|
save() |
|
Save/update the request in the data file. |
|
Saves the request, its unmangled version, the response, and the unmanbled response. |
|
Cannot be called from inside an async function. |
|
""" |
|
|
|
yield self.async_deep_save() |
|
|
|
@defer.inlineCallbacks |
|
def async_deep_save(self): |
|
""" |
|
async_deep_save() |
|
Saves self, unmangled, response, and unmangled response. Returns a deferred |
|
which fires after everything has been saved. |
|
|
|
:rtype: twisted.internet.defer.Deferred |
|
""" |
|
|
|
if self.response: |
|
if self.response.unmangled: |
|
yield self.response.unmangled.async_save() |
|
yield self.response.async_save() |
|
if self.unmangled: |
|
yield self.unmangled.async_save() |
|
yield self.async_save() |
|
|
|
def _update_tags(self, txn): |
|
# This should never be called on an unsaved or in-memory request |
|
txn.execute( |
|
""" |
|
DELETE FROM tagged WHERE reqid=?; |
|
""", |
|
(self.reqid,) |
|
) |
|
|
|
tagids = [] |
|
tags_to_add = [] |
|
# Find ids that already exist |
|
for tag in self.tags: |
|
txn.execute( |
|
""" |
|
SELECT id, tag FROM tags WHERE tag=?; |
|
""", |
|
(tag,) |
|
) |
|
result = txn.fetchall() |
|
if len(result) == 0: |
|
tags_to_add.append(tag) |
|
else: |
|
tagid = int(result[0][0]) |
|
tagids.append(tagid) |
|
|
|
# Add new tags |
|
for tag in tags_to_add: |
|
txn.execute( |
|
""" |
|
INSERT INTO tags (tag) VALUES (?); |
|
""", |
|
(tag,) |
|
) |
|
tagids.append(int(txn.lastrowid)) |
|
|
|
# Tag our request |
|
for tagid in tagids: |
|
txn.execute( |
|
""" |
|
INSERT INTO tagged (reqid, tagid) VALUES (?, ?); |
|
""", |
|
(int(self.reqid), tagid) |
|
) |
|
|
|
def _update(self, txn): |
|
# If we don't have an reqid, we're creating a new reuqest row |
|
setnames = ["full_request=?", "port=?"] |
|
queryargs = [self.full_request, self.port] |
|
if self.response: |
|
setnames.append('response_id=?') |
|
assert(self.response.rspid is not None) # should be saved first |
|
queryargs.append(self.response.rspid) |
|
if self.unmangled: |
|
setnames.append('unmangled_id=?') |
|
assert(self.unmangled.reqid is not None) # should be saved first |
|
queryargs.append(self.unmangled.reqid) |
|
if self.time_start: |
|
setnames.append('start_datetime=?') |
|
queryargs.append(self.time_start.isoformat()) |
|
if self.time_end: |
|
setnames.append('end_datetime=?') |
|
queryargs.append(self.time_end.isoformat()) |
|
|
|
setnames.append('is_ssl=?') |
|
if self.is_ssl: |
|
queryargs.append('1') |
|
else: |
|
queryargs.append('0') |
|
|
|
setnames.append('submitted=?') |
|
if self.submitted: |
|
queryargs.append('1') |
|
else: |
|
queryargs.append('0') |
|
|
|
queryargs.append(self.reqid) |
|
txn.execute( |
|
""" |
|
UPDATE requests SET %s WHERE id=?; |
|
""" % ','.join(setnames), |
|
tuple(queryargs) |
|
) |
|
|
|
def _insert(self, txn): |
|
# If we don't have an reqid, we're creating a new reuqest row |
|
colnames = ["full_request", "port"] |
|
colvals = [self.full_request, self.port] |
|
if self.response: |
|
colnames.append('response_id') |
|
assert(self.response.rspid is not None) # should be saved first |
|
colvals.append(self.response.rspid) |
|
if self.unmangled: |
|
colnames.append('unmangled_id') |
|
assert(self.unmangled.reqid is not None) # should be saved first |
|
colvals.append(self.unmangled.reqid) |
|
if self.time_start: |
|
colnames.append('start_datetime') |
|
colvals.append(self.time_start.isoformat()) |
|
if self.time_end: |
|
colnames.append('end_datetime') |
|
colvals.append(self.time_end.isoformat()) |
|
colnames.append('submitted') |
|
if self.submitted: |
|
colvals.append('1') |
|
else: |
|
colvals.append('0') |
|
|
|
colnames.append('is_ssl') |
|
if self.is_ssl: |
|
colvals.append('1') |
|
else: |
|
colvals.append('0') |
|
|
|
txn.execute( |
|
""" |
|
INSERT INTO requests (%s) VALUES (%s); |
|
""" % (','.join(colnames), ','.join(['?']*len(colvals))), |
|
tuple(colvals) |
|
) |
|
self.reqid = str(txn.lastrowid) |
|
assert txn.lastrowid is not None |
|
assert self.reqid is not None |
|
|
|
@defer.inlineCallbacks |
|
def delete(self): |
|
assert(self.reqid is not None) |
|
yield dbpool.runQuery( |
|
""" |
|
DELETE FROM requests WHERE id=?; |
|
""", |
|
(self.reqid,) |
|
) |
|
yield dbpool.runQuery( |
|
""" |
|
DELETE FROM tagged WHERE reqid=?; |
|
""", |
|
(self.reqid,) |
|
) |
|
self.reqid = None |
|
|
|
@defer.inlineCallbacks |
|
def deep_delete(self): |
|
if self.unmangled: |
|
yield self.unmangled.delete() |
|
if self.response: |
|
if self.response.unmangled: |
|
yield self.response.unmangled.delete() |
|
yield self.response.delete() |
|
yield self.delete() |
|
|
|
@staticmethod |
|
def _gen_sql_row(tablename=None): |
|
template = "{pre}full_request, {pre}response_id, {pre}id, {pre}unmangled_id, {pre}start_datetime, {pre}end_datetime, {pre}port, {pre}is_ssl" |
|
if tablename: |
|
return template.format(pre=('%s.'%tablename)) |
|
else: |
|
return template.format(pre='') |
|
|
|
|
|
@staticmethod |
|
@defer.inlineCallbacks |
|
def _from_sql_row(row): |
|
req = Request(row[0]) |
|
if row[1]: |
|
rsp = yield Response.load_response(str(row[1])) |
|
req.response = rsp |
|
if row[3]: |
|
unmangled_req = yield Request.load_request(str(row[3])) |
|
req.unmangled = unmangled_req |
|
if row[4]: |
|
req.time_start = datetime.datetime.strptime(row[4], "%Y-%m-%dT%H:%M:%S.%f") |
|
if row[5]: |
|
req.time_end = datetime.datetime.strptime(row[5], "%Y-%m-%dT%H:%M:%S.%f") |
|
if row[6] is not None: |
|
req.port = int(row[6]) |
|
if row[7] == 1: |
|
req._is_ssl = True |
|
req.reqid = str(row[2]) |
|
|
|
# tags |
|
rows = yield dbpool.runQuery( |
|
""" |
|
SELECT tg.tag |
|
FROM tagged tgd, tags tg |
|
WHERE tgd.tagid=tg.id AND tgd.reqid=?; |
|
""", |
|
(req.reqid,) |
|
) |
|
req.tags = [] |
|
for row in rows: |
|
req.tags.append(row[0]) |
|
defer.returnValue(req) |
|
|
|
@staticmethod |
|
@defer.inlineCallbacks |
|
def load_all_requests(): |
|
""" |
|
load_all_requests() |
|
Load all the requests in the data file and return them in a list. |
|
Returns a deferred which calls back with the list of requests when complete. |
|
|
|
:rtype: twisted.internet.defer.Deferred |
|
""" |
|
|
|
reqs = [] |
|
reqs += list(pappyproxy.context.in_memory_requests) |
|
rows = yield dbpool.runQuery( |
|
""" |
|
SELECT %s |
|
FROM requests; |
|
""" % Request._gen_sql_row(), |
|
) |
|
for row in rows: |
|
req = yield Request._from_sql_row(row) |
|
reqs.append(req) |
|
defer.returnValue(reqs) |
|
|
|
@staticmethod |
|
@defer.inlineCallbacks |
|
def load_requests_by_tag(tag): |
|
""" |
|
load_requests_by_tag(tag) |
|
Load all the requests in the data file with a given tag and return them in a list. |
|
Returns a deferred which calls back with the list of requests when complete. |
|
|
|
:rtype: twisted.internet.defer.Deferred |
|
""" |
|
# tags |
|
rows = yield dbpool.runQuery( |
|
""" |
|
SELECT tgd.reqid |
|
FROM tagged tgd, tags tg |
|
WHERE tgd.tagid=tg.id AND tg.tag=?; |
|
""", |
|
(tag,) |
|
) |
|
reqs = [] |
|
for row in rows: |
|
req = Request.load_request(row[0]) |
|
reqs.append(req) |
|
defer.returnValue(reqs) |
|
|
|
@staticmethod |
|
@defer.inlineCallbacks |
|
def load_request(to_load, allow_special=True): |
|
""" |
|
load_request(to_load) |
|
Load a request with the given request id and return it. |
|
Returns a deferred which calls back with the request when complete. |
|
|
|
:rtype: twisted.internet.defer.Deferred |
|
""" |
|
|
|
assert(dbpool) |
|
|
|
if not allow_special: |
|
try: |
|
int(to_load) |
|
except (ValueError, TypeError): |
|
raise PappyException('Cannot load special id %s' % to_load) |
|
|
|
ret_unmangled = False |
|
rsp_unmangled = False |
|
if to_load[0] == 'u': |
|
ret_unmangled = True |
|
loadid = to_load[1:] |
|
elif to_load[0] == 's': |
|
rsp_unmangled = True |
|
loadid = to_load[1:] |
|
else: |
|
loadid = to_load |
|
|
|
def retreq(r): |
|
if ret_unmangled: |
|
if not r.unmangled: |
|
raise PappyException("Request %s was not mangled"%r.reqid) |
|
return r.unmangled |
|
if rsp_unmangled: |
|
if not r.response: |
|
raise PappyException("Request %s does not have a response" % r.reqid) |
|
if not r.response.unmangled: |
|
raise PappyException("Response to request %s was not mangled" % r.reqid) |
|
r.response = r.response.unmangled |
|
return r |
|
else: |
|
return r |
|
|
|
for r in pappyproxy.context.in_memory_requests: |
|
if r.reqid == to_load: |
|
defer.returnValue(retreq(r)) |
|
for r in pappyproxy.context.all_reqs: |
|
if r.reqid == to_load: |
|
defer.returnValue(retreq(r)) |
|
for r in pappyproxy.context.active_requests: |
|
if r.reqid == to_load: |
|
defer.returnValue(retreq(r)) |
|
if to_load[0] == 'm': |
|
# An in-memory request should have been loaded in the previous loop |
|
raise PappyException('In-memory request %s not found' % to_load) |
|
rows = yield dbpool.runQuery( |
|
""" |
|
SELECT %s |
|
FROM requests |
|
WHERE id=?; |
|
""" % Request._gen_sql_row(), |
|
(loadid,) |
|
) |
|
if len(rows) != 1: |
|
raise PappyException("Request with id %s does not exist" % loadid) |
|
req = yield Request._from_sql_row(rows[0]) |
|
req.reqid = to_load |
|
|
|
defer.returnValue(retreq(req)) |
|
|
|
@staticmethod |
|
@defer.inlineCallbacks |
|
def load_from_filters(filters): |
|
# Not efficient in any way |
|
# But it stays this way until we hit performance issues |
|
assert(dbpool) |
|
rows = yield dbpool.runQuery( |
|
""" |
|
SELECT %s FROM requests r1 |
|
LEFT JOIN requests r2 ON r1.id=r2.unmangled_id |
|
WHERE r2.id is NULL; |
|
""" % Request._gen_sql_row('r1'), |
|
) |
|
reqs = [] |
|
for row in rows: |
|
req = yield Request._from_sql_row(row) |
|
reqs.append(req) |
|
reqs += list(pappyproxy.context.in_memory_requests) |
|
(reqs, _) = pappyproxy.context.filter_reqs(reqs, filters) |
|
|
|
defer.returnValue(reqs) |
|
|
|
###################### |
|
## Submitting Requests |
|
|
|
@staticmethod |
|
@defer.inlineCallbacks |
|
def submit_new(host, port, is_ssl, full_request): |
|
""" |
|
submit_new(host, port, is_ssl, full_request) |
|
Submits a request with the given parameters and returns a request object |
|
with the response. |
|
|
|
:param host: The host to submit to |
|
:type host: string |
|
:param port: The port to submit to |
|
:type port: Integer |
|
:type is_ssl: Whether to use SSL |
|
:param full_request: The request data to send |
|
:type full_request: string |
|
:rtype: Twisted deferred that calls back with a Request |
|
""" |
|
|
|
new_obj = Request(full_request) |
|
factory = pappyproxy.proxy.ProxyClientFactory(new_obj, save_all=False) |
|
factory.connection_id = pappyproxy.proxy.get_next_connection_id() |
|
if is_ssl: |
|
reactor.connectSSL(host, port, factory, pappyproxy.proxy.ClientTLSContext()) |
|
else: |
|
reactor.connectTCP(host, port, factory) |
|
new_req = yield factory.data_defer |
|
defer.returnValue(new_req) |
|
|
|
@defer.inlineCallbacks |
|
def async_submit(self): |
|
""" |
|
async_submit() |
|
Same as :func:`~pappyproxy.http.Request.submit` but generates deferreds. |
|
Submits the request using its host, port, etc. and updates its response value |
|
to the resulting response. |
|
|
|
:rtype: Twisted deferred |
|
""" |
|
new_req = yield Request.submit_new(self.host, self.port, self.is_ssl, |
|
self.full_request) |
|
self.response = new_req.response |
|
self.time_start = new_req.time_start |
|
self.time_end = new_req.time_end |
|
|
|
@crochet.wait_for(timeout=180.0) |
|
@defer.inlineCallbacks |
|
def submit(self): |
|
""" |
|
submit() |
|
Submits the request using its host, port, etc. and updates its response value |
|
to the resulting response. |
|
Cannot be called in async functions. |
|
This is what you should use to submit your requests in macros. |
|
""" |
|
new_req = yield Request.submit_new(self.host, self.port, self.is_ssl, |
|
self.full_request) |
|
self.response = new_req.response |
|
self.time_start = new_req.time_start |
|
self.time_end = new_req.time_end |
|
|
|
|
|
class Response(object): |
|
""" |
|
:ivar complete: When creating the response with :func:`~pappyproxy.http.Response.add_line` |
|
and :func:`~pappyproxy.http.Response.add_data`, returns whether |
|
the request is complete. |
|
:vartype complete: Bool |
|
:ivar cookies: Cookies set by the response |
|
:vartype cookies: RepeatableDict of ResponseCookie objects |
|
:ivar headers: The headers of the response |
|
:vartype headers: RepeatableDict |
|
:ivar headers_complete: When creating the response with |
|
:func:`~pappyproxy.http.Response.add_line` and |
|
:func:`~pappyproxy.http.Response.add_data`, returns whether the headers |
|
are complete |
|
:vartype headers_complete: Bool |
|
:ivar response_code: The response code of the response |
|
:vartype response_code: Integer |
|
:ivar response_text: The text associated with the response code (ie OK, NOT FOUND, etc) |
|
:vartype response_text: String |
|
:ivar rspid: If the response is saved in the data file, the id of the response |
|
:vartype rspid: String |
|
:ivar unmangled: If the response was mangled, the unmangled version of the response |
|
:vartype unmangled: Response |
|
:ivar version: The version part of the status line (ie HTTP/1.1) |
|
:vartype version: String |
|
""" |
|
|
|
def __init__(self, full_response=None, update_content_length=False): |
|
self.complete = False |
|
self.cookies = RepeatableDict() |
|
self.headers = RepeatableDict(case_insensitive=True) |
|
self.headers_complete = False |
|
self._raw_data = '' |
|
self.response_code = 0 |
|
self.response_text = '' |
|
self.rspid = None |
|
self.unmangled = None |
|
self.version = '' |
|
|
|
self._encoding_type = ENCODE_NONE |
|
self._first_line = True |
|
self._data_obj = None |
|
self._end_after_headers = False |
|
|
|
self._set_dict_callbacks() |
|
|
|
if full_response is not None: |
|
self._from_full_response(full_response, update_content_length) |
|
|
|
def __copy__(self): |
|
if not self.complete: |
|
raise PappyException("Cannot copy incomplete responses") |
|
retrsp = Response(self.full_response) |
|
if self.unmangled: |
|
retrsp.unmangled = self.unmangled.copy() |
|
return retrsp |
|
|
|
def copy(self): |
|
return self.__copy__() |
|
|
|
def __eq__(self, other): |
|
if self.full_response != other.full_response: |
|
return False |
|
return True |
|
|
|
@property |
|
def raw_headers(self): |
|
""" |
|
The raw text of the headers including the extra newline at the end. |
|
|
|
:getter: Returns the raw text of the headers including the extra newline at the end. |
|
:type: string |
|
""" |
|
ret = self.status_line + '\r\n' |
|
for k, v in self.headers.all_pairs(): |
|
ret = ret + "%s: %s\r\n" % (k, v) |
|
ret = ret + '\r\n' |
|
return ret |
|
|
|
@property |
|
def status_line(self): |
|
""" |
|
The status line of the response. ie `HTTP/1.1 200 OK` |
|
|
|
:getter: Returns the status line of the response |
|
:setter: Sets the status line of the response |
|
:type: string |
|
""" |
|
if not self.version and self.response_code == 0 and not self.version: |
|
return '' |
|
return '%s %d %s' % (self.version, self.response_code, self.response_text) |
|
|
|
@status_line.setter |
|
def status_line(self, val): |
|
self._handle_statusline(val) |
|
|
|
@property |
|
def raw_data(self): |
|
""" |
|
The data portion of the response |
|
|
|
:getter: Returns the data portion of the response |
|
:setter: Set the data of the response and update metadata |
|
:type: string |
|
""" |
|
return self._raw_data |
|
|
|
@raw_data.setter |
|
def raw_data(self, val): |
|
self._raw_data = val |
|
self._data_obj = LengthData(len(val)) |
|
if len(val) > 0: |
|
self._data_obj.add_data(val) |
|
self._encoding_type = ENCODE_NONE |
|
self.complete = True |
|
self._update_from_data() |
|
|
|
@property |
|
def full_response(self): |
|
""" |
|
The full text of the response including the headers and data. |
|
Response is automatically converted from compressed/chunked into an |
|
uncompressed response with a Content-Length header. |
|
|
|
:getter: Returns the full text of the response |
|
:type: string |
|
""" |
|
if not self.status_line: |
|
return '' |
|
ret = self.raw_headers |
|
ret = ret + self.raw_data |
|
return ret |
|
|
|
@property |
|
def soup(self): |
|
""" |
|
Returns a beautifulsoup4 object for parsing the html of the response |
|
|
|
:getter: Returns a BeautifulSoup object representing the html of the response |
|
""" |
|
return bs4.BeautifulSoup(self.raw_data, 'lxml') |
|
|
|
def _from_full_response(self, full_response, update_content_length=False): |
|
# Get rid of leading CRLF. Not in spec, should remove eventually |
|
full_response = _strip_leading_newlines(full_response) |
|
if full_response == '': |
|
return |
|
|
|
remaining = full_response |
|
while remaining and not self.headers_complete: |
|
line, remaining = _consume_line(remaining) |
|
self.add_line(line) |
|
|
|
if not self.headers_complete: |
|
self.add_line('') |
|
|
|
if update_content_length: |
|
self.raw_data = remaining |
|
if not self.complete: |
|
self.add_data(remaining) |
|
assert(self.complete) |
|
|
|
############################ |
|
## Internal update functions |
|
|
|
def _set_dict_callbacks(self): |
|
# Add callbacks to dicts |
|
self.headers.set_modify_callback(self._update_from_text) |
|
self.cookies.set_modify_callback(self._update_from_objects) |
|
|
|
def _update_from_data(self): |
|
self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False) |
|
|
|
def _update_from_objects(self): |
|
# Updates headers from objects |
|
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION |
|
|
|
# Cookies |
|
new_headers = RepeatableDict() |
|
cookies_added = False |
|
for pair in self.headers.all_pairs(): |
|
if pair[0].lower() == 'set-cookie': |
|
# If we haven't added our cookies, add them all. Otherwise |
|
# strip the header (do nothing) |
|
if not cookies_added: |
|
# Add all our cookies here |
|
for k, c in self.cookies.all_pairs(): |
|
new_headers.append('Set-Cookie', c.cookie_str) |
|
cookies_added = True |
|
else: |
|
new_headers.append(pair[0], pair[1]) |
|
|
|
if not cookies_added: |
|
# Add all our cookies to the end |
|
for k, c in self.cookies.all_pairs(): |
|
new_headers.append('Set-Cookie', c.cookie_str) |
|
|
|
self.headers = new_headers |
|
self._set_dict_callbacks() |
|
|
|
def _update_from_text(self): |
|
self.cookies = RepeatableDict() |
|
self._set_dict_callbacks() |
|
for k, v in self.headers.all_pairs(): |
|
if k.lower() == 'set-cookie': |
|
# Parse the cookie |
|
cookie = ResponseCookie(v) |
|
self.cookies.append(cookie.key, cookie, do_callback=False) |
|
|
|
############### |
|
## Data parsing |
|
|
|
def _handle_statusline(self, status_line): |
|
self._first_line = False |
|
self.version, self.response_code, self.response_text = \ |
|
status_line.split(' ', 2) |
|
self.response_code = int(self.response_code) |
|
|
|
if self.response_code == 304 or self.response_code == 204 or \ |
|
self.response_code/100 == 1: |
|
self._end_after_headers = True |
|
|
|
def _handle_header(self, key, val): |
|
stripped = False |
|
if key.lower() == 'content-encoding': |
|
if val in ('gzip', 'x-gzip'): |
|
self._encoding_type = ENCODE_GZIP |
|
elif val in ('deflate'): |
|
self._encoding_type = ENCODE_DEFLATE |
|
|
|
# We send our requests already decoded, so we don't want a header |
|
# saying it's encoded |
|
if self._encoding_type != ENCODE_NONE: |
|
stripped = True |
|
elif key.lower() == 'transfer-encoding' and val.lower() == 'chunked': |
|
self._data_obj = ChunkedData() |
|
self.complete = self._data_obj.complete |
|
stripped = True |
|
elif key.lower() == 'content-length': |
|
# We use our own content length |
|
self._data_obj = LengthData(int(val)) |
|
elif key.lower() == 'set-cookie': |
|
cookie = ResponseCookie(val) |
|
self.cookies.append(cookie.key, cookie, do_callback=False) |
|
|
|
if stripped: |
|
return False |
|
else: |
|
self.headers.append(key, val, do_callback=False) |
|
return True |
|
|
|
############### |
|
## Data loading |
|
|
|
def add_line(self, line): |
|
""" |
|
Used for building a response from a Twisted protocol. |
|
Add a line (for status line and headers). Lines must be added in order |
|
and the first line must be the status line. The line should not contain |
|
the trailing carriage return/newline. I do not suggest you use this for |
|
anything. |
|
|
|
:param line: The line to add |
|
:type line: string |
|
""" |
|
assert(not self.headers_complete) |
|
if not line and self._first_line: |
|
return |
|
if not line: |
|
self.headers_complete = True |
|
|
|
if self._end_after_headers: |
|
self.complete = True |
|
return |
|
|
|
if not self._data_obj: |
|
self._data_obj = LengthData(0) |
|
self.complete = self._data_obj.complete |
|
return |
|
|
|
if self._first_line: |
|
self._handle_statusline(line) |
|
self._first_line = False |
|
else: |
|
key, val = line.split(':', 1) |
|
val = val.strip() |
|
self._handle_header(key, val) |
|
|
|
def add_data(self, data): |
|
""" |
|
Used for building a response from a Twisted protocol. |
|
Add data to the response. The data must conform to the content encoding |
|
and transfer encoding given in the headers passed in to |
|
:func:`~pappyproxy.http.Response.add_line`. Can be any fragment of the data. |
|
I do not suggest that you use this function ever. |
|
|
|
:param data: The data to add |
|
:type data: string |
|
""" |
|
assert(self._data_obj) |
|
assert(not self._data_obj.complete) |
|
assert not self.complete |
|
self._data_obj.add_data(data) |
|
if self._data_obj.complete: |
|
self._raw_data = _decode_encoded(self._data_obj.raw_data, |
|
self._encoding_type) |
|
self.complete = True |
|
self._update_from_data() |
|
|
|
#################### |
|
## Cookie management |
|
|
|
def add_cookie(self, cookie): |
|
""" |
|
Add a :class:`pappyproxy.http.ResponseCookie` to the response. |
|
|
|
.. warning:: |
|
This will add duplicate cookies. If you want to add a cookie you're not sure exists, |
|
use :func:`~pappyproxy.http.Response.set_cookie` |
|
""" |
|
self.cookies.append(cookie.key, cookie) |
|
|
|
def set_cookie(self, cookie): |
|
""" |
|
Set a cookie in the response. ``cookie`` must be a :class:`pappyproxy.http.ResponseCookie` |
|
""" |
|
self.cookies[cookie.key] = cookie |
|
|
|
def set_cookie_kv(self, key, val): |
|
""" |
|
Set a cookie by key and value. Will not have path, secure, etc set at all. |
|
""" |
|
cookie = ResponseCookie() |
|
cookie.key = key |
|
cookie.val = val |
|
self.cookies[cookie.key] = cookie |
|
|
|
def delete_cookie(self, key): |
|
""" |
|
Delete a cookie from the response by its key |
|
""" |
|
del self.cookies[key] |
|
|
|
############## |
|
## Serializing |
|
|
|
def to_json(self): |
|
""" |
|
Return a JSON encoding of the response that can be used by |
|
:func:`~pappyproxy.http.Response.from_json` to recreate the response. |
|
The ``full_response`` portion is base64 encoded because json doesn't play |
|
nice with binary blobs. |
|
""" |
|
data = { |
|
'rspid': self.rspid, |
|
'full_response': base64.b64encode(self.full_response), |
|
} |
|
if self.unmangled: |
|
data['unmangled_id'] = self.unmangled.rspid |
|
|
|
return json.dumps(data) |
|
|
|
|
|
def from_json(self, json_string): |
|
""" |
|
Update the metadata of the response to match data from |
|
:func:`~pappyproxy.http.Response.to_json` |
|
|
|
:param json_string: The JSON data to use |
|
:type json_string: JSON data in a string |
|
""" |
|
data = json.loads(json_string) |
|
self._from_full_response(base64.b64decode(data['full_response'])) |
|
self._update_from_text() |
|
self._update_from_data() |
|
if data['rspid']: |
|
self.rspid = str(data['rspid']) |
|
|
|
####################### |
|
## Database interaction |
|
|
|
@defer.inlineCallbacks |
|
def async_save(self): |
|
""" |
|
async_save() |
|
Save/update the just request in the data file. Returns a twisted deferred which |
|
fires when the save is complete. It is suggested that you use |
|
:func: `~pappyproxy.http.Request.async_deep_save` instead to save responses. |
|
|
|
:rtype: twisted.internet.defer.Deferred |
|
""" |
|
assert(dbpool) |
|
try: |
|
# Check for intyness |
|
_ = int(self.rspid) |
|
|
|
# If we have rspid, we're updating |
|
yield dbpool.runInteraction(self._update) |
|
except (ValueError, TypeError): |
|
yield dbpool.runInteraction(self._insert) |
|
assert(self.rspid is not None) |
|
|
|
# Right now responses without requests are unviewable |
|
# @crochet.wait_for(timeout=180.0) |
|
# @defer.inlineCallbacks |
|
# def save(self): |
|
# yield self.save() |
|
|
|
def _update(self, txn): |
|
setnames = ["full_response=?"] |
|
queryargs = [self.full_response] |
|
if self.unmangled: |
|
setnames.append('unmangled_id=?') |
|
assert(self.unmangled.rspid is not None) # should be saved first |
|
queryargs.append(self.unmangled.rspid) |
|
|
|
queryargs.append(self.rspid) |
|
txn.execute( |
|
""" |
|
UPDATE responses SET %s WHERE id=?; |
|
""" % ','.join(setnames), |
|
tuple(queryargs) |
|
) |
|
assert(self.rspid is not None) |
|
|
|
def _insert(self, txn): |
|
# If we don't have an rspid, we're creating a new one |
|
colnames = ["full_response"] |
|
colvals = [self.full_response] |
|
if self.unmangled is not None: |
|
colnames.append('unmangled_id') |
|
assert(self.unmangled.rspid is not None) # should be saved first |
|
colvals.append(self.unmangled.rspid) |
|
|
|
txn.execute( |
|
""" |
|
INSERT INTO responses (%s) VALUES (%s); |
|
""" % (','.join(colnames), ','.join(['?']*len(colvals))), |
|
tuple(colvals) |
|
) |
|
self.rspid = txn.lastrowid |
|
assert(self.rspid is not None) |
|
|
|
@defer.inlineCallbacks |
|
def delete(self): |
|
assert(self.rspid is not None) |
|
row = yield dbpool.runQuery( |
|
""" |
|
DELETE FROM responses WHERE id=?; |
|
""", |
|
(self.rspid,) |
|
) |
|
self.rspid = None |
|
|
|
@staticmethod |
|
@defer.inlineCallbacks |
|
def load_response(respid): |
|
""" |
|
Load a response from its response id. Returns a deferred. I don't suggest you use this. |
|
|
|
:rtype: twisted.internet.defer.Deferred |
|
""" |
|
assert(dbpool) |
|
rows = yield dbpool.runQuery( |
|
""" |
|
SELECT full_response, id, unmangled_id |
|
FROM responses |
|
WHERE id=?; |
|
""", |
|
(respid,) |
|
) |
|
if len(rows) != 1: |
|
raise PappyException("Response with request id %s does not exist" % respid) |
|
full_response = rows[0][0] |
|
resp = Response(full_response) |
|
resp.rspid = str(rows[0][1]) |
|
if rows[0][2]: |
|
unmangled_response = yield Response.load_response(int(rows[0][2])) |
|
resp.unmangled = unmangled_response |
|
defer.returnValue(resp) |
|
|
|
|