You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1130 lines
37 KiB
1130 lines
37 KiB
9 years ago
|
import base64
|
||
|
import collections
|
||
|
import console
|
||
|
import context
|
||
|
import crochet
|
||
|
import datetime
|
||
|
import gzip
|
||
|
import json
|
||
|
import proxy
|
||
|
import re
|
||
|
import StringIO
|
||
|
import urlparse
|
||
|
import zlib
|
||
|
from twisted.internet import defer, reactor
|
||
|
from util import PappyException
|
||
|
|
||
|
ENCODE_NONE = 0
|
||
|
ENCODE_DEFLATE = 1
|
||
|
ENCODE_GZIP = 2
|
||
|
|
||
|
dbpool = None
|
||
|
|
||
|
class DataAlreadyComplete(Exception):
|
||
|
pass
|
||
|
|
||
|
def init(pool):
|
||
|
global dbpool
|
||
|
if dbpool is None:
|
||
|
dbpool = pool
|
||
|
assert(dbpool)
|
||
|
|
||
|
def destruct():
|
||
|
assert(dbpool)
|
||
|
dbpool.close()
|
||
|
|
||
|
def decode_encoded(data, encoding):
|
||
|
if encoding == ENCODE_NONE:
|
||
|
return data
|
||
|
|
||
|
if encoding == ENCODE_DEFLATE:
|
||
|
dec_data = StringIO.StringIO(zlib.decompress(data))
|
||
|
else:
|
||
|
dec_data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data))
|
||
|
return dec_data.read()
|
||
|
|
||
|
def repeatable_parse_qs(s):
|
||
|
pairs = s.split('&')
|
||
|
ret_dict = RepeatableDict()
|
||
|
for pair in pairs:
|
||
|
if '=' in pair:
|
||
|
t = tuple(pair.split('=', 1))
|
||
|
ret_dict.append(t[0], t[1])
|
||
|
else:
|
||
|
ret_dict.append(pair, None)
|
||
|
return ret_dict
|
||
|
|
||
|
class RepeatableDict:
|
||
|
"""
|
||
|
A dict that retains the order of items inserted and keeps track of
|
||
|
duplicate values. Can optionally treat keys as case insensitive.
|
||
|
Custom made for the proxy, so it has strange features
|
||
|
"""
|
||
|
|
||
|
def __init__(self, from_pairs=None, case_insensitive=False):
|
||
|
# If efficiency becomes a problem, add a dict that keeps a list by key
|
||
|
# and use that for getting data. But until then, this stays.
|
||
|
self._pairs = []
|
||
|
self._keys = set()
|
||
|
self._modify_callback = None
|
||
|
self.case_insensitive = case_insensitive
|
||
|
|
||
|
if from_pairs:
|
||
|
for k, v in from_pairs:
|
||
|
self.append(k, v)
|
||
|
|
||
|
def _ef_key(self, key):
|
||
|
# "effective key", returns key.lower() if we're case insensitive,
|
||
|
# otherwise it returns the same key
|
||
|
if self.case_insensitive:
|
||
|
return key.lower()
|
||
|
return key
|
||
|
|
||
|
def _mod_callback(self):
|
||
|
# Calls the modify callback if we have one
|
||
|
if self._modify_callback:
|
||
|
self._modify_callback()
|
||
|
|
||
|
def __contains__(self, val):
|
||
|
return self._ef_key(val) in self._keys
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
for p in reversed(self._pairs):
|
||
|
if self._ef_key(p[0]) == self._ef_key(key):
|
||
|
return p[1]
|
||
|
raise KeyError
|
||
|
|
||
|
def __setitem__(self, key, val):
|
||
|
# Replaces first instance of `key` and deletes the rest
|
||
|
self.set_val(key, val)
|
||
|
|
||
|
def __delitem__(self, key):
|
||
|
self._keys.remove(key)
|
||
|
self._pairs = [p for p in self._pairs if self._ef_key(p[0]) != self._ef_key(key)]
|
||
|
self._mod_callback()
|
||
|
|
||
|
def __nonzero__(self):
|
||
|
if self._pairs:
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
def _add_key(self, key):
|
||
|
self._keys.add(self._ef_key(key))
|
||
|
|
||
|
def _remove_key(self, key):
|
||
|
self._keys.remove(self._ef_key(key))
|
||
|
|
||
|
def all_pairs(self):
|
||
|
return self._pairs[:]
|
||
|
|
||
|
def append(self, key, val, do_callback=True):
|
||
|
# Add a duplicate entry for key
|
||
|
self._add_key(key)
|
||
|
self._pairs.append((key, val))
|
||
|
if do_callback:
|
||
|
self._mod_callback()
|
||
|
|
||
|
def set_val(self, key, val, do_callback=True):
|
||
|
new_pairs = []
|
||
|
added = False
|
||
|
self._add_key(key)
|
||
|
for p in self._pairs:
|
||
|
if self._ef_key(p[0]) == self._ef_key(key):
|
||
|
if not added:
|
||
|
# only add the first instance
|
||
|
new_pairs.append((key, val))
|
||
|
added = True
|
||
|
else:
|
||
|
new_pairs.append(p)
|
||
|
if not added:
|
||
|
new_pairs.append((key, val))
|
||
|
self._pairs = new_pairs
|
||
|
|
||
|
if do_callback:
|
||
|
self._mod_callback()
|
||
|
|
||
|
def update(self, key, val, do_callback=True):
|
||
|
# If key is already in the dict, replace that value with the new value
|
||
|
if key in self:
|
||
|
for k, v in self.all_pairs():
|
||
|
if self._ef_key(k) == self._ef_key(key):
|
||
|
self.set_val(k, val, do_callback=do_callback)
|
||
|
break
|
||
|
else:
|
||
|
self.set_val(key, val, do_callback=do_callback)
|
||
|
|
||
|
def clear(self, do_callback=True):
|
||
|
self._pairs = []
|
||
|
if do_callback:
|
||
|
self._mod_callback()
|
||
|
|
||
|
def all_vals(self, key):
|
||
|
return [p[1] for p in self._pairs if self._ef_key(p[0]) == self._ef_key(key)]
|
||
|
|
||
|
def add_pairs(self, pairs, do_callback=True):
|
||
|
for pair in pairs:
|
||
|
self._add_key(pair[0])
|
||
|
self._pairs += pairs
|
||
|
if do_callback:
|
||
|
self._mod_callback()
|
||
|
|
||
|
def sort(self):
|
||
|
# Sorts pairs by key alphabetaclly
|
||
|
pairs = sorted(pairs, key=lambda x: x[0])
|
||
|
|
||
|
def set_modify_callback(self, callback):
|
||
|
# Add a function to be called whenever an element is added, changed, or
|
||
|
# deleted. Set to None to remove
|
||
|
self._modify_callback = callback
|
||
|
|
||
|
|
||
|
class LengthData:
|
||
|
def __init__(self, length=None):
|
||
|
self.raw_data = ''
|
||
|
self.complete = False
|
||
|
self.length = length or 0
|
||
|
|
||
|
if self.length == 0:
|
||
|
self.complete = True
|
||
|
|
||
|
def add_data(self, data):
|
||
|
if self.complete:
|
||
|
raise DataAlreadyComplete()
|
||
|
remaining_length = self.length-len(self.raw_data)
|
||
|
if len(data) >= remaining_length:
|
||
|
self.raw_data += data[:remaining_length]
|
||
|
assert(len(self.raw_data) == self.length)
|
||
|
self.complete = True
|
||
|
else:
|
||
|
self.raw_data += data
|
||
|
|
||
|
class ChunkedData:
|
||
|
|
||
|
def __init__(self):
|
||
|
self.raw_data = ''
|
||
|
self._pos = 0
|
||
|
self._state = 0 # 0=reading length, 1=reading data, 2=going over known string
|
||
|
self._len_str = ''
|
||
|
self._chunk_remaining = 0
|
||
|
self._known_str = ''
|
||
|
self._known_str_pos = 0
|
||
|
self._next_state = 0
|
||
|
self._raw_data = ''
|
||
|
self.complete = False
|
||
|
self.unchunked_data = ''
|
||
|
|
||
|
def add_data(self, data):
|
||
|
self._raw_data += data
|
||
|
self.scan_forward()
|
||
|
|
||
|
def scan_forward(self):
|
||
|
# Don't add more data if we're already done
|
||
|
if self.complete:
|
||
|
return
|
||
|
|
||
|
while self._pos < len(self._raw_data):
|
||
|
curchar = self._raw_data[self._pos]
|
||
|
if self._state == 0:
|
||
|
if curchar.lower() in '0123456789abcdef':
|
||
|
# Read the next char of the length
|
||
|
self._len_str += curchar
|
||
|
|
||
|
# Move to the next char
|
||
|
self._pos += 1
|
||
|
elif curchar == '\r':
|
||
|
# Save how much chunk to read
|
||
|
self._chunk_remaining = int(self._len_str, 16)
|
||
|
|
||
|
# If the length is 0, chunked encoding is done!
|
||
|
if self._chunk_remaining == 0:
|
||
|
self.complete = True
|
||
|
# I should probably just rename raw_data since it's what
|
||
|
# you use to look at unchunked data, but you're not
|
||
|
# supposed to look at it until after it's complete
|
||
|
# anyways
|
||
|
self._raw_data = self.unchunked_data
|
||
|
self.raw_data = self._raw_data # Expose raw_data
|
||
|
return
|
||
|
|
||
|
# There should be a newline after the \r
|
||
|
self._known_str = '\n'
|
||
|
self._state = 2
|
||
|
self._next_state = 1
|
||
|
|
||
|
# Reset the length str
|
||
|
self._len_str = ''
|
||
|
|
||
|
# Move to the next char
|
||
|
self._pos += 1
|
||
|
else:
|
||
|
raise Exception("Malformed chunked encoding!")
|
||
|
|
||
|
elif self._state == 1:
|
||
|
if self._chunk_remaining > 0:
|
||
|
# Read next byte of data
|
||
|
self.unchunked_data += curchar
|
||
|
self._chunk_remaining -= 1
|
||
|
self._pos += 1
|
||
|
else:
|
||
|
# Read newline then read a new chunk
|
||
|
self._known_str = '\r\n'
|
||
|
self._next_state = 0 # Read len after newlines
|
||
|
self._state = 2 # Read newlines
|
||
|
# Don't move to the next char because we didn't do anything
|
||
|
elif self._state == 2:
|
||
|
# Read a char of an expected string
|
||
|
|
||
|
# If the expected char doesn't match, throw an error
|
||
|
if self._known_str[self._known_str_pos] != curchar:
|
||
|
raise Exception("Unexpected data")
|
||
|
|
||
|
# Move to the next char in the raw data and in our known string
|
||
|
self._known_str_pos += 1
|
||
|
self._pos += 1
|
||
|
|
||
|
# If we've reached the end of the known string, go to the next state
|
||
|
if self._known_str_pos == len(self._known_str):
|
||
|
self._known_str_pos = 0
|
||
|
self._state = self._next_state
|
||
|
|
||
|
class ResponseCookie(object):
|
||
|
|
||
|
def __init__(self, set_cookie_string=None):
|
||
|
self.key = None
|
||
|
self.val = None
|
||
|
self.expires = None
|
||
|
self.max_age = None
|
||
|
self.domain = None
|
||
|
self.path = None
|
||
|
self.secure = False
|
||
|
self.http_only = False
|
||
|
|
||
|
if set_cookie_string:
|
||
|
self.from_cookie(set_cookie_string)
|
||
|
|
||
|
@property
|
||
|
def cookie_av(self):
|
||
|
av = '%s=%s' % (self.key, self.val)
|
||
|
to_add = [av]
|
||
|
if self.expires:
|
||
|
to_add.append('expires=%s'%self.expires)
|
||
|
if self.max_age:
|
||
|
to_add.append('Max-Age=%d'%self.max_age)
|
||
|
if self.domain:
|
||
|
to_add.append('Domain=%s'%self.domain)
|
||
|
if self.path:
|
||
|
to_add.append('Path=%s'%self.path)
|
||
|
if self.secure:
|
||
|
to_add.append('secure')
|
||
|
if self.http_only:
|
||
|
to_add.append('httponly')
|
||
|
return '; '.join(to_add)
|
||
|
|
||
|
def parse_cookie_av(self, cookie_av):
|
||
|
if '=' in cookie_av:
|
||
|
key, val = cookie_av.split('=', 1)
|
||
|
key = key.lstrip()
|
||
|
if key.lower() == 'expires':
|
||
|
self.expires = val
|
||
|
if key.lower() == 'max-age':
|
||
|
self.max_age = int(val)
|
||
|
if key.lower() == 'domain':
|
||
|
self.domain = val
|
||
|
if key.lower() == 'path':
|
||
|
self.path = val
|
||
|
elif cookie_av.lstrip().lower() == 'secure':
|
||
|
self.secure = True
|
||
|
elif cookie_av.lstrip().lower() == 'httponly':
|
||
|
self.http_only = True
|
||
|
|
||
|
def from_cookie(self, set_cookie_string):
|
||
|
if ';' in set_cookie_string:
|
||
|
cookie_pair, rest = set_cookie_string.split(';', 1)
|
||
|
self.key, self.val = cookie_pair.split('=',1)
|
||
|
cookie_avs = rest.split(';')
|
||
|
for cookie_av in cookie_avs:
|
||
|
cookie_av.lstrip()
|
||
|
self.parse_cookie_av(cookie_av)
|
||
|
else:
|
||
|
self.key, self.val = set_cookie_string.split('=',1)
|
||
|
|
||
|
|
||
|
class Request(object):
|
||
|
|
||
|
def __init__(self, full_request=None, update_content_length=False):
|
||
|
self.time_end = None
|
||
|
self.time_start = None
|
||
|
self.complete = False
|
||
|
self.cookies = RepeatableDict()
|
||
|
self.fragment = None
|
||
|
self.get_params = RepeatableDict()
|
||
|
self.header_len = 0
|
||
|
self.headers = RepeatableDict(case_insensitive=True)
|
||
|
self.headers_complete = False
|
||
|
self.host = None
|
||
|
self.is_ssl = False
|
||
|
self.path = ''
|
||
|
self.port = None
|
||
|
self.post_params = RepeatableDict()
|
||
|
self._raw_data = ''
|
||
|
self.reqid = None
|
||
|
self.response = None
|
||
|
self.submitted = False
|
||
|
self.unmangled = None
|
||
|
self.verb = ''
|
||
|
self.version = ''
|
||
|
|
||
|
self._first_line = True
|
||
|
#self._connect_response = False
|
||
|
#self._encoding_type = ENCODE_NONE
|
||
|
self._data_length = 0
|
||
|
self._partial_data = ''
|
||
|
|
||
|
self.set_dict_callbacks()
|
||
|
|
||
|
# Get values from the raw request
|
||
|
if full_request is not None:
|
||
|
self.from_full_request(full_request, update_content_length)
|
||
|
|
||
|
@property
|
||
|
def rsptime(self):
|
||
|
if self.time_start and self.time_end:
|
||
|
return self.time_end-self.time_start
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
@property
|
||
|
def status_line(self):
|
||
|
path = self.path
|
||
|
if self.get_params:
|
||
|
path += '?'
|
||
|
pairs = []
|
||
|
for pair in self.get_params.all_pairs():
|
||
|
if pair[1] is None:
|
||
|
pairs.append(pair[0])
|
||
|
else:
|
||
|
pairs.append('='.join(pair))
|
||
|
path += '&'.join(pairs)
|
||
|
if self.fragment:
|
||
|
path += '#'
|
||
|
path += self.fragment
|
||
|
return '%s %s %s' % (self.verb, path, self.version)
|
||
|
|
||
|
@status_line.setter
|
||
|
def status_line(self, val):
|
||
|
self.handle_statusline(val)
|
||
|
|
||
|
@property
|
||
|
def raw_headers(self):
|
||
|
ret = self.status_line + '\r\n'
|
||
|
for k, v in self.headers.all_pairs():
|
||
|
ret = ret + "%s: %s\r\n" % (k, v)
|
||
|
ret = ret + '\r\n'
|
||
|
return ret
|
||
|
|
||
|
@property
|
||
|
def full_request(self):
|
||
|
ret = self.raw_headers
|
||
|
ret = ret + self.raw_data
|
||
|
return ret
|
||
|
|
||
|
@property
|
||
|
def raw_data(self):
|
||
|
return self._raw_data
|
||
|
|
||
|
@raw_data.setter
|
||
|
def raw_data(self, val):
|
||
|
self._raw_data = val
|
||
|
self.update_from_data()
|
||
|
self.complete = True
|
||
|
|
||
|
def set_dict_callbacks(self):
|
||
|
# Add callbacks to dicts
|
||
|
self.headers.set_modify_callback(self.update_from_text)
|
||
|
self.cookies.set_modify_callback(self.update_from_objects)
|
||
|
self.post_params.set_modify_callback(self.update_from_data)
|
||
|
|
||
|
def from_full_request(self, full_request, update_content_length=False):
|
||
|
# Get rid of leading CRLF. Not in spec, should remove eventually
|
||
|
# technically doesn't treat \r\n same as \n, but whatever.
|
||
|
while full_request[0:2] == '\r\n':
|
||
|
full_request = full_request[2:]
|
||
|
|
||
|
# We do redundant splits, but whatever
|
||
|
lines = full_request.splitlines()
|
||
|
for line in lines:
|
||
|
if self.headers_complete:
|
||
|
break
|
||
|
self.add_line(line)
|
||
|
|
||
|
if not self.headers_complete:
|
||
|
self.add_line('')
|
||
|
|
||
|
if not self.complete:
|
||
|
data = full_request[self.header_len:]
|
||
|
if update_content_length:
|
||
|
self.raw_data = data
|
||
|
else:
|
||
|
self.add_data(data)
|
||
|
assert(self.complete)
|
||
|
|
||
|
def update_from_data(self):
|
||
|
# Updates metadata that's based off of data
|
||
|
self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False)
|
||
|
if 'content-type' in self.headers:
|
||
|
if self.headers['content-type'] == 'application/x-www-form-urlencoded':
|
||
|
self.post_params = repeatable_parse_qs(self.raw_data)
|
||
|
self.set_dict_callbacks()
|
||
|
|
||
|
def update_from_objects(self):
|
||
|
# Updates text values that depend on objects.
|
||
|
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
|
||
|
if self.cookies:
|
||
|
assignments = []
|
||
|
for ck, cv in self.cookies.all_pairs():
|
||
|
asn = '%s=%s' % (ck, cv)
|
||
|
assignments.append(asn)
|
||
|
header_val = '; '.join(assignments)
|
||
|
self.headers.update('Cookie', header_val, do_callback=False)
|
||
|
if self.post_params:
|
||
|
pairs = []
|
||
|
for k, v in self.post_params:
|
||
|
pairs.append('%s=%s' % (k, v))
|
||
|
self.raw_data = '&'.join(pairs)
|
||
|
|
||
|
def update_from_text(self):
|
||
|
# Updates metadata that depends on header/status line values
|
||
|
self.cookies = RepeatableDict()
|
||
|
self.set_dict_callbacks()
|
||
|
for k, v in self.headers.all_pairs():
|
||
|
self.handle_header(k, v)
|
||
|
|
||
|
def add_data(self, data):
|
||
|
# Add data (headers must be complete)
|
||
|
len_remaining = self._data_length - len(self._partial_data)
|
||
|
if len(data) >= len_remaining:
|
||
|
self._partial_data += data[:len_remaining]
|
||
|
self._raw_data = self._partial_data
|
||
|
self.complete = True
|
||
|
self.handle_data_end()
|
||
|
else:
|
||
|
self._partial_data += data
|
||
|
|
||
|
def _process_host(self, hostline, overwrite=False):
|
||
|
# Only overwrite if told to since we may set it during the CONNECT request and we don't want to
|
||
|
# overwrite that
|
||
|
# Get address and port
|
||
|
if ':' in hostline:
|
||
|
self.host, self.port = hostline.split(':')
|
||
|
self.port = int(self.port)
|
||
|
if self.port == 443:
|
||
|
self.is_ssl = True
|
||
|
else:
|
||
|
self.host = hostline
|
||
|
if not self.port or overwrite: # could be changed by connect request
|
||
|
self.port = 80
|
||
|
self.host.strip()
|
||
|
|
||
|
def add_line(self, line):
|
||
|
# Add a line (for status line and headers)
|
||
|
# Modifies first line if it is in full url form
|
||
|
|
||
|
if self._first_line and line == '':
|
||
|
# Ignore leading newlines because fuck the spec
|
||
|
return
|
||
|
|
||
|
if self._first_line:
|
||
|
self.handle_statusline(line)
|
||
|
self._first_line = False
|
||
|
else:
|
||
|
# Either header or newline (end of headers)
|
||
|
if line == '':
|
||
|
self.headers_complete = True
|
||
|
if self._data_length == 0:
|
||
|
self.complete = True
|
||
|
else:
|
||
|
key, val = line.split(':', 1)
|
||
|
val = val.strip()
|
||
|
if self.handle_header(key, val):
|
||
|
self.headers.append(key, val, do_callback=False)
|
||
|
self.header_len += len(line)+2
|
||
|
|
||
|
def handle_statusline(self, status_line):
|
||
|
parts = status_line.split()
|
||
|
uri = None
|
||
|
if len(parts) == 3:
|
||
|
self.verb, uri, self.version = parts
|
||
|
elif len(parts) == 2:
|
||
|
self.verb, self.version = parts
|
||
|
else:
|
||
|
raise Exception("Unexpected format of first line of request")
|
||
|
|
||
|
# Get path using urlparse
|
||
|
if uri is not None:
|
||
|
if not re.match('(?:^.+)://', uri):
|
||
|
uri = '//' + uri
|
||
|
parsed_path = urlparse.urlparse(uri)
|
||
|
netloc = parsed_path.netloc
|
||
|
self._process_host(netloc)
|
||
|
|
||
|
# Check for https
|
||
|
if re.match('^https://', uri) or self.port == 443:
|
||
|
self.is_ssl = True
|
||
|
self.port = 443
|
||
|
|
||
|
reqpath = parsed_path.path
|
||
|
self.path = parsed_path.path
|
||
|
if parsed_path.query:
|
||
|
reqpath += '?'
|
||
|
reqpath += parsed_path.query
|
||
|
self.get_params = repeatable_parse_qs(parsed_path.query)
|
||
|
if parsed_path.fragment:
|
||
|
reqpath += '#'
|
||
|
reqpath += parsed_path.fragment
|
||
|
self.fragment = parsed_path.fragment
|
||
|
|
||
|
def handle_header(self, key, val):
|
||
|
# We may have duplicate headers
|
||
|
stripped = False
|
||
|
|
||
|
if key.lower() == 'content-length':
|
||
|
self._data_length = int(val)
|
||
|
elif key.lower() == 'cookie':
|
||
|
# We still want the raw key/val for the cookies header
|
||
|
# because it's still a header
|
||
|
cookie_strs = val.split('; ')
|
||
|
|
||
|
# The only whitespace that matters is the space right after the
|
||
|
# semicolon. If actual implementations mess this up, we could
|
||
|
# probably strip whitespace around the key/value
|
||
|
for cookie_str in cookie_strs:
|
||
|
splitted = cookie_str.split('=',1)
|
||
|
assert(len(splitted) == 2)
|
||
|
(cookie_key, cookie_val) = splitted
|
||
|
# we want to parse duplicate cookies
|
||
|
self.cookies.append(cookie_key, cookie_val, do_callback=False)
|
||
|
elif key.lower() == 'host':
|
||
|
self._process_host(val)
|
||
|
elif key.lower() == 'connection':
|
||
|
#stripped = True
|
||
|
pass
|
||
|
|
||
|
return (not stripped)
|
||
|
|
||
|
def handle_data_end(self):
|
||
|
if 'content-type' in self.headers:
|
||
|
if self.headers['content-type'] == 'application/x-www-form-urlencoded':
|
||
|
self.post_params = repeatable_parse_qs(self.raw_data)
|
||
|
self.set_dict_callbacks()
|
||
|
|
||
|
@defer.inlineCallbacks
|
||
|
def save(self):
|
||
|
assert(dbpool)
|
||
|
if self.reqid:
|
||
|
# If we have reqid, we're updating
|
||
|
yield dbpool.runInteraction(self._update)
|
||
|
assert(self.reqid is not None)
|
||
|
else:
|
||
|
yield dbpool.runInteraction(self._insert)
|
||
|
assert(self.reqid is not None)
|
||
|
|
||
|
@defer.inlineCallbacks
|
||
|
def deep_save(self):
|
||
|
"Saves self, unmangled, response, and unmangled response"
|
||
|
if self.response:
|
||
|
if self.response.unmangled:
|
||
|
yield self.response.unmangled.save()
|
||
|
yield self.response.save()
|
||
|
if self.unmangled:
|
||
|
yield self.unmangled.save()
|
||
|
yield self.save()
|
||
|
|
||
|
def _update(self, txn):
|
||
|
# If we don't have an reqid, we're creating a new reuqest row
|
||
|
setnames = ["full_request=?"]
|
||
|
queryargs = [self.full_request]
|
||
|
if self.response:
|
||
|
setnames.append('response_id=?')
|
||
|
assert(self.response.rspid is not None) # should be saved first
|
||
|
queryargs.append(self.response.rspid)
|
||
|
if self.unmangled:
|
||
|
setnames.append('unmangled_id=?')
|
||
|
assert(self.unmangled.reqid is not None) # should be saved first
|
||
|
queryargs.append(self.unmangled.reqid)
|
||
|
if self.time_start:
|
||
|
setnames.append('start_datetime=?')
|
||
|
queryargs.append(self.time_start.isoformat())
|
||
|
if self.time_end:
|
||
|
setnames.append('end_datetime=?')
|
||
|
queryargs.append(self.time_end.isoformat())
|
||
|
|
||
|
setnames.append('submitted=?')
|
||
|
if self.submitted:
|
||
|
queryargs.append('1')
|
||
|
else:
|
||
|
queryargs.append('0')
|
||
|
|
||
|
queryargs.append(self.reqid)
|
||
|
txn.execute(
|
||
|
"""
|
||
|
UPDATE requests SET %s WHERE id=?;
|
||
|
""" % ','.join(setnames),
|
||
|
tuple(queryargs)
|
||
|
)
|
||
|
|
||
|
def _insert(self, txn):
|
||
|
# If we don't have an reqid, we're creating a new reuqest row
|
||
|
colnames = ["full_request"]
|
||
|
colvals = [self.full_request]
|
||
|
if self.response:
|
||
|
colnames.append('response_id')
|
||
|
assert(self.response.rspid is not None) # should be saved first
|
||
|
colvals.append(self.response.rspid)
|
||
|
if self.unmangled:
|
||
|
colnames.append('unmangled_id')
|
||
|
assert(self.unmangled.reqid is not None) # should be saved first
|
||
|
colvals.append(self.unmangled.reqid)
|
||
|
if self.time_start:
|
||
|
colnames.append('start_datetime')
|
||
|
colvals.append(self.time_start.isoformat())
|
||
|
if self.time_end:
|
||
|
colnames.append('end_datetime')
|
||
|
colvals.append(self.time_end.isoformat())
|
||
|
colnames.append('submitted')
|
||
|
if self.submitted:
|
||
|
colvals.append('1')
|
||
|
else:
|
||
|
colvals.append('0')
|
||
|
|
||
|
txn.execute(
|
||
|
"""
|
||
|
INSERT INTO requests (%s) VALUES (%s);
|
||
|
""" % (','.join(colnames), ','.join(['?']*len(colvals))),
|
||
|
tuple(colvals)
|
||
|
)
|
||
|
self.reqid = txn.lastrowid
|
||
|
assert txn.lastrowid is not None
|
||
|
assert self.reqid is not None
|
||
|
|
||
|
def to_json(self):
|
||
|
# We base64 encode the full response because json doesn't paly nice with
|
||
|
# binary blobs
|
||
|
data = {
|
||
|
'full_request': base64.b64encode(self.full_request),
|
||
|
'reqid': self.reqid,
|
||
|
}
|
||
|
if self.response:
|
||
|
data['response_id'] = self.response.rspid
|
||
|
else:
|
||
|
data['response_id'] = None
|
||
|
|
||
|
if self.unmangled:
|
||
|
data['unmangled_id'] = self.unmangled.reqid
|
||
|
|
||
|
if self.time_start:
|
||
|
data['start'] = self.time_start.isoformat()
|
||
|
if self.time_end:
|
||
|
data['end'] = self.time_end.isoformat()
|
||
|
|
||
|
return json.dumps(data)
|
||
|
|
||
|
def from_json(self, json_string):
|
||
|
data = json.loads(json_string)
|
||
|
self.from_full_request(base64.b64decode(data['full_request']))
|
||
|
self.update_from_text()
|
||
|
self.update_from_data()
|
||
|
if data['reqid']:
|
||
|
self.reqid = int(data['reqid'])
|
||
|
|
||
|
def delete(self):
|
||
|
assert(self.reqid is not None)
|
||
|
row = yield dbpool.runQuery(
|
||
|
"""
|
||
|
DELETE FROM requests WHERE id=?;
|
||
|
""",
|
||
|
(self.reqid,)
|
||
|
)
|
||
|
|
||
|
def duplicate(self):
|
||
|
return Request(self.full_request)
|
||
|
|
||
|
@staticmethod
|
||
|
@defer.inlineCallbacks
|
||
|
def submit(host, port, is_ssl, full_request):
|
||
|
new_obj = Request(full_request)
|
||
|
factory = proxy.ProxyClientFactory(new_obj)
|
||
|
factory.connection_id = proxy.get_next_connection_id()
|
||
|
if is_ssl:
|
||
|
reactor.connectSSL(host, port, factory, proxy.ClientTLSContext())
|
||
|
else:
|
||
|
reactor.connectTCP(host, port, factory)
|
||
|
new_req = yield factory.data_defer
|
||
|
defer.returnValue(new_req)
|
||
|
|
||
|
def submit_self(self):
|
||
|
new_req = Request.submit(self.host, self.port, self.is_ssl,
|
||
|
self.full_request)
|
||
|
return new_req
|
||
|
|
||
|
@staticmethod
|
||
|
@defer.inlineCallbacks
|
||
|
def load_request(reqid):
|
||
|
assert(dbpool)
|
||
|
rows = yield dbpool.runQuery(
|
||
|
"""
|
||
|
SELECT full_request, response_id, id, unmangled_id, start_datetime, end_datetime
|
||
|
FROM requests
|
||
|
WHERE id=?;
|
||
|
""",
|
||
|
(reqid,)
|
||
|
)
|
||
|
if len(rows) != 1:
|
||
|
raise PappyException("Request with id %d does not exist" % reqid)
|
||
|
full_request = rows[0][0]
|
||
|
req = Request(full_request)
|
||
|
if rows[0][1]:
|
||
|
rsp = yield Response.load_response(int(rows[0][1]))
|
||
|
req.response = rsp
|
||
|
if rows[0][3]:
|
||
|
unmangled_req = yield Request.load_request(int(rows[0][3]))
|
||
|
req.unmangled = unmangled_req
|
||
|
if rows[0][4]:
|
||
|
req.time_start = datetime.datetime.strptime(rows[0][4], "%Y-%m-%dT%H:%M:%S.%f")
|
||
|
if rows[0][5]:
|
||
|
req.time_end = datetime.datetime.strptime(rows[0][5], "%Y-%m-%dT%H:%M:%S.%f")
|
||
|
req.reqid = int(rows[0][2])
|
||
|
defer.returnValue(req)
|
||
|
|
||
|
@staticmethod
|
||
|
@defer.inlineCallbacks
|
||
|
def load_from_filters(filters):
|
||
|
# Not efficient in any way
|
||
|
# But it stays this way until we hit performance issues
|
||
|
assert(dbpool)
|
||
|
rows = yield dbpool.runQuery(
|
||
|
"""
|
||
|
SELECT r1.id FROM requests r1
|
||
|
LEFT JOIN requests r2 ON r1.id=r2.unmangled_id
|
||
|
WHERE r2.id is NULL;
|
||
|
""",
|
||
|
)
|
||
|
reqs = []
|
||
|
for r in rows:
|
||
|
newreq = yield Request.load_request(int(r[0]))
|
||
|
reqs.append(newreq)
|
||
|
|
||
|
reqs = context.filter_reqs(reqs, filters)
|
||
|
|
||
|
defer.returnValue(reqs)
|
||
|
|
||
|
|
||
|
|
||
|
class Response(object):
|
||
|
|
||
|
def __init__(self, full_response=None, update_content_length=False):
|
||
|
self.complete = False
|
||
|
self.cookies = RepeatableDict()
|
||
|
self.header_len = 0
|
||
|
self.headers = RepeatableDict(case_insensitive=True)
|
||
|
self.headers_complete = False
|
||
|
self.host = None
|
||
|
self._raw_data = ''
|
||
|
self.response_code = 0
|
||
|
self.response_text = ''
|
||
|
self.rspid = None
|
||
|
self._status_line = ''
|
||
|
self.unmangled = None
|
||
|
self.version = ''
|
||
|
|
||
|
self._encoding_type = ENCODE_NONE
|
||
|
self._first_line = True
|
||
|
self._data_obj = None
|
||
|
self._end_after_headers = False
|
||
|
|
||
|
self.set_dict_callbacks()
|
||
|
|
||
|
if full_response is not None:
|
||
|
self.from_full_response(full_response, update_content_length)
|
||
|
|
||
|
@property
|
||
|
def raw_headers(self):
|
||
|
ret = self.status_line + '\r\n'
|
||
|
for k, v in self.headers.all_pairs():
|
||
|
ret = ret + "%s: %s\r\n" % (k, v)
|
||
|
ret = ret + '\r\n'
|
||
|
return ret
|
||
|
|
||
|
@property
|
||
|
def status_line(self):
|
||
|
return self._status_line
|
||
|
|
||
|
@status_line.setter
|
||
|
def status_line(self, val):
|
||
|
self._status_line = val
|
||
|
self.handle_statusline(val)
|
||
|
|
||
|
@property
|
||
|
def raw_data(self):
|
||
|
return self._raw_data
|
||
|
|
||
|
@raw_data.setter
|
||
|
def raw_data(self, val):
|
||
|
self._raw_data = val
|
||
|
self._data_obj = LengthData(len(val))
|
||
|
self._data_obj.add_data(val)
|
||
|
self._encoding_type = ENCODE_NONE
|
||
|
self.complete = True
|
||
|
self.update_from_data()
|
||
|
|
||
|
@property
|
||
|
def full_response(self):
|
||
|
ret = self.raw_headers
|
||
|
ret = ret + self.raw_data
|
||
|
return ret
|
||
|
|
||
|
def set_dict_callbacks(self):
|
||
|
# Add callbacks to dicts
|
||
|
self.headers.set_modify_callback(self.update_from_text)
|
||
|
self.cookies.set_modify_callback(self.update_from_objects)
|
||
|
|
||
|
def from_full_response(self, full_response, update_content_length=False):
|
||
|
# Get rid of leading CRLF. Not in spec, should remove eventually
|
||
|
while full_response[0:2] == '\r\n':
|
||
|
full_response = full_response[2:]
|
||
|
|
||
|
# We do redundant splits, but whatever
|
||
|
lines = full_response.splitlines()
|
||
|
for line in lines:
|
||
|
if self.headers_complete:
|
||
|
break
|
||
|
self.add_line(line)
|
||
|
if not self.headers_complete:
|
||
|
self.add_line('')
|
||
|
|
||
|
if not self.complete:
|
||
|
data = full_response[self.header_len:]
|
||
|
if update_content_length:
|
||
|
self.raw_data = data
|
||
|
else:
|
||
|
self.add_data(data)
|
||
|
assert(self.complete)
|
||
|
|
||
|
def add_line(self, line):
|
||
|
assert(not self.headers_complete)
|
||
|
self.header_len += len(line)+2
|
||
|
if not line and self._first_line:
|
||
|
return
|
||
|
if not line:
|
||
|
self.headers_complete = True
|
||
|
|
||
|
if self._end_after_headers:
|
||
|
self.complete = True
|
||
|
return
|
||
|
|
||
|
if not self._data_obj:
|
||
|
self._data_obj = LengthData(0)
|
||
|
self.complete = self._data_obj.complete
|
||
|
return
|
||
|
|
||
|
if self._first_line:
|
||
|
self.handle_statusline(line)
|
||
|
self._first_line = False
|
||
|
else:
|
||
|
key, val = line.split(':', 1)
|
||
|
val = val.strip()
|
||
|
self.handle_header(key, val)
|
||
|
|
||
|
def handle_statusline(self, status_line):
|
||
|
self._first_line = False
|
||
|
self._status_line = status_line
|
||
|
self.version, self.response_code, self.response_text = \
|
||
|
status_line.split(' ', 2)
|
||
|
self.response_code = int(self.response_code)
|
||
|
|
||
|
if self.response_code == 304 or self.response_code == 204 or \
|
||
|
self.response_code/100 == 1:
|
||
|
self._end_after_headers = True
|
||
|
|
||
|
def handle_header(self, key, val):
|
||
|
stripped = False
|
||
|
if key.lower() == 'content-encoding':
|
||
|
if val in ('gzip', 'x-gzip'):
|
||
|
self._encoding_type = ENCODE_GZIP
|
||
|
elif val in ('deflate'):
|
||
|
self._encoding_type = ENCODE_DEFLATE
|
||
|
|
||
|
# We send our requests already decoded, so we don't want a header
|
||
|
# saying it's encoded
|
||
|
if self._encoding_type != ENCODE_NONE:
|
||
|
stripped = True
|
||
|
elif key.lower() == 'transfer-encoding' and val.lower() == 'chunked':
|
||
|
self._data_obj = ChunkedData()
|
||
|
self.complete = self._data_obj.complete
|
||
|
stripped = True
|
||
|
elif key.lower() == 'content-length':
|
||
|
# We use our own content length
|
||
|
self._data_obj = LengthData(int(val))
|
||
|
elif key.lower() == 'set-cookie':
|
||
|
cookie = ResponseCookie(val)
|
||
|
self.cookies.append(cookie.key, cookie, do_callback=False)
|
||
|
elif key.lower() == 'host':
|
||
|
self.host = val
|
||
|
|
||
|
if stripped:
|
||
|
return False
|
||
|
else:
|
||
|
self.headers.append(key, val, do_callback=False)
|
||
|
return True
|
||
|
|
||
|
def update_from_data(self):
|
||
|
self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False)
|
||
|
|
||
|
def update_from_objects(self):
|
||
|
# Updates headers from objects
|
||
|
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
|
||
|
|
||
|
# Cookies
|
||
|
new_headers = RepeatableDict()
|
||
|
cookies_added = False
|
||
|
for pair in self.headers.all_pairs():
|
||
|
if pair[0].lower() == 'set-cookie':
|
||
|
# If we haven't added our cookies, add them all. Otherwise
|
||
|
# strip the header (do nothing)
|
||
|
if not cookies_added:
|
||
|
# Add all our cookies here
|
||
|
for k, c in self.cookies.all_pairs():
|
||
|
new_headers.append('Set-Cookie', c.cookie_av)
|
||
|
cookies_added = True
|
||
|
else:
|
||
|
new_headers.append(pair[0], pair[1])
|
||
|
|
||
|
if not cookies_added:
|
||
|
# Add all our cookies to the end
|
||
|
for k, c in self.cookies.all_pairs():
|
||
|
new_headers.append('Set-Cookie', c.cookie_av)
|
||
|
|
||
|
self.headers = new_headers
|
||
|
self.set_dict_callbacks()
|
||
|
|
||
|
def update_from_text(self):
|
||
|
self.cookies = RepeatableDict()
|
||
|
self.set_dict_callbacks()
|
||
|
for k, v in self.headers.all_pairs():
|
||
|
if k.lower() == 'set-cookie':
|
||
|
# Parse the cookie
|
||
|
cookie = ResponseCookie(v)
|
||
|
self.cookies.append(cookie.key, cookie, do_callback=False)
|
||
|
|
||
|
def add_data(self, data):
|
||
|
assert(self._data_obj)
|
||
|
assert(not self._data_obj.complete)
|
||
|
assert not self.complete
|
||
|
self._data_obj.add_data(data)
|
||
|
if self._data_obj.complete:
|
||
|
self._raw_data = decode_encoded(self._data_obj.raw_data,
|
||
|
self._encoding_type)
|
||
|
self.complete = True
|
||
|
self.update_from_data()
|
||
|
|
||
|
def add_cookie(self, cookie):
|
||
|
self.cookies.append(cookie.key, cookie, do_callback=False)
|
||
|
|
||
|
def to_json(self):
|
||
|
# We base64 encode the full response because json doesn't paly nice with
|
||
|
# binary blobs
|
||
|
data = {
|
||
|
'rspid': self.rspid,
|
||
|
'full_response': base64.b64encode(self.full_response),
|
||
|
}
|
||
|
if self.unmangled:
|
||
|
data['unmangled_id'] = self.unmangled.rspid
|
||
|
|
||
|
return json.dumps(data)
|
||
|
|
||
|
def from_json(self, json_string):
|
||
|
data = json.loads(json_string)
|
||
|
self.from_full_response(base64.b64decode(data['full_response']))
|
||
|
self.update_from_text()
|
||
|
self.update_from_data()
|
||
|
if data['rspid']:
|
||
|
self.rspid = int(data['rspid'])
|
||
|
|
||
|
@defer.inlineCallbacks
|
||
|
def save(self):
|
||
|
assert(dbpool)
|
||
|
if self.rspid:
|
||
|
# If we have rspid, we're updating
|
||
|
yield dbpool.runInteraction(self._update)
|
||
|
else:
|
||
|
yield dbpool.runInteraction(self._insert)
|
||
|
assert(self.rspid is not None)
|
||
|
|
||
|
def _update(self, txn):
|
||
|
setnames = ["full_response=?"]
|
||
|
queryargs = [self.full_response]
|
||
|
if self.unmangled:
|
||
|
setnames.append('unmangled_id=?')
|
||
|
assert(self.unmangled.rspid is not None) # should be saved first
|
||
|
queryargs.append(self.unmangled.rspid)
|
||
|
|
||
|
queryargs.append(self.rspid)
|
||
|
txn.execute(
|
||
|
"""
|
||
|
UPDATE responses SET %s WHERE id=?;
|
||
|
""" % ','.join(setnames),
|
||
|
tuple(queryargs)
|
||
|
)
|
||
|
assert(self.rspid is not None)
|
||
|
|
||
|
def _insert(self, txn):
|
||
|
# If we don't have an rspid, we're creating a new one
|
||
|
colnames = ["full_response"]
|
||
|
colvals = [self.full_response]
|
||
|
if self.unmangled is not None:
|
||
|
colnames.append('unmangled_id')
|
||
|
assert(self.unmangled.rspid is not None) # should be saved first
|
||
|
colvals.append(self.unmangled.rspid)
|
||
|
|
||
|
txn.execute(
|
||
|
"""
|
||
|
INSERT INTO responses (%s) VALUES (%s);
|
||
|
""" % (','.join(colnames), ','.join(['?']*len(colvals))),
|
||
|
tuple(colvals)
|
||
|
)
|
||
|
self.rspid = txn.lastrowid
|
||
|
assert(self.rspid is not None)
|
||
|
|
||
|
def delete(self):
|
||
|
assert(self.rspid is not None)
|
||
|
row = yield dbpool.runQuery(
|
||
|
"""
|
||
|
DELETE FROM responses WHERE id=?;
|
||
|
""",
|
||
|
(self.rspid,)
|
||
|
)
|
||
|
|
||
|
@staticmethod
|
||
|
@defer.inlineCallbacks
|
||
|
def load_response(respid):
|
||
|
assert(dbpool)
|
||
|
rows = yield dbpool.runQuery(
|
||
|
"""
|
||
|
SELECT full_response, id, unmangled_id
|
||
|
FROM responses
|
||
|
WHERE id=?;
|
||
|
""",
|
||
|
(respid,)
|
||
|
)
|
||
|
if len(rows) != 1:
|
||
|
raise PappyException("Response with request id %d does not exist" % respid)
|
||
|
full_response = rows[0][0]
|
||
|
resp = Response(full_response)
|
||
|
resp.rspid = int(rows[0][1])
|
||
|
if rows[0][2]:
|
||
|
unmangled_response = yield Response.load_response(int(rows[0][2]))
|
||
|
resp.unmangled = unmangled_response
|
||
|
defer.returnValue(resp)
|
||
|
|
||
|
|