Version 0.2.2

This commit is contained in:
Rob Glew 2016-01-26 16:23:40 -06:00
parent d8dfcd3e24
commit 9a14a5541a
15 changed files with 519 additions and 114 deletions

113
pappyproxy/colors.py Normal file
View file

@ -0,0 +1,113 @@
import re
import itertools
def clen(s):
ansi_escape = re.compile(r'\x1b[^m]*m')
return len(ansi_escape.sub('', s))
class Colors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
# Effects
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
# Colors
BLACK = '\033[30m'
RED = '\033[31m'
GREEN = '\033[32m'
YELLOW = '\033[33m'
BLUE = '\033[34m'
MAGENTA = '\033[35m'
CYAN = '\033[36m'
WHITE = '\033[37m'
# BG Colors
BGBLACK = '\033[40m'
BGRED = '\033[41m'
BGGREEN = '\033[42m'
BGYELLOW = '\033[43m'
BGBLUE = '\033[44m'
BGMAGENTA = '\033[45m'
BGCYAN = '\033[46m'
BGWHITE = '\033[47m'
# Light Colors
LBLACK = '\033[90m'
LRED = '\033[91m'
LGREEN = '\033[92m'
LYELLOW = '\033[93m'
LBLUE = '\033[94m'
LMAGENTA = '\033[95m'
LCYAN = '\033[96m'
LWHITE = '\033[97m'
class Styles:
################
# Request tables
TABLE_HEADER = Colors.BOLD+Colors.UNDERLINE
VERB_GET = Colors.CYAN
VERB_POST = Colors.YELLOW
VERB_OTHER = Colors.BLUE
STATUS_200 = Colors.CYAN
STATUS_300 = Colors.MAGENTA
STATUS_400 = Colors.YELLOW
STATUS_500 = Colors.RED
PATH_COLORS = [Colors.CYAN, Colors.BLUE]
KV_KEY = Colors.GREEN
KV_VAL = Colors.ENDC
def verb_color(verb):
if verb and verb == 'GET':
return Styles.VERB_GET
elif verb and verb == 'POST':
return Styles.VERB_POST
else:
return Styles.VERB_OTHER
def scode_color(scode):
if scode and scode[0] == '2':
return Styles.STATUS_200
elif scode and scode[0] == '3':
return Styles.STATUS_300
elif scode and scode[0] == '4':
return Styles.STATUS_400
elif scode and scode[0] == '5':
return Styles.STATUS_500
else:
return Colors.ENDC
def path_formatter(path, width=-1):
if len(path) > width and width != -1:
path = path[:width]
path = path[:-3]+'...'
parts = path.split('/')
colparts = []
for p, c in zip(parts, itertools.cycle(Styles.PATH_COLORS)):
colparts.append(c+p+Colors.ENDC)
return '/'.join(colparts)
def host_color(host):
# Give each unique host a different color (ish)
if not host:
return Colors.RED
hostcols = [Colors.RED,
Colors.GREEN,
Colors.YELLOW,
Colors.BLUE,
Colors.MAGENTA,
Colors.CYAN,
Colors.LRED,
Colors.LGREEN,
Colors.LYELLOW,
Colors.LBLUE,
Colors.LMAGENTA,
Colors.LCYAN]
return hostcols[hash(host)%(len(hostcols)-1)]

View file

@ -7,8 +7,10 @@ import cmd2
import re
import string
import sys
import itertools
from .util import PappyException
from .colors import Styles, Colors, verb_color, scode_color, path_formatter, host_color
from twisted.internet import defer
###################
@ -84,9 +86,13 @@ def print_table(coldata, rows):
maxwidth = 0
colwidth = 0
for row in rows:
printstr = str(row[i])
if len(printstr) > colwidth:
colwidth = len(printstr)
printdata = row[i]
if isinstance(printdata, dict):
collen = len(str(printdata['data']))
else:
collen = len(str(printdata))
if collen > colwidth:
colwidth = collen
if maxwidth > 0 and colwidth > maxwidth:
widths.append(maxwidth)
else:
@ -94,16 +100,45 @@ def print_table(coldata, rows):
# Print rows
padding = 2
is_heading = not empty_headers
for row in rows:
if is_heading:
sys.stdout.write(Styles.TABLE_HEADER)
for (col, width) in zip(row, widths):
printstr = str(col)
if isinstance(col, dict):
printstr = str(col['data'])
if 'color' in col:
colors = col['color']
formatter = None
elif 'formatter' in col:
colors = None
formatter = col['formatter']
else:
colors = None
formatter = None
else:
printstr = str(col)
colors = None
formatter = None
if len(printstr) > width:
for i in range(len(printstr)-4, len(printstr)-1):
printstr=printstr[:width]
printstr=printstr[:-3]+'...'
sys.stdout.write(printstr)
trunc_printstr=printstr[:width]
trunc_printstr=trunc_printstr[:-3]+'...'
else:
trunc_printstr=printstr
if colors is not None:
sys.stdout.write(colors)
sys.stdout.write(trunc_printstr)
sys.stdout.write(Colors.ENDC)
elif formatter is not None:
toprint = formatter(printstr, width)
sys.stdout.write(toprint)
else:
sys.stdout.write(trunc_printstr)
sys.stdout.write(' '*(width-len(printstr)))
sys.stdout.write(' '*padding)
if is_heading:
sys.stdout.write(Colors.ENDC)
is_heading = False
sys.stdout.write('\n')
sys.stdout.flush()
@ -112,23 +147,11 @@ def print_requests(requests):
Takes in a list of requests and prints a table with data on each of the
requests. It's the same table that's used by ``ls``.
"""
# Print a table with info on all the requests in the list
cols = [
{'name':'ID'},
{'name':'Verb'},
{'name': 'Host'},
{'name':'Path', 'width':40},
{'name':'S-Code'},
{'name':'Req Len'},
{'name':'Rsp Len'},
{'name':'Time'},
{'name':'Mngl'},
]
rows = []
for req in requests:
rows.append(get_req_data_row(req))
print_table(cols, rows)
def print_request_rows(request_rows):
"""
Takes in a list of request rows generated from :func:`pappyproxy.console.get_req_data_row`
@ -142,13 +165,23 @@ def print_request_rows(request_rows):
{'name':'Verb'},
{'name': 'Host'},
{'name':'Path', 'width':40},
{'name':'S-Code'},
{'name':'S-Code', 'width':16},
{'name':'Req Len'},
{'name':'Rsp Len'},
{'name':'Time'},
{'name':'Mngl'},
]
print_table(cols, request_rows)
print_rows = []
for row in request_rows:
(reqid, verb, host, path, scode, qlen, slen, time, mngl) = row
verb = {'data':verb, 'color':verb_color(verb)}
scode = {'data':scode, 'color':scode_color(scode)}
host = {'data':host, 'color':host_color(host)}
path = {'data':path, 'formatter':path_formatter}
print_rows.append((reqid, verb, host, path, scode, qlen, slen, time, mngl))
print_table(cols, print_rows)
def get_req_data_row(request):
"""

View file

@ -73,7 +73,7 @@ def _consume_line(instr):
return (''.join(l), instr[pos+1:])
l.append(instr[pos])
pos += 1
return instr
return (instr, '')
###################
## Functions to use
@ -537,18 +537,8 @@ class HTTPMessage(object):
reserved_meta_keys = ['full_message']
def __init__(self, full_message=None, update_content_length=False):
self.complete = False
self.headers = RepeatableDict(case_insensitive=True)
self.headers_complete = False
self.malformed = False
self.start_line = ''
self.reset_metadata()
self._decoded = False
self._encoding_type = ENCODE_NONE
self._first_line = True
self._data_obj = None
self._end_after_headers = False
# Initializes instance variables too
self.clear()
if full_message is not None:
self._from_full_message(full_message, update_content_length)
@ -579,19 +569,44 @@ class HTTPMessage(object):
"""
return self.__copy__()
def clear(self):
"""
Resets all internal data and clears the message
"""
self.complete = False
self.headers = RepeatableDict(case_insensitive=True)
self.headers_complete = False
self.malformed = False
self.start_line = ''
self.reset_metadata()
self._decoded = False
self._encoding_type = ENCODE_NONE
self._first_line = True
self._data_obj = None
self._end_after_headers = False
def _from_full_message(self, full_message, update_content_length=False, meta=None):
# Set defaults for metadata
self.reset_metadata()
self.clear()
# Get rid of leading CRLF. Not in spec, should remove eventually
full_message = _strip_leading_newlines(full_message)
if full_message == '':
return
remaining = full_message
while remaining and not self.headers_complete:
line, remaining = _consume_line(remaining)
self.add_line(line)
lines = full_message.splitlines(True)
header_len = 0
for line in lines:
if line[-2] == '\r':
l = line[:-2]
else:
l = line[:-1]
self.add_line(l)
header_len += len(line)
if self.headers_complete:
break
remaining = full_message[header_len:]
if not self.headers_complete:
self.add_line('')
@ -940,24 +955,8 @@ class Request(HTTPMessage):
def __init__(self, full_request=None, update_content_length=True,
port=None, is_ssl=None, host=None):
self.time_end = None
self.time_start = None
self.cookies = RepeatableDict()
self.fragment = None
self.url_params = RepeatableDict()
self._host = None
self._is_ssl = False
self.path = ''
self.port = None
self.post_params = RepeatableDict()
self.reqid = None
self.response = None
self.submitted = False
self.unmangled = None
self.verb = ''
self.version = ''
self.tags = []
self.plugin_data = {}
# Resets instance variables
self.clear()
# Called after instance vars since some callbacks depend on
# instance vars
@ -1232,10 +1231,32 @@ class Request(HTTPMessage):
self.tags = []
def get_plugin_dict(self, name):
"""
Get the data dictionary for the given plugin name.
"""
if not name in self.plugin_data:
self.plugin_data[name] = {}
return self.plugin_data[name]
def clear(self):
HTTPMessage.clear(self)
self.time_end = None
self.time_start = None
self.cookies = RepeatableDict()
self.fragment = None
self.url_params = RepeatableDict()
self._is_ssl = False
self.path = ''
self.post_params = RepeatableDict()
self.response = None
self.submitted = False
self.unmangled = None
self.verb = ''
self.version = ''
self.plugin_data = {}
self.reset_metadata()
self.is_unmangled_version = False
############################
## Internal update functions
@ -1262,8 +1283,6 @@ class Request(HTTPMessage):
def _update_from_objects(self):
# Updates text values that depend on objects.
# DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
print 'FOOOOO'
print self.post_params.all_pairs()
if self.cookies:
assignments = []
for ck, cv in self.cookies.all_pairs():
@ -1684,6 +1703,7 @@ class Request(HTTPMessage):
if row[3]:
unmangled_req = yield Request.load_request(str(row[3]))
req.unmangled = unmangled_req
req.unmangled.is_unmangled_version = True
if row[4]:
req.time_start = datetime.datetime.fromtimestamp(row[4])
if row[5]:
@ -1825,7 +1845,7 @@ class Request(HTTPMessage):
# If it's not cached, load_request will be called again and be told
# not to use the cache.
r = yield Request.cache.get(loadid)
defer.returnValue(r)
defer.returnValue(retreq(r))
# Load it from the data file
rows = yield dbpool.runQuery(
@ -1923,14 +1943,8 @@ class Response(HTTPMessage):
"""
def __init__(self, full_response=None, update_content_length=True):
self.complete = False
self.cookies = RepeatableDict()
self.response_code = 0
self.response_text = ''
self.rspid = None
self.unmangled = None
self.version = ''
self._saving = False
# Resets instance variables
self.clear()
# Called after instance vars since some callbacks depend on
# instance vars
@ -2023,6 +2037,15 @@ class Response(HTTPMessage):
def reset_metadata(self):
self.rspid = None
def clear(self):
HTTPMessage.clear(self)
self.cookies = RepeatableDict()
self.response_code = 0
self.response_text = ''
self.rspid = None
self.unmangled = None
self.version = ''
############################
## Internal update functions

View file

@ -30,6 +30,14 @@ all_contexts = [main_context]
plugin_loader = None
cons = None
try:
from guppy import hpy
heapstats = hpy()
heapstats.setref()
except ImportError:
heapstats = None
def parse_args():
# parses sys.argv and returns a settings dictionary

155
pappyproxy/plugins/debug.py Normal file
View file

@ -0,0 +1,155 @@
import gc
import shlex
import code
import crochet
import os
import resource
import random
import datetime
from pappyproxy.http import Request, post_request
from pappyproxy.util import PappyException
from pappyproxy.requestcache import RequestCache
from pappyproxy.console import print_requests
from pappyproxy.pappy import heapstats, cons
from twisted.internet import defer
def cache_info(line):
c = Request.cache
print 'Cache has %d/%d slots filled' % (len(c._cached_reqs), c._cache_size)
print 'Hit score: {0:.2f} ({1}/{2})'.format(c.hit_ratio, c.hits, c.hits+c.misses)
print ''
if line != 'q':
rl = [v for k, v in Request.cache._cached_reqs.iteritems()]
rs = sorted(rl, key=lambda r: Request.cache._last_used[r.reqid], reverse=True)
print_requests(rs)
def memory_info(line):
try:
import psutil
except ImportError:
raise PappyException('This command requires the psutil package')
proc = psutil.Process(os.getpid())
mem = proc.memory_info().rss
megabyte = (float(mem)/1024)/1024
print 'Memory usage: {0:.2f} Mb ({1} bytes)'.format(megabyte, mem)
def heap_info(line):
if heapstats is None:
raise PappyException('Command requires the guppy library')
size = heapstats.heap().size
print 'Heap usage: {0:.2f} Mb'.format(size/(1024.0*1024.0))
print heapstats.heap()
def limit_info(line):
rsrc = resource.RLIMIT_AS
soft, hard = resource.getrlimit(rsrc)
print 'Soft limit starts as:', soft
print 'Hard limit starts as:', hard
if line:
limit_mb = int(line)
limit_kb = int(line)*1024
print 'Setting limit to %s Mb' % limit_mb
resource.setrlimit(rsrc, (limit_kb, hard)) #limit to one kilobyte
soft, hard = resource.getrlimit(rsrc)
print 'Soft limit is now:', soft
print 'Hard limit is now:', hard
def graph_randobj(line):
try:
import objgraph
except ImportError:
raise PappyException('This command requires the objgraph library')
args = shlex.split(line)
if len(args) > 1:
fname = args[1]
else:
fname = 'chain.png'
print 'Getting random %s object...' % args[0]
obj = random.choice(objgraph.by_type(args[0]))
print 'Creating chain...'
chain = objgraph.find_backref_chain(obj, objgraph.is_proper_module)
print 'Saving chain...'
objgraph.show_chain(chain, filename=fname)
def heapdo(line):
if heapstats is None:
raise PappyException('Command requires the guppy library')
h = heapstats.heap()
code.interact(local=locals())
def collect(line):
gc.collect()
@crochet.wait_for(timeout=None)
@defer.inlineCallbacks
def loadblock(line):
args = shlex.split(line)
yield Request.cache.load(args[0], int(args[1]))
@crochet.wait_for(timeout=None)
@defer.inlineCallbacks
def big_fucking_data_file(line):
print "Generating some giant fucking requests"
for i in range(1000):
if i % 20 == 0:
print 'Generated %d' % i
r = post_request('https://www.google.com')
r.body = 'A'*(1024*1024)
yield r.async_deep_save()
def time_cmd(line):
print 'Timing `%s`...' % line
start = datetime.datetime.now()
cons.onecmd(line.strip())
end = datetime.datetime.now()
total_time = (end-start).total_seconds()
print '`{0}` took {1:.3f} seconds'.format(line, total_time)
def cache_data(line):
args = shlex.split(line)
reqid = args[0]
cached = reqid in Request.cache._cached_reqs
if reqid in Request.cache._last_used:
last_used = Request.cache._last_used[reqid]
else:
last_used = 'NOT IN _last_used'
in_all = reqid in Request.cache.all_ids
in_unmangled = reqid in Request.cache.unmangled_ids
try:
ordered_ids_pos = Request.cache.ordered_ids.index(reqid)
except ValueError:
ordered_ids_pos = 'Not in ordered_ids'
in_inmem = reqid in Request.cache.inmem_reqs
print ''
print 'Cache data about request %s ----------' % reqid
print 'Cahced: %s' % cached
print 'Last used: %s' % last_used
print 'In all_ids: %s' % in_all
print 'In unmangled: %s' % in_unmangled
print 'Ordered id pos: %s' % ordered_ids_pos
print 'Is inmem: %s' % in_inmem
print ''
def check_cache(line):
Request.cache.assert_ids()
def load_cmds(cmd):
cmd.set_cmds({
'cacheinfo': (cache_info, None),
'heapinfo': (heap_info, None),
'memlimit': (limit_info, None),
'heapdo': (heapdo, None),
'gccollect': (collect, None),
'graphobj': (graph_randobj, None),
'meminfo': (memory_info, None),
'bigdata': (big_fucking_data_file, None),
'checkcache': (check_cache, None),
'loadblock': (loadblock, None),
'time': (time_cmd, None),
'cachedata': (cache_data, None),
})
cmd.add_aliases([
])

View file

@ -8,6 +8,7 @@ from pappyproxy.util import PappyException
from pappyproxy.http import Request
from twisted.internet import defer
from pappyproxy.plugin import main_context_ids
from pappyproxy.colors import Colors, Styles, verb_color, scode_color, path_formatter, host_color
###################
## Helper functions
@ -21,8 +22,7 @@ def view_full_message(request, headers_only=False):
def print_request_extended(request):
# Prints extended info for the request
title = "Request Info (reqid=%s)" % request.reqid
print title
print '-'*len(title)
print Styles.TABLE_HEADER + title + Colors.ENDC
reqlen = len(request.body)
reqlen = '%d bytes' % reqlen
rsplen = 'No response'
@ -34,6 +34,7 @@ def print_request_extended(request):
if request.response:
response_code = str(request.response.response_code) + \
' ' + request.response.response_text
response_code = scode_color(response_code) + response_code + Colors.ENDC
rsplen = len(request.response.body)
rsplen = '%d bytes' % rsplen
@ -59,24 +60,31 @@ def print_request_extended(request):
time_made_str = request.time_start.strftime('%a, %b %d, %Y, %I:%M:%S %p')
else:
time_made_str = '--'
verb = verb_color(request.verb) + request.verb + Colors.ENDC
host = host_color(request.host) + request.host + Colors.ENDC
print 'Made on %s' % time_made_str
print 'ID: %s' % request.reqid
print 'Verb: %s' % request.verb
print 'Host: %s' % request.host
print 'Path: %s' % request.full_path
print 'Status Code: %s' % response_code
print 'Request Length: %s' % reqlen
print 'Response Length: %s' % rsplen
print_pairs = []
print_pairs.append(('Made on', time_made_str))
print_pairs.append(('ID', request.reqid))
print_pairs.append(('Verb', verb))
print_pairs.append(('Host', host))
print_pairs.append(('Path', path_formatter(request.full_path)))
print_pairs.append(('Status Code', response_code))
print_pairs.append(('Request Length', reqlen))
print_pairs.append(('Response Length', rsplen))
if request.response and request.response.unmangled:
print 'Unmangled Response Length: %s bytes' % len(request.response.unmangled.full_response)
print 'Time: %s' % time_str
print 'Port: %s' % request.port
print 'SSL: %s' % is_ssl
print 'Mangled: %s' % mangle_str
print 'Tags: %s' % (', '.join(request.tags))
print_pairs.append(('Unmangled Response Length', len(request.response.unmangled.full_response)))
print_pairs.append(('Time', time_str))
print_pairs.append(('Port', request.port))
print_pairs.append(('SSL', is_ssl))
print_pairs.append(('Mangled', mangle_str))
print_pairs.append(('Tags', ', '.join(request.tags)))
if request.plugin_data:
print 'Plugin Data: %s' % (request.plugin_data)
print_pairs.append(('Plugin Data', request.plugin_data))
for k, v in print_pairs:
print Styles.KV_KEY+str(k)+': '+Styles.KV_VAL+str(v)
def print_tree(tree):
# Prints a tree. Takes in a sorted list of path tuples

View file

@ -128,7 +128,7 @@ class ProxyClient(LineReceiver):
if self.factory.save_all:
# It isn't the actual time, but this should work in case
# we do an 'ls' before it gets a real time saved
sendreq.time_start = datetime.datetime.now()
self.request.time_start = datetime.datetime.now()
if self.factory.stream_response and not to_mangle:
self.request.async_deep_save()
else:
@ -157,6 +157,7 @@ class ProxyClient(LineReceiver):
if sendreq != self.request:
sendreq.unmangled = self.request
if self.factory.save_all:
sendreq.time_start = datetime.datetime.now()
yield sendreq.async_deep_save()
else:
self.log("Request out of scope, passing along unmangled")

View file

@ -46,13 +46,6 @@ class RequestCache(object):
RequestCache._next_in_mem_id += 1
return i
def _update_meta(self):
# Can probably do better to prevent unmangled IDs from being added, but whatever
over = self._cached_reqs.items()[:]
for k, v in over:
if v.unmangled:
RequestCache.unmangled_ids.add(v.unmangled.reqid)
@staticmethod
@defer.inlineCallbacks
def load_ids():
@ -87,16 +80,11 @@ class RequestCache(object):
self._evict_single()
self._cache_size = size
def assert_ids(self):
for k, v in self._cached_reqs.iteritems():
assert v.reqid is not None
@defer.inlineCallbacks
def get(self, reqid):
"""
Get a request by id
"""
self.assert_ids()
if self.check(reqid):
self._update_last_used(reqid)
self.hits += 1
@ -112,25 +100,26 @@ class RequestCache(object):
"""
Returns True if the id is cached, false otherwise
"""
self.assert_ids()
return reqid in self._cached_reqs
def add(self, req):
"""
Add a request to the cache
"""
self.assert_ids()
if not req.reqid:
req.reqid = RequestCache.get_memid()
if req.reqid[0] == 'm':
self.inmem_reqs.add(req)
if req.is_unmangled_version:
self.unmangled_ids.add(req.reqid)
if req.unmangled:
self.unmangled_ids.add(req.unmangled.reqid)
self._cached_reqs[req.reqid] = req
self._update_last_used(req.reqid)
RequestCache.req_times[req.reqid] = req.sort_time
if req.reqid not in RequestCache.all_ids:
RequestCache.ordered_ids.insert(req.reqid)
RequestCache.all_ids.add(req.reqid)
self._update_meta()
if len(self._cached_reqs) > self._cache_size and self._cache_size != -1:
self._evict_single()

View file

@ -0,0 +1,23 @@
from twisted.internet import defer
"""
Schema v7
Creates an index for requests on start time in the data file. This will make
iterating through history a bit faster.
"""
update_queries = [
"""
CREATE INDEX ind_start_time ON requests(start_datetime);
""",
"""
UPDATE schema_meta SET version=7;
"""
]
@defer.inlineCallbacks
def update(dbpool):
for query in update_queries:
yield dbpool.runQuery(query)

View file

@ -733,13 +733,13 @@ def test_request_to_json():
r.response = rsp
expected_reqdata = {u'full_message': unicode(base64.b64encode(r.full_request)),
u'response_id': str(rsp.rspid),
u'port': 80,
u'is_ssl': False,
u'tags': ['foo', 'bar'],
u'reqid': str(r.reqid),
u'host': '',
expected_reqdata = {'full_message': unicode(base64.b64encode(r.full_request)),
'response_id': str(rsp.rspid),
'port': 80,
'is_ssl': False,
'tags': ['foo', 'bar'],
'reqid': str(r.reqid),
'host': '',
}
assert json.loads(r.to_json()) == expected_reqdata