Version 0.2.2

2016-01-26 16:23:40 -06:00 · 2016-01-26 16:23:40 -06:00 · 9a14a5541a
commit 9a14a5541a
parent d8dfcd3e24
15 changed files with 519 additions and 114 deletions
--- a/pappyproxy/colors.py
+++ b/pappyproxy/colors.py
@ -0,0 +1,113 @@
+import re
+import itertools
+
+def clen(s):
+    ansi_escape = re.compile(r'\x1b[^m]*m')
+    return len(ansi_escape.sub('', s))
+
+class Colors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    # Effects
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+    # Colors
+    BLACK   = '\033[30m'
+    RED     = '\033[31m'
+    GREEN   = '\033[32m'
+    YELLOW  = '\033[33m'
+    BLUE    = '\033[34m'
+    MAGENTA = '\033[35m'
+    CYAN    = '\033[36m'
+    WHITE   = '\033[37m'
+
+    # BG Colors
+    BGBLACK   = '\033[40m'
+    BGRED     = '\033[41m'
+    BGGREEN   = '\033[42m'
+    BGYELLOW  = '\033[43m'
+    BGBLUE    = '\033[44m'
+    BGMAGENTA = '\033[45m'
+    BGCYAN    = '\033[46m'
+    BGWHITE   = '\033[47m'
+
+    # Light Colors
+    LBLACK   = '\033[90m'
+    LRED     = '\033[91m'
+    LGREEN   = '\033[92m'
+    LYELLOW  = '\033[93m'
+    LBLUE    = '\033[94m'
+    LMAGENTA = '\033[95m'
+    LCYAN    = '\033[96m'
+    LWHITE   = '\033[97m'
+
+class Styles:
+
+    ################
+    # Request tables
+    TABLE_HEADER = Colors.BOLD+Colors.UNDERLINE
+    VERB_GET = Colors.CYAN
+    VERB_POST = Colors.YELLOW
+    VERB_OTHER = Colors.BLUE
+    STATUS_200 = Colors.CYAN
+    STATUS_300 = Colors.MAGENTA
+    STATUS_400 = Colors.YELLOW
+    STATUS_500 = Colors.RED
+    PATH_COLORS = [Colors.CYAN, Colors.BLUE]
+
+    KV_KEY = Colors.GREEN
+    KV_VAL = Colors.ENDC
+
+    
+def verb_color(verb):
+    if verb and verb == 'GET':
+        return Styles.VERB_GET
+    elif verb and verb == 'POST':
+        return Styles.VERB_POST
+    else:
+        return Styles.VERB_OTHER
+    
+def scode_color(scode):
+    if scode and scode[0] == '2':
+        return Styles.STATUS_200
+    elif scode and scode[0] == '3':
+        return Styles.STATUS_300
+    elif scode and scode[0] == '4':
+        return Styles.STATUS_400
+    elif scode and scode[0] == '5':
+        return Styles.STATUS_500
+    else:
+        return Colors.ENDC
+
+def path_formatter(path, width=-1):
+    if len(path) > width and width != -1:
+        path = path[:width]
+        path = path[:-3]+'...'
+    parts = path.split('/')
+    colparts = []
+    for p, c in zip(parts, itertools.cycle(Styles.PATH_COLORS)):
+        colparts.append(c+p+Colors.ENDC)
+    return '/'.join(colparts)
+
+def host_color(host):
+    # Give each unique host a different color (ish)
+    if not host:
+        return Colors.RED
+    hostcols = [Colors.RED,
+                Colors.GREEN,
+                Colors.YELLOW,
+                Colors.BLUE,
+                Colors.MAGENTA,
+                Colors.CYAN,
+                Colors.LRED,
+                Colors.LGREEN,
+                Colors.LYELLOW,
+                Colors.LBLUE,
+                Colors.LMAGENTA,
+                Colors.LCYAN]
+    return hostcols[hash(host)%(len(hostcols)-1)]
--- a/pappyproxy/console.py
+++ b/pappyproxy/console.py
@ -7,8 +7,10 @@ import cmd2
 import re
 import string
 import sys
+import itertools

 from .util import PappyException
+from .colors import Styles, Colors, verb_color, scode_color, path_formatter, host_color
 from twisted.internet import defer

 ###################
@ -84,9 +86,13 @@ def print_table(coldata, rows):
            maxwidth = 0
        colwidth = 0
        for row in rows:
-            printstr = str(row[i])
-            if len(printstr) > colwidth:
-                colwidth = len(printstr)
+            printdata = row[i]
+            if isinstance(printdata, dict):
+                collen = len(str(printdata['data']))
+            else:
+                collen = len(str(printdata))
+            if collen > colwidth:
+                colwidth = collen
        if maxwidth > 0 and colwidth > maxwidth:
            widths.append(maxwidth)
        else:
@ -94,16 +100,45 @@ def print_table(coldata, rows):

    # Print rows
    padding = 2
+    is_heading = not empty_headers
    for row in rows:
+        if is_heading:
+            sys.stdout.write(Styles.TABLE_HEADER)
        for (col, width) in zip(row, widths):
-            printstr = str(col)
+            if isinstance(col, dict):
+                printstr = str(col['data'])
+                if 'color' in col:
+                    colors = col['color']
+                    formatter = None
+                elif 'formatter' in col:
+                    colors = None
+                    formatter = col['formatter']
+                else:
+                    colors = None
+                    formatter = None
+            else:
+                printstr = str(col)
+                colors = None
+                formatter = None
            if len(printstr) > width:
-                for i in range(len(printstr)-4, len(printstr)-1):
-                    printstr=printstr[:width]
-                    printstr=printstr[:-3]+'...'
-            sys.stdout.write(printstr)
+                trunc_printstr=printstr[:width]
+                trunc_printstr=trunc_printstr[:-3]+'...'
+            else:
+                trunc_printstr=printstr
+            if colors is not None:
+                sys.stdout.write(colors)
+                sys.stdout.write(trunc_printstr)
+                sys.stdout.write(Colors.ENDC)
+            elif formatter is not None:
+                toprint = formatter(printstr, width)
+                sys.stdout.write(toprint)
+            else:
+                sys.stdout.write(trunc_printstr)
            sys.stdout.write(' '*(width-len(printstr)))
            sys.stdout.write(' '*padding)
+        if is_heading:
+            sys.stdout.write(Colors.ENDC)
+            is_heading = False
        sys.stdout.write('\n')
        sys.stdout.flush()

@ -112,23 +147,11 @@ def print_requests(requests):
    Takes in a list of requests and prints a table with data on each of the
    requests. It's the same table that's used by ``ls``.
    """
-    # Print a table with info on all the requests in the list
-    cols = [
-        {'name':'ID'},
-        {'name':'Verb'},
-        {'name': 'Host'},
-        {'name':'Path', 'width':40},
-        {'name':'S-Code'},
-        {'name':'Req Len'},
-        {'name':'Rsp Len'},
-        {'name':'Time'},
-        {'name':'Mngl'},
-    ]
    rows = []
    for req in requests:
        rows.append(get_req_data_row(req))
    print_table(cols, rows)
-
+    
 def print_request_rows(request_rows):
    """
    Takes in a list of request rows generated from :func:`pappyproxy.console.get_req_data_row`
@ -142,13 +165,23 @@ def print_request_rows(request_rows):
        {'name':'Verb'},
        {'name': 'Host'},
        {'name':'Path', 'width':40},
-        {'name':'S-Code'},
+        {'name':'S-Code', 'width':16},
        {'name':'Req Len'},
        {'name':'Rsp Len'},
        {'name':'Time'},
        {'name':'Mngl'},
    ]
-    print_table(cols, request_rows)
+    print_rows = []
+    for row in request_rows:
+        (reqid, verb, host, path, scode, qlen, slen, time, mngl) = row
+
+        verb =  {'data':verb, 'color':verb_color(verb)}
+        scode = {'data':scode, 'color':scode_color(scode)}
+        host = {'data':host, 'color':host_color(host)}
+        path = {'data':path, 'formatter':path_formatter}
+
+        print_rows.append((reqid, verb, host, path, scode, qlen, slen, time, mngl))
+    print_table(cols, print_rows)
    
 def get_req_data_row(request):
    """
--- a/pappyproxy/http.py
+++ b/pappyproxy/http.py
@ -73,7 +73,7 @@ def _consume_line(instr):
            return (''.join(l), instr[pos+1:])
        l.append(instr[pos])
        pos += 1
-    return instr
+    return (instr, '')

 ###################
 ## Functions to use
@ -537,18 +537,8 @@ class HTTPMessage(object):
    reserved_meta_keys = ['full_message']

    def __init__(self, full_message=None, update_content_length=False):
-        self.complete = False
-        self.headers = RepeatableDict(case_insensitive=True)
-        self.headers_complete = False
-        self.malformed = False
-        self.start_line = ''
-        self.reset_metadata()
-        self._decoded = False
-
-        self._encoding_type = ENCODE_NONE
-        self._first_line = True
-        self._data_obj = None
-        self._end_after_headers = False
+        # Initializes instance variables too
+        self.clear()

        if full_message is not None:
            self._from_full_message(full_message, update_content_length)
@ -579,19 +569,44 @@ class HTTPMessage(object):
        """
        return self.__copy__()

+    def clear(self):
+        """
+        Resets all internal data and clears the message
+        """
+        self.complete = False
+        self.headers = RepeatableDict(case_insensitive=True)
+        self.headers_complete = False
+        self.malformed = False
+        self.start_line = ''
+        self.reset_metadata()
+        self._decoded = False
+
+        self._encoding_type = ENCODE_NONE
+        self._first_line = True
+        self._data_obj = None
+        self._end_after_headers = False
+
    def _from_full_message(self, full_message, update_content_length=False, meta=None):
        # Set defaults for metadata
-        self.reset_metadata()
+        self.clear()
        # Get rid of leading CRLF. Not in spec, should remove eventually
        full_message = _strip_leading_newlines(full_message)
        if full_message == '':
            return

-        remaining = full_message
-        while remaining and not self.headers_complete:
-            line, remaining = _consume_line(remaining)
-            self.add_line(line)
-
+        lines = full_message.splitlines(True)
+        header_len = 0
+        for line in lines:
+            if line[-2] == '\r':
+                l = line[:-2]
+            else:
+                l = line[:-1]
+            self.add_line(l)
+            header_len += len(line)
+            if self.headers_complete:
+                break
+        remaining = full_message[header_len:]
+            
        if not self.headers_complete:
            self.add_line('')

@ -940,24 +955,8 @@ class Request(HTTPMessage):

    def __init__(self, full_request=None, update_content_length=True,
                 port=None, is_ssl=None, host=None):
-        self.time_end = None
-        self.time_start = None
-        self.cookies = RepeatableDict()
-        self.fragment = None
-        self.url_params = RepeatableDict()
-        self._host = None
-        self._is_ssl = False
-        self.path = ''
-        self.port = None
-        self.post_params = RepeatableDict()
-        self.reqid = None
-        self.response = None
-        self.submitted = False
-        self.unmangled = None
-        self.verb = ''
-        self.version = ''
-        self.tags = []
-        self.plugin_data = {}
+        # Resets instance variables
+        self.clear()

        # Called after instance vars since some callbacks depend on
        # instance vars
@ -1232,10 +1231,32 @@ class Request(HTTPMessage):
        self.tags = []

    def get_plugin_dict(self, name):
+        """
+        Get the data dictionary for the given plugin name.
+        """
        if not name in self.plugin_data:
            self.plugin_data[name] = {}
        return self.plugin_data[name]

+    def clear(self):
+        HTTPMessage.clear(self)
+        self.time_end = None
+        self.time_start = None
+        self.cookies = RepeatableDict()
+        self.fragment = None
+        self.url_params = RepeatableDict()
+        self._is_ssl = False
+        self.path = ''
+        self.post_params = RepeatableDict()
+        self.response = None
+        self.submitted = False
+        self.unmangled = None
+        self.verb = ''
+        self.version = ''
+        self.plugin_data = {}
+        self.reset_metadata()
+        self.is_unmangled_version = False
+
    ############################
    ## Internal update functions

@ -1262,8 +1283,6 @@ class Request(HTTPMessage):
    def _update_from_objects(self):
        # Updates text values that depend on objects.
        # DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION
-        print 'FOOOOO'
-        print self.post_params.all_pairs()
        if self.cookies:
            assignments = []
            for ck, cv in self.cookies.all_pairs():
@ -1684,6 +1703,7 @@ class Request(HTTPMessage):
        if row[3]:
            unmangled_req = yield Request.load_request(str(row[3]))
            req.unmangled = unmangled_req
+            req.unmangled.is_unmangled_version = True
        if row[4]:
            req.time_start = datetime.datetime.fromtimestamp(row[4])
        if row[5]:
@ -1825,7 +1845,7 @@ class Request(HTTPMessage):
            # If it's not cached, load_request will be called again and be told
            # not to use the cache.
            r = yield Request.cache.get(loadid)
-            defer.returnValue(r)
+            defer.returnValue(retreq(r))

        # Load it from the data file
        rows = yield dbpool.runQuery(
@ -1923,14 +1943,8 @@ class Response(HTTPMessage):
    """

    def __init__(self, full_response=None, update_content_length=True):
-        self.complete = False
-        self.cookies = RepeatableDict()
-        self.response_code = 0
-        self.response_text = ''
-        self.rspid = None
-        self.unmangled = None
-        self.version = ''
-        self._saving = False
+        # Resets instance variables
+        self.clear()
        
        # Called after instance vars since some callbacks depend on
        # instance vars
@ -2023,6 +2037,15 @@ class Response(HTTPMessage):

    def reset_metadata(self):
        self.rspid = None
+
+    def clear(self):
+        HTTPMessage.clear(self)
+        self.cookies = RepeatableDict()
+        self.response_code = 0
+        self.response_text = ''
+        self.rspid = None
+        self.unmangled = None
+        self.version = ''
    
    ############################
    ## Internal update functions
--- a/pappyproxy/pappy.py
+++ b/pappyproxy/pappy.py
@ -30,6 +30,14 @@ all_contexts = [main_context]
 plugin_loader = None
 cons = None

+try:
+    from guppy import hpy
+    heapstats = hpy()
+    heapstats.setref()
+except ImportError:
+    heapstats = None
+    
+
 def parse_args():
    # parses sys.argv and returns a settings dictionary

--- a/pappyproxy/plugins/debug.py
+++ b/pappyproxy/plugins/debug.py
@ -0,0 +1,155 @@
+import gc
+import shlex
+import code
+import crochet
+import os
+import resource
+import random
+import datetime
+from pappyproxy.http import Request, post_request
+from pappyproxy.util import PappyException
+from pappyproxy.requestcache import RequestCache
+from pappyproxy.console import print_requests
+from pappyproxy.pappy import heapstats, cons
+from twisted.internet import defer
+
+def cache_info(line):
+    c = Request.cache
+    print 'Cache has %d/%d slots filled' % (len(c._cached_reqs), c._cache_size)
+    print 'Hit score: {0:.2f} ({1}/{2})'.format(c.hit_ratio, c.hits, c.hits+c.misses)
+    print ''
+    if line != 'q':
+        rl = [v for k, v in Request.cache._cached_reqs.iteritems()]
+        rs = sorted(rl, key=lambda r: Request.cache._last_used[r.reqid], reverse=True)
+        print_requests(rs)
+        
+def memory_info(line):
+    try:
+        import psutil
+    except ImportError:
+        raise PappyException('This command requires the psutil package')
+    proc = psutil.Process(os.getpid())
+    mem = proc.memory_info().rss
+    megabyte = (float(mem)/1024)/1024
+    print 'Memory usage: {0:.2f} Mb ({1} bytes)'.format(megabyte, mem)
+
+def heap_info(line):
+    if heapstats is None:
+        raise PappyException('Command requires the guppy library')
+    size = heapstats.heap().size
+    print 'Heap usage: {0:.2f} Mb'.format(size/(1024.0*1024.0))
+    print heapstats.heap()
+    
+def limit_info(line):
+    rsrc = resource.RLIMIT_AS
+    soft, hard = resource.getrlimit(rsrc)
+    print 'Soft limit starts as:', soft
+    print 'Hard limit starts as:', hard
+    if line:
+        limit_mb = int(line)
+        limit_kb = int(line)*1024
+        print 'Setting limit to %s Mb' % limit_mb
+        resource.setrlimit(rsrc, (limit_kb, hard)) #limit to one kilobyte
+        soft, hard = resource.getrlimit(rsrc)
+        print 'Soft limit is now:', soft
+        print 'Hard limit is now:', hard
+        
+def graph_randobj(line):
+    try:
+        import objgraph
+    except ImportError:
+        raise PappyException('This command requires the objgraph library')
+    args = shlex.split(line)
+    if len(args) > 1:
+        fname = args[1]
+    else:
+        fname = 'chain.png'
+    print 'Getting random %s object...' % args[0]
+    obj = random.choice(objgraph.by_type(args[0]))
+    print 'Creating chain...'
+    chain = objgraph.find_backref_chain(obj, objgraph.is_proper_module)
+    print 'Saving chain...'
+    objgraph.show_chain(chain, filename=fname)
+        
+    
+def heapdo(line):
+    if heapstats is None:
+        raise PappyException('Command requires the guppy library')
+    h = heapstats.heap()
+    code.interact(local=locals())
+    
+def collect(line):
+    gc.collect()
+    
+@crochet.wait_for(timeout=None)
+@defer.inlineCallbacks
+def loadblock(line):
+    args = shlex.split(line)
+    yield Request.cache.load(args[0], int(args[1]))
+
+@crochet.wait_for(timeout=None)
+@defer.inlineCallbacks
+def big_fucking_data_file(line):
+    print "Generating some giant fucking requests"
+    for i in range(1000):
+        if i % 20 == 0:
+            print 'Generated %d' % i
+        r = post_request('https://www.google.com')
+        r.body = 'A'*(1024*1024)
+        yield r.async_deep_save()
+        
+def time_cmd(line):
+    print 'Timing `%s`...' % line
+    start = datetime.datetime.now()
+    cons.onecmd(line.strip())
+    end = datetime.datetime.now()
+    total_time = (end-start).total_seconds()
+    print '`{0}` took {1:.3f} seconds'.format(line, total_time)
+
+def cache_data(line):
+    args = shlex.split(line)
+    reqid = args[0]
+    cached = reqid in Request.cache._cached_reqs
+    if reqid in Request.cache._last_used:
+        last_used = Request.cache._last_used[reqid]
+    else:
+        last_used = 'NOT IN _last_used'
+    in_all = reqid in Request.cache.all_ids
+    in_unmangled = reqid in Request.cache.unmangled_ids
+    try:
+        ordered_ids_pos = Request.cache.ordered_ids.index(reqid)
+    except ValueError:
+        ordered_ids_pos = 'Not in ordered_ids'
+    in_inmem = reqid in Request.cache.inmem_reqs
+
+    print ''
+    print 'Cache data about request %s ----------' % reqid
+    print 'Cahced: %s' % cached
+    print 'Last used: %s' % last_used
+    print 'In all_ids: %s' % in_all
+    print 'In unmangled: %s' % in_unmangled
+    print 'Ordered id pos: %s' % ordered_ids_pos
+    print 'Is inmem: %s' % in_inmem
+    print ''
+    
+        
+def check_cache(line):
+    Request.cache.assert_ids()
+
+def load_cmds(cmd):
+    cmd.set_cmds({
+        'cacheinfo': (cache_info, None),
+        'heapinfo': (heap_info, None),
+        'memlimit': (limit_info, None),
+        'heapdo': (heapdo, None),
+        'gccollect': (collect, None),
+        'graphobj': (graph_randobj, None),
+        'meminfo': (memory_info, None),
+        'bigdata': (big_fucking_data_file, None),
+        'checkcache': (check_cache, None),
+        'loadblock': (loadblock, None),
+        'time': (time_cmd, None),
+        'cachedata': (cache_data, None),
+    })
+    cmd.add_aliases([
+    ])
--- a/pappyproxy/plugins/view.py
+++ b/pappyproxy/plugins/view.py
@ -8,6 +8,7 @@ from pappyproxy.util import PappyException
 from pappyproxy.http import Request
 from twisted.internet import defer
 from pappyproxy.plugin import main_context_ids
+from pappyproxy.colors import Colors, Styles, verb_color, scode_color, path_formatter, host_color

 ###################
 ## Helper functions
@ -21,8 +22,7 @@ def view_full_message(request, headers_only=False):
 def print_request_extended(request):
    # Prints extended info for the request
    title = "Request Info (reqid=%s)" % request.reqid
-    print title
-    print '-'*len(title)
+    print Styles.TABLE_HEADER + title + Colors.ENDC
    reqlen = len(request.body)
    reqlen = '%d bytes' % reqlen
    rsplen = 'No response'
@ -34,6 +34,7 @@ def print_request_extended(request):
    if request.response:
        response_code = str(request.response.response_code) + \
            ' ' + request.response.response_text
+        response_code = scode_color(response_code) + response_code + Colors.ENDC
        rsplen = len(request.response.body)
        rsplen = '%d bytes' % rsplen

@ -59,24 +60,31 @@ def print_request_extended(request):
        time_made_str = request.time_start.strftime('%a, %b %d, %Y, %I:%M:%S %p')
    else:
        time_made_str = '--'
+
+    verb = verb_color(request.verb) + request.verb + Colors.ENDC
+    host = host_color(request.host) + request.host + Colors.ENDC
    
-    print 'Made on %s' % time_made_str
-    print 'ID: %s' % request.reqid
-    print 'Verb: %s' % request.verb
-    print 'Host: %s' % request.host
-    print 'Path: %s' % request.full_path
-    print 'Status Code: %s' % response_code
-    print 'Request Length: %s' % reqlen
-    print 'Response Length: %s' % rsplen
+    print_pairs = []
+    print_pairs.append(('Made on', time_made_str))
+    print_pairs.append(('ID', request.reqid))
+    print_pairs.append(('Verb', verb))
+    print_pairs.append(('Host', host))
+    print_pairs.append(('Path', path_formatter(request.full_path)))
+    print_pairs.append(('Status Code', response_code))
+    print_pairs.append(('Request Length', reqlen))
+    print_pairs.append(('Response Length', rsplen))
    if request.response and request.response.unmangled:
-        print 'Unmangled Response Length: %s bytes' % len(request.response.unmangled.full_response)
-    print 'Time: %s' % time_str
-    print 'Port: %s' % request.port
-    print 'SSL: %s' % is_ssl
-    print 'Mangled: %s' % mangle_str
-    print 'Tags: %s' % (', '.join(request.tags))
+        print_pairs.append(('Unmangled Response Length', len(request.response.unmangled.full_response)))
+    print_pairs.append(('Time', time_str))
+    print_pairs.append(('Port', request.port))
+    print_pairs.append(('SSL', is_ssl))
+    print_pairs.append(('Mangled', mangle_str))
+    print_pairs.append(('Tags', ', '.join(request.tags)))
    if request.plugin_data:
-        print 'Plugin Data: %s' % (request.plugin_data)
+        print_pairs.append(('Plugin Data', request.plugin_data))
+
+    for k, v in print_pairs:
+        print Styles.KV_KEY+str(k)+': '+Styles.KV_VAL+str(v)

 def print_tree(tree):
    # Prints a tree. Takes in a sorted list of path tuples
--- a/pappyproxy/proxy.py
+++ b/pappyproxy/proxy.py
@ -128,7 +128,7 @@ class ProxyClient(LineReceiver):
            if self.factory.save_all:
                # It isn't the actual time, but this should work in case
                # we do an 'ls' before it gets a real time saved
-                sendreq.time_start = datetime.datetime.now()
+                self.request.time_start = datetime.datetime.now()
                if self.factory.stream_response and not to_mangle:
                    self.request.async_deep_save()
                else:
@ -157,6 +157,7 @@ class ProxyClient(LineReceiver):
            if sendreq != self.request:
                sendreq.unmangled = self.request
                if self.factory.save_all:
+                    sendreq.time_start = datetime.datetime.now()
                    yield sendreq.async_deep_save()
        else:
            self.log("Request out of scope, passing along unmangled")
--- a/pappyproxy/requestcache.py
+++ b/pappyproxy/requestcache.py
@ -46,13 +46,6 @@ class RequestCache(object):
        RequestCache._next_in_mem_id += 1
        return i

-    def _update_meta(self):
-        # Can probably do better to prevent unmangled IDs from being added, but whatever
-        over = self._cached_reqs.items()[:]
-        for k, v in over:
-            if v.unmangled:
-                RequestCache.unmangled_ids.add(v.unmangled.reqid)
-    
    @staticmethod
    @defer.inlineCallbacks
    def load_ids():
@ -87,16 +80,11 @@ class RequestCache(object):
                self._evict_single()
            self._cache_size = size
            
-    def assert_ids(self):
-        for k, v in self._cached_reqs.iteritems():
-            assert v.reqid is not None
-        
    @defer.inlineCallbacks
    def get(self, reqid):
        """
        Get a request by id
        """
-        self.assert_ids()
        if self.check(reqid):
            self._update_last_used(reqid)
            self.hits += 1
@ -112,25 +100,26 @@ class RequestCache(object):
        """
        Returns True if the id is cached, false otherwise
        """
-        self.assert_ids()
        return reqid in self._cached_reqs

    def add(self, req):
        """
        Add a request to the cache
        """
-        self.assert_ids()
        if not req.reqid:
            req.reqid = RequestCache.get_memid()
        if req.reqid[0] == 'm':
            self.inmem_reqs.add(req)
+        if req.is_unmangled_version:
+            self.unmangled_ids.add(req.reqid)
+        if req.unmangled:
+            self.unmangled_ids.add(req.unmangled.reqid)
        self._cached_reqs[req.reqid] = req
        self._update_last_used(req.reqid)
        RequestCache.req_times[req.reqid] = req.sort_time
        if req.reqid not in RequestCache.all_ids:
            RequestCache.ordered_ids.insert(req.reqid)
        RequestCache.all_ids.add(req.reqid)
-        self._update_meta()
        if len(self._cached_reqs) > self._cache_size and self._cache_size != -1:
            self._evict_single()

--- a/pappyproxy/schema/schema_7.py
+++ b/pappyproxy/schema/schema_7.py
@ -0,0 +1,23 @@
+from twisted.internet import defer
+
+"""
+Schema v7
+
+Creates an index for requests on start time in the data file. This will make
+iterating through history a bit faster.
+"""
+
+update_queries = [
+    """
+    CREATE INDEX ind_start_time ON requests(start_datetime);
+    """,
+
+    """
+    UPDATE schema_meta SET version=7;
+    """
+]
+
+@defer.inlineCallbacks
+def update(dbpool):
+    for query in update_queries:
+        yield dbpool.runQuery(query)
--- a/pappyproxy/tests/test_http.py
+++ b/pappyproxy/tests/test_http.py
@ -733,13 +733,13 @@ def test_request_to_json():

    r.response = rsp

-    expected_reqdata = {u'full_message': unicode(base64.b64encode(r.full_request)),
-                        u'response_id': str(rsp.rspid),
-                        u'port': 80,
-                        u'is_ssl': False,
-                        u'tags': ['foo', 'bar'],
-                        u'reqid': str(r.reqid),
-                        u'host': '',
+    expected_reqdata = {'full_message': unicode(base64.b64encode(r.full_request)),
+                        'response_id': str(rsp.rspid),
+                        'port': 80,
+                        'is_ssl': False,
+                        'tags': ['foo', 'bar'],
+                        'reqid': str(r.reqid),
+                        'host': '',
                       }

    assert json.loads(r.to_json()) == expected_reqdata