From 26376eaaec1cb2903c820a44a9e21c9eaa59be2d Mon Sep 17 00:00:00 2001 From: Rob Glew Date: Tue, 19 Jan 2016 18:00:34 -0600 Subject: [PATCH] Version 0.2.0 --- MANIFEST.in | 2 +- Makefile | 1 + README.md | 232 ++- docs/source/conf.py | 6 +- docs/source/index.rst | 1 + docs/source/overview.rst | 387 +++-- docs/source/pappyplugins.rst | 450 ++++++ docs/source/pappyproxy.rst | 156 +- docs/source/pappyproxy.schema.rst | 16 + docs/source/tutorial.rst | 93 +- pappyproxy/comm.py | 19 +- pappyproxy/config.py | 76 +- pappyproxy/console.py | 1396 ++---------------- pappyproxy/context.py | 845 ++++++----- pappyproxy/http.py | 1180 +++++++++------ pappyproxy/iter.py | 2 +- pappyproxy/macros.py | 117 +- pappyproxy/pappy.py | 85 +- pappyproxy/plugin.py | 150 ++ pappyproxy/plugins/__init__.py | 0 pappyproxy/plugins/filter.py | 192 +++ pappyproxy/plugins/macrocmds.py | 215 +++ pappyproxy/plugins/manglecmds.py | 243 +++ pappyproxy/plugins/misc.py | 85 ++ pappyproxy/plugins/tagcmds.py | 102 ++ pappyproxy/plugins/view.py | 328 ++++ pappyproxy/plugins/vim_repeater/__init__.py | 0 pappyproxy/plugins/vim_repeater/repeater.py | 135 ++ pappyproxy/plugins/vim_repeater/repeater.vim | 17 + pappyproxy/proxy.py | 98 +- pappyproxy/schema/schema_2.py | 36 +- pappyproxy/schema/schema_4.py | 50 + pappyproxy/schema/schema_5.py | 29 + pappyproxy/schema/update.py | 51 +- pappyproxy/templates/intmacro.py | 6 + pappyproxy/templates/macro.py | 2 +- pappyproxy/tests/old_test_mangle.py | 211 +++ pappyproxy/tests/test_context.py | 98 +- pappyproxy/tests/test_http.py | 155 +- pappyproxy/tests/test_proxy.py | 13 +- pappyproxy/tests/test_session.py | 6 +- pappyproxy/util.py | 21 + setup.py | 4 +- 43 files changed, 4671 insertions(+), 2640 deletions(-) create mode 100644 docs/source/pappyplugins.rst create mode 100644 pappyproxy/plugin.py create mode 100644 pappyproxy/plugins/__init__.py create mode 100644 pappyproxy/plugins/filter.py create mode 100644 pappyproxy/plugins/macrocmds.py create mode 100644 pappyproxy/plugins/manglecmds.py create mode 100644 pappyproxy/plugins/misc.py create mode 100644 pappyproxy/plugins/tagcmds.py create mode 100644 pappyproxy/plugins/view.py create mode 100644 pappyproxy/plugins/vim_repeater/__init__.py create mode 100644 pappyproxy/plugins/vim_repeater/repeater.py create mode 100644 pappyproxy/plugins/vim_repeater/repeater.vim create mode 100644 pappyproxy/schema/schema_4.py create mode 100644 pappyproxy/schema/schema_5.py create mode 100644 pappyproxy/tests/old_test_mangle.py diff --git a/MANIFEST.in b/MANIFEST.in index 7ad55ca..587792f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,4 +4,4 @@ recursive-include pappyproxy *.json recursive-include pappyproxy *.py recursive-include pappyproxy *.vim recursive-include pappyproxy *.txt -include docs/source/overview.rst +include docs/source/overview.rst \ No newline at end of file diff --git a/Makefile b/Makefile index 3539b48..1c9ecfd 100644 --- a/Makefile +++ b/Makefile @@ -2,5 +2,6 @@ docs: pandoc --from=markdown --to=rst --output=docs/source/overview.rst README.md cd docs; make html + cd docs/build/html; touch .nojekyll .PHONY: docs diff --git a/README.md b/README.md index ff8007b..d20b82c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The Pappy Proxy Introduction ------------ -The Pappy (**P**roxy **A**ttack **P**roxy **P**rox**Y**) Proxy is an intercepting proxy for performing web application security testing. Its features are often similar, or straight up rippoffs from [Burp Suite](https://portswigger.net/burp/). However, Burp Suite is neither open source nor a command line tool, thus making a proxy like Pappy inevitable. The project is still in its early stages, so there are bugs and only the bare minimum features, but it should be able to do some cool stuff soon (I'm already using it for real work). +The Pappy (**P**roxy **A**ttack **P**roxy **P**rox**Y**) Proxy is an intercepting proxy for performing web application security testing. Its features are often similar, or straight up rippoffs from [Burp Suite](https://portswigger.net/burp/). However, Burp Suite is neither open source nor a command line tool, thus making a proxy like Pappy inevitable. The project is still in its early stages, so there are bugs and only the bare minimum features, but it can already do some cool stuff. Contributing ------------ @@ -12,6 +12,8 @@ Contributing If you're brave and want to try and contribute code, please let me know. Right now the codebase is kind of rough and I have refactored it a few times already, but I would be more than happy to find a stable part of the codebase that you can contribute to. +Another option is to try writing a plugin. It might be a bit easier than contributing code and plugins are extremely easy to integrate as a core feature. So you can also contribute by writing a plugin and letting me know about it. You can find out more by looking at [the official plugin docs](https://roglew.github.io/pappy-proxy/pappyplugins.html). + How to Use It ============= @@ -26,7 +28,7 @@ $ pip install . Quickstart ---------- -Pappy projects take up an entire directory. Any generated scripts, exported responses, etc. will be placed in the current directory so it's good to give your project a directory of its own. To start a project, do something like: +Pappy projects take up an entire directory. Any generated scripts, exported responses, plugin data, etc. will be placed in the current directory so it's good to give your project a directory of its own. To start a project, do something like: ``` $ mkdir test_project @@ -34,7 +36,7 @@ $ cd test_project $ pappy Copying default config to directory Proxy is listening on port 8000 -itsPappyTime> exit +pappy> exit $ ls data.db project_config.json $ @@ -51,7 +53,7 @@ Example: $ pappy -l Temporary datafile is /tmp/tmpw4mGv2 Proxy is listening on port 8000 -itsPappyTime> quit +pappy> quit Deleting temporary datafile $ ``` @@ -173,6 +175,7 @@ The context is a set of filters that define which requests are considered "activ |:--------|:------------|:---| | `f ` | filter, fl, f |Add a filter that limits which requests are included in the current context. See the Filter String section for how to create a filter string | | `fc` | filter_clear, fc | Clears the filters and resets the context to contain all requests and responses. Ignores scope | +| `fu` | filter_up, fu | Removes the most recently applied filter | | `fls` | filter_list, fls | Print the filters that make up the current context | | `filter_prune` | filter_prune | Delete all the requests that aren't in the current context from the data file | @@ -259,6 +262,14 @@ Matches both A and B but not C | gt | gt | A gt B if A > B (A and B must be a number) | | lt | lt | A lt B if A < B (A and B must be a number) | +### Special form filters +A few filters don't conform to the field, comparer, value format. You can still negate these. + +| Format | Aliases | Description | +|:--|:--|:--| +| before | before, bf, b4 | Filters out any request that is not before the given request. Filters out any request without a time. | +| after | after, af | Filters out any request that is not before the given request. Filters out any request without a time. | + Scope ----- Scope is a set of rules to define whether Pappy should mess with a request. You define the scope by setting the context to what you want the scope to be and running `scope_save`. The scope is saved in the data file and is automatically restored when using the same project directory. @@ -346,7 +357,7 @@ $ ls -l In this case we have a `blank`, `hackthensa`, `testgen`, and `test` macro. A macro script is any python script that defines a `run_macro(args)` function and a `MACRO_NAME` variable. For example, a simple macro would be: ``` ---- macro_print.py +### macro_print.py MACRO_NAME = 'Print Macro' @@ -362,19 +373,19 @@ You can place this macro in your project directory then load and run it from Pap ``` $ pappy Proxy is listening on port 8000 -itsPappyTime> lma +pappy> lma Loaded "" Loaded "" Loaded "" Loaded "" Loaded "" -itsPappyTime> rma print +pappy> rma print Hello, Pappy! -itsPappyTime> rma print NSA +pappy> rma print NSA Hello, NSA! -itsPappyTime> rma print Idiot Slayer +pappy> rma print Idiot Slayer Hello, Idiot! -itsPappyTime> rma print "Idiot Slayer" +pappy> rma print "Idiot Slayer" Hello, Idiot Slayer! ``` @@ -387,16 +398,16 @@ You can also generate macros that have Pappy `Request` objects created with the ``` $ pappy Proxy is listening on port 8000 -itsPappyTime> ls +pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 5 GET vitaly.sexy /esr1.jpg 200 OK 0 17653 -- -- 4 GET vitaly.sexy /netscape.gif 200 OK 0 1135 -- -- 3 GET vitaly.sexy /construction.gif 200 OK 0 28366 -- -- 2 GET vitaly.sexy /vitaly2.jpg 200 OK 0 2034003 -- -- 1 GET vitaly.sexy / 200 OK 0 1201 -- -- -itsPappyTime> gma sexy 1 +pappy> gma sexy 1 Wrote script to macro_sexy.py -itsPappyTime> quit +pappy> quit $ cat macro_sexy.py from pappyproxy.http import Request, get_request, post_request @@ -430,7 +441,7 @@ def run_macro(args): pass ``` -If you enter in a value for `SHORT_NAME`, you can use it as a shortcut to run that macro. So if in a macro you set `SHORT_NAME='tm'` you can run it by running `itsPappyTime> rma tm`. +If you enter in a value for `SHORT_NAME`, you can use it as a shortcut to run that macro. So if in a macro you set `SHORT_NAME='tm'` you can run it by running `pappy> rma tm`. | Command | Aliases | Description | |:--------|:--------|:------------| @@ -441,9 +452,9 @@ If you enter in a value for `SHORT_NAME`, you can use it as a shortcut to run th ### Request Objects -The main method of interacting with the proxy is through `Request` objects. You can submit a request with `req.sumbit()` and save it to the data file with `req.save()`. The objects also have attributes which can be used to modify the request in a high-level way. Unfortunately, I haven't gotten around to writing full docs on the API and it's still changing every once in a while so I apologize if I pull the carpet out from underneath you. +The main method of interacting with the proxy is through `Request` objects. You can submit a request with `req.sumbit()` and save it to the data file with `req.save()`. The objects also have attributes which can be used to modify the request in a high-level way. You can see the [full documentation](https://roglew.github.io/pappy-proxy/pappyproxy.html#module-pappyproxy.http) for more details on using these objects. -Dict-like objects are represented with a custom class called a `RepeatableDict`. I haven't gotten around to writing docs on it yet, so just interact with it like a dict and don't be surprised if it's missing some methods you would expect a dict to have. +Dict-like objects are represented with a custom class called a `RepeatableDict`. Again, look at the docs for details. For the most part, you can interact with it like a normal dictionary, but don't be surprised if it's missing some methods you would expect. Here is a quick list of attributes that you can use with `Request` objects: @@ -500,12 +511,30 @@ Like I said, these interfaces are prone to change and will probably crash when y ### Useful Functions -There are also a few functions which could be useful for creating requests in macros. +There are also a few functions which could be useful for creating requests in macros. It's worth pointing out that `request_by_id` is useful for passing request objects as arguments. For example, here is a macro that lets you resubmit a request with the Google Bot user agent: + +``` +## macro_googlebot.py + +from pappyproxy.http import Request, get_request, post_request, request_by_id +from pappyproxy.context import set_tag +from pappyproxy.iter import * + +MACRO_NAME = 'Submit as Google' +SHORT_NAME = '' + +def run_macro(args): + req = request_by_id(args[0]) + req.headers['User-Agent'] = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" + req.submit() + req.save() +``` | Function | Description | |:--|:--| | get_request(url, url_params={}) | Returns a Request object that contains a GET request to the given url with the given url params | | post_request(url, post_params={}, url_params={}) | Returns a Request object that contains a POST request to the given url with the given url and post params | +| request_by_id(reqid) | Get a request object from its id. | Intercepting Macros ------------------- @@ -556,6 +585,46 @@ def mangle_request(request): return r ``` +In addition, you can use an `init(args)` function to get arguments from the command line. If no arguments are passed, args will be an empty list. Here is an example macro that does a search and replace: + +``` +## int_replace.py + +MACRO_NAME = 'Find and Replace' +SHORT_NAME = '' +runargs = [] + +def init(args): + global runargs + runargs = args + +def mangle_request(request): + global runargs + if len(runargs) < 2: + return request + request.body = request.body.replace(runargs[0], runargs[1]) + return request + +def mangle_response(request): + global runargs + if len(runargs) < 2: + return request.response + request.response.body = request.response.body.replace(runargs[0], runargs[1]) + return request.response +``` + +You can use this macro to do any search and replace that you want. For example, if you wanted to replace "Google" with "Skynet", you can run the macro like this: + +``` +pappy> lma +Loaded "" +pappy> rim replace Google Skynet +"Find and Replace" started +pappy> +``` + +Now every site that you visit will be a little bit more accurate. + ### Enabling/Disabling Intercepting Macros You can use the following commands to start/stop intercepting macros @@ -563,8 +632,8 @@ You can use the following commands to start/stop intercepting macros |:--------|:--------|:------------| | `lma [dir]` | `load_macros`, `lma` | Load macros from a directory. If `dir` is not given, use the current directory (the project directory) | | `rim ` | `run_int_macro`, `rim` | Run an intercepting macro. Similarly to normal macros you can use the name, short name, or file name of the macro. | -| `sim ` | `stop_int_macro`, `sim` | Stop an intercepting macro. | -| `lim` | `list_int_macros`, `lim` | List all enabled/disabled intercepting macros | +| `sim [args]` | `stop_int_macro`, `sim` | Stop an intercepting macro. If arguments are given, they will be passed to the macro's `init(args)` function if it exists. | +| `lim` | `list_int_macros`, `lsim` | List all enabled/disabled intercepting macros | | `gima ` | `generate_int_macro`, `gima` | Generate an intercepting macro with the given name. | Logging @@ -575,8 +644,8 @@ You can watch in real-time what requests are going through the proxy. Verbosisty |:--------|:------------| | `log [verbosity]` | View the log at the given verbosity. Default verbosity is 1 which just shows connections being made/lost and some other info, verbosity 3 shows full requests/responses as they pass through and are processed by the proxy | -Additional Commands -------------------- +Additional Commands and Features +-------------------------------- This is a list of other random stuff you can do that isn't categorized under anything else. These are mostly commands that I found that I needed while doing a test and just added. They likely don't do a ton of error checking. | Command | Aliases | Description | @@ -584,3 +653,124 @@ This is a list of other random stuff you can do that isn't categorized under any | `dump_response [filename]` | `dump_response` | Dumps the data from the response to the given filename (useful for images, .swf, etc). If no filename is given, it uses the name given in the path. | | `export ` | `export` | Writes either the full request or response to a file in the current directory. | +### Response streaming + +If you don't have any intercepting macros running, Pappy will forward data to the browser as it gets it. However, if you're trying to mangle messages/responses, Pappy will need to download the entire message first. + +Plugins +------- +Note that this section is a very quick overview of plugins. For a full description of how to write them, please see [the official docs](https://roglew.github.io/pappy-proxy/pappyplugins.html). + +It is also possible to write plugins which are reusable across projects. Plugins are simply Python scripts located in `~/.pappy/plugins`. Plugins are able to create new console commands and maintain state throughout a Pappy session. They can access the same API as macros, but the plugin system is designed to allow you to create general purpose commands as compared to macros which are meant to be project-specific scripts. Still, it may not be a bad idea to try building a macro to do something in a quick and dirty way before writing a plugin since plugins are more complicated to write. + +A simple hello world plugin could be something like: + +``` +## hello.py +import shlex + +def hello_world(line): + if line: + args = shlex.split(line) + print 'Hello, %s!' % (', '.join(args)) + else: + print "Hello, world!" + +############### +## Plugin hooks + +def load_cmds(cmd): + cmd.set_cmds({ + 'hello': (hello_world, None), + }) + cmd.add_aliases([ + ('hello', 'hlo'), + ('hello', 'ho'), + ]) +``` + +You can also create commands which support autocomplete: + +``` +import shlex + +_AUTOCOMPLETE_NAMES = ['alice', 'allie', 'sarah', 'mallory', 'slagathor'] + +def hello_world(line): + if line: + args = shlex.split(line) + print 'Hello, %s!' % (', '.join(args)) + else: + print "Hello, world!" + +def complete_hello_world(text, line, begidx, endidx): + return [n for n in _AUTOCOMPLETE_NAMES if n.startswith(text)] + +############### +## Plugin hooks + +def load_cmds(cmd): + cmd.set_cmds({ + 'hello': (hello_world, complete_hello_world), + }) + cmd.add_aliases([ + ('hello', 'hlo'), + ]) +``` + +Then when you run Pappy you can use the ``hello`` command: + +``` +$ pappy -l +Temporary datafile is /tmp/tmpBOXyJ3 +Proxy is listening on port 8000 +pappy> ho +Hello, world! +pappy> ho foo bar baz +Hello, foo, bar, baz! +pappy> ho foo bar "baz lihtyur" +Hello, foo, bar, baz lihtyur! +pappy> +``` + +### Should I Write a Plugin or a Macro? + +A lot of the time, you can get away with writing a macro. However, you may consider writing a plugin if: + +* You find yourself copying one macro to multiple projects +* You want to write a general tool that can be applied to any website +* You need to maintain state during the Pappy session + +My guess is that if you need one quick thing for a project, you're better off writing a macro first and seeing if you end up using it in future projects. Then if you find yourself needing it a lot, write a plugin for it. You may also consider keeping a `mine.py` plugin where you can write out commands that you use regularly but may not be worth creating a dedicated plugin for. + +FAQ +--- + +### I still like Burp, but Pappy looks interesting, can I use both? +Yes! If you don't want to go completely over to Pappy yet, you can configure Burp to use Pappy as an upstream proxy server. That way, traffic will go through both Burp and Pappy and you can use whichever you want to do your testing. + +How to have Burp forward traffic through Pappy: + +1. Open Burp +2. Go to `Options -> Connections -> Upstream Proxy Servers` +3. Click `Add` +4. Leave `Destination Host` blank, but put `127.0.0.1` in `Proxy Host` and `8000` into `Port` (assuming you're using the default listener) +5. Configure your browser to use Burp as a proxy + +### Why does my request have an id of `--`?!?! +You can't do anything with a request/response until it is decoded and saved to disk. In between the time when a request is decoded and when it's saved to disk, it will have an ID of `--`. So just wait a little bit and it will get an ID you can use. + +Changelog +--------- +The boring part of the readme + +* 0.2.0 + * Lots of refactoring + * Plugins + * Bugfixes probably + * Change prompt to make Pappy look more professional (but it will always be pappy time in your heart, I promise) + * Create changelog + * Add response streaming if no intercepting macros are active +* 0.1.1 + * Start using sane versioning system + * Did proxy things \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 64a1dbf..4d1a423 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -59,9 +59,9 @@ author = u'Rob Glew' # built documents. # # The short X.Y version. -version = u'0.0.1' +version = u'0.2.0' # The full version, including alpha/beta/rc tags. -release = u'0.0.1' +release = u'0.2.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -307,6 +307,8 @@ intersphinx_mapping = {'https://docs.python.org/': None} def maybe_skip_member(app, what, name, obj, skip, options): skip_vals = ('__doc__', '__module__', '__weakref__') + if name[0] == '_': + return True if name in skip_vals: return True if obj.__doc__ is None: diff --git a/docs/source/index.rst b/docs/source/index.rst index 96b43b9..b77b3b4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -13,6 +13,7 @@ Contents: overview tutorial + pappyplugins Indices and tables diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 9aa1e44..f135ae9 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -11,7 +11,7 @@ testing. Its features are often similar, or straight up rippoffs from neither open source nor a command line tool, thus making a proxy like Pappy inevitable. The project is still in its early stages, so there are bugs and only the bare minimum features, but it should be able to do -some cool stuff soon (I'm already using it for realtm work). +some cool stuff soon (I'm already using it for real work). Contributing ------------ @@ -24,9 +24,9 @@ know so that I can [STRIKEOUT:use it to stomp them into the dust] improve my project. If you're brave and want to try and contribute code, please let me know. -Right now the codebase is a giant clusterfun which I have refactored a -few times already, but I would be more than happy to find a stable part -of the codebase that you can contribute to. +Right now the codebase is kind of rough and I have refactored it a few +times already, but I would be more than happy to find a stable part of +the codebase that you can contribute to. How to Use It ============= @@ -48,11 +48,10 @@ installed correctly by running ``pappy -l`` to start the proxy. Quickstart ---------- -Pappy projects take up an entire directory. While a full directory may -seem like a dumb idea compared to storing everything in a zip file, but -when it comes to generating attack strips and things, it's easier to -just keep everything in a directory so you can view/edit files with -other programs. To start a project, do something like: +Pappy projects take up an entire directory. Any generated scripts, +exported responses, etc. will be placed in the current directory so it's +good to give your project a directory of its own. To start a project, do +something like: :: @@ -61,7 +60,7 @@ other programs. To start a project, do something like: $ pappy Copying default config to directory Proxy is listening on port 8000 - itsPappyTime> exit + pappy> exit $ ls data.db project_config.json $ @@ -70,16 +69,18 @@ And that's it! The proxy will by default be running on port 8000 and bound to localhost (to keep the hackers out). You can modify the port/interface in ``config.json``. You can list all your intercepted requests with ``ls``, view a full request with ``vfq `` or view a -full response with ``vfs ``. No you can't delete them yet. I'm -working on it. +full response with ``vfs ``. Right now, the only command to +delete requests is ``filter_prune`` which deletes all the requests that +aren't in the current context (look at the sections on the +context/filter strings for more information on that). Lite Mode --------- If you don't want to dirty up a directory, you can run Pappy in "lite" mode. Pappy will use the default configuration settings and will create -a temporary datafile in ``/tmp`` to use. When you quit, the file will be -deleted. If you want to run Pappy in line mode, run Pappy with either +a temporary data file in ``/tmp`` to use. When you quit, the file will +be deleted. If you want to run Pappy in lite mode, run Pappy with either ``-l`` or ``--lite``. Example: @@ -89,7 +90,7 @@ Example: $ pappy -l Temporary datafile is /tmp/tmpw4mGv2 Proxy is listening on port 8000 - itsPappyTime> quit + pappy> quit Deleting temporary datafile $ @@ -99,11 +100,10 @@ Adding The CA Cert to Your Browser In order for Pappy to view data sent using HTTPS, you need to add a generated CA cert (``certificate.crt``) to your browser. Certificates are generated using the ``gencerts`` command and are by default stored -in the same directory as ``pappy.py``. This allows Pappy to act as a CA -and MITM HTTPS connections. I believe that Firefox and Chrome ignore -keychain/system certs, so you will have to install the CA cert to the -browsers instead of (or in addition to) adding the cert to your -keychain. +in ``~/.pappy/certs``. This allows Pappy to act as a CA and sign any +HTTPS certificate it wants without the browser complaining. This allows +Pappy to decrypt and modify HTTPS requests. The certificate installation +instructions are different for each browser. Firefox ~~~~~~~ @@ -156,11 +156,13 @@ proxy. The following fields can be used to configure the proxy: The following tokens will also be replaced with values: -+------------------+------------------------------------------------+ -| Token | Replaced with | -+==================+================================================+ -| ``{PAPPYDIR}`` | The directory where Pappy's files are stored | -+------------------+------------------------------------------------+ ++-----------------+-----------------------------------------------------+ +| Token | Replaced with | ++=================+=====================================================+ +| ``{DATADIR}`` | The directory where Pappy's data files are stored | ++-----------------+-----------------------------------------------------+ + +See the default ``config.json`` for examples. Generating Pappy's CA Cert -------------------------- @@ -168,16 +170,16 @@ Generating Pappy's CA Cert In order to intercept and modify requests to sites that use HTTPS, you have to generate and install CA certs to your browser. You can do this by running the ``gencerts`` command in Pappy. By default, certs are -stored in the same directory as Pappy's script files. However, you can -change where Pappy will look for the private key file in the config -file. In addition, you can give the ``gencerts`` command an argument to -have it put the generated certs in a different directory. +stored ``~/.pappy/certs``. This is also the default location that Pappy +will look for certificates (unless you specify otherwise in +``config.json``.) In addition, you can give the ``gencerts`` command an +argument to have it put the generated certs in a different directory. -+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Command | Description | -+========================================+================================================================================================================================================================+ -| ``gencerts [/path/to/put/certs/in]`` | Generate a CA cert that can be added to your browser to let Pappy decrypt HTTPS traffic. Also generates the private key for that cert in the same directory. | -+----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Command | Description | ++========================================+==================================================================================================================================================================================================================================================================================+ +| ``gencerts [/path/to/put/certs/in]`` | Generate a CA cert that can be added to your browser to let Pappy decrypt HTTPS traffic. Also generates the private key for that cert in the same directory. If no path is given, the certs will be placed in the default certificate location. Overwrites any existing certs. | ++----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Browsing Recorded Requests/Responses ------------------------------------ @@ -202,7 +204,7 @@ The following commands can be used to view requests and responses | ``vhs `` | view\_response\_headers, vhs | [V]iew [H]eaders of a Re[S]ponse. Prints just the headers of a response associated with a request. | +--------------------+--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -The table shown will have the following columns: +The table shown by ``ls`` will have the following columns: +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Label | Description | @@ -230,7 +232,7 @@ Tags ---- You can apply tags to a request and use filters to view specific tags. -The following commands can be used to apply tags to requests: +The following commands can be used to apply and remove tags to requests: +---------------------------+-----------+---------------------------------------------------------------------------------------------------------------+ | Command | Aliases | Description | @@ -245,10 +247,13 @@ The following commands can be used to apply tags to requests: Request IDs ----------- -Request IDs are how you identify a request. You can see it when you run -``ls``. In addition, you can prepend an ID with prefixes to get requests -or responses associated with the request (for example its unmangled -request or response) Here are the valid prefixes: +Request IDs are how you identify a request and every command that +involves specifying a request will take one or more request IDs. You can +see it when you run ``ls``. In addition, you can prepend an ID with +prefixes to get requests or responses associated with the request (for +example if you modified the request or its response with the +interceptor, you can get the unmangled versions.) Here are the valid +prefixes: +----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Prefix | Description | @@ -258,9 +263,9 @@ request or response) Here are the valid prefixes: | ``s`` | If the response was mangled, prefixing the request ID ``s`` will result in the same request but its associated response will be the unmangled version. | +----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -I know it sounds kind of weird, but here are some example commands that -will hopefully make things clearer. Suppose request 1 had its request -mangled, and request 2 had its response mangled. +I know it sounds kind of unintuitive. Here are some example commands +that will hopefully make things clearer. Suppose request 1 had its +request mangled, and request 2 had its response mangled. - ``vfq 1`` Prints the mangled version of request 1 - ``vfq u1`` Prints the unmangled version of request 1 @@ -280,7 +285,7 @@ Passing Multiple Request IDs to a Command ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some arguments can take multiple IDs for an argument. To pass multiple -IDs to a command, separate the IDs with commas (no spaces!). A few +IDs to a command, separate the IDs with commas **(no spaces!)**. A few examples: - ``viq 1,2,u3`` View information about requests 1, 2, and the @@ -293,13 +298,12 @@ Context The context is a set of filters that define which requests are considered "active". Only requests in the current context are displayed -with ``ls``, and eventually contexts will be how Pappy will manage -requests for group operations. By default, the context includes every -single request that passes through the proxy. You can limit down the -current context by applying filters. Filters apply rules such as "the -response code must equal 500" or "the host must contain google.com". -Once you apply one or more filters, only requests/responses which pass -every active filter will be a part of the current context. +with ``ls``. By default, the context includes every single request that +passes through the proxy. You can limit down the current context by +applying filters. Filters apply rules such as "the response code must +equal 500" or "the host must contain google.com". Once you apply one or +more filters, only requests/responses which pass every active filter +will be a part of the current context. +-------------------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------+ | Command | Aliases | Description | @@ -308,24 +312,28 @@ every active filter will be a part of the current context. +-------------------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------+ | ``fc`` | filter\_clear, fc | Clears the filters and resets the context to contain all requests and responses. Ignores scope | +-------------------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``fu`` | filter\_up, fu | Removes the most recently applied filter | ++-------------------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------+ | ``fls`` | filter\_list, fls | Print the filters that make up the current context | +-------------------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``filter_prune`` | filter\_prune | Delete all the requests that aren't in the current context from the data file | ++-------------------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------+ Filter Strings -------------- Filter strings define a condition that a request/response pair must pass -to be part of a context. Most filter strings have the following format: +to be part of the context. Most filter strings have the following +format: :: Where ```` is some part of the request/response, ```` -is some comparison to ````. Also **if you prefix a comparer with -'n' it turns it into a negation.** For example, if you wanted a filter -that only matches requests to target.org, you could use the following -filter string: +is some comparison to ````. For example, if you wanted a filter +that only matches requests to ``target.org``, you could use the +following filter string: :: @@ -335,6 +343,18 @@ filter string: comparer = "is" value = "target.org" +Also **if you prefix a comparer with 'n' it turns it into a negation.** +Using the previous example, the following will match any request except +for ones where the host contains ``target.org``: + +:: + + host nis target.org + + field = "host" + comparer = "nis" + value = "target.org" + For fields that are a list of key/value pairs (headers, get params, post params, and cookies) you can use the following format: @@ -423,21 +443,34 @@ List of comparers | lt | lt | A lt B if A < B (A and B must be a number) | +--------------+------------------+-----------------------------------------------------------------+ +Special form filters +~~~~~~~~~~~~~~~~~~~~ + +A few filters don't conform to the field, comparer, value format. You +can still negate these. + ++-----------+------------------+---------------------------------------------------------------------------------------------------------+ +| Format | Aliases | Description | ++===========+==================+=========================================================================================================+ +| before | before, bf, b4 | Filters out any request that is not before the given request. Filters out any request without a time. | ++-----------+------------------+---------------------------------------------------------------------------------------------------------+ +| after | after, af | Filters out any request that is not before the given request. Filters out any request without a time. | ++-----------+------------------+---------------------------------------------------------------------------------------------------------+ + Scope ----- Scope is a set of rules to define whether Pappy should mess with a request. You define the scope by setting the context to what you want -the scope to be and running ``scope_save``. The scope is saved in -data.db and is automatically restored when using the same project +the scope to be and running ``scope_save``. The scope is saved in the +data file and is automatically restored when using the same project directory. Any requests which don't match all the filters in the scope will be passed straight to the browser and will not be caught by the interceptor -or recorded in the database. This is useful to make sure you don't +or recorded in the data file. This is useful to make sure you don't accidentally do something like log in to your email through the proxy -and have your plaintext username/password stored and accidentally shown -to your coworkers. +and have your plaintext username/password stored. +--------------------+---------------------------+------------------------------------------------------+ | Command | Aliases | Description | @@ -458,11 +491,13 @@ Pappy also includes some built in filters that you can apply. These are things that you may want to filter by but may be too tedius to type out. The ``fbi`` command also supports tab completion. -+-----------------+-----------------------------------------+ -| Filter | Description | -+=================+=========================================+ -| ``not_image`` | Matches anything that isn't an image. | -+-----------------+-----------------------------------------+ ++-----------------+--------------------------------------------------+ +| Filter | Description | ++=================+==================================================+ +| ``not_image`` | Matches anything that isn't an image. | ++-----------------+--------------------------------------------------+ +| ``not_jscss`` | Matches anything that isn't JavaScript or CSS. | ++-----------------+--------------------------------------------------+ +--------------------+-------------------------------+--------------------------------------------------+ | Command | Aliases | Description | @@ -476,12 +511,12 @@ Interceptor This feature is like Burp's proxy with "Intercept Mode" turned on, except it's not turned on unless you explicitly turn it on. When the proxy gets a request while in intercept mode, it lets you edit it before -it forwards it to the server. In addition, it can stop responses from -the server and let you edit them before they get forwarded to the -browser. When you run the command, you can pass ``request`` and/or -``response`` as arguments to say whether you would like to intercept -requests and/or responses. Only in-scope requests/responses will be -intercepted (see Scope section). +forwarding it to the server. In addition, it can stop responses from the +server and let you edit them before they get forwarded to the browser. +When you run the command, you can pass ``req`` and/or ``rsp`` as +arguments to say whether you would like to intercept requests and/or +responses. Only in-scope requests/responses will be intercepted (see +Scope section). The interceptor will use your EDITOR variable to decide which editor to edit the request/response with. If no editor variable is set, it will @@ -512,6 +547,8 @@ To forward a request, edit it, save the file, then quit. Be totally useless: > ic +To drop a request, delete everything, save and quit. + Repeater -------- @@ -520,12 +557,11 @@ and Pappy will open vim in a split window with your request on the left and the original response on the right. You can make changes to the request and then run ":RepeaterSubmitBuffer" to submit the modified request. The response will be displayed on the right. This command is -bound to ``f`` by default, but you can rebind it in your vimrc -(I think, dunno if vim will complain if it's undefined). This command -will submit whatever buffer your cursor is in, so make sure it's in the -request buffer. - -To drop a request, delete everything, save and quit (``ggdG:wq``). +bound to ``f`` by default, but you can bind it to something else +too in your vimrc (I think, dunno if vim will complain if the function +undefined which it will be for regular files). This command will submit +whatever buffer your cursor is in, so make sure it's in the request +buffer. When you're done with repeater, run ":qa!" to avoid having to save changes to nonexistent files. @@ -588,19 +624,19 @@ quotes around it. $ pappy Proxy is listening on port 8000 - itsPappyTime> lma + pappy> lma Loaded "" Loaded "" Loaded "" Loaded "" Loaded "" - itsPappyTime> rma print + pappy> rma print Hello, Pappy! - itsPappyTime> rma print NSA + pappy> rma print NSA Hello, NSA! - itsPappyTime> rma print Idiot Slayer + pappy> rma print Idiot Slayer Hello, Idiot! - itsPappyTime> rma print "Idiot Slayer" + pappy> rma print "Idiot Slayer" Hello, Idiot Slayer! You'll need to run ``lma`` every time you make a change to the macro in @@ -617,16 +653,16 @@ with the same information as requests you've already made. For example: $ pappy Proxy is listening on port 8000 - itsPappyTime> ls + pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 5 GET vitaly.sexy /esr1.jpg 200 OK 0 17653 -- -- 4 GET vitaly.sexy /netscape.gif 200 OK 0 1135 -- -- 3 GET vitaly.sexy /construction.gif 200 OK 0 28366 -- -- 2 GET vitaly.sexy /vitaly2.jpg 200 OK 0 2034003 -- -- 1 GET vitaly.sexy / 200 OK 0 1201 -- -- - itsPappyTime> gma sexy 1 + pappy> gma sexy 1 Wrote script to macro_sexy.py - itsPappyTime> quit + pappy> quit $ cat macro_sexy.py from pappyproxy.http import Request, get_request, post_request @@ -658,11 +694,10 @@ with the same information as requests you've already made. For example: # req.save() # save the request to the data file # or copy req0 into a loop and use string substitution to automate requests pass - $ If you enter in a value for ``SHORT_NAME``, you can use it as a shortcut to run that macro. So if in a macro you set ``SHORT_NAME='tm'`` you can -run it by running ``itsPappyTime> rma tm``. +run it by running ``pappy> rma tm``. +--------------------------+-------------------------------+-------------------------------------------------------------------------------------------------------------------------------------+ | Command | Aliases | Description | @@ -692,8 +727,8 @@ Dict-like objects are represented with a custom class called a so just interact with it like a dict and don't be surprised if it's missing some methods you would expect a dict to have. -Here is a quick (non-comprehensive) list of attributes that you can use -with ``Request`` objects: +Here is a quick list of attributes that you can use with ``Request`` +objects: +-----------------+-------------+------------------+-----------------------------------------------------------------------------------------------------------------+ | Attribute | Settable? | Data Type | Description | @@ -755,8 +790,8 @@ Request methods: | save() | Save the request, its unmangled version, its associated response, and the unmangled version of the response to the database | +------------+-------------------------------------------------------------------------------------------------------------------------------+ -And here is a quick (non-comprehensive) list of attributes that you can -use with ``Response`` objects: +And here is a quick list of attributes that you can use with +``Response`` objects: +------------------+-------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Attribute | Settable? | Data Type | Description | @@ -791,8 +826,27 @@ can fix it. Useful Functions ~~~~~~~~~~~~~~~~ -There are also a few functions which could be useful for making -requests. +There are also a few functions which could be useful for creating +requests in macros. It's worth pointing out that ``request_by_id`` is +useful for passing request objects as arguments. For example, here is a +macro that lets you resubmit a request with the Google Bot user agent: + +:: + + ## macro_googlebot.py + + from pappyproxy.http import Request, get_request, post_request, request_by_id + from pappyproxy.context import set_tag + from pappyproxy.iter import * + + MACRO_NAME = 'Submit as Google' + SHORT_NAME = '' + + def run_macro(args): + req = request_by_id(args[0]) + req.headers['User-Agent'] = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" + req.submit() + req.save() +-------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+ | Function | Description | @@ -801,6 +855,8 @@ requests. +-------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+ | post\_request(url, post\_params={}, url\_params={}) | Returns a Request object that contains a POST request to the given url with the given url and post params | +-------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+ +| request\_by\_id(reqid) | Get a request object from its id. | ++-------------------------------------------------------+-------------------------------------------------------------------------------------------------------------+ Intercepting Macros ------------------- @@ -824,9 +880,10 @@ Note, that due to twisted funkyness, *you cannot save requests from intercepting macros*. Technically you **can**, but to do that you'll have to define ``async_mangle_request`` (or response) instead of ``mangle_request`` (or response) then use ``Request.async_deep_save`` -which returns a deferred, then return a deferred from +which generates a deferred, then generate a deferred from ``async_mangle_requests`` (inline callbacks work too). If you've never -used twisted before, please don't try. Twisted is hard. +used twisted before, please don't try. Twisted is hard. Plus the mangled +request will be saved before it is submitted anyways. Confusing? Here are some example intercepting macros: @@ -862,9 +919,7 @@ Confusing? Here are some example intercepting macros: ## int_adminplz.py - from pappyproxy.http import ResponseCookie from base64 import base64encode as b64e - import string MACRO_NAME = 'Admin Session' @@ -873,40 +928,68 @@ Confusing? Here are some example intercepting macros: r.headers['Authorization'] = 'Basic %s' % b64e('Admin:Password123') return r +In addition, you can use an ``init(args)`` function to get arguments +from the command line. If no arguments are passed, args will be an empty +list. Here is an example macro that does a search and replace: + +:: + + ## int_replace.py + + MACRO_NAME = 'Find and Replace' + SHORT_NAME = '' + runargs = [] + + def init(args): + global runargs + runargs = args + + def mangle_request(request): + global runargs + if len(runargs) < 2: + return request + request.body = request.body.replace(runargs[0], runargs[1]) + return request + + def mangle_response(request): + global runargs + if len(runargs) < 2: + return request.response + request.response.body = request.response.body.replace(runargs[0], runargs[1]) + return request.response + +You can use this macro to do any search and replace that you want. For +example, if you wanted to replace "Google" with "Skynet", you can run +the macro like this: + +:: + + pappy> lma + Loaded "" + pappy> rim replace Google Skynet + "Find and Replace" started + pappy> + +Now every site that you visit will be a little bit more accurate. + Enabling/Disabling Intercepting Macros ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use the following commands to start/stop intercepting macros -+------------------------+------------------------------------+----------------------------------------------------------------------------------------------------------------------+ -| Command | Aliases | Description | -+========================+====================================+======================================================================================================================+ -| ``lma [dir]`` | ``load_macros``, ``lma`` | Load macros from a directory. If ``dir`` is not given, use the current directory (the project directory) | -+------------------------+------------------------------------+----------------------------------------------------------------------------------------------------------------------+ -| ``rim `` | ``run_int_macro``, ``rim`` | Run an intercepting macro. Similarly to normal macros you can use the name, short name, or file name of the macro. | -+------------------------+------------------------------------+----------------------------------------------------------------------------------------------------------------------+ -| ``sim `` | ``stop_int_macro``, ``sim`` | Stop an intercepting macro. | -+------------------------+------------------------------------+----------------------------------------------------------------------------------------------------------------------+ -| ``lim`` | ``list_int_macros``, ``lim`` | List all enabled/disabled intercepting macros | -+------------------------+------------------------------------+----------------------------------------------------------------------------------------------------------------------+ -| ``gima `` | ``generate_int_macro``, ``gima`` | Generate an intercepting macro with the given name. | -+------------------------+------------------------------------+----------------------------------------------------------------------------------------------------------------------+ - -Additional Commands -------------------- - -This is a list of other random stuff you can do that isn't categorized -under anything else. These are mostly commands that I found that I -needed while doing a test and just added. They likely don't do a ton of -error checking and are likely not super full-featured. - -+----------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Command | Aliases | Description | -+========================================+=====================+=======================================================================================================================================================+ -| ``dump_response [filename]`` | ``dump_response`` | Dumps the data from the response to the given filename (useful for images, .swf, etc). If no filename is given, it uses the name given in the path. | -+----------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ``export `` | ``export`` | Writes either the full request or response to a file in the current directory. | -+----------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ ++-------------------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+ +| Command | Aliases | Description | ++===============================+====================================+================================================================================================================================+ +| ``lma [dir]`` | ``load_macros``, ``lma`` | Load macros from a directory. If ``dir`` is not given, use the current directory (the project directory) | ++-------------------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+ +| ``rim `` | ``run_int_macro``, ``rim`` | Run an intercepting macro. Similarly to normal macros you can use the name, short name, or file name of the macro. | ++-------------------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+ +| ``sim [args]`` | ``stop_int_macro``, ``sim`` | Stop an intercepting macro. If arguments are given, they will be passed to the macro's ``init(args)`` function if it exists. | ++-------------------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+ +| ``lim`` | ``list_int_macros``, ``lsim`` | List all enabled/disabled intercepting macros | ++-------------------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+ +| ``gima `` | ``generate_int_macro``, ``gima`` | Generate an intercepting macro with the given name. | ++-------------------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------------------+ Logging ------- @@ -925,3 +1008,53 @@ every start though!) +=======================+===============================================================================================================================================================================================================================+ | ``log [verbosity]`` | View the log at the given verbosity. Default verbosity is 1 which just shows connections being made/lost and some other info, verbosity 3 shows full requests/responses as they pass through and are processed by the proxy | +-----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Additional Commands and Features +-------------------------------- + +This is a list of other random stuff you can do that isn't categorized +under anything else. These are mostly commands that I found that I +needed while doing a test and just added. They likely don't do a ton of +error checking. + ++----------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Command | Aliases | Description | ++========================================+=====================+=======================================================================================================================================================+ +| ``dump_response [filename]`` | ``dump_response`` | Dumps the data from the response to the given filename (useful for images, .swf, etc). If no filename is given, it uses the name given in the path. | ++----------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``export `` | ``export`` | Writes either the full request or response to a file in the current directory. | ++----------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Response streaming +~~~~~~~~~~~~~~~~~~ + +If you don't have any intercepting macros running, Pappy will forward +data to the browser as it gets it. However, if you're trying to mangle +messages/responses, Pappy will need to download the entire message +first. + +FAQ +--- + +Why does my request have an id of ``--``?!?! +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can't do anything with a request/response until it is decoded and +saved to disk. In between the time when a request is decoded and when +it's saved to disk, it will have an ID of ``--``. So just wait a little +bit and it will get an ID you can use. + +Changelog +--------- + +The boring part of the readme + +- 0.1.2 +- Refactor almost every part of proxy +- Basic framework for plugins +- Bugfixes probably +- Create changelog +- 0.1.1 +- Start using sane versioning system +- No idea what I added + diff --git a/docs/source/pappyplugins.rst b/docs/source/pappyplugins.rst new file mode 100644 index 0000000..182cc9d --- /dev/null +++ b/docs/source/pappyplugins.rst @@ -0,0 +1,450 @@ +Writing Plugins for the Pappy Proxy +*********************************** + +.. contents:: Table of Contents + :local: + +Introduction +============ + +Are macros not powerful enough? Want to make something reusable? Want to add console commands?! Then you might want to write yourself a plugin. Some quick highlights about plugins: + +* Python scripts stored in ``~/.pappy/plugins`` +* Can add console commands +* For actions which aren't specific to one project +* Harder to write than macros + +Since macros can also use the plugin API, plugins aren't any more powerful than macros (besides adding console commands). However, if you find yourself copying a useful macro to more than one project, it may be worth it to just bind it to some commands, put the script in one place, and stop worrying about copying it around. Plus then you can put it on GitHub for some sweet sweet nerd cred. + +Should I Write a Plugin or a Macro? +----------------------------------- +A lot of the time, you can get away with writing a macro. However, you may consider writing a plugin if: + +* You find yourself copying one macro to multiple projects +* You want to write a general tool that can be applied to any website +* You need to maintain state during the Pappy session + +My guess is that if you need one quick thing for a project, you're better off writing a macro first and seeing if you end up using it in future projects. Then if you find yourself needing it a lot, write a plugin for it. You may also consider keeping a ``mine.py`` plugin where you can write out commands that you use regularly but may not be worth creating a dedicated plugin for. + +Plugins Get Merged +------------------ +If you write a useful plugin, as long as it isn't uber niche, I'll try and merge it into the core project. + +Creating a Plugin +================= +Whenever you make a macro, you'll have to bind some functions to some console commands. To do this, you'll have to define a ``load_cmds`` function in your plugin. This function should take one argument. When the plugin is loaded, this function will be called and the console object will be passed to this function. You can then use ``set_cmds`` and ``add_aliases`` to bind functions to console commands. + +Writing a Hello World Plugin +---------------------------- +It's probably easiest to explain how to write a plugin by writing one. Here is a simple plugin that defines a ``hello`` command and gives an alias ``hlo`` (we'll go over all the parts in a second):: + + ## hello.py + + def hello_world(line): + print "Hello, world!" + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'hello': (hello_world, None), + }) + cmd.add_aliases([ + ('hello', 'hlo'), + ]) + +Save this as ``~/.pappy/plugins/hello.py`` and run Pappy. You should have a new ``hello`` command that prints your message:: + + $ cp hello.py ~/.pappy/plugins/ + $ pappy -l + Temporary datafile is /tmp/tmp1Myw6q + Proxy is listening on port 8000 + pappy> hello + Hello, world! + pappy> hlo + Hello, world! + pappy> + +Awesome! So let's go over the code. Here are the important parts of the code: + +* We define a function that we want to call +* We define ``load_cmds(cmd)`` to be called when our plugin is loaded to bind our function to a command +* We use ``cmd.set_cmds`` to set all our commands +* We use ``cmd.add_aliases`` to add aliases for commands + +Now let's go over it in detail + +Passing Arguments to Your Function +---------------------------------- + +Each command gets bound to one function which takes one argument. That argument is all the text that was entered after the name of the command in the console. For example if we run ``hello foo bar``, in our function line would be "foo bar". **I suggest using shlex.split(line) to parse multiple arguments**. So let's update our script to take some arguments:: + + ## hello.py + import shlex + + def hello_world(line): + if line: + args = shlex.split(line) + print 'Hello, %s!' % (', '.join(args)) + else: + print "Hello, world!" + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'hello': (hello_world, None), + }) + cmd.add_aliases([ + ('hello', 'hlo'), + ]) + +Save your changes and restart pappy to reload the plugin:: + + $ pappy -l + Temporary datafile is /tmp/tmpBOXyJ3 + Proxy is listening on port 8000 + pappy> hello + Hello, world! + pappy> hello foo bar baz + Hello, foo, bar, baz! + pappy> hello foo bar "baz lihtyur" + Hello, foo, bar, baz lihtyur! + pappy> + +Adding More Aliases +------------------- + +So now let's add some more aliases to our command. If we want to add a new alias, we just add another tuple to the list passed into ``cmd.add_aliases``. The first element is the real name of the command (what you set with ``set_cmds``) and the second value is the alias you want to type. So let's make it so we can just type ``ho`` to say hello:: + + ## hello.py + import shlex + + def hello_world(line): + if line: + args = shlex.split(line) + print 'Hello, %s!' % (', '.join(args)) + else: + print "Hello, world!" + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'hello': (hello_world, None), + }) + cmd.add_aliases([ + ('hello', 'hlo'), + ('hello', 'ho'), + ]) + +.. note:: + + You must use the actual name of the command that you used in ``set_cmds``. You can't "chain" alieases. As a result, in our example we couldn't add the alias ``('hlo', 'ho')`` to add ``ho`` as our alias. + +Then reload the plugin:: + + $ pappy -l + Temporary datafile is /tmp/tmpBOXyJ3 + Proxy is listening on port 8000 + pappy> ho + Hello, world! + pappy> ho foo bar baz + Hello, foo, bar, baz! + pappy> ho foo bar "baz lihtyur" + Hello, foo, bar, baz lihtyur! + pappy> + +Adding Another Command +---------------------- +So now let's add a ``robe_and_wizard_hat`` command. To do this, we will define another function, then add another entry in the dict that is passed to ``set_cmds``. The second value in the tuple is the autocomplete function, but we'll get to that later. For now, just put in ``None`` to say we don't have one. We will also add a ``wh`` alias to it:: + + $ pappy -l + Temporary datafile is /tmp/tmpyl2cEZ + Proxy is listening on port 8000 + pappy> wh + I put on my robe and wizard hat + pappy> + +Adding Autocompletion +--------------------- +You can also define a function to handle autocompletion for your command. In order to do this, you define a function that takes 4 arguments: ``text``, ``line``, ``begidx``, and ``endidx``. From the `Cmd docs `_, this is what the arguments mean: + + ``text`` is the string prefix we are attempting to match: all returned matches must begin with it. ``line`` is the current input line with leading whitespace removed, ``begidx`` and ``endidx`` are the beginning and ending indexes of the prefix text, which could be used to provide different completion depending upon which position the argument is in. + +Let's let the user to autocomplete some names in our plugin:: + + import shlex + + _AUTOCOMPLETE_NAMES = ['alice', 'allie', 'sarah', 'mallory', 'slagathor'] + + def hello_world(line): + if line: + args = shlex.split(line) + print 'Hello, %s!' % (', '.join(args)) + else: + print "Hello, world!" + + def put_on_rope_and_wizard_hat(line): + if line: + print '%s puts on their robe and wizard hat' % line + else: + print 'I put on my robe and wizard hat' + + def complete_hello_world(text, line, begidx, endidx): + return [n for n in _AUTOCOMPLETE_NAMES if n.startswith(text)] + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'hello': (hello_world, complete_hello_world), + 'wizard_hat': (put_on_rope_and_wizard_hat, None), + }) + cmd.add_aliases([ + ('hello', 'hlo'), + ('wizard_hat', 'wh'), + ]) + +Then restart and run:: + + $ pappy -l + Temporary datafile is /tmp/tmp3J97rE + Proxy is listening on port 8000 + pappy> hello + alice allie mallory sarah slagathor + pappy> hello allie + Hello, allie! + pappy> + +You can't see it, but I hit tab twice after typing hello to get the completions to appear. + +Adding Help +----------- +Now let's say we want to add some help to the command so that when the user runs ``help hello`` they get something useful. To do that, just add a docstring to your function:: + + import shlex + + _AUTOCOMPLETE_NAMES = ['alice', 'allie', 'sarah', 'mallory', 'slagathor'] + + def hello_world(line): + """ + Say hello to the world. Usage: hello [name] + """ + + if line: + args = shlex.split(line) + print 'Hello, %s!' % (', '.join(args)) + else: + print "Hello, world!" + + def put_on_rope_and_wizard_hat(line): + if line: + print '%s puts on their robe and wizard hat' % line + else: + print 'I put on my robe and wizard hat' + + def complete_hello_world(text, line, begidx, endidx): + return [n for n in _AUTOCOMPLETE_NAMES if n.startswith(text)] + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'hello': (hello_world, complete_hello_world), + 'wizard_hat': (put_on_rope_and_wizard_hat, None), + }) + cmd.add_aliases([ + ('hello', 'hlo'), + ('wizard_hat', 'wh'), + ]) +Using defer.inlineCallbacks With a Command +------------------------------------------ + +.. note:: + If you are using inlineCallbacks, you can't use any functions which are blocking versions of async functions. For example, you cannot use :func:`pappyproxy.http.Request.save` and must instead use :func:`pappyproxy.http.Request.async_deep_save`. + +.. note:: + This tutorial won't tell you how to use inlineCallbacks in general. Type "twisted inline callbacks" into google to figure out what they are. This is mainly just a reminder to use the ``crochet`` wrapper for console commands and warning you that some functions may return deferreds that you may have to deal with. + +Since you're writing a plugin, you'll probably be using functions which return a deferred. And to keep things readable, you'll want to use the ``defer.inlineCallbacks`` function wrapper. Unfortunately, you can't bind async functions to commands. Luckily, there's a library called `crochet `_ which lets you add another wrapper to the function that lets it be used like a blocking function. Rather than talking about it, let's write a plugin to call :func:`pappyproxy.console.load_reqlist` to print out some requests' hosts. Let's start by pretending it's a normal function:: + + import shlex + from pappyproxy.console import load_reqlist + + def print_hosts(line): + args = shlex.split(line) + reqs = load_reqlist(args[0]) # It's supposed to return a list of requests, right? + for r in reqs: + print 'The host for request %s is: %s' % (r.reqid, r.host) + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'print_hosts': (print_hosts, None), + }) + cmd.add_aliases([ + ]) + +And we run it:: + + pappy> print_hosts 1 + Traceback (most recent call last): + File "/usr/local/lib/python2.7/dist-packages/cmd2.py", line 788, in onecmd_plus_hooks + stop = self.onecmd(statement) + File "/usr/local/lib/python2.7/dist-packages/cmd2.py", line 871, in onecmd + stop = func(statement) + File "/home/supahacker/pappy/pappyproxy/console.py", line 15, in catch + func(*args, **kwargs) + File "/home/supahacker/.pappy/plugins/hosts.py", line 7, in print_hosts + for r in reqs: + TypeError: iteration over non-sequence + iteration over non-sequence + pappy> + +Iteration over a non-sequence? what? Well, :func:`pappyproxy.console.load_reqlist` doesn't actually return a list of requests. It returns a deferred which returns a list of requests. I'm not going into the details (look up some stuff on using inline callbacks with Twisted if you want more info), but the way to fix it is to slap an ``inlineCallbacks`` wrapper on the function and ``yield`` the result of the function. Now it looks like this:: + + import shlex + from pappyproxy.console import load_reqlist + from twisted.internet import defer + + @defer.inlineCallbacks + def print_hosts(line): + args = shlex.split(line) + reqs = yield load_reqlist(args[0]) + for r in reqs: + print 'The host for request %s is: %s' % (r.reqid, r.host) + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'print_hosts': (print_hosts, None), + }) + cmd.add_aliases([ + ]) + +However, the console assumes that any functions it calls will be blocking. As a result, we need to add the ``crochet.wait_for`` wrapper:: + + import shlex + import crochet + from pappyproxy.console import load_reqlist + from twisted.internet import defer + + @crochet.wait_for(timeout=None) + @defer.inlineCallbacks + def print_hosts(line): + args = shlex.split(line) + reqs = yield load_reqlist(args[0]) + for r in reqs: + print 'The host for request %s is: %s' % (r.reqid, r.host) + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'print_hosts': (print_hosts, None), + }) + cmd.add_aliases([ + ]) + +And now we're good! If you run it without the crochet wrapper, it may still work. However, since the console assumes any functions it calls will be blocking, not having the wrapper could lead to weird errors. + +Plugin API +========== +There are also some useful functions that you can use to interact with the request history and the context. It's somewhat limited for now, but for now you can at least look through history and create/send new requests. Hopefully the API will expand as people find themselves wanting to do new things. That means **if you're writing a plugin, let me know and I'll add any APIs you need**. For now at least, plugins will let you maintain state over the course of the session and let you define commands. + +The best way to learn what you can do is to go through the :ref:`pappyproxy-package` and look at all the available functions. + +API Functions +------------- +See :mod:`pappyproxy.plugin` for docs on all the functions you can use. You can also use any of the functions provided for writing macros (and vice-versa). + +Storing Data on Disk +-------------------- +Unfortunately, you're on your own if you want to store plugin specific stuff on disk. It's also important that you store any data that is specific to a project in the same directory as the data file. This is to make sure that if you encrypt your project folder, you can be sure that no sensitive data about the test can be found anywhere else. The only time you should store anything outside of the current directory is to store global plugin settings, and even then it would probably be better to parse options from ``config.config_dict``. Pappy doesn't even store data outside of the project directory except for its CA certificates. + +However, if your plugin is a special snowflake that needs to store unencrypted, global settings, you should create a directory for your plugin in ``{config.DATA_DIR}/plugindata`` and put your files there. But again, avoid this if you can. + +.. note:: + Any project-specific data (ie anything that contains info about requests) should be stored in the project directory unless you have a really really good reason. This is because it must be possible to secure any sensitive data by encrypting the project folder and storing data outside of the directory will add complications. + +.. warning:: + Do not modify the data file schema. There is a good chance the schema will break in future versions of Pappy. + +Storing Custom Request Metadata +------------------------------- +:class:`pappyproxy.http.Request` objects have a ``plugin_data`` attribute. It is a dictionary that is intended to be used by plugins to give the request custom metadata. If you want to store metadata about a request, it is suggested that you add a key to this dictionary and store any metadata you want under that key. You can use :func:`pappyproxy.http.Request.get_plugin_dict` to get a dictionary for a specific name. It will create an entry for that name if it doesn't exist. I also suggest defining a function plugin-wide for getting the plugin's data dict from a specific request. Since dictionaries are always passed by reference, any modifications you make to the returned dict will be applied to the request as well. + +.. note:: + You will need to save the request using something like :func:`pappyproxy.http.Request.save` or :func:`pappyproxy.http.Request.async_deep_save` in order to store the changes in the data file. + +Here is an example plugin for storing the user-agent (if it exists) in the ``plugin_data`` dict of a request under the key ``agent``:: + + import crochet + import shlex + from twisted.internet import defer + + from pappyproxy.console import load_reqlist + from pappyproxy.plugin import main_context + from pappyproxy.util import PappyException + + DATA_KEY = 'agent' + + def get_data(r): + return r.get_plugin_dict(DATA_KEY) + + @crochet.wait_for(timeout=None) + @defer.inlineCallbacks + def update_agent_metadata(line): + for r in main_context().active_requests: + if 'user-agent' in r.headers: + get_data(r)['agent'] = r.headers['user-agent'] + yield r.async_deep_save() + + @crochet.wait_for(timeout=None) + @defer.inlineCallbacks + def view_agent(line): + args = shlex.split(line) + reqs = yield load_reqlist(args[0]) + for r in reqs: + if 'agent' in get_data(r): + print 'The user agent for %s is "%s"' % (r.reqid, get_data(r)['agent']) + else: + print 'Request %s has no user agent data' % r.reqid + + ############### + ## Plugin hooks + + def load_cmds(cmd): + cmd.set_cmds({ + 'agent_update': (update_agent_metadata, None), + 'view_agent': (view_agent, None), + }) + cmd.add_aliases([ + ]) + +Useful Functions +---------------- +* Load a request by id: :func:`pappyproxy.http.Request.load_request` +* Create a filter from a filter string: :func:`pappyproxy.context.Filter.from_filter_string` + +Built In Plugins As Examples +============================ + +Built In Plugins +---------------- +All the commands in Pappy are implemented as plugins. I have done what I could to avoid using internal functions as much as I could, but there are still some instances where I had to implement an internal function in order to get the functions I needed. However, you can still look them over to see how things are structured and see some examples of semi-complicated plugins. + +Interceptor and Repeater +------------------------ +Pappy's interceptor and repeater are fully implemented as a plugin. It defines an intercepting macro that handles saving then editing messages and commands that read those files and edit them. It relies on Twisted to switch between the macro handling the request and the command modifying it, so if you want to make something similar, you'll have to learn how to use deferreds. diff --git a/docs/source/pappyproxy.rst b/docs/source/pappyproxy.rst index 8a166a4..0ee98c2 100644 --- a/docs/source/pappyproxy.rst +++ b/docs/source/pappyproxy.rst @@ -1,45 +1,43 @@ pappyproxy package ================== -.. - Subpackages - ----------- +Subpackages +----------- - .. toctree:: +.. toctree:: - pappyproxy.schema - pappyproxy.templates - pappyproxy.tests - pappyproxy.vim_repeater + pappyproxy.plugins + pappyproxy.schema + pappyproxy.templates + pappyproxy.tests + pappyproxy.vim_repeater - Submodules - ---------- +Submodules +---------- -.. - pappyproxy.comm module - ---------------------- +pappyproxy.comm module +---------------------- - .. automodule:: pappyproxy.comm - :members: - :noindex: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.comm + :members: + :undoc-members: + :show-inheritance: - pappyproxy.config module - ------------------------ +pappyproxy.config module +------------------------ - .. automodule:: pappyproxy.config - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.config + :members: + :undoc-members: + :show-inheritance: - pappyproxy.console module - ------------------------- +pappyproxy.console module +------------------------- - .. automodule:: pappyproxy.console - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.console + :members: + :undoc-members: + :show-inheritance: pappyproxy.context module ------------------------- @@ -65,69 +63,67 @@ pappyproxy.iter module :undoc-members: :show-inheritance: -.. - pappyproxy.macros module - ------------------------ +pappyproxy.macros module +------------------------ - .. automodule:: pappyproxy.macros - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.macros + :members: + :undoc-members: + :show-inheritance: - pappyproxy.mangle module - ------------------------ +pappyproxy.pappy module +----------------------- - .. automodule:: pappyproxy.mangle - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.pappy + :members: + :undoc-members: + :show-inheritance: - pappyproxy.pappy module - ----------------------- +pappyproxy.plugin module +------------------------ - .. automodule:: pappyproxy.pappy - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.plugin + :members: + :undoc-members: + :show-inheritance: - pappyproxy.proxy module - ----------------------- +pappyproxy.proxy module +----------------------- - .. automodule:: pappyproxy.proxy - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.proxy + :members: + :undoc-members: + :show-inheritance: - pappyproxy.repeater module - -------------------------- +pappyproxy.repeater module +-------------------------- - .. automodule:: pappyproxy.repeater - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.repeater + :members: + :undoc-members: + :show-inheritance: - pappyproxy.session module - ------------------------- +pappyproxy.session module +------------------------- - .. automodule:: pappyproxy.session - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.session + :members: + :undoc-members: + :show-inheritance: - pappyproxy.util module - ---------------------- +pappyproxy.util module +---------------------- - .. automodule:: pappyproxy.util - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy.util + :members: + :undoc-members: + :show-inheritance: -.. - Module contents - --------------- +Module contents +--------------- - .. automodule:: pappyproxy - :members: - :undoc-members: - :show-inheritance: +.. automodule:: pappyproxy + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pappyproxy.schema.rst b/docs/source/pappyproxy.schema.rst index 771c9c9..0af2798 100644 --- a/docs/source/pappyproxy.schema.rst +++ b/docs/source/pappyproxy.schema.rst @@ -28,6 +28,22 @@ pappyproxy.schema.schema_3 module :undoc-members: :show-inheritance: +pappyproxy.schema.schema_4 module +--------------------------------- + +.. automodule:: pappyproxy.schema.schema_4 + :members: + :undoc-members: + :show-inheritance: + +pappyproxy.schema.schema_5 module +--------------------------------- + +.. automodule:: pappyproxy.schema.schema_5 + :members: + :undoc-members: + :show-inheritance: + pappyproxy.schema.update module ------------------------------- diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 9603afb..941f395 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -1,13 +1,9 @@ The Pappy Proxy Tutorial ************************ -Table of Contents -================= - -.. toctree:: - - tutorial - +.. contents:: Table of Contents + :local: + Getting Set Up ============== @@ -40,9 +36,9 @@ That was easy! Make a project directory anywhere for Natas and fire up Pappy.:: $ cd natas Copying default config to ./config.json Proxy is listening on port 8000 - itsPappyTime> + pappy> -If you look at what's in the file, you'll notice that there's a ``data.db`` file and a ``config.json`` file. +If you look at what's in the directory, you'll notice that there's a ``data.db`` file and a ``config.json`` file. * ``data.db`` is a SQLite file that stores all the (in-scope) requests that pass through the proxy * ``config.json`` stores settings for the proxy @@ -55,13 +51,13 @@ In order to intercept HTTPS requests, you'll need to add a CA cert to your brows To generate certificates, you'll use the ``gencerts`` command. This will generate certificates in Pappy's directory. By default, all projects will use the certs in this directory, so you should only have to generate/install the certificates once.:: - itsPappyTime> gencerts + pappy> gencerts This will overwrite any existing certs in /home/anonymouse/pappy/pappyproxy/certs. Are you sure? (y/N) y Generating certs to /home/anonymouse/pappy/pappyproxy/certs Generating private key... Done! Generating client cert... Done! - itsPappyTime> + pappy> The directory that the certs get put in may be different for you. Next, you'll need to add the generated ``certificate.crt`` file to your browser. This is different for each browser. @@ -107,7 +103,7 @@ Start up Pappy in Lite mode by running ``pappy -l``, enable the proxy in your br /pappynatas/ $ pappy -l Temporary datafile is /tmp/tmp5AQBrH Proxy is listening on port 8000 - itsPappyTime> ls + pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 8 GET vitaly.sexy /favicon.ico 404 Not Found 0 114 0.21 -- 7 GET vitaly.sexy /favicon.ico 404 Not Found 0 114 0.22 -- @@ -117,7 +113,7 @@ Start up Pappy in Lite mode by running ``pappy -l``, enable the proxy in your br 3 GET vitaly.sexy /vitaly2.jpg 200 OK 0 2034003 1.34 -- 2 GET vitaly.sexy / 200 OK 0 1201 0.21 -- 1 GET vitaly.sexy / 301 Moved Permanently 0 178 0.27 -- - itsPappyTime> quit + pappy> quit Deleting temporary datafile Make sure that the request you made appears on the list. When you quit, the temporary data file will be deleted, so no cleanup will be required! @@ -129,8 +125,8 @@ Setting the Scope ----------------- The first thing we'll do is set up Pappy so that it only intercepts requests going to ``*.natas.labs.overthewire.org``:: - itsPappyTime> filter host containsr "natas\.labs\.overthewire\.org$" - itsPappyTime> scope_save + pappy> filter host containsr "natas\.labs\.overthewire\.org$" + pappy> scope_save What these commands do: @@ -151,7 +147,7 @@ First, go to ``_ and log in with the d Natas 1 ------- -Haha! This is the same as natas0, but they got tricky and shut off right-clicking. There's still ways to view the source in the browser, but we'll use Pappy here. The two commands we'll learn here are ``ls``, ``vfq``, and ``vfs``. +Haha! This is the same as natas0, but they got tricky and shut off right-clicking. There's still ways to view the source in the browser, but we'll use Pappy here. The commands we'll learn here are ``ls``, ``vfq``, and ``vfs``. * ``ls`` lists the most current requests that are in the current context. You'll be using this a lot to get the IDs of requests you want to do things with. * ``vfq `` prints the full request of a request you specify @@ -159,7 +155,7 @@ Haha! This is the same as natas0, but they got tricky and shut off right-clickin So to solve natas1, we'll want to view the full response to our request to the page:: - itsPappyTime> ls + pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 16 GET natas1.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.27 -- 15 GET natas1.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.27 -- @@ -177,7 +173,7 @@ So to solve natas1, we'll want to view the full response to our request to the p 3 GET natas.labs.overthewire.org /css/level.css 200 OK 0 1332 0.48 -- 2 GET natas0.natas.labs.overthewire.org / 200 OK 0 918 0.26 -- 1 GET natas0.natas.labs.overthewire.org / 401 Unauthorized 0 479 0.26 -- - itsPappyTime> vfs 14 + pappy> vfs 14 HTTP/1.1 200 OK Date: Fri, 18 Dec 2015 19:47:21 GMT @@ -197,7 +193,7 @@ So to solve natas1, we'll want to view the full response to our request to the p ... snip ... - itsPappyTime> + pappy> Yay! @@ -205,7 +201,7 @@ Natas 2 ------- When you visit this page, you get a message saying "There is nothing on this page". That is probably a blatant lie. Let's see what was in that response.:: - itsPappyTime> ls + pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 30 GET natas2.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.27 -- 29 GET natas2.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.27 -- @@ -213,7 +209,7 @@ When you visit this page, you get a message saying "There is nothing on this pag 27 GET natas2.natas.labs.overthewire.org / 200 OK 0 872 0.27 -- 26 GET natas2.natas.labs.overthewire.org / 401 Unauthorized 0 479 0.27 -- ... snip ... - itsPappyTime> vfs 27 + pappy> vfs 27 HTTP/1.1 200 OK ... snip ... @@ -225,7 +221,7 @@ When you visit this page, you get a message saying "There is nothing on this pag - itsPappyTime> + pappy> So the only suspicious thing is ````. I'll let you figure out the rest ;) @@ -282,7 +278,7 @@ While we can't find all the passwords with one filter, if we remember how we got For natas0 and natas1, the responses had a phrase like "the password is abc123". So we can filter out anything that doesn't have the word "password" in it.:: - itsPappyTime> ls + pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 52 GET natas4.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.26 -- 51 GET natas4.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.27 -- @@ -309,8 +305,8 @@ For natas0 and natas1, the responses had a phrase like "the password is abc123". 30 GET natas2.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.27 -- 29 GET natas2.natas.labs.overthewire.org /favicon.ico 404 Not Found 0 307 0.27 -- 28 GET natas2.natas.labs.overthewire.org /files/pixel.png 200 OK 0 303 0.27 -- - itsPappyTime> f body ct password - itsPappyTime> ls + pappy> f body ct password + pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 49 GET natas4.natas.labs.overthewire.org / 401 Unauthorized 0 479 0.26 -- 38 GET natas3.natas.labs.overthewire.org / 401 Unauthorized 0 479 0.28 -- @@ -325,7 +321,7 @@ For natas0 and natas1, the responses had a phrase like "the password is abc123". 6 GET natas.labs.overthewire.org /js/jquery-1.9.1.js 200 OK 0 268381 1.20 -- 2 GET natas0.natas.labs.overthewire.org / 200 OK 0 918 0.26 -- 1 GET natas0.natas.labs.overthewire.org / 401 Unauthorized 0 479 0.26 -- - itsPappyTime> + pappy> It looks like requests 2 and 14 are the ones we're looking for (we know the password is on the page and those are the requests to / that have a 200 OK response). Use ``vfs`` to look at the response and you'll get the passwords again! It looks like we also found the password from natas2 (the request to /s3cr3t/users.txt). @@ -341,7 +337,7 @@ To do this, we'll be using Pappy's interceptor. The interceptor lets you stop a In this case, we only want to intercept requests, so we'll run ``ic req``:: - itsPappyTime> ic req + pappy> ic req And we'll get a screen that says something like:: @@ -405,14 +401,15 @@ For this one, when you view the source you'll notice they're taking value you en Use ``:wq!`` to quit the repeater without having to save buffers .. note:: - You must know the basics of how to use vim for the repeater and have a key bound to the leader. You can find more information on the leader key ``here ``. By default is bound to ``\``. + You must know the basics of how to use vim for the repeater and have a key bound to the leader. You can find more information on the leader key `here `_. By default is bound to ``\``. -Submit a request then open that request in the repeater.:: - itsPappyTime> ls +Submit a request then open that request in the repeater:: + + pappy> ls 196 GET natas9.natas.labs.overthewire.org /index.php?needle=ball&submit=Search 200 OK 0 1686 0.27 -- 195 GET natas9.natas.labs.overthewire.org /index-source.html 200 OK 0 1952 0.27 -- ... snip ... - itsPappyTime> rp 196 + pappy> rp 196 Vim will open up in a vertical split with the request on the left and the response on the right. @@ -434,7 +431,7 @@ Commands we'll learn: So the first thing we'll do is submit a request to have a base request that we can modify. Submit a request with any username. You should get a response back saying the user doesn't exist. Now we'll generate a macro and use that request as a base for our script:: - itsPappyTime> ls + pappy> ls ID Verb Host Path S-Code Req Len Rsp Len Time Mngl 224 POST natas15.natas.labs.overthewire.org /index.php 200 OK 14 937 0.27 -- 223 POST natas15.natas.labs.overthewire.org /index.php 200 OK 12 937 0.27 -- @@ -445,9 +442,9 @@ So the first thing we'll do is submit a request to have a base request that we c 218 GET natas15.natas.labs.overthewire.org / 401 Unauthorized 0 480 0.27 -- ... snip ... - itsPappyTime> gma brute 224 + pappy> gma brute 224 Wrote script to macro_brute.py - itsPappyTime> + pappy> Now open up ``macro_brute.py`` in your favorite text editor. You should have a script that looks like this:: @@ -513,15 +510,15 @@ To start out simple, we'll write a macro that lets us check a username from the Then to run it:: - itsPappyTime> lma + pappy> lma Loaded "" - itsPappyTime> rma brute admin + pappy> rma brute admin admin is not a user - itsPappyTime> rma brute fooooo + pappy> rma brute fooooo fooooo is not a user - itsPappyTime> rma brute natas16 + pappy> rma brute natas16 natas16 is a user! - itsPappyTime> + pappy> Awesome! Notice how we didn't have to deal with authentication either. This is because the authentication is handled by the ``Authorization`` header which was included in the generated request. @@ -535,11 +532,11 @@ So to escape it, we use a payload like:: In this case, any username that ends in ``" OR 1=1; #`` will be considered a valid username. Let's try this out:: - itsPappyTime> rma brute "foo\" OR 1=1;" + pappy> rma brute "foo\" OR 1=1;" foo" OR 1=1; is a user! - itsPappyTime> rma brute "fooooooo\" OR 1=1;" + pappy> rma brute "fooooooo\" OR 1=1;" fooooooo" OR 1=1; is a user! - itsPappyTime> + pappy> Great! Now we can check any true/false condition we want. In this case, we want to check if a certain character is at a certain position in the ``password`` column. We do this with the ``ASCII`` and ``SUBSTRING`` functions. So something like this will check if the first character is an ``A``.:: @@ -598,9 +595,9 @@ Alright, let's update our macro to find the first character of the password.:: And when we run it...:: - itsPappyTime> lma + pappy> lma Loaded "" - itsPappyTime> rma brute + pappy> rma brute Trying a... Trying b... Trying c... @@ -610,7 +607,7 @@ And when we run it...:: Trying V... Trying W... W is the first char! - itsPappyTime> + pappy> We find the first character! Woo! Next we just have to do this for each position. Even through we don't know the length of the password, we will know that the password is over when none of the characters are valid. So let's update our macro:: @@ -685,9 +682,9 @@ We find the first character! Woo! Next we just have to do this for each position Then we run it:: - itsPappyTime> lma + pappy> lma Loaded "" - itsPappyTime> rma brute + pappy> rma brute a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W W is char 1! The password so far is W @@ -713,7 +710,7 @@ Then we run it:: The password so far is WaIHEacj63wnNIBROHeqi3p9t0m5nhmh a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 7 8 9 0 Done! The password is "WaIHEacj63wnNIBROHeqi3p9t0m5nhmh" - itsPappyTime> + pappy> Boom! There it is! diff --git a/pappyproxy/comm.py b/pappyproxy/comm.py index 9c88e26..5d220bd 100644 --- a/pappyproxy/comm.py +++ b/pappyproxy/comm.py @@ -1,10 +1,10 @@ import base64 import json -import pappyproxy from twisted.protocols.basic import LineReceiver from twisted.internet import defer from util import PappyException +from .http import Request, Response """ comm.py @@ -31,6 +31,8 @@ class CommServer(LineReceiver): } def lineReceived(self, line): + from .http import Request, Response + if line == '': return try: @@ -71,7 +73,7 @@ class CommServer(LineReceiver): def action_get_request(self, data): try: reqid = data['reqid'] - req = yield pappyproxy.http.Request.load_request(reqid) + req = yield Request.load_request(reqid) except KeyError: raise PappyException("Request with given ID does not exist") @@ -82,12 +84,12 @@ class CommServer(LineReceiver): def action_get_response(self, data): try: reqid = data['reqid'] - req = yield pappyproxy.http.Request.load_request(reqid) + req = yield Request.load_request(reqid) except KeyError: raise PappyException("Request with given ID does not exist, cannot fetch associated response.") if req.response: - rsp = yield pappyproxy.http.Response.load_response(req.response.rspid) + rsp = yield Response.load_response(req.response.rspid) dat = json.loads(rsp.to_json()) else: dat = {} @@ -95,13 +97,8 @@ class CommServer(LineReceiver): @defer.inlineCallbacks def action_submit_request(self, data): - try: - req = pappyproxy.http.Request(base64.b64decode(data['full_request'])) - req.port = data['port'] - req.is_ssl = data['is_ssl'] - except: - raise PappyException("Error parsing request") - yield req.async_submit() + message = base64.b64decode(data['full_message']) + req = yield Request.submit_new(data['host'], data['port'], data['is_ssl'], message) yield req.async_deep_save() retdata = {} diff --git a/pappyproxy/config.py b/pappyproxy/config.py index 14af0de..620e48a 100644 --- a/pappyproxy/config.py +++ b/pappyproxy/config.py @@ -1,20 +1,89 @@ -import imp +""" +The configuration settings for the proxy. + +.. data:: CERT_DIR + + The location of the CA certs that Pappy will use. This can be configured in the + ``config.json`` file for a project. + + :Default: ``{DATADIR}/certs`` + +.. data:: PAPPY_DIR + + The file where pappy's scripts are located. Don't write anything here, and you + probably don't need to write anything here. Use DATA_DIR instead. + + :Default: Wherever the scripts are installed + +.. data:: DATA_DIR + + The data directory. This is where files that have to be read by Pappy every time + it's run are put. For example, plugins are stored in ``{DATADIR}/plugins`` and + certs are by default stored in ``{DATADIR}/certs``. This defaults to ``~/.pappy`` + and isn't configurable right now. + + :Default: ``~/.pappy`` + +.. data:: DATAFILE + + The location of the CA certs that Pappy will use. This can be configured in the + ``config.json`` file for a project. + + :Default: ``data.db`` + +.. data:: DEBUG_DIR + + The directory to write debug output to. Don't put this outside the project folder + since it writes all the request data to this directory. You probably won't need + to use this. Configured in the ``config.json`` file for the project. + + :Default: None + +.. data: LISTENERS + + The list of active listeners. It is a list of tuples of the format (port, interface) + Not modifiable after startup. Configured in the ``config.json`` file for the project. + + :Default: ``[(8000, '127.0.0.1')]`` + +.. data: PLUGIN_DIRS + + List of directories that plugins are loaded from. Not modifiable. + + :Default: ``['{DATA_DIR}/plugins', '{PAPPY_DIR}/plugins']`` + +.. data: CONFIG_DICT + + The dictionary read from config.json. When writing plugins, use this to load + configuration options for your plugin. + +""" + import json import os import shutil PAPPY_DIR = os.path.dirname(os.path.realpath(__file__)) DATA_DIR = os.path.join(os.path.expanduser('~'), '.pappy') +DATA_DIR CERT_DIR = os.path.join(DATA_DIR, 'certs') + DATAFILE = 'data.db' + DEBUG_DIR = None DEBUG_TO_FILE = False DEBUG_VERBOSITY = 0 + LISTENERS = [(8000, '127.0.0.1')] + SSL_CA_FILE = 'certificate.crt' SSL_PKEY_FILE = 'private.key' +PLUGIN_DIRS = [os.path.join(DATA_DIR, 'plugins'), os.path.join(PAPPY_DIR, 'plugins')] + +CONFIG_DICT = {} + def get_default_config(): default_config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'default_user_config.json') @@ -61,6 +130,7 @@ def load_settings(proj_config): def load_from_file(fname): + global CONFIG_DICT # Make sure we have a config file if not os.path.isfile(fname): print "Copying default config to %s" % fname @@ -70,5 +140,5 @@ def load_from_file(fname): # Load local project config with open(fname, 'r') as f: - proj_config = json.load(f) - load_settings(proj_config) + CONFIG_DICT = json.load(f) + load_settings(CONFIG_DICT) diff --git a/pappyproxy/console.py b/pappyproxy/console.py index 94e84db..49d151f 100644 --- a/pappyproxy/console.py +++ b/pappyproxy/console.py @@ -1,46 +1,18 @@ +""" +Contains helpers for interacting with the console. Includes definition for the +class that is used to run the console. +""" + import cmd2 -import crochet -import curses -import datetime -import os -import pappyproxy -import pygments import re -import shlex import string -import subprocess import sys -import termios -import time -import urllib -from twisted.internet import defer, reactor -from pappyproxy.util import PappyException -from pappyproxy.macros import load_macros, macro_from_requests, gen_imacro -from pappyproxy.repeater import start_editor -from pygments.lexers import get_lexer_for_mimetype -from pygments.lexers import HttpLexer -from pygments.formatters import TerminalFormatter +from .util import PappyException +from twisted.internet import defer -""" -console.py - -Functions and classes involved with interacting with console input and output -""" - -# http://www.termsys.demon.co.uk/vtansi.htm#cursor -SAVE_CURSOR = '\x1b[7' -UNSAVE_CURSOR = '\x1b[8' -LINE_UP = '\x1b[1A' -LINE_ERASE = '\x1b[2K' -PRINT_LINE = '\x1b[1i' - -edit_queue = [] -loaded_macros = [] -loaded_int_macros = [] -macro_dict = {} -int_macro_dict = {} -proxy_server_factory = None +################### +## Helper functions def print_pappy_errors(func): def catch(*args, **kwargs): @@ -50,1002 +22,41 @@ def print_pappy_errors(func): print str(e) return catch -def set_proxy_server_factory(factory): - global proxy_server_factory - proxy_server_factory = factory - -class ProxyCmd(cmd2.Cmd): - - def __init__(self, *args, **kwargs): - self.alerts = [] - self.prompt = 'itsPappyTime> ' - self.debug = True - cmd2.Cmd.__init__(self, *args, **kwargs) - - def add_alert(self, alert): - self.alerts.append(alert) - - def postcmd(self, stop, line): - for l in self.alerts: - print '[!] ', l - self.alerts = [] - return stop - - def help_view_request_headers(self): - print ("View information about request\n" - "Usage: view_request_info [u]" - "If 'u' is given as an additional argument, the unmangled version " - "of the request will be displayed.") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_view_request_info(self, line): - args = shlex.split(line) - reqids = args[0] - - reqs = yield load_reqlist(reqids) - - for req in reqs: - print '' - print_request_extended(req) - print '' - - def help_view_request_headers(self): - print ("View the headers of the request\n" - "Usage: view_request_headers [u]" - "If 'u' is given as an additional argument, the unmangled version " - "of the request will be displayed.") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_view_request_headers(self, line): - args = shlex.split(line) - reqid = args[0] - showid = reqid - - reqs = yield load_reqlist(reqid) - for req in reqs: - if len(reqs) > 1: - print 'Request %s:' % req.reqid - print '' - view_full_request(req, True) - if len(reqs) > 1: - print '-'*30 - - def help_view_full_request(self): - print ("View the full data of the request\n" - "Usage: view_full_request [u]\n" - "If 'u' is given as an additional argument, the unmangled version " - "of the request will be displayed.") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_view_full_request(self, line): - args = shlex.split(line) - reqid = args[0] - showid = reqid - - reqs = yield load_reqlist(reqid) - for req in reqs: - if len(reqs) > 1: - print 'Request %s:' % req.reqid - print '' - view_full_request(req) - if len(reqs) > 1: - print '-'*30 - - def help_view_response_headers(self): - print ("View the headers of the response\n" - "Usage: view_response_headers ") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_view_response_headers(self, line): - reqs = yield load_reqlist(line) - for req in reqs: - if req.response: - if len(reqs) > 1: - print '-'*15 + (' %s ' % req.reqid) + '-'*15 - view_full_response(req.response, True) - else: - print "Request %s does not have a response" % req.reqid - - def help_view_full_response(self): - print ("View the full data of the response associated with a request\n" - "Usage: view_full_response ") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_view_full_response(self, line): - reqs = yield load_reqlist(line) - for req in reqs: - if req.response: - if len(reqs) > 1: - print '-'*15 + (' %s ' % req.reqid) + '-'*15 - view_full_response(req.response) - else: - print "Request %s does not have a response" % req.reqid - - def help_dump_response(self): - print ('Dump the data of the response to a file.\n' - 'Usage: dump_response ') - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_dump_response(self, line): - # dump the data of a response - args = shlex.split(line) - reqid = args[0] - showid = reqid - req = yield pappyproxy.http.Request.load_request(reqid) - rsp = req.response - if len(args) >= 2: - fname = args[1] - else: - fname = req.path.split('/')[-1] - - with open(fname, 'w') as f: - f.write(rsp.raw_data) - print 'Response data written to %s' % fname - - def help_list(self): - print ("List request/response pairs in the current context\n" - "Usage: list") - - @print_pappy_errors - def do_list(self, line): - args = shlex.split(line) - if len(args) > 0: - if args[0][0].lower() == 'a': - print_count = -1 - else: - try: - print_count = int(args[0]) - except: - print "Please enter a valid argument for list" - return - else: - print_count = 25 - - def key_reqtime(req): - if req.time_start is None: - return -1 - else: - return (req.time_start-datetime.datetime(1970,1,1)).total_seconds() - - to_print = list(pappyproxy.context.active_requests) - to_print = sorted(to_print, key=key_reqtime, reverse=True) - if print_count > 0: - to_print = to_print[:print_count] - print_requests(to_print) - - def help_site_map(self): - print ('Print the site map. Only includes requests in the current context.\n' - 'Usage: site_map') - - @print_pappy_errors - def do_site_map(self, line): - to_print = [r for r in pappyproxy.context.active_requests if not r.response or r.response.response_code != 404] - tree = get_site_map(to_print) - print_tree(tree) - - def help_filter(self): - print ("Apply a filter to the current context\n" - "Usage: filter \n" - "See README.md for information on filter strings") - - @print_pappy_errors - def do_filter(self, line): - if not line: - raise PappyException("Filter string required") - - filter_to_add = pappyproxy.context.Filter(line) - pappyproxy.context.add_filter(filter_to_add) - - def complete_builtin_filter(self, text, line, begidx, endidx): - all_names = pappyproxy.context.BuiltinFilters.list() - if not text: - ret = all_names[:] - else: - ret = [n for n in all_names if n.startswith(text)] - return ret - - @print_pappy_errors - def do_builtin_filter(self, line): - if not line: - raise PappyException("Filter name required") - - filters_to_add = pappyproxy.context.BuiltinFilters.get(line) - for f in filters_to_add: - print f.filter_string - pappyproxy.context.add_filter(f) - - def help_filter_up(self): - print ("Remove the last applied filter\n" - "Usage: filter_up") - - @print_pappy_errors - def do_filter_up(self, line): - pappyproxy.context.filter_up() - - def help_filter_clear(self): - print ("Reset the context so that it contains no filters (ignores scope)\n" - "Usage: filter_clear") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_filter_clear(self, line): - pappyproxy.context.active_filters = [] - yield pappyproxy.context.reload_from_storage() - - def help_filter_list(self): - print ("Print the filters that make up the current context\n" - "Usage: filter_list") - - @print_pappy_errors - def do_filter_list(self, line): - for f in pappyproxy.context.active_filters: - print f.filter_string - - - def help_scope_save(self): - print ("Set the scope to be the current context. Saved between launches\n" - "Usage: scope_save") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_scope_save(self, line): - pappyproxy.context.save_scope() - yield pappyproxy.context.store_scope(pappyproxy.http.dbpool) - - def help_scope_reset(self): - print ("Set the context to be the scope (view in-scope items)\n" - "Usage: scope_reset") - - @print_pappy_errors - def do_scope_reset(self, line): - pappyproxy.context.reset_to_scope() - - def help_scope_delete(self): - print ("Delete the scope so that it contains all request/response pairs\n" - "Usage: scope_delete") - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_scope_delete(self, line): - pappyproxy.context.set_scope([]) - yield pappyproxy.context.store_scope(pappyproxy.http.dbpool) - - def help_scope_list(self): - print ("Print the filters that make up the scope\n" - "Usage: scope_list") - - @print_pappy_errors - def do_scope_list(self, line): - pappyproxy.context.print_scope() - - def help_filter_prune(self): - print ('Delete all out of context requests from the data file. ' - 'CANNOT BE UNDONE!! Be careful!\n' - 'Usage: filter_prune') - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_filter_prune(self, line): - # Delete filtered items from datafile - print '' - print 'Currently active filters:' - for f in pappyproxy.context.active_filters: - print '> %s' % f.filter_string - - # We copy so that we're not removing items from a set we're iterating over - reqs = list(pappyproxy.context.inactive_requests) - act_reqs = list(pappyproxy.context.active_requests) - message = 'This will delete %d/%d requests. You can NOT undo this!! Continue?' % (len(reqs), (len(reqs) + len(act_reqs))) - if not confirm(message, 'n'): - defer.returnValue(None) - - for r in reqs: - yield r.deep_delete() - pappyproxy.context.remove_request(r) - print 'Deleted %d requests' % len(reqs) - defer.returnValue(None) - - def help_clrmem(self): - print ('Delete all in-memory only requests' - 'Usage: clrmem') - - def do_clrmem(self, line): - to_delete = list(pappyproxy.context.in_memory_requests) - for r in to_delete: - pappyproxy.context.remove_request(r) - - def help_repeater(self): - print ("Open a request in the repeater\n" - "Usage: repeater ") - - @print_pappy_errors - def do_repeater(self, line): - # This is not async on purpose. start_editor acts up if this is called - # with inline callbacks. As a result, check_reqid and get_unmangled - # cannot be async - args = shlex.split(line) - reqid = args[0] - - check_reqid(reqid) - start_editor(reqid) - - def help_intercept(self): - print ("Intercept requests and/or responses and edit them with before passing them along\n" - "Usage: intercept ") - - @print_pappy_errors - def do_intercept(self, line): - global edit_queue - global proxy_server_factory - args = shlex.split(line) - intercept_requests = False - intercept_responses = False - - req_names = ('req', 'request', 'requests') - rsp_names = ('rsp', 'response', 'responses') - - if any(a in req_names for a in args): - intercept_requests = True - if any(a in rsp_names for a in args): - intercept_responses = True - - if intercept_requests and intercept_responses: - intercept_str = 'Requests and responses' - elif intercept_requests: - intercept_str = 'Requests' - elif intercept_responses: - intercept_str = 'Responses' - else: - intercept_str = 'NOTHING' - - macro_file = os.path.join(pappyproxy.config.PAPPY_DIR, 'mangle.py') - mangle_macro = pappyproxy.macros.InterceptMacro(macro_file) - mangle_macro.intercept_requests = intercept_requests - mangle_macro.intercept_responses = intercept_responses - - pappyproxy.proxy.add_intercepting_macro('pappy_intercept', mangle_macro, - proxy_server_factory.intercepting_macros) - - ## Interceptor loop - stdscr = curses.initscr() - curses.noecho() - curses.cbreak() - - try: - editnext = False - stdscr.nodelay(True) - while True: - stdscr.addstr(0, 0, "Currently intercepting: %s" % intercept_str) - stdscr.clrtoeol() - stdscr.addstr(1, 0, "%d item(s) in queue." % len(edit_queue)) - stdscr.clrtoeol() - if editnext: - stdscr.addstr(2, 0, "Waiting for next item... Press 'q' to quit or 'b' to quit waiting") - else: - stdscr.addstr(2, 0, "Press 'n' to edit the next item or 'q' to quit interceptor.") - stdscr.clrtoeol() - - c = stdscr.getch() - if c == ord('q'): - break - elif c == ord('n'): - editnext = True - elif c == ord('b'): - editnext = False - - if editnext and edit_queue: - editnext = False - (to_edit, deferred) = edit_queue.pop(0) - editor = 'vi' - if 'EDITOR' in os.environ: - editor = os.environ['EDITOR'] - subprocess.call([editor, to_edit]) - stdscr.clear() - deferred.callback(None) - finally: - curses.nocbreak() - stdscr.keypad(0) - curses.echo() - curses.endwin() - try: - pappyproxy.proxy.remove_intercepting_macro('pappy_intercept', - proxy_server_factory.intercepting_macros) - except PappyException: - pass - # Send remaining requests along - while len(edit_queue) > 0: - (fname, deferred) = edit_queue.pop(0) - deferred.callback(None) - - def help_load_macros(self, line): - print ('Load macros from a directory. By default loads macros in the current directory.\n' - 'Usage: load_macros [dir]') - - @print_pappy_errors - def do_load_macros(self, line): - global macro_dict - global int_macro_dict - global loaded_macros - global loaded_int_macros - - if line: - load_dir = line - else: - load_dir = '.' - (to_load, int_to_load) = load_macros(load_dir) - if not to_load and not int_to_load: - raise PappyException('No macros to load.') - - macro_dict = {} - loaded_macros = [] - int_macro_dict = {} - loaded_int_macros = [] - - for macro in to_load: - if macro.name in macro_dict: - print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.name) - elif macro.short_name and macro.short_name in macro_dict: - print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.short_name) - elif macro.file_name in macro_dict: - print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.file_name) - else: - macro_dict[macro.name] = macro - macro_dict[macro.file_name] = macro - if macro.short_name: - macro_dict[macro.short_name] = macro - loaded_macros.append(macro) - print 'Loaded "%s"' % macro - - for macro in int_to_load: - if macro.name in int_macro_dict: - print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.name) - elif macro.short_name and macro.short_name in int_macro_dict: - print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.short_name) - elif macro.file_name in int_macro_dict: - print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.file_name) - else: - int_macro_dict[macro.name] = macro - int_macro_dict[macro.file_name] = macro - if macro.short_name: - int_macro_dict[macro.short_name] = macro - loaded_int_macros.append(macro) - print 'Loaded "%s"' % macro - - def help_run_macro(self): - print ('Run a macro\n' - 'Usage: run_macro ') - - @print_pappy_errors - def do_run_macro(self, line): - global macro_dict - global loaded_macros - args = shlex.split(line) - if not args: - raise PappyException('You must give a macro to run. You can give its short name, or the name in the filename.') - mname = args[0] - if mname not in macro_dict: - raise PappyException('%s not a loaded macro' % mname) - macro = macro_dict[mname] - macro.execute(args[1:]) - - def help_run_int_macro(self): - print ('Activate an intercepting macro\n' - 'Usage: run_int_macro \n' - 'Macro can be stopped with stop_int_macro') - - @print_pappy_errors - def do_run_int_macro(self, line): - global int_macro_dict - global loaded_int_macros - if not line: - raise PappyException('You must give an intercepting macro to run. You can give its short name, or the name in the filename.') - if line not in int_macro_dict: - raise PappyException('%s not a loaded intercepting macro' % line) - macro = int_macro_dict[line] - pappyproxy.proxy.add_intercepting_macro(macro.name, macro) - print '"%s" started' % macro.name - - def help_stop_int_macro(self): - print ('Stop a running intercepting macro\n' - 'Usage: stop_int_macro ') - - @print_pappy_errors - def do_stop_int_macro(self, line): - global int_macro_dict - global loaded_int_macros - if not line: - raise PappyException('You must give an intercepting macro to run. You can give its short name, or the name in the filename.') - if line not in int_macro_dict: - raise PappyException('%s not a loaded intercepting macro' % line) - macro = int_macro_dict[line] - pappyproxy.proxy.remove_intercepting_macro(macro.name) - print '"%s" stopped' % macro.name - - def help_list_int_macros(self): - print ('List all active/inactive intercepting macros') - - def do_list_int_macros(self, line): - global int_macro_dict - global loaded_int_macros - running = [] - not_running = [] - for macro in loaded_int_macros: - if macro.name in pappyproxy.proxy.intercepting_macros: - running.append(macro) - else: - not_running.append(macro) - - if not running and not not_running: - print 'No loaded intercepting macros' - - if running: - print 'Active intercepting macros:' - for m in running: - print ' %s' % m - - if not_running: - print 'Inactive intercepting macros:' - for m in not_running: - print ' %s' % m - - def do_help_generate_macro(self): - print ('Generate a macro script with request objects' - 'Usage: generate_macro , , ... ') - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_generate_macro(self, line): - if line == '': - raise PappyException('Macro name is required') - args = shlex.split(line) - name = args[0] - reqs = yield load_reqlist(args[1]) - script_str = macro_from_requests(reqs) - fname = 'macro_%s.py' % name - with open(fname, 'wc') as f: - f.write(script_str) - print 'Wrote script to %s' % fname - - def do_help_generate_macro(self): - print ('Generate a macro script with request objects\n' - 'Usage: generate_macro , , ... ') - - def help_generate_int_macro(self): - print ('Generate an intercepting macro script\n' - 'Usage: generate_int_macro ') - - @print_pappy_errors - def do_generate_int_macro(self, line): - if line == '': - raise PappyException('Macro name is required') - args = shlex.split(line) - name = args[0] - script_str = gen_imacro() - fname = 'int_%s.py' % name - with open(fname, 'wc') as f: - f.write(script_str) - print 'Wrote script to %s' % fname - - def help_gencerts(self): - print ("Generate CA cert and private CA file\n" - "Usage: gencerts [/path/to/put/certs/in]") - - def help_rpy(self): - print ('Copy python object definitions of requests.\n' - 'Usage: rpy ') - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_rpy(self, line): - reqs = yield load_reqlist(line) - for req in reqs: - print pappyproxy.macros.req_obj_def(req) - - @print_pappy_errors - def do_inmem(self, line): - r = pappyproxy.http.Request() - r.status_line = 'GET /%s HTTP/1.1' % line - r.reqid = pappyproxy.context.get_memid() - pappyproxy.context.add_request(r) - - def help_tag(self): - print ('Add a tag to requests.\n' - 'Usage: tag \n' - 'You can tag as many requests as you want at the same time. If no' - ' ids are given, the tag will be applied to all in-context requests.') - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_tag(self, line): - args = shlex.split(line) - if len(args) == 0: - self.help_tag() - defer.returnValue(None) - tag = args[0] - - if len(args) > 1: - reqs = yield load_reqlist(args[1], False) - ids = [r.reqid for r in reqs] - print 'Tagging %s with %s' % (', '.join(ids), tag) - else: - print "Tagging all in-context requests with %s" % tag - reqs = list(pappyproxy.context.active_requests) - - for req in reqs: - if tag not in req.tags: - req.tags.append(tag) - if req.saved: - yield req.async_save() - pappyproxy.context.add_request(req) - else: - print 'Request %s already has tag %s' % (req.reqid, tag) - - def help_untag(self): - print ('Remove a tag from requests\n' - 'Usage: untag \n' - 'You can provide as many request ids as you want and the tag will' - ' be removed from all of them. If no ids are given, the tag will ' - 'be removed from all in-context requests.') - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_untag(self, line): - args = shlex.split(line) - if len(args) == 0: - self.help_untag() - defer.returnValue(None) - tag = args[0] - - ids = [] - if len(args) > 1: - reqs = yield load_reqlist(args[1], False) - ids = [r.reqid for r in reqs] - else: - print "Untagging all in-context requests with tag %s" % tag - reqs = list(pappyproxy.context.active_requests) - - for req in reqs: - if tag in req.tags: - req.tags.remove(tag) - if req.saved: - yield req.async_save() - if ids: - print 'Tag %s removed from %s' % (tag, ', '.join(ids)) - pappyproxy.context.filter_recheck() - - def help_clrtag(self): - print ('Clear all the tags from requests\n' - 'Usage: clrtag ') - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_clrtag(self, line): - args = shlex.split(line) - if len(args) == 0: - self.help_clrtag() - defer.returnValue(None) - reqs = yield load_reqlist(args[1], False) - - for req in reqs: - if req.tags: - req.tags = [] - print 'Tags cleared from request %s' % (req.reqid) - if req.saved: - yield req.async_save() - pappyproxy.context.filter_recheck() - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_save(self, line): - args = shlex.split(line) - if len(args) == 0: - self.help_save() - defer.returnValue(None) - reqs = yield load_reqlist(args) - for req in reqs: - if req.reqid[0] != 'm': - print '%s is already saved' % req.reqid - else: - oldid = req.reqid - try: - yield req.async_deep_save() - print '%s saved with id %s' % (oldid, req.reqid) - except PappyException as e: - print 'Unable to save %s: %s' % (oldid, e) - defer.returnValue(None) - - @print_pappy_errors - @crochet.wait_for(timeout=None) - @defer.inlineCallbacks - def do_export(self, line): - args = shlex.split(line) - if len(args) < 2: - self.help_export() - defer.returnValue(None) - - if args[0] not in ('req', 'rsp'): - raise PappyException('Request or response not specified') - - reqs = yield load_reqlist(args[1]) - for req in reqs: - try: - if args[0] == 'req': - fname = 'req_%s.txt'%req.reqid - with open(fname, 'w') as f: - f.write(req.full_request) - print 'Full request written to %s' % fname - elif args[0] == 'rsp': - fname = 'rsp_%s.txt'%req.reqid - with open(fname, 'w') as f: - f.write(req.full_response) - print 'Full response written to %s' % fname - except PappyException as e: - print 'Unable to export %s: %s' % (req.reqid, e) - - @print_pappy_errors - def do_gencerts(self, line): - dest_dir = line or pappyproxy.config.CERT_DIR - message = "This will overwrite any existing certs in %s. Are you sure?" % dest_dir - if not confirm(message, 'n'): - return False - print "Generating certs to %s" % dest_dir - pappyproxy.proxy.generate_ca_certs(dest_dir) - - def help_log(self): - print ("View the log\n" - "Usage: log [verbosity (default is 1)]\n" - "verbosity=1: Show connections as they're made/lost, some additional info\n" - "verbosity=3: Show full requests/responses as they are processed by the proxy") - - @print_pappy_errors - def do_log(self, line): +@defer.inlineCallbacks +def load_reqlist(line, allow_special=True): + """ + load_reqlist(line, allow_special=True) + A helper function for parsing a list of requests that are passed as an + argument. If ``allow_special`` is True, then it will parse IDs such as + ``u123`` or ``s123``. Even if allow_special is false, it will still parse + ``m##`` IDs. Will print any errors with loading any of the requests and + will return a list of all the requests which were successfully loaded. + Returns a deferred. + + :Returns: Twisted deferred + """ + from .http import Request + # Parses a comma separated list of ids and returns a list of those requests + # prints any errors + ids = re.split(',\s*', line) + reqs = [] + for reqid in ids: try: - verbosity = int(line.strip()) - except: - verbosity = 1 - pappyproxy.config.DEBUG_VERBOSITY = verbosity - raw_input() - pappyproxy.config.DEBUG_VERBOSITY = 0 - - ## Shortcut funcs - - def help_urld(self): - print "Url decode a string\nUsage: urld " - - @print_pappy_errors - def do_urld(self, line): - print urllib.unquote(line) - - def help_urle(self): - print "Url encode a string\nUsage: urle " - - @print_pappy_errors - def do_urle(self, line): - print urllib.quote_plus(line) - - @print_pappy_errors - def do_testerror(self, line): - raise PappyException("Test error") - - @print_pappy_errors - def do_EOF(self): - print "EOF" - return True - - ### ABBREVIATIONS - def help_ls(self): - self.help_list() - - @print_pappy_errors - def do_ls(self, line): - self.onecmd('list %s' % line) - - def help_sm(self): - self.help_list() - - @print_pappy_errors - def do_sm(self, line): - self.onecmd('site_map %s' % line) - - def help_sr(self): - self.help_scope_reset() - - @print_pappy_errors - def do_sr(self, line): - self.onecmd('scope_reset %s' % line) - - def help_sls(self): - self.help_scope_list() - - @print_pappy_errors - def do_sls(self, line): - self.onecmd('scope_list %s' % line) - - def help_viq(self): - self.help_view_request_info() - - @print_pappy_errors - def do_viq(self, line): - self.onecmd('view_request_info %s' % line) - - def help_vhq(self): - self.help_view_request_headers() - - @print_pappy_errors - def do_vhq(self, line): - self.onecmd('view_request_headers %s' % line) - - def help_vfq(self): - self.help_view_full_request() - - @print_pappy_errors - def do_vfq(self, line): - self.onecmd('view_full_request %s' % line) - - def help_vhs(self): - self.help_view_response_headers() - - @print_pappy_errors - def do_vhs(self, line): - self.onecmd('view_response_headers %s' % line) - - def help_vfs(self): - self.help_view_full_response() - - @print_pappy_errors - def do_vfs(self, line): - self.onecmd('view_full_response %s' % line) - - def help_fl(self): - self.help_filter() - - @print_pappy_errors - def do_fl(self, line): - self.onecmd('filter %s' % line) - - def help_f(self): - self.help_filter() - - @print_pappy_errors - def do_f(self, line): - self.onecmd('filter %s' % line) - - def help_fls(self): - self.help_filter_list() - - @print_pappy_errors - def do_fls(self, line): - self.onecmd('filter_list %s' % line) - - def help_fc(self): - self.help_filter_clear() - - @print_pappy_errors - def do_fc(self, line): - self.onecmd('filter_clear %s' % line) - - def help_fbi(self): - self.help_filter() - - def help_fu(self): - self.help_filter_up() - - @print_pappy_errors - def do_fu(self, line): - self.onecmd('filter_up %s' % line) - - def complete_fbi(self, *args, **kwargs): - return self.complete_builtin_filter(*args, **kwargs) - - @print_pappy_errors - def do_fbi(self, line): - self.onecmd('builtin_filter %s' % line) - - def help_rp(self): - self.help_repeater() - - @print_pappy_errors - def do_rp(self, line): - self.onecmd('repeater %s' % line) - - def help_ic(self): - self.help_intercept() - - @print_pappy_errors - def do_ic(self, line): - self.onecmd('intercept %s' % line) - - def help_rma(self): - self.help_run_macro() - - @print_pappy_errors - def do_rma(self, line): - self.onecmd('run_macro %s' % line) - - def help_rim(self): - self.help_run_int_macro() - - @print_pappy_errors - def do_rim(self, line): - self.onecmd('run_int_macro %s' % line) - - def help_sim(self): - self.help_stop_int_macro() - - @print_pappy_errors - def do_sim(self, line): - self.onecmd('stop_int_macro %s' % line) - - def help_lim(self): - self.help_list_int_macros() - - @print_pappy_errors - def do_lim(self, line): - self.onecmd('list_int_macros %s' % line) - - def help_lma(self): - self.help_load_macros() - - @print_pappy_errors - def do_lma(self, line): - self.onecmd('load_macros %s' % line) - - def help_gma(self, line): - self.help_generate_macro() - - @print_pappy_errors - def do_gma(self, line): - self.onecmd('generate_macro %s' % line) - - def help_gima(self, line): - self.help_generate_int_macro() - - @print_pappy_errors - def do_gima(self, line): - self.onecmd('generate_int_macro %s' % line) - - -def cmd_failure(cmd): - print "FAILURE" + req = yield Request.load_request(reqid, allow_special) + reqs.append(req) + except PappyException as e: + print e + defer.returnValue(reqs) -def edit_file(fname, front=False): - global edit_queue - # Adds the filename to the edit queue. Returns a deferred that is fired once - # the file is edited and the editor is closed - d = defer.Deferred() - if front: - edit_queue = [(fname, d)] + edit_queue - else: - edit_queue.append((fname, d)) - return d - def print_table(coldata, rows): - # Coldata: List of dicts with info on how to print the columns. - # name: heading to give column - # width: (optional) maximum width before truncating. 0 for unlimited - # Rows: List of tuples with the data to print + """ + Print a table. + Coldata: List of dicts with info on how to print the columns. + ``name`` is the heading to give column, + ``width (optional)`` maximum width before truncating. 0 for unlimited. + + Rows: List of tuples with the data to print + """ # Get the width of each column widths = [] @@ -1093,70 +104,11 @@ def print_table(coldata, rows): sys.stdout.write('\n') sys.stdout.flush() - -def printable_data(data): - chars = [] - for c in data: - if c in string.printable: - chars += c - else: - chars += '.' - return ''.join(chars) - -@crochet.wait_for(timeout=None) -@defer.inlineCallbacks -def get_unmangled(reqid): - # Used for the repeater command. Must not be async - req = yield pappyproxy.http.Request.load_request(reqid) - if req.unmangled: - defer.returnValue(req.unmangled.reqid) - else: - defer.returnValue(None) - -@crochet.wait_for(timeout=None) -@defer.inlineCallbacks -def check_reqid(reqid): - # Used for the repeater command. Must not be async - try: - yield pappyproxy.http.Request.load_request(reqid) - except: - raise PappyException('"%s" is not a valid request id' % reqid) - defer.returnValue(None) - -def view_full_request(request, headers_only=False): - if headers_only: - to_print = printable_data(request.raw_headers) - else: - to_print = printable_data(request.full_request) - to_print = pygments.highlight(to_print, HttpLexer(), TerminalFormatter()) - - print to_print - -def view_full_response(response, headers_only=False): - def check_type(response, against): - if 'Content-Type' in response.headers and against in response.headers['Content-Type']: - return True - return False - - if headers_only: - to_print = printable_data(response.raw_headers) - to_print = pygments.highlight(to_print, HttpLexer(), TerminalFormatter()) - print to_print - else: - headers = printable_data(response.raw_headers) - headers = pygments.highlight(headers, HttpLexer(), TerminalFormatter()) - print headers - to_print = printable_data(response.raw_data) - if 'Content-Type' in response.headers: - try: - lexer = get_lexer_for_mimetype(response.headers['Content-Type'].split(';')[0]) - to_print = pygments.highlight(to_print, lexer, TerminalFormatter()) - except ClassNotFound: - pass - - print to_print - def print_requests(requests): + """ + Takes in a list of requests and prints a table with data on each of the + requests. It's the same table that's used by ``ls``. + """ # Print a table with info on all the requests in the list cols = [ {'name':'ID'}, @@ -1178,7 +130,7 @@ def print_requests(requests): else: host = '??' path = request.full_path - reqlen = len(request.raw_data) + reqlen = len(request.body) rsplen = 'N/A' mangle_str = '--' @@ -1188,7 +140,7 @@ def print_requests(requests): if request.response: response_code = str(request.response.response_code) + \ ' ' + request.response.response_text - rsplen = len(request.response.raw_data) + rsplen = len(request.response.body) if request.response.unmangled: if mangle_str == '--': mangle_str = 's' @@ -1202,150 +154,16 @@ def print_requests(requests): time_delt = request.time_end - request.time_start time_str = "%.2f" % time_delt.total_seconds() - port = request.port - if request.is_ssl: - is_ssl = 'YES' - else: - is_ssl = 'NO' - rows.append([rid, method, host, path, response_code, reqlen, rsplen, time_str, mangle_str]) print_table(cols, rows) -def print_request_extended(request): - # Prints extended info for the request - title = "Request Info (reqid=%s)" % request.reqid - print title - print '-'*len(title) - reqlen = len(request.raw_data) - reqlen = '%d bytes' % reqlen - rsplen = 'No response' - - mangle_str = 'Nothing mangled' - if request.unmangled: - mangle_str = 'Request' - - if request.response: - response_code = str(request.response.response_code) + \ - ' ' + request.response.response_text - rsplen = len(request.response.raw_data) - rsplen = '%d bytes' % rsplen - - if request.response.unmangled: - if mangle_str == 'Nothing mangled': - mangle_str = 'Response' - else: - mangle_str += ' and Response' - else: - response_code = '' - - time_str = '--' - if request.time_start and request.time_end: - time_delt = request.time_end - request.time_start - time_str = "%.2f sec" % time_delt.total_seconds() - - port = request.port - if request.is_ssl: - is_ssl = 'YES' - else: - is_ssl = 'NO' - - if request.time_start: - time_made_str = request.time_start.strftime('%a, %b %d, %Y, %I:%M:%S %p') - else: - time_made_str = '--' - - print 'Made on %s' % time_made_str - print 'ID: %s' % request.reqid - print 'Verb: %s' % request.verb - print 'Host: %s' % request.host - print 'Path: %s' % request.full_path - print 'Status Code: %s' % response_code - print 'Request Length: %s' % reqlen - print 'Response Length: %s' % rsplen - if request.response and request.response.unmangled: - print 'Unmangled Response Length: %s bytes' % len(request.response.unmangled.full_response) - print 'Time: %s' % time_str - print 'Port: %s' % request.port - print 'SSL: %s' % is_ssl - print 'Mangled: %s' % mangle_str - print 'Tags: %s' % (', '.join(request.tags)) - -@defer.inlineCallbacks -def load_reqlist(line, allow_special=True): - # Parses a comma separated list of ids and returns a list of those requests - # prints any errors - ids = re.split(',\s*', line) - reqs = [] - for reqid in ids: - try: - req = yield pappyproxy.http.Request.load_request(reqid, allow_special) - reqs.append(req) - except PappyException as e: - print e - defer.returnValue(reqs) - -def get_site_map(reqs): - # Takes in a list of requests and returns a tree representing the site map - paths_set = set() - for req in reqs: - paths_set.add(req.path_tuple) - paths = sorted(list(paths_set)) - return paths - -def print_tree(tree): - # Prints a tree. Takes in a sorted list of path tuples - _print_tree_helper(tree, 0, []) - -def _get_tree_prefix(depth, print_bars, last): - if depth == 0: - return u'' - else: - ret = u'' - pb = print_bars + [True] - for i in range(depth): - if pb[i]: - ret += u'\u2502 ' - else: - ret += u' ' - if last: - ret += u'\u2514\u2500\u2500 ' - else: - ret += u'\u251c\u2500\u2500 ' - return ret - -def _print_tree_helper(tree, depth, print_bars): - # Takes in a tree and prints it at the given depth - if tree == [] or tree == [()]: - return - while tree[0] == (): - tree = tree[1:] - if tree == [] or tree == [()]: - return - if len(tree) == 1 and len(tree[0]) == 1: - print _get_tree_prefix(depth, print_bars + [False], True) + tree[0][0] - return - - curkey = tree[0][0] - subtree = [] - for row in tree: - if row[0] != curkey: - if curkey == '': - curkey = '/' - print _get_tree_prefix(depth, print_bars, False) + curkey - if depth == 0: - _print_tree_helper(subtree, depth+1, print_bars + [False]) - else: - _print_tree_helper(subtree, depth+1, print_bars + [True]) - curkey = row[0] - subtree = [] - subtree.append(row[1:]) - if curkey == '': - curkey = '/' - print _get_tree_prefix(depth, print_bars, True) + curkey - _print_tree_helper(subtree, depth+1, print_bars + [False]) - def confirm(message, default='n'): + """ + A helper function to get confirmation from the user. It prints ``message`` + then asks the user to answer yes or no. Returns True if the user answers + yes, otherwise returns False. + """ if 'n' in default.lower(): default = False else: @@ -1366,3 +184,113 @@ def confirm(message, default='n'): else: return False +########## +## Classes + +class ProxyCmd(cmd2.Cmd): + """ + An object representing the console interface. Provides methods to add + commands and aliases to the console. + """ + + def __init__(self, *args, **kwargs): + self.prompt = 'pappy> ' + self.debug = True + + self._cmds = {} + self._aliases = {} + cmd2.Cmd.__init__(self, *args, **kwargs) + + def __dir__(self): + # Hack to get cmd2 to detect that we can run a command + ret = set(dir(self.__class__)) + ret.update(self.__dict__.keys()) + ret.update(['do_'+k for k in self._cmds.keys()]) + ret.update(['help_'+k for k in self._cmds.keys()]) + ret.update(['complete_'+k for k, v in self._cmds.iteritems() if self._cmds[k][1]]) + for k, v in self._aliases.iteritems(): + ret.add('do_' + k) + ret.add('help_' + k) + if self._cmds[self._aliases[k]][1]: + ret.add('complete_'+k) + return sorted(ret) + + def __getattr__(self, attr): + def gen_helpfunc(func): + def f(): + if not func.__doc__: + to_print = 'No help exists for function' + lines = func.__doc__.splitlines() + if len(lines) > 0 and lines[0] == '': + lines = lines[1:] + if len(lines) > 0 and lines[-1] == '': + lines = lines[-1:] + to_print = '\n'.join(string.lstrip(l) for l in lines) + print to_print + return f + + if attr.startswith('do_'): + command = attr[3:] + if command in self._cmds: + return print_pappy_errors(self._cmds[command][0]) + elif command in self._aliases: + real_command = self._aliases[command] + if real_command in self._cmds: + return print_pappy_errors(self._cmds[real_command][0]) + elif attr.startswith('help_'): + command = attr[5:] + if command in self._cmds: + return gen_helpfunc(self._cmds[command][0]) + elif command in self._aliases: + real_command = self._aliases[command] + if real_command in self._cmds: + return gen_helpfunc(self._cmds[real_command][0]) + elif attr.startswith('complete_'): + command = attr[9:] + if command in self._cmds: + if self._cmds[command][1]: + return self._cmds[command][1] + elif command in self._aliases: + real_command = self._aliases[command] + if real_command in self._cmds: + if self._cmds[real_command][1]: + return self._cmds[real_command][1] + raise AttributeError(attr) + + def get_names(self): + # Hack to get cmd to recognize do_/etc functions as functions for things + # like autocomplete + return dir(self) + + def set_cmd(self, command, func, autocomplete_func=None): + """ + Add a command to the console. + """ + self._cmds[command] = (func, autocomplete_func) + + def set_cmds(self, cmd_dict): + """ + Set multiple commands from a dictionary. Format is: + {'command': (do_func, autocomplete_func)} + Use autocomplete_func=None for no autocomplete function + """ + for command, vals in cmd_dict.iteritems(): + do_func, ac_func = vals + self.set_cmd(command, do_func, ac_func) + + def add_alias(self, command, alias): + """ + Add an alias for a command. + ie add_alias("foo", "f") will let you run the 'foo' command with 'f' + """ + self._aliases[alias] = command + + def add_aliases(self, alias_list): + """ + Pass in a list of tuples to add them all as aliases. + ie add_aliases([('foo', 'f'), ('foo', 'fo')]) will add 'f' and 'fo' as + aliases for 'foo' + """ + for command, alias in alias_list: + self.add_alias(command, alias) + diff --git a/pappyproxy/context.py b/pappyproxy/context.py index 78ffab6..dfc3c47 100644 --- a/pappyproxy/context.py +++ b/pappyproxy/context.py @@ -1,11 +1,11 @@ -from pappyproxy import http -from twisted.internet import defer -from util import PappyException import crochet -import shlex -import datetime +import pappyproxy import re +import shlex +from . import http +from twisted.internet import defer +from util import PappyException """ context.py @@ -14,50 +14,146 @@ Functions and classes involved with managing the current context and filters """ scope = [] -base_filters = [] -active_filters = [] -active_requests = set() -inactive_requests = set() -all_reqs = set() -in_memory_requests = set() -next_in_mem_id = 1 - -class BuiltinFilters(object): - _filters = { - 'not_image': ( - ['path nctr "(\.png$|\.jpg$|\.gif$)"'], - 'Filter out image requests', - ), - 'not_jscss': ( - ['path nctr "(\.js$|\.css$)"'], - 'Filter out javascript and css files', - ), - } - - @staticmethod - def get(name): - if name not in BuiltinFilters._filters: - raise PappyException('%s not a bult in filter' % name) - if name in BuiltinFilters._filters: - return [Filter(f) for f in BuiltinFilters._filters[name][0]] +_BARE_COMPARERS = ('ex','nex') + +class Context(object): + """ + A class representing a set of requests that pass a set of filters + + :ivar active_filters: Filters that are currently applied to the context + :vartype active_filters: List of functions that takes one :class:`pappyproxy.http.Request` and returns either true or false. + :ivar active_requests: Requests which pass all the filters applied to the context + :type active_requests: Request + :ivar inactive_requests: Requests which do not pass all the filters applied to the context + :type inactive_requests: Request + """ + + all_reqs = set() + """ + Class variable! All requests in history. Do not directly add requests to this set. Instead, + use :func:`pappyproxy.context.Context.add_request` on some context. It will + automatically be added to this set. + """ + + in_memory_requests = set() + """ + Class variable! Requests that are only stored in memory. These are the requests with ``m##`` + style IDs. Do not directly add requests to this set. Instead, use + :func:`pappyproxy.context.Context.add_request` on some context with a request + that has not been saved. It will automatically be assigned a ``m##`` id and + be added to this set. + """ + + _next_in_mem_id = 1 + + def __init__(self): + self.active_filters = [] + self.active_requests = set() + self.inactive_requests = set() @staticmethod - def list(): - return [k for k, v in BuiltinFilters._filters.iteritems()] + def get_memid(): + i = 'm%d' % Context._next_in_mem_id + Context._next_in_mem_id += 1 + return i + + def filter_recheck(self): + self.inactive_requests = set() + self.active_requests = set() + for req in Context.all_reqs: + self.add_request(req) + + def add_filter(self, filt): + """ + Add a filter to the context. This will remove any requests that do not pass + the filter from the ``active_requests`` set. + + :param filt: The filter to add + :type filt: Function that takes one :class:`pappyproxy.http.Request` and returns either true or false. (or a :class:`pappyproxy.context.Filter`) + """ + self.active_filters.append(filt) + (new_active, deleted) = filter_reqs(self.active_requests, self.active_filters) + self.active_requests = set(new_active) + for r in deleted: + self.inactive_requests.add(r) + + def add_request(self, req): + """ + Adds a request to the context. If the request passes all of the context's + filters, it will be placed in the ``active_requests`` set. If it does not, + it will be placed in the ``inactive_requests`` set. Either way, it will + be added to ``all_reqs`` and if appropriate, ``in_memory_requests``. + :param req: The request to add + :type req: Request + """ + # Check if we have to add it to in_memory + if not req.reqid: + req.reqid = Context.get_memid() + if req.reqid[0] == 'm': + Context.in_memory_requests.add(req) + + # Check if we have to add it to active_requests + if passes_filters(req, self.active_filters): + self.active_requests.add(req) + else: + self.inactive_requests.add(req) + + # Add it to all_reqs + Context.all_reqs.add(req) + @staticmethod - def help(name): - if name not in BuiltinFilters._filters: - raise PappyException('%s not a bult in filter' % name) - return Filter(BuiltinFilters._filters[name][1]) + def remove_request(req): + """ + Removes request from all contexts. It is suggested that you use + :func:`pappyproxy.http.Request.deep_delete` instead as this will + remove the request (and its unmangled version, response, and + unmangled response) from the data file as well. Otherwise it will + just be put back into the context when Pappy is restarted. + + :param req: The request to remove + :type req: Request + """ + if req in Context.all_reqs: + Context.all_reqs.remove(req) + if req in Context.in_memory_requests: + Context.in_memory_requests.remove(req) + + # Remove it from all other contexts + for c in pappyproxy.pappy.all_contexts: + if req in c.inactive_requests: + c.inactive_requests.remove(req) + if req in c.active_requests: + c.active_requests.remove(req) + + def filter_up(self): + """ + Removes the last filter that was applied to the context. + """ + # Deletes the last filter of the context + if self.active_filters: + self.active_filters = self.active_filters[:-1] + self.filter_recheck() + + def set_filters(self, filters): + """ + Set the list of filters for the context. + """ + self.active_filters = filters[:] + self.filter_recheck() + class FilterParseError(PappyException): pass class Filter(object): + """ + A class representing a filter. Its claim to fame is that you can use + :func:`pappyproxy.context.Filter.from_filter_string` to generate a + filter from a filter string. + """ def __init__(self, filter_string): - self.filter_func = self.from_filter_string(filter_string) self.filter_string = filter_string def __call__(self, *args, **kwargs): @@ -66,106 +162,69 @@ class Filter(object): def __repr__(self): return '' % self.filter_string + @defer.inlineCallbacks + def generate(self): + self.filter_func = yield self.from_filter_string(self.filter_string) + @staticmethod + @defer.inlineCallbacks def from_filter_string(filter_string): + """ + from_filter_string(filter_string) + + Create a filter from a filter string. + + :rtype: Deferred that returns a :class:`pappyproxy.context.Filter` + """ args = shlex.split(filter_string) + if len(args) == 0: + raise PappyException('Field is required') field = args[0] - relation = args[1] new_filter = None - negate = False - if relation[0] == 'n' and len(relation) > 1: - negate = True - relation = relation[1:] - - if len(args) > 2: - val1 = args[2] - elif relation not in ('ex',): - raise PappyException('%s requires a value' % relation) - else: - val1 = None - if len(args) > 3: - comp2 = args[3] - else: - comp2 = None - if len(args) > 4: - val2 = args[4] - else: - comp2 = None - - # Raises exception if invalid - comparer = get_relation(relation, val1) - + field_args = args[1:] if field in ("all",): - new_filter = gen_filter_by_all(comparer, val1, negate) + new_filter = gen_filter_by_all(field_args) elif field in ("host", "domain", "hs", "dm"): - new_filter = gen_filter_by_host(comparer, val1, negate) + new_filter = gen_filter_by_host(field_args) elif field in ("path", "pt"): - new_filter = gen_filter_by_path(comparer, val1, negate) + new_filter = gen_filter_by_path(field_args) elif field in ("body", "bd", "data", "dt"): - new_filter = gen_filter_by_body(comparer, val1, negate) + new_filter = gen_filter_by_body(field_args) elif field in ("verb", "vb"): - new_filter = gen_filter_by_verb(comparer, val1, negate) + new_filter = gen_filter_by_verb(field_args) elif field in ("param", "pm"): - if len(args) > 4: - comparer2 = get_relation(comp2, val2) - new_filter = gen_filter_by_params(comparer, val1, - comparer2, val2, negate) - else: - new_filter = gen_filter_by_params(comparer, val1, - negate=negate) + new_filter = gen_filter_by_params(field_args) elif field in ("header", "hd"): - if len(args) > 4: - comparer2 = get_relation(comp2, val2) - new_filter = gen_filter_by_headers(comparer, val1, - comparer2, val2, negate) - else: - new_filter = gen_filter_by_headers(comparer, val1, - negate=negate) + new_filter = gen_filter_by_headers(field_args) elif field in ("rawheaders", "rh"): - new_filter = gen_filter_by_raw_headers(comparer, val1, negate) + new_filter = gen_filter_by_raw_headers(field_args) elif field in ("sentcookie", "sck"): - if len(args) > 4: - comparer2 = get_relation(comp2, val2) - new_filter = gen_filter_by_submitted_cookies(comparer, val1, - comparer2, val2, negate) - else: - new_filter = gen_filter_by_submitted_cookies(comparer, val1, - negate=negate) + new_filter = gen_filter_by_submitted_cookies(field_args) elif field in ("setcookie", "stck"): - if len(args) > 4: - comparer2 = get_relation(comp2, val2) - new_filter = gen_filter_by_set_cookies(comparer, val1, - comparer2, val2, negate) - else: - new_filter = gen_filter_by_set_cookies(comparer, val1, - negate=negate) + new_filter = gen_filter_by_set_cookies(field_args) elif field in ("statuscode", "sc", "responsecode"): - new_filter = gen_filter_by_response_code(comparer, val1, negate) + new_filter = gen_filter_by_response_code(field_args) elif field in ("responsetime", "rt"): - pass + raise PappyException('Not implemented yet, sorry!') elif field in ("tag", "tg"): - new_filter = gen_filter_by_tag(comparer, val1, negate) + new_filter = gen_filter_by_tag(field_args) elif field in ("saved", "svd"): - new_filter = gen_filter_by_saved(comparer, val1, negate) + new_filter = gen_filter_by_saved(field_args) + elif field in ("before", "b4", "bf"): + new_filter = yield gen_filter_by_before(field_args) + elif field in ("after", "af"): + new_filter = yield gen_filter_by_after(field_args) else: raise FilterParseError("%s is not a valid field" % field) - if new_filter is not None: - return new_filter - else: + if new_filter is None: raise FilterParseError("Error creating filter") - -def filter_reqs(requests, filters): - to_delete = set() - # Could definitely be more efficient, but it stays like this until - # it impacts performance - for req in requests: - for filt in filters: - if not filt(req): - to_delete.add(req) - requests = [r for r in requests if r not in to_delete] - return (requests, list(to_delete)) + # dirty hack to get it to work if we don't generate any deferreds + # d = defer.Deferred() + # d.callback(None) + # yield d + defer.returnValue(new_filter) def cmp_is(a, b): return str(a) == str(b) @@ -202,364 +261,297 @@ def cmp_containsr(a, b): except re.error as e: raise PappyException('Invalid regexp: %s' % e) +def relation_from_text(s, val=''): + # Gets the relation function associated with the string + # Returns none if not found + + def negate_func(func): + def f(*args, **kwargs): + return not func(*args, **kwargs) + return f + + negate = False + if s[0] == 'n': + negate = True + s = s[1:] -def gen_filter_by_attr(comparer, val, attr, negate=False): + if s in ("is",): + retfunc = cmp_is + elif s in ("contains", "ct"): + retfunc = cmp_contains + elif s in ("containsr", "ctr"): + validate_regexp(val) + retfunc = cmp_containsr + elif s in ("exists", "ex"): + retfunc = cmp_exists + elif s in ("Leq",): + retfunc = cmp_len_eq + elif s in ("Lgt",): + retfunc = cmp_len_gt + elif s in ("Llt",): + retfunc = cmp_len_lt + elif s in ("eq",): + retfunc = cmp_eq + elif s in ("gt",): + retfunc = cmp_gt + elif s in ("lt",): + retfunc = cmp_lt + else: + raise FilterParseError("Invalid relation: %s" % s) + + if negate: + return negate_func(retfunc) + else: + return retfunc + +def compval_from_args(args): """ NOINDEX - Filters by an attribute whose name is shared by the request and response - objects + returns a function that compares to a value from text. + ie compval_from_text('ct foo') will return a function that returns true + if the passed in string contains foo. """ - def f(req): - req_match = comparer(getattr(req, attr), val) - if req.response: - rsp_match = comparer(getattr(req.response, attr), val) - else: - rsp_match = False + if len(args) == 0: + raise PappyException('Invalid number of arguments') + if args[0] in _BARE_COMPARERS: + if len(args) != 1: + raise PappyException('Invalid number of arguments') + comparer = relation_from_text(args[0], None) + value = None + else: + if len(args) != 2: + raise PappyException('Invalid number of arguments') + comparer = relation_from_text(args[0], args[1]) + value = args[1] - result = req_match or rsp_match - if negate: - return not result - else: - return result + def retfunc(s): + return comparer(s, value) - return f + return retfunc -def gen_filter_by_all(comparer, val, negate=False): - def f(req): - req_match = comparer(req.full_request, val) - if req.response: - rsp_match = comparer(req.response.full_response, val) +def compval_from_args_repdict(args): + """ + NOINDEX + Similar to compval_from_args but checks a repeatable dict with up to 2 + comparers and values. + """ + if len(args) == 0: + raise PappyException('Invalid number of arguments') + nextargs = args[:] + value = None + if args[0] in _BARE_COMPARERS: + comparer = relation_from_text(args[0], None) + if len(args) > 1: + nextargs = args[1:] + else: + if len(args) == 1: + raise PappyException('Invalid number of arguments') + comparer = relation_from_text(args[0], args[1]) + value = args[1] + nextargs = args[2:] + + comparer2 = None + value2 = None + if nextargs: + if nextargs[0] in _BARE_COMPARERS: + comparer2 = relation_from_text(nextargs[0], None) else: - rsp_match = False + if len(nextargs) == 1: + raise PappyException('Invalid number of arguments') + comparer2 = relation_from_text(nextargs[0], nextargs[1]) + value2 = nextargs[1] + + def retfunc(d): + for k, v in d.all_pairs(): + if comparer2 is None: + if comparer(k, value) or comparer(v, value): + return True + else: + if comparer(k, value) and comparer2(v, value2): + return True + return False - result = req_match or rsp_match - if negate: - return not result + return retfunc + +def gen_filter_by_all(args): + compval_from_args(args) # try and throw an error + def f(req): + compval = compval_from_args(args) + if args[0][0] == 'n': + return compval(req.full_message) and (not req.response or compval(req.response.full_message)) else: - return result + return compval(req.full_message) or (req.response and compval(req.response.full_message)) + return f +def gen_filter_by_host(args): + compval_from_args(args) # try and throw an error + def f(req): + compval = compval_from_args(args) + return compval(req.host) return f -def gen_filter_by_host(comparer, val, negate=False): +def gen_filter_by_body(args): + compval_from_args(args) # try and throw an error def f(req): - result = comparer(req.host, val) - if negate: - return not result + compval = compval_from_args(args) + if args[0][0] == 'n': + return compval(req.body) and (not req.response or compval(req.response.body)) else: - return result - + return compval(req.body) or (req.response and compval(req.response.body)) return f -def gen_filter_by_body(comparer, val, negate=False): - return gen_filter_by_attr(comparer, val, 'raw_data', negate=negate) - -def gen_filter_by_raw_headers(comparer, val, negate=False): - return gen_filter_by_attr(comparer, val, 'raw_headers', negate=negate) - -def gen_filter_by_response_code(comparer, val, negate=False): +def gen_filter_by_raw_headers(args): + compval_from_args(args) # try and throw an error def f(req): - if req.response: - result = comparer(req.response.response_code, val) - else: - result = False - if negate: - return not result + compval = compval_from_args(args) + if args[0][0] == 'n': + return compval(req.headers_section) and (not req.response or compval(req.response.headers_section)) else: - return result + return compval(req.headers_section) or (req.response and compval(req.response.headers_section)) + return f +def gen_filter_by_response_code(args): + compval_from_args(args) # try and throw an error + def f(req): + if not req.response: + return False + compval = compval_from_args(args) + return compval(req.response.response_code) return f -def gen_filter_by_path(comparer, val, negate=False): +def gen_filter_by_path(args): + compval_from_args(args) def f(req): - result = comparer(req.full_path, val) - if negate: - return not result - else: - return result - + compval = compval_from_args(args) + return compval(req.path) return f -def gen_filter_by_responsetime(comparer, val, negate=False): +def gen_filter_by_responsetime(args): + compval_from_args(args) def f(req): - result = comparer(req.rsptime, val) - if negate: - return not result - else: - return result - + compval = compval_from_args(args) + return compval(req.rsptime) return f -def gen_filter_by_verb(comparer, val, negate=False): +def gen_filter_by_verb(args): + compval_from_args(args) def f(req): - result = comparer(req.verb, val) - if negate: - return not result - else: - return result - + compval = compval_from_args(args) + return compval(req.verb) return f -def gen_filter_by_tag(comparer, val, negate=False): +def gen_filter_by_tag(args): + compval_from_args(args) def f(req): - result = False + compval = compval_from_args(args) for tag in req.tags: - if comparer(tag, val): - result = True - break - if negate: - return not result - else: - return result - + if compval(tag): + return True + return False return f -def gen_filter_by_saved(comparer, val, negate=False): +def gen_filter_by_saved(args): + if len(args) != 0: + raise PappyException('Invalid number of arguments') def f(req): - result = False if req.saved: - result = comparer('true', val) - else: - result = comparer('false', val) - if negate: - return not result + return True else: - return result - + return False return f +@defer.inlineCallbacks +def gen_filter_by_before(args): + if len(args) != 1: + raise PappyException('Invalid number of arguments') + r = yield http.Request.load_request(args[0]) + def f(req): + if req.time_start is None: + return False + if r.time_start is None: + return False + return req.time_start <= r.time_start + defer.returnValue(f) -def check_repeatable_dict(d, comparer1, val1, comparer2=None, val2=None, negate=False): - result = False - for k, v in d.all_pairs(): - if comparer2: - key_matches = comparer1(k, val1) - val_matches = comparer2(v, val2) - if key_matches and val_matches: - result = True - break - else: - # We check if the first value matches either - key_matches = comparer1(k, val1) - val_matches = comparer1(v, val1) - if key_matches or val_matches: - result = True - break - if negate: - return not result - else: - return result +@defer.inlineCallbacks +def gen_filter_by_after(reqid, negate=False): + if len(args) != 1: + raise PappyException('Invalid number of arguments') + r = yield http.Request.load_request(args[0]) + def f(req): + if req.time_start is None: + return False + if r.time_start is None: + return False + return req.time_start >= r.time_start + defer.returnValue(f) -def gen_filter_by_repeatable_dict_attr(attr, keycomparer, keyval, valcomparer=None, - valval=None, negate=False, check_req=True, - check_rsp=True): +def gen_filter_by_headers(args): + comparer = compval_from_args_repdict(args) def f(req): - matched = False - d = getattr(req, attr) - if check_req and check_repeatable_dict(d, keycomparer, keyval, valcomparer, valval): - matched = True - if check_rsp and req.response: - d = getattr(req.response, attr) - if check_repeatable_dict(d, keycomparer, keyval, valcomparer, valval): - matched = True - if negate: - return not matched + if args[0][0] == 'n': + return comparer(req.headers) and (not req.response or comparer(req.response.headers)) else: - return matched - + return comparer(req.headers) and (req.response and comparer(req.response.headers)) return f -def gen_filter_by_headers(keycomparer, keyval, valcomparer=None, valval=None, - negate=False): - return gen_filter_by_repeatable_dict_attr('headers', keycomparer, keyval, - valcomparer, valval, negate=negate) - -def gen_filter_by_submitted_cookies(keycomparer, keyval, valcomparer=None, - valval=None, negate=False): - return gen_filter_by_repeatable_dict_attr('cookies', keycomparer, keyval, - valcomparer, valval, negate=negate, - check_rsp=False) - -def gen_filter_by_set_cookies(keycomparer, keyval, valcomparer=None, - valval=None, negate=False): +def gen_filter_by_submitted_cookies(args): + comparer = compval_from_args_repdict(args) + def f(req): + return comparer(req.cookies) + return f + +def gen_filter_by_set_cookies(args): + comparer = compval_from_args_repdict(args) def f(req): if not req.response: return False - - for k, c in req.response.cookies.all_pairs(): - if keycomparer(c.key, keyval): - if not valcomparer: - return True - else: - if valcomparer(c.val, valval): - return True - - return False - + checkdict = http.RepeatableDict() + for k, v in req.response.cookies.all_pairs(): + checkdict[k] = v.cookie_str + return comparer(checkdict) return f -def gen_filter_by_url_params(keycomparer, keyval, valcomparer=None, valval=None, - negate=False): +def gen_filter_by_url_params(args): + comparer = compval_from_args_repdict(args) def f(req): - matched = False - for k, v in req.url_params.all_pairs(): - if keycomparer(k, keyval): - if not valcomparer: - matched = True - else: - if valcomparer(v, valval): - matched = True - if negate: - return not matched - else: - return matched - + return comparer(req.url_params) return f -def gen_filter_by_post_params(keycomparer, keyval, valcomparer=None, valval=None, - negate=False): +def gen_filter_by_post_params(args): + comparer = compval_from_args_repdict(args) def f(req): - matched = False - for k, v in req.post_params.all_pairs(): - if keycomparer(k, keyval): - if not valcomparer: - matched = True - else: - if valcomparer(v, valval): - matched = True - if negate: - return not matched - else: - return matched - - + return comparer(req.post_params) return f -def gen_filter_by_params(keycomparer, keyval, valcomparer=None, valval=None, - negate=False): +def gen_filter_by_params(args): + comparer = compval_from_args_repdict(args) def f(req): - matched = False - # purposely don't pass negate here, otherwise we get double negatives - f1 = gen_filter_by_post_params(keycomparer, keyval, valcomparer, valval) - f2 = gen_filter_by_url_params(keycomparer, keyval, valcomparer, valval) - if f1(req): - matched = True - if f2(req): - matched = True - - if negate: - return not matched - else: - return matched - + return comparer(req.url_params) or comparer(req.post_params) return f -def get_relation(s, val): - # Gets the relation function associated with the string - # Returns none if not found - if s in ("is",): - return cmp_is - elif s in ("contains", "ct"): - return cmp_contains - elif s in ("containsr", "ctr"): - validate_regexp(val) - return cmp_containsr - elif s in ("exists", "ex"): - return cmp_exists - elif s in ("Leq",): - return cmp_len_eq - elif s in ("Lgt",): - return cmp_len_gt - elif s in ("Llt",): - return cmp_len_lt - elif s in ("eq",): - return cmp_eq - elif s in ("gt",): - return cmp_gt - elif s in ("lt",): - return cmp_lt - - raise FilterParseError("Invalid relation: %s" % s) - @defer.inlineCallbacks def init(): yield reload_from_storage() +def filter_reqs(requests, filters): + to_delete = set() + # Could definitely be more efficient, but it stays like this until + # it impacts performance + for req in requests: + for filt in filters: + if not filt(req): + to_delete.add(req) + retreqs = [r for r in requests if r not in to_delete] + return (retreqs, list(to_delete)) + @defer.inlineCallbacks def reload_from_storage(): - global active_requests - global all_reqs - active_requests = set() - inactive_requests = set() - all_reqs = set() + Context.all_reqs = set() reqs = yield http.Request.load_all_requests() for req in reqs: - add_request(req) + Context.all_reqs.add(req) -def update_active_requests(): - global active_requests - global all_reqs - - inactive_requests = set() - active_requests = set() - for req in all_reqs: - add_request(req) - -def add_filter(filt): - global active_requests - global active_filters - active_filters.append(filt) - (active_requests, deleted) = filter_reqs(active_requests, active_filters) - for r in deleted: - inactive_requests.add(r) - -def add_request(req): - global active_requests - global active_filters - global in_memory_requests - global all_reqs - - # Check if we have to add it to in_memory - if not req.reqid: - req.reqid = get_memid() - if req.reqid[0] == 'm': - in_memory_requests.add(req) - - # Check if we have to add it to active_requests - if passes_filters(req, active_filters): - active_requests.add(req) - else: - inactive_requests.add(req) - - # Add it to all_reqs - all_reqs.add(req) - -def remove_request(req): - global in_memory_requests - global inactive_requests - global active_requests - global all_reqs - - if req in in_memory_requests: - in_memory_requests.remove(req) - if req in inactive_requests: - inactive_requests.remove(req) - if req in active_requests: - active_requests.remove(req) - if req in all_reqs: - all_reqs.remove(req) - -def filter_recheck(): - global active_requests - global inactive_requests - global all_reqs - active_requests = set() - inactive_requests = set() - for req in all_reqs: - if passes_filters(req, active_filters): - active_requests.add(req) - else: - inactive_requests.add(req) - def passes_filters(request, filters): for filt in filters: if not filt(request): @@ -574,16 +566,14 @@ def set_scope(filters): global scope scope = filters -def save_scope(): - global active_filters +def save_scope(context): global scope - scope = active_filters[:] + scope = context.active_filters[:] -def reset_to_scope(): +def reset_to_scope(context): global scope - global active_filters - active_filters = scope[:] - update_active_requests() + context.active_filters = scope[:] + context.filter_recheck() def print_scope(): global scope @@ -622,15 +612,10 @@ def load_scope(dbpool): new_scope = [] for row in rows: new_filter = Filter(row[1]) + yield new_filter.generate() new_scope.append(new_filter) scope = new_scope -def get_memid(): - global next_in_mem_id - i = 'm%d' % next_in_mem_id - next_in_mem_id += 1 - return i - @defer.inlineCallbacks def clear_tag(tag): # Remove a tag from every request @@ -646,7 +631,12 @@ def async_set_tag(tag, reqs): """ async_set_tag(tag, reqs) Remove the tag from every request then add the given requests to memory and - give them the tag. + give them the tag. The async version. + + :param tag: The tag to set + :type tag: String + :param reqs: The requests to assign to the tag + :type reqs: List of Requests """ yield clear_tag(tag) for req in reqs: @@ -658,6 +648,16 @@ def async_set_tag(tag, reqs): @crochet.wait_for(timeout=180.0) @defer.inlineCallbacks def set_tag(tag, reqs): + """ + set_tag(tag, reqs) + Remove the tag from every request then add the given requests to memory and + give them the tag. The non-async version. + + :param tag: The tag to set + :type tag: String + :param reqs: The requests to assign to the tag + :type reqs: List of Requests + """ yield async_set_tag(tag, reqs) def validate_regexp(r): @@ -666,9 +666,8 @@ def validate_regexp(r): except re.error as e: raise PappyException('Invalid regexp: %s' % e) -def filter_up(): - # Deletes the last filter of the context - global active_filters - if active_filters: - active_filters = active_filters[:-1] - filter_recheck() +def add_request_to_contexts(req): + import pappyproxy.pappy + for c in pappyproxy.pappy.all_contexts: + c.add_request(req) + diff --git a/pappyproxy/http.py b/pappyproxy/http.py index b90edac..15c2fe7 100644 --- a/pappyproxy/http.py +++ b/pappyproxy/http.py @@ -1,17 +1,19 @@ +import StringIO import base64 -import collections +import bs4 import crochet import datetime import gzip import json -import pappyproxy +import pygments import re -import StringIO import urlparse import zlib + +from .util import PappyException, printable_data +from pygments.formatters import TerminalFormatter +from pygments.lexers import get_lexer_for_mimetype, HttpLexer from twisted.internet import defer, reactor -from pappyproxy.util import PappyException -import bs4 ENCODE_NONE = 0 ENCODE_DEFLATE = 1 @@ -34,7 +36,7 @@ def init(pool): def destruct(): assert(dbpool) dbpool.close() - + def _decode_encoded(data, encoding): if encoding == ENCODE_NONE: return data @@ -79,7 +81,7 @@ def get_request(url='', url_params={}): given url params. """ r = Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' r.url = url r.headers['Host'] = r.host if url_params: @@ -94,7 +96,7 @@ def post_request(url, post_params={}, url_params={}): given post and url params. """ r = Request() - r.status_line = 'POST / HTTP/1.1' + r.start_line = 'POST / HTTP/1.1' r.url = url r.headers['Host'] = r.host if url_params: @@ -114,6 +116,12 @@ def repeatable_parse_qs(s): ret_dict.append(pair, None) return ret_dict +@crochet.wait_for(timeout=180.0) +@defer.inlineCallbacks +def request_by_id(reqid): + req = Request.load_request(str(reqid)) + defer.returnValue(req) + ########## ## Classes @@ -179,9 +187,16 @@ class RepeatableDict: self._keys.remove(self._ef_key(key)) def all_pairs(self): + """ + A list of all the key/value pairs stored in the dictionary + """ return self._pairs[:] def append(self, key, val, do_callback=True): + """ + append(key, val) + Append a pair to the end of the dictionary. Will add a duplicate if the key already exists. + """ # Add a duplicate entry for key self._add_key(key) self._pairs.append((key, val)) @@ -189,6 +204,16 @@ class RepeatableDict: self._mod_callback() def set_val(self, key, val, do_callback=True): + """ + set_val(key, val) + Set a value in the dictionary. Will replace the first instance of the + key with the value. If multiple values of the keys are already in the + dictionary, the duplicates of the key will be removed and the first instance + of the key will be replaced with the value. If the dictionary is case + insensitive, it will maintain the original capitalization. This is the same + behavior as assigning a value via ``d[key] = val``. If the key is not + present, it will be added to the end of the dict. + """ new_pairs = [] added = False self._add_key(key) @@ -218,27 +243,55 @@ class RepeatableDict: self.set_val(key, val, do_callback=do_callback) def clear(self, do_callback=True): + """ + clear() + Remove all key/value pairs from the dictionary + """ self._pairs = [] if do_callback: self._mod_callback() def all_vals(self, key): + """ + all_vals(key) + Return all the values associated with a given key + """ return [p[1] for p in self._pairs if self._ef_key(p[0]) == self._ef_key(key)] def add_pairs(self, pairs, do_callback=True): + """ + add_pairs(pairs) + Add a list of pairs to the dictionary. + + :param pairs: The list of key/value pairs to add + :type pairs: List of tuples of length 2 + """ for pair in pairs: self._add_key(pair[0]) self._pairs += pairs if do_callback: self._mod_callback() - def from_dict(self, d): + def from_dict(self, d, do_callback=True): + """ + from_dict(d) + Set the RepeatableDict to contain the same items as a normal dictionary. + + :param d: The dictionary to use + :type d: dict + """ self._pairs = list(d.items()) - self._mod_callback() + if do_callback: + self._mod_callback() def sort(self): + """ + sort() + Sort the dictionary by the key. Requires that all keys can be compared + to each other + """ # Sorts pairs by key alphabetaclly - pairs = sorted(pairs, key=lambda x: x[0]) + self._pairs = sorted(self._pairs, key=lambda x: x[0]) def set_modify_callback(self, callback): # Add a function to be called whenever an element is added, changed, or @@ -248,7 +301,7 @@ class RepeatableDict: class LengthData: def __init__(self, length=None): - self.raw_data = '' + self.body = '' self.complete = False self.length = length or 0 @@ -258,18 +311,18 @@ class LengthData: def add_data(self, data): if self.complete: raise PappyException("Data already complete!") - remaining_length = self.length-len(self.raw_data) + remaining_length = self.length-len(self.body) if len(data) >= remaining_length: - self.raw_data += data[:remaining_length] - assert(len(self.raw_data) == self.length) + self.body += data[:remaining_length] + assert(len(self.body) == self.length) self.complete = True else: - self.raw_data += data + self.body += data class ChunkedData: def __init__(self): - self.raw_data = '' + self.body = '' self._pos = 0 self._state = 0 # 0=reading length, 1=reading data, 2=going over known string self._len_str = '' @@ -277,12 +330,13 @@ class ChunkedData: self._known_str = '' self._known_str_pos = 0 self._next_state = 0 - self._raw_data = '' + self._body = [] self.complete = False - self.unchunked_data = '' + self.unchunked_data = [] def add_data(self, data): - self._raw_data += data + for c in data: + self._body.append(c) self.scan_forward() def scan_forward(self): @@ -290,8 +344,8 @@ class ChunkedData: if self.complete: return - while self._pos < len(self._raw_data): - curchar = self._raw_data[self._pos] + while self._pos < len(self._body): + curchar = self._body[self._pos] if self._state == 0: if curchar.lower() in '0123456789abcdef': # Read the next char of the length @@ -306,12 +360,7 @@ class ChunkedData: # If the length is 0, chunked encoding is done! if self._chunk_remaining == 0: self.complete = True - # I should probably just rename raw_data since it's what - # you use to look at unchunked data, but you're not - # supposed to look at it until after it's complete - # anyways - self._raw_data = self.unchunked_data - self.raw_data = self._raw_data # Expose raw_data + self.body = ''.join(self.unchunked_data) return # There should be a newline after the \r @@ -330,7 +379,7 @@ class ChunkedData: elif self._state == 1: if self._chunk_remaining > 0: # Read next byte of data - self.unchunked_data += curchar + self.unchunked_data.append(curchar) self._chunk_remaining -= 1 self._pos += 1 else: @@ -358,6 +407,23 @@ class ChunkedData: class ResponseCookie(object): """ A cookie representing a cookie set by a response + + :ivar key: The key of the cookie + :type key: string + :ivar val: The value of the cookie + :type val: string + :ivar expires: The value of the "expires" attribute + :type expires: string + :ivar max_age: The max age of the cookie + :type max_age: int + :ivar domain: The domain of the cookie + :type domain: string + :ivar path: The path of the cookie + :type path: string + :ivar secure: The secure flag of the cookie + :type secure: Bool + :ivar http_only: The httponly flag of the cookie + :type http_only: Bool """ def __init__(self, set_cookie_string=None): @@ -444,30 +510,401 @@ class ResponseCookie(object): else: self.key, self.val = set_cookie_string.split('=',1) +class HTTPMessage(object): + """ + A base class which represents an HTTP message. It is used to implement + both requests and responses + + :ivar complete: When loading data with + :func:`~pappyproxy.http.HTTPMessage.add_line` and + :func:`~pappyproxy.http.HTTPMessage.add_data`, returns whether the message + is complete + :vartype complete: bool + :ivar headers: Headers of the message + :vartype complete: RepeatableDict + :ivar headers_complete: When creating the message with + :func:`~pappyproxy.http.HTTPMessage.add_line` and + :func:`~pappyproxy.http.HTTPMessage.add_data`, returns whether the headers + are complete + :ivar start_line: The start line of the message + :vartype start_line: string + """ + reserved_meta_keys = ['full_message'] + + def __init__(self, full_message=None, update_content_length=False): + self.complete = False + self.headers = RepeatableDict(case_insensitive=True) + self.headers_complete = False + self.malformed = False + self.start_line = '' + self.reset_metadata() + self._decoded = False + + self._encoding_type = ENCODE_NONE + self._first_line = True + self._data_obj = None + self._end_after_headers = False + + #self._set_dict_callbacks() + + if full_message is not None: + self._from_full_message(full_message, update_content_length) + + def __eq__(self, other): + # TODO check meta + if self.full_message != other.full_message: + return False + if self.get_metadata() != other.get_metadata(): + return False + return True + + def __copy__(self): + if not self.complete: + raise PappyException("Cannot copy incomplete http messages") + retmsg = self.__class__(self.full_message) + retmsg.set_metadata(self.get_metadata()) + return retmsg + + def __deepcopy__(self): + return self.__copy__() + + def copy(self): + """ + Returns a copy of the request + + :rtype: Request + """ + return self.__copy__() + + def _from_full_message(self, full_message, update_content_length=False, meta=None): + # Set defaults for metadata + self.reset_metadata() + # Get rid of leading CRLF. Not in spec, should remove eventually + full_message = _strip_leading_newlines(full_message) + if full_message == '': + return + + remaining = full_message + while remaining and not self.headers_complete: + line, remaining = _consume_line(remaining) + self.add_line(line) + + if not self.headers_complete: + self.add_line('') + + if meta: + self.set_metadata(meta) + + # We keep track of encoding here since if it's encoded, after + # we call add_data it will update content-length automatically + # and we won't have to update the content-length manually + if not self.complete: + # We do add data since just setting the body will keep the + # object from decoding chunked/compressed messages + self.add_data(remaining) + if update_content_length and (not self._decoded): + self.body = remaining + assert(self.complete) + + ############################### + ## Properties/attribute setters + + @property + def headers_section(self): + """ + The raw text of the headers including the extra newline at the end. + + :getter: Returns the raw text of the headers including the extra newline at the end. + :type: string + """ + ret = '' + if self.start_line: + ret = self.start_line + '\r\n' + for k, v in self.headers.all_pairs(): + ret = ret + "%s: %s\r\n" % (k, v) + if ret: + ret = ret + '\r\n' + return ret + + @property + def headers_section_pretty(self): + """ + Same thing as :func:`pappyproxy.http.HTTPMessage.headers_section` except + that the headers are colorized for terminal printing. + """ + to_ret = printable_data(self.headers_section) + to_ret = pygments.highlight(to_ret, HttpLexer(), TerminalFormatter()) + return to_ret + + @property + def body(self): + """ + The data portion of the message + + :getter: Returns the data portion of the message + :setter: Set the data of the response and update metadata + :type: string + """ + if self._data_obj: + return self._data_obj.body + else: + return '' + + @body.setter + def body(self, val): + self._data_obj = LengthData(len(val)) + if len(val) > 0: + self._data_obj.add_data(val) + self._encoding_type = ENCODE_NONE + self.complete = True + self.update_from_body() + + @property + def body_pretty(self): + """ + Same thing as :func:`pappy.http.HTTPMessage.body` but the output is + colorized for the terminal. + """ + to_ret = printable_data(self.body) + if 'content-type' in self.headers: + try: + lexer = get_lexer_for_mimetype(self.headers['content-type'].split(';')[0]) + to_ret = pygments.highlight(to_ret, lexer, TerminalFormatter()) + except: + pass + return to_ret + + @property + def full_message(self): + """ + The full message including the start line, headers, and body + """ + if self.headers_section == '': + return self.body + else: + return (self.headers_section + self.body) + + @property + def full_message_pretty(self): + """ + Same as :func:`pappyproxy.http.HTTPMessage.full_message` except the + output is colorized + """ + return (self.headers_section_pretty + '\r\n' + self.body_pretty) + + ############### + ## Data loading + + def add_line(self, line): + """ + Used for building a message from a Twisted protocol. + Add a line (for status line and headers). Lines must be added in order + and the first line must be the status line. The line should not contain + the trailing carriage return/newline. I do not suggest you use this for + anything. + + :param line: The line to add + :type line: string + """ + assert(not self.headers_complete) + if not line and self._first_line: + return + if not line: + self.headers_complete = True + + if self._end_after_headers: + self.complete = True + return -class Request(object): + if not self._data_obj: + self._data_obj = LengthData(0) + self.complete = self._data_obj.complete + self.headers_end() + return + + if self._first_line: + self.handle_start_line(line) + self._first_line = False + else: + key, val = line.split(':', 1) + val = val.strip() + if self.handle_header(key, val): + self.headers.append(key, val, do_callback=False) + + def add_data(self, data): + """ + Used for building a message from a Twisted protocol. + Add data to the message. The data must conform to the content encoding + and transfer encoding given in the headers passed in to + :func:`~pappyproxy.http.HTTPMessage.add_line`. Can be any fragment of the data. + I do not suggest that you use this function ever. + + :param data: The data to add + :type data: string + """ + assert(self._data_obj) + assert(not self._data_obj.complete) + assert not self.complete + self._data_obj.add_data(data) + if self._data_obj.complete: + self.complete = True + self.body_complete() + + ############### + ## Data parsing + + def handle_header(self, key, val): + """ + Called when a header is loaded into the message. Should not be called + outside of implementation. + + :param key: Header key + :type line: string + :param key: Header value + :type line: string + """ + stripped = False + if key.lower() == 'content-encoding': + if val in ('gzip', 'x-gzip'): + self._encoding_type = ENCODE_GZIP + elif val in ('deflate'): + self._encoding_type = ENCODE_DEFLATE + + # We send our requests already decoded, so we don't want a header + # saying it's encoded + if self._encoding_type != ENCODE_NONE: + self._decoded = True + stripped = True + elif key.lower() == 'transfer-encoding' and val.lower() == 'chunked': + self._data_obj = ChunkedData() + self.complete = self._data_obj.complete + self._decoded = True + stripped = True + elif key.lower() == 'content-length': + # We use our own content length + self._data_obj = LengthData(int(val)) + + return (not stripped) + + def handle_start_line(self, start_line): + """ + A handler function for the status line. + """ + self.start_line = start_line + + def headers_end(self): + """ + Called when the headers are complete. + """ + pass + + def body_complete(self): + """ + Called when the body of the message is complete + """ + self.body = _decode_encoded(self._data_obj.body, + self._encoding_type) + + def update_from_body(self): + """ + Called when the body of the message is modified directly. Should be used + to update metadata that depends on the body of the message. + """ + if len(self.body) > 0 or 'Content-Length' in self.headers: + self.headers.update('Content-Length', str(len(self.body)), do_callback=False) + + def update_from_headers(self): + """ + Called when a header is modified. Should be used to update metadata that + depends on the values of headers. + """ + pass + + ########### + ## Metadata + + # The metadata functions are used so that we only have to make changes in a + # few similar functions which will update all copying, serialization, etc + # functions at the same time. + + def get_metadata(self): + """ + Get all the metadata of the message in dictionary form. + Should be implemented in child class. + Should not be invoked outside of implementation! + """ + pass + + def set_metadata(self, data): + """ + Set metadata values based off of a data dictionary. + Should be implemented in child class. + Should not be invoked outside of implementation! + + :param data: Metadata to apply + :type line: dict + """ + pass + + def reset_metadata(self): + """ + Reset meta values to default values. Overridden by child class. + Should not be invoked outside of implementation! + """ + pass + + ############## + ## Serializing + + def to_json(self): + """ + Return a JSON encoding of the message that can be used by + :func:`~pappyproxy.http.Message.from_json` to recreate the message. + The ``full_message`` portion is base64 encoded because json doesn't play + nice with binary blobs. + """ + data = { + 'full_message': base64.b64encode(self.full_message), + } + + metadata = self.get_metadata() + for k, v in metadata.iteritems(): + if k in HTTPMessage.reserved_meta_keys: + raise PappyException('A message with %s as a key for a metavalue cannot be encoded into JSON') + data[k] = v + + return json.dumps(data) + + + def from_json(self, json_string): + """ + Update the metadata of the message to match data from + :func:`~pappyproxy.http.Message.to_json` + + :param json_string: The JSON data to use + :type json_string: JSON data in a string + """ + data = json.loads(json_string) + full_message = base64.b64decode(data['full_message']) + for k in HTTPMessage.reserved_meta_keys: + if k in data: + del data[k] + self._from_full_message(full_message, meta=data) + # self.update_from_headers() + # self.update_from_body() + +class Request(HTTPMessage): """ :ivar time_end: The datetime that the request ended. :vartype time_end: datetime.datetime :ivar time_start: The datetime that the request was made :vartype time_start: datetime.datetime - :ivar complete: When creating the request with :func:`~pappyproxy.http.Request.add_line` - and :func:`~pappyproxy.http.Request.add_data`, returns whether - the request is complete. - :vartype complete: Bool :ivar cookies: Cookies sent with the request :vartype cookies: RepeatableDict :ivar fragment: The fragment part of the url (The part that comes after the #) :vartype fragment: String :ivar url_params: The url parameters of the request (aka the get parameters) :vartype url_params: RepeatableDict - :ivar headers: The headers of the request - :vartype headers: RepeatableDict - :ivar headers_complete: When creating the request with - :func:`~pappyproxy.http.Request.add_line` and - :func:`~pappyproxy.http.Request.add_data`, returns whether the headers - are complete - :vartype headers_complete: Bool :ivar path: The path of the request :vartype path: String :ivar port: The port that the request was sent to (or will be sent to) @@ -489,25 +926,22 @@ class Request(object): :vartype version: String :ivar tags: Tags associated with the request :vartype tags: List of Strings + :ivar plugin_data: Data about the request created by plugins. If you modify this, please add your own key to it for your plugin and store all your plugin's data under that key (probably as another dict). For example if you have a plugin called ``foo``, try and store all your data under ``req.plugin_data['foo']``. + :vartype plugin_data: Dict """ - def __init__(self, full_request=None, update_content_length=True, - port=None, is_ssl=None): + port=None, is_ssl=None, host=None): self.time_end = None self.time_start = None - self.complete = False self.cookies = RepeatableDict() self.fragment = None self.url_params = RepeatableDict() - self.headers = RepeatableDict(case_insensitive=True) - self.headers_complete = False self._host = None self._is_ssl = False self.path = '' self.port = None self.post_params = RepeatableDict() - self._raw_data = '' self.reqid = None self.response = None self.submitted = False @@ -515,11 +949,13 @@ class Request(object): self.verb = '' self.version = '' self.tags = [] + self.plugin_data = {} - self._first_line = True - self._data_length = 0 - self._partial_data = '' + # Called after instance vars since some callbacks depend on + # instance vars + HTTPMessage.__init__(self, full_request, update_content_length) + # After message init so that other instance vars are initialized self._set_dict_callbacks() # Set values from init @@ -527,45 +963,9 @@ class Request(object): self.is_ssl = True if port: self.port = port - - # Get values from the raw request - if full_request is not None: - self._from_full_request(full_request, update_content_length) + if host: + self._host = host - def __copy__(self): - if not self.complete: - raise PappyException("Cannot copy incomplete requests") - newreq = Request(self.full_request) - newreq.is_ssl = self.is_ssl - newreq.port = self.port - newreq._host = self._host - newreq.time_start = self.time_start - newreq.time_end = self.time_end - if self.unmangled: - newreq.unmangled = self.unmangled.copy() - if self.response: - newreq.response = self.response.copy() - return newreq - - def __eq__(self, other): - if self.full_request != other.full_request: - return False - if self.port != other.port: - return False - if self.is_ssl != other.is_ssl: - return False - if self._host != other._host: - return False - return True - - def copy(self): - """ - Returns a copy of the request - - :rtype: Request - """ - return self.__copy__() - @property def rsptime(self): """ @@ -580,7 +980,7 @@ class Request(object): return None @property - def status_line(self): + def start_line(self): """ The status line of the request. ie `GET / HTTP/1.1` @@ -588,13 +988,28 @@ class Request(object): :setter: Sets the status line of the request :type: string """ - if not self.verb and not self.path and not self.version: + if not self.verb and not self.full_path and not self.version: return '' return '%s %s %s' % (self.verb, self.full_path, self.version) + @start_line.setter + def start_line(self, val): + self.handle_start_line(val) + + @property + def status_line(self): + """ + Alias for `pappyproxy.http.Request.start_line`. + + :getter: Returns the status line of the request + :setter: Sets the status line of the request + :type: string + """ + return self.start_line + @status_line.setter def status_line(self, val): - self._handle_statusline(val) + self.start_line = val @property def full_path(self): @@ -624,47 +1039,37 @@ class Request(object): @property def raw_headers(self): """ - The raw text of the headers including the extra newline at the end. + Alias for Request.headers_section :getter: Returns the raw text of the headers including the extra newline at the end. :type: string """ - ret = self.status_line + '\r\n' - for k, v in self.headers.all_pairs(): - ret = ret + "%s: %s\r\n" % (k, v) - ret = ret + '\r\n' - return ret + return self.headers_section @property def full_request(self): """ - The full text of the request including the headers and data. + Alias for Request.full_message :getter: Returns the full text of the request :type: string """ - if not self.status_line: - return '' - ret = self.raw_headers - ret = ret + self.raw_data - return ret + return self.full_message @property def raw_data(self): """ - The data portion of the request + Alias for Request.body :getter: Returns the data portion of the request :setter: Set the data of the request and update metadata :type: string """ - return self._raw_data + return self.body @raw_data.setter def raw_data(self, val): - self._raw_data = val - self._update_from_data() - self.complete = True + self.body = val @property def url(self): @@ -773,44 +1178,60 @@ class Request(object): ret = ret[:-1] return tuple(ret) - def _from_full_request(self, full_request, update_content_length=False): - # Get rid of leading CRLF. Not in spec, should remove eventually - # technically doesn't treat \r\n same as \n, but whatever. - full_request = _strip_leading_newlines(full_request) - if full_request == '': - return + ########### + ## Metadata - remaining = full_request - while remaining and not self.headers_complete: - line, remaining = _consume_line(remaining) - self.add_line(line) + def get_metadata(self): + data = {} + if self.port is not None: + data['port'] = self.port + data['is_ssl'] = self.is_ssl + data['host'] = self.host + data['reqid'] = self.reqid + if self.response: + data['response_id'] = self.response.rspid + data['tags'] = self.tags + return data - if not self.headers_complete: - self.add_line('') + def set_metadata(self, data): + if 'reqid' in data: + self.reqid = data['reqid'] + if 'is_ssl' in data: + self.is_ssl = data['is_ssl'] + if 'host' in data: + self._host = data['host'] + if 'port' in data: + self.port = data['port'] + if 'tags' in data: + self.tags = data['tags'] - if not self.complete: - if update_content_length: - self.raw_data = remaining - else: - self.add_data(remaining) - assert(self.complete) - self._handle_data_end() + def reset_metadata(self): + self.port = 80 + self.is_ssl = False + self.reqid = None + self._host = '' + self.tags = [] + + def get_plugin_dict(self, name): + if not name in self.plugin_data: + self.plugin_data[name] = {} + return self.plugin_data[name] ############################ ## Internal update functions def _set_dict_callbacks(self): # Add callbacks to dicts - self.headers.set_modify_callback(self._update_from_text) + self.headers.set_modify_callback(self.update_from_headers) self.cookies.set_modify_callback(self._update_from_objects) self.post_params.set_modify_callback(self._update_from_objects) - def _update_from_data(self): + def update_from_body(self): # Updates metadata that's based off of data - self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False) + HTTPMessage.update_from_body(self) if 'content-type' in self.headers: if self.headers['content-type'] == 'application/x-www-form-urlencoded': - self.post_params = repeatable_parse_qs(self.raw_data) + self.post_params = repeatable_parse_qs(self.body) self._set_dict_callbacks() def _update_from_objects(self): @@ -827,67 +1248,14 @@ class Request(object): pairs = [] for k, v in self.post_params.all_pairs(): pairs.append('%s=%s' % (k, v)) - self.raw_data = '&'.join(pairs) + self.body = '&'.join(pairs) - def _update_from_text(self): + def update_from_headers(self): # Updates metadata that depends on header/status line values self.cookies = RepeatableDict() self._set_dict_callbacks() for k, v in self.headers.all_pairs(): - self._handle_header(k, v) - - ############### - ## Data loading - - def add_line(self, line): - """ - Used for building a request from a Twisted protocol. - Add a line (for status line and headers). Lines must be added in order - and the first line must be the status line. The line should not contain - the trailing carriage return/newline. I do not suggest you use this for - anything. - - :param line: The line to add - :type line: string - """ - - if self._first_line and line == '': - # Ignore leading newlines because fuck the spec - return - - if self._first_line: - self._handle_statusline(line) - self._first_line = False - else: - # Either header or newline (end of headers) - if line == '': - self.headers_complete = True - if self._data_length == 0: - self.complete = True - else: - key, val = line.split(':', 1) - val = val.strip() - if self._handle_header(key, val): - self.headers.append(key, val, do_callback=False) - - def add_data(self, data): - """ - Used for building a request from a Twisted protocol. - Add data to the request. - I do not suggest that you use this function ever. - - :param data: The data to add - :type data: string - """ - # Add data (headers must be complete) - len_remaining = self._data_length - len(self._partial_data) - if len(data) >= len_remaining: - self._partial_data += data[:len_remaining] - self._raw_data = self._partial_data - self.complete = True - self._handle_data_end() - else: - self._partial_data += data + self.handle_header(k, v) ############### ## Data parsing @@ -895,6 +1263,7 @@ class Request(object): def _process_host(self, hostline): # Get address and port # Returns true if port was explicitly stated + # Used only for processing host header port_given = False if ':' in hostline: self._host, self.port = hostline.split(':') @@ -946,8 +1315,14 @@ class Request(object): reqpath += parsed_path.fragment self.fragment = parsed_path.fragment - def _handle_statusline(self, status_line): - parts = status_line.split() + def handle_start_line(self, start_line): + #HTTPMessage.handle_start_line(self, start_line) + if start_line == '': + self.verb = '' + self.path = '' + self.version = '' + return + parts = start_line.split() uri = None if len(parts) == 3: self.verb, uri, self.version = parts @@ -960,13 +1335,14 @@ class Request(object): if uri is not None: self._handle_statusline_uri(uri) - def _handle_header(self, key, val): + def handle_header(self, key, val): # We may have duplicate headers - stripped = False + keep = HTTPMessage.handle_header(self, key, val) + if not keep: + return False - if key.lower() == 'content-length': - self._data_length = int(val) - elif key.lower() == 'cookie': + stripped = False + if key.lower() == 'cookie': # We still want the raw key/val for the cookies header # because it's still a header cookie_strs = val.split('; ') @@ -992,68 +1368,13 @@ class Request(object): return (not stripped) - def _handle_data_end(self): + def body_complete(self): + HTTPMessage.body_complete(self) if 'content-type' in self.headers: if self.headers['content-type'] == 'application/x-www-form-urlencoded': - self.post_params = repeatable_parse_qs(self.raw_data) + self.post_params = repeatable_parse_qs(self.body) self._set_dict_callbacks() - ############## - ## Serializing - - def to_json(self): - """ - Return a JSON encoding of the request that can be used by - :func:`~pappyproxy.http.Request.from_json` to recreate the request. - The `full_request` portion is base64 encoded because json doesn't play - nice with binary blobs. - """ - # We base64 encode the full response because json doesn't paly nice with - # binary blobs - data = { - 'full_request': base64.b64encode(self.full_request), - 'reqid': self.reqid, - } - if self.response: - data['response_id'] = self.response.rspid - else: - data['response_id'] = None - - if self.unmangled: - data['unmangled_id'] = self.unmangled.reqid - - if self.time_start: - data['start'] = self.time_start.isoformat() - if self.time_end: - data['end'] = self.time_end.isoformat() - data['tags'] = self.tags - data['port'] = self.port - data['is_ssl'] = self.is_ssl - - return json.dumps(data) - - def from_json(self, json_string): - """ - Update the metadata of the request to match data from - :func:`~pappyproxy.http.Request.to_json` - - :param json_string: The JSON data to use - :type json_string: JSON data in a string - """ - - data = json.loads(json_string) - self._from_full_request(base64.b64decode(data['full_request'])) - self.port = data['port'] - self._is_ssl = data['is_ssl'] - if 'tags' in data: - self.tags = data['tags'] - else: - self.tags = [] - self._update_from_text() - self._update_from_data() - if data['reqid']: - self.reqid = data['reqid'] - ####################### ## Data store functions @@ -1066,8 +1387,13 @@ class Request(object): :rtype: twisted.internet.defer.Deferred """ + from .context import add_request_to_contexts, Context + from .pappy import main_context assert(dbpool) + if not self.reqid: + self.reqid = '--' + add_request_to_contexts(self) try: # Check for intyness _ = int(self.reqid) @@ -1076,13 +1402,14 @@ class Request(object): yield dbpool.runInteraction(self._update) assert(self.reqid is not None) yield dbpool.runInteraction(self._update_tags) - pappyproxy.context.add_request(self) except (ValueError, TypeError): # Either no id or in-memory yield dbpool.runInteraction(self._insert) assert(self.reqid is not None) yield dbpool.runInteraction(self._update_tags) - pappyproxy.context.add_request(self) + if self.unmangled: + Context.remove_request(self.unmangled) + main_context.filter_recheck() @crochet.wait_for(timeout=180.0) @defer.inlineCallbacks @@ -1190,6 +1517,18 @@ class Request(object): else: queryargs.append('0') + setnames.append('host=?') + if self.host: + queryargs.append(self.host) + else: + queryargs.append('') + + setnames.append('plugin_data=?') + if self.plugin_data: + queryargs.append(json.dumps(self.plugin_data)) + else: + queryargs.append('{}') + queryargs.append(self.reqid) txn.execute( """ @@ -1202,13 +1541,11 @@ class Request(object): # If we don't have an reqid, we're creating a new reuqest row colnames = ["full_request", "port"] colvals = [self.full_request, self.port] - if self.response: + if self.response and self.response.rspid: colnames.append('response_id') - assert(self.response.rspid is not None) # should be saved first colvals.append(self.response.rspid) - if self.unmangled: + if self.unmangled and self.unmangled.reqid: colnames.append('unmangled_id') - assert(self.unmangled.reqid is not None) # should be saved first colvals.append(self.unmangled.reqid) if self.time_start: colnames.append('start_datetime') @@ -1228,6 +1565,18 @@ class Request(object): else: colvals.append('0') + colnames.append('host') + if self.host: + colvals.append(self.host) + else: + colvals.append('') + + colnames.append('plugin_data') + if self.plugin_data: + colvals.append(json.dumps(self.plugin_data)) + else: + colvals.append('{}') + txn.execute( """ INSERT INTO requests (%s) VALUES (%s); @@ -1240,7 +1589,10 @@ class Request(object): @defer.inlineCallbacks def delete(self): + from .context import Context + assert(self.reqid is not None) + Context.remove_request(self) yield dbpool.runQuery( """ DELETE FROM requests WHERE id=?; @@ -1257,6 +1609,14 @@ class Request(object): @defer.inlineCallbacks def deep_delete(self): + """ + deep_delete() + Delete a request, its unmangled version, its response, and its response's + unmangled version from history. Also removes the request from all contexts. + Returns a Twisted deferred. + + :rtype: Deferred + """ if self.unmangled: yield self.unmangled.delete() if self.response: @@ -1267,7 +1627,7 @@ class Request(object): @staticmethod def _gen_sql_row(tablename=None): - template = "{pre}full_request, {pre}response_id, {pre}id, {pre}unmangled_id, {pre}start_datetime, {pre}end_datetime, {pre}port, {pre}is_ssl" + template = "{pre}full_request, {pre}response_id, {pre}id, {pre}unmangled_id, {pre}start_datetime, {pre}end_datetime, {pre}port, {pre}is_ssl, {pre}host, {pre}plugin_data" if tablename: return template.format(pre=('%s.'%tablename)) else: @@ -1277,6 +1637,8 @@ class Request(object): @staticmethod @defer.inlineCallbacks def _from_sql_row(row): + from .http import Request + req = Request(row[0]) if row[1]: rsp = yield Response.load_response(str(row[1])) @@ -1292,6 +1654,10 @@ class Request(object): req.port = int(row[6]) if row[7] == 1: req._is_ssl = True + if row[8]: + req._host = row[8] + if row[9]: + req.plugin_data = json.loads(row[9]) req.reqid = str(row[2]) # tags @@ -1318,9 +1684,11 @@ class Request(object): :rtype: twisted.internet.defer.Deferred """ - + from .context import Context + from .http import Request + reqs = [] - reqs += list(pappyproxy.context.in_memory_requests) + reqs += list(Context.in_memory_requests) rows = yield dbpool.runQuery( """ SELECT %s @@ -1342,6 +1710,7 @@ class Request(object): :rtype: twisted.internet.defer.Deferred """ + from .http import Request # tags rows = yield dbpool.runQuery( """ @@ -1367,9 +1736,13 @@ class Request(object): :rtype: twisted.internet.defer.Deferred """ + from .context import Context assert(dbpool) + if to_load == '--': + raise PappyException('Invalid request ID. Wait for it to save first.') + if not allow_special: try: int(to_load) @@ -1402,13 +1775,10 @@ class Request(object): else: return r - for r in pappyproxy.context.in_memory_requests: - if r.reqid == to_load: - defer.returnValue(retreq(r)) - for r in pappyproxy.context.all_reqs: + for r in Context.in_memory_requests: if r.reqid == to_load: defer.returnValue(retreq(r)) - for r in pappyproxy.context.active_requests: + for r in Context.all_reqs: if r.reqid == to_load: defer.returnValue(retreq(r)) if to_load[0] == 'm': @@ -1434,6 +1804,8 @@ class Request(object): def load_from_filters(filters): # Not efficient in any way # But it stays this way until we hit performance issues + from .context import Context, filter_reqs + assert(dbpool) rows = yield dbpool.runQuery( """ @@ -1446,8 +1818,8 @@ class Request(object): for row in rows: req = yield Request._from_sql_row(row) reqs.append(req) - reqs += list(pappyproxy.context.in_memory_requests) - (reqs, _) = pappyproxy.context.filter_reqs(reqs, filters) + reqs += list(Context.in_memory_requests) + (reqs, _) = filter_reqs(reqs, filters) defer.returnValue(reqs) @@ -1471,12 +1843,13 @@ class Request(object): :type full_request: string :rtype: Twisted deferred that calls back with a Request """ - + from .proxy import ProxyClientFactory, get_next_connection_id, ClientTLSContext + new_obj = Request(full_request) - factory = pappyproxy.proxy.ProxyClientFactory(new_obj, save_all=False) - factory.connection_id = pappyproxy.proxy.get_next_connection_id() + factory = ProxyClientFactory(new_obj, save_all=False) + factory.connection_id = get_next_connection_id() if is_ssl: - reactor.connectSSL(host, port, factory, pappyproxy.proxy.ClientTLSContext()) + reactor.connectSSL(host, port, factory, ClientTLSContext()) else: reactor.connectTCP(host, port, factory) new_req = yield factory.data_defer @@ -1494,6 +1867,8 @@ class Request(object): """ new_req = yield Request.submit_new(self.host, self.port, self.is_ssl, self.full_request) + self.set_metadata(new_req.get_metadata()) + self.unmangled = new_req.unmangled self.response = new_req.response self.time_start = new_req.time_start self.time_end = new_req.time_end @@ -1508,28 +1883,13 @@ class Request(object): Cannot be called in async functions. This is what you should use to submit your requests in macros. """ - new_req = yield Request.submit_new(self.host, self.port, self.is_ssl, - self.full_request) - self.response = new_req.response - self.time_start = new_req.time_start - self.time_end = new_req.time_end + yield self.async_submit() -class Response(object): +class Response(HTTPMessage): """ - :ivar complete: When creating the response with :func:`~pappyproxy.http.Response.add_line` - and :func:`~pappyproxy.http.Response.add_data`, returns whether - the request is complete. - :vartype complete: Bool :ivar cookies: Cookies set by the response :vartype cookies: RepeatableDict of ResponseCookie objects - :ivar headers: The headers of the response - :vartype headers: RepeatableDict - :ivar headers_complete: When creating the response with - :func:`~pappyproxy.http.Response.add_line` and - :func:`~pappyproxy.http.Response.add_data`, returns whether the headers - are complete - :vartype headers_complete: Bool :ivar response_code: The response code of the response :vartype response_code: Integer :ivar response_text: The text associated with the response code (ie OK, NOT FOUND, etc) @@ -1542,60 +1902,35 @@ class Response(object): :vartype version: String """ - def __init__(self, full_response=None, update_content_length=False): + def __init__(self, full_response=None, update_content_length=True): self.complete = False self.cookies = RepeatableDict() - self.headers = RepeatableDict(case_insensitive=True) - self.headers_complete = False - self._raw_data = '' self.response_code = 0 self.response_text = '' self.rspid = None self.unmangled = None self.version = '' + self._saving = False - self._encoding_type = ENCODE_NONE - self._first_line = True - self._data_obj = None - self._end_after_headers = False + # Called after instance vars since some callbacks depend on + # instance vars + HTTPMessage.__init__(self, full_response, update_content_length) + # After message init so that other instance vars are initialized self._set_dict_callbacks() - if full_response is not None: - self._from_full_response(full_response, update_content_length) - - def __copy__(self): - if not self.complete: - raise PappyException("Cannot copy incomplete responses") - retrsp = Response(self.full_response) - if self.unmangled: - retrsp.unmangled = self.unmangled.copy() - return retrsp - - def copy(self): - return self.__copy__() - - def __eq__(self, other): - if self.full_response != other.full_response: - return False - return True - @property def raw_headers(self): """ - The raw text of the headers including the extra newline at the end. + Alias for Response.headers_section :getter: Returns the raw text of the headers including the extra newline at the end. :type: string """ - ret = self.status_line + '\r\n' - for k, v in self.headers.all_pairs(): - ret = ret + "%s: %s\r\n" % (k, v) - ret = ret + '\r\n' - return ret + return self.headers_section @property - def status_line(self): + def start_line(self): """ The status line of the response. ie `HTTP/1.1 200 OK` @@ -1607,46 +1942,43 @@ class Response(object): return '' return '%s %d %s' % (self.version, self.response_code, self.response_text) + @start_line.setter + def start_line(self, val): + self.handle_start_line(val) + + @property + def status_line(self): + return self.start_line + @status_line.setter def status_line(self, val): - self._handle_statusline(val) + self.start_line = val @property def raw_data(self): """ - The data portion of the response + Alias for Response.body :getter: Returns the data portion of the response :setter: Set the data of the response and update metadata :type: string """ - return self._raw_data + return self.body @raw_data.setter def raw_data(self, val): - self._raw_data = val - self._data_obj = LengthData(len(val)) - if len(val) > 0: - self._data_obj.add_data(val) - self._encoding_type = ENCODE_NONE - self.complete = True - self._update_from_data() + self.body = val @property def full_response(self): """ The full text of the response including the headers and data. - Response is automatically converted from compressed/chunked into an - uncompressed response with a Content-Length header. + Alias for Response.full_message :getter: Returns the full text of the response :type: string """ - if not self.status_line: - return '' - ret = self.raw_headers - ret = ret + self.raw_data - return ret + return self.full_message @property def soup(self): @@ -1655,38 +1987,33 @@ class Response(object): :getter: Returns a BeautifulSoup object representing the html of the response """ - return bs4.BeautifulSoup(self.raw_data, 'lxml') + return bs4.BeautifulSoup(self.body, 'lxml') - def _from_full_response(self, full_response, update_content_length=False): - # Get rid of leading CRLF. Not in spec, should remove eventually - full_response = _strip_leading_newlines(full_response) - if full_response == '': - return - - remaining = full_response - while remaining and not self.headers_complete: - line, remaining = _consume_line(remaining) - self.add_line(line) + ########### + ## Metadata - if not self.headers_complete: - self.add_line('') + def get_metadata(self): + data = {} + data['rspid'] = self.rspid + return data - if update_content_length: - self.raw_data = remaining - if not self.complete: - self.add_data(remaining) - assert(self.complete) + def set_metadata(self, data): + if 'rspid' in data: + self.rspid = data['rspid'] + def reset_metadata(self): + self.rspid = None + ############################ ## Internal update functions def _set_dict_callbacks(self): # Add callbacks to dicts - self.headers.set_modify_callback(self._update_from_text) + self.headers.set_modify_callback(self.update_from_headers) self.cookies.set_modify_callback(self._update_from_objects) - def _update_from_data(self): - self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False) + def update_from_body(self): + HTTPMessage.update_from_body(self) def _update_from_objects(self): # Updates headers from objects @@ -1715,7 +2042,7 @@ class Response(object): self.headers = new_headers self._set_dict_callbacks() - def _update_from_text(self): + def update_from_headers(self): self.cookies = RepeatableDict() self._set_dict_callbacks() for k, v in self.headers.all_pairs(): @@ -1727,17 +2054,26 @@ class Response(object): ############### ## Data parsing - def _handle_statusline(self, status_line): + def handle_start_line(self, start_line): + if start_line == '': + self.response_code = 0 + self.version = '' + self.response_text = '' + return self._first_line = False self.version, self.response_code, self.response_text = \ - status_line.split(' ', 2) + start_line.split(' ', 2) self.response_code = int(self.response_code) if self.response_code == 304 or self.response_code == 204 or \ self.response_code/100 == 1: self._end_after_headers = True - def _handle_header(self, key, val): + def handle_header(self, key, val): + keep = HTTPMessage.handle_header(self, key, val) + if not keep: + return False + stripped = False if key.lower() == 'content-encoding': if val in ('gzip', 'x-gzip'): @@ -1763,67 +2099,8 @@ class Response(object): if stripped: return False else: - self.headers.append(key, val, do_callback=False) return True - ############### - ## Data loading - - def add_line(self, line): - """ - Used for building a response from a Twisted protocol. - Add a line (for status line and headers). Lines must be added in order - and the first line must be the status line. The line should not contain - the trailing carriage return/newline. I do not suggest you use this for - anything. - - :param line: The line to add - :type line: string - """ - assert(not self.headers_complete) - if not line and self._first_line: - return - if not line: - self.headers_complete = True - - if self._end_after_headers: - self.complete = True - return - - if not self._data_obj: - self._data_obj = LengthData(0) - self.complete = self._data_obj.complete - return - - if self._first_line: - self._handle_statusline(line) - self._first_line = False - else: - key, val = line.split(':', 1) - val = val.strip() - self._handle_header(key, val) - - def add_data(self, data): - """ - Used for building a response from a Twisted protocol. - Add data to the response. The data must conform to the content encoding - and transfer encoding given in the headers passed in to - :func:`~pappyproxy.http.Response.add_line`. Can be any fragment of the data. - I do not suggest that you use this function ever. - - :param data: The data to add - :type data: string - """ - assert(self._data_obj) - assert(not self._data_obj.complete) - assert not self.complete - self._data_obj.add_data(data) - if self._data_obj.complete: - self._raw_data = _decode_encoded(self._data_obj.raw_data, - self._encoding_type) - self.complete = True - self._update_from_data() - #################### ## Cookie management @@ -1858,41 +2135,6 @@ class Response(object): """ del self.cookies[key] - ############## - ## Serializing - - def to_json(self): - """ - Return a JSON encoding of the response that can be used by - :func:`~pappyproxy.http.Response.from_json` to recreate the response. - The ``full_response`` portion is base64 encoded because json doesn't play - nice with binary blobs. - """ - data = { - 'rspid': self.rspid, - 'full_response': base64.b64encode(self.full_response), - } - if self.unmangled: - data['unmangled_id'] = self.unmangled.rspid - - return json.dumps(data) - - - def from_json(self, json_string): - """ - Update the metadata of the response to match data from - :func:`~pappyproxy.http.Response.to_json` - - :param json_string: The JSON data to use - :type json_string: JSON data in a string - """ - data = json.loads(json_string) - self._from_full_response(base64.b64decode(data['full_response'])) - self._update_from_text() - self._update_from_data() - if data['rspid']: - self.rspid = str(data['rspid']) - ####################### ## Database interaction @@ -1907,15 +2149,19 @@ class Response(object): :rtype: twisted.internet.defer.Deferred """ assert(dbpool) - try: - # Check for intyness - _ = int(self.rspid) + if not self._saving: + # Not thread safe... I know, but YOLO + self._saving = True + try: + # Check for intyness + _ = int(self.rspid) - # If we have rspid, we're updating - yield dbpool.runInteraction(self._update) - except (ValueError, TypeError): - yield dbpool.runInteraction(self._insert) - assert(self.rspid is not None) + # If we have rspid, we're updating + yield dbpool.runInteraction(self._update) + except (ValueError, TypeError): + yield dbpool.runInteraction(self._insert) + self._saving = False + assert(self.rspid is not None) # Right now responses without requests are unviewable # @crochet.wait_for(timeout=180.0) diff --git a/pappyproxy/iter.py b/pappyproxy/iter.py index 343abd8..6932ab2 100644 --- a/pappyproxy/iter.py +++ b/pappyproxy/iter.py @@ -1,5 +1,5 @@ import os -import itertools + from .config import PAPPY_DIR def from_file(fname, intro=False): diff --git a/pappyproxy/macros.py b/pappyproxy/macros.py index 98b5be3..ffb602c 100644 --- a/pappyproxy/macros.py +++ b/pappyproxy/macros.py @@ -3,11 +3,12 @@ import imp import os import random import re +import stat -from pappyproxy import http +from jinja2 import Environment, FileSystemLoader from pappyproxy import config +from pappyproxy.util import PappyException from twisted.internet import defer -from jinja2 import Environment, FileSystemLoader class Macro(object): """ @@ -39,6 +40,9 @@ class Macro(object): if self.filename: match = re.findall('.*macro_(.*).py$', self.filename) self.file_name = match[0] + st = os.stat(self.filename) + if (st.st_mode & stat.S_IWOTH): + raise PappyException("Refusing to load world-writable macro: %s" % self.filename) module_name = os.path.basename(os.path.splitext(self.filename)[0]) self.source = imp.load_source('%s'%module_name, self.filename) if not hasattr(self.source, 'MACRO_NAME'): @@ -57,17 +61,49 @@ class Macro(object): # Execute the macro if self.source: self.source.run_macro(args) - + class InterceptMacro(object): """ A class representing a macro that modifies requests as they pass through the proxy """ - def __init__(self, filename=''): + def __init__(self): self.name = '' self.short_name = None - self.intercept_requests = True - self.intercept_responses = True + self.intercept_requests = False + self.intercept_responses = False + + self.do_req = False + self.do_rsp = False + self.do_async_req = False + self.do_async_rsp = False + + def __repr__(self): + return "" % self.name + + def init(self, args): + pass + + def mangle_request(self, request): + return request + + def mangle_response(self, request): + return request.response + + @defer.inlineCallbacks + def async_mangle_request(self, request): + defer.returnValue(request) + + @defer.inlineCallbacks + def async_mangle_response(self, request): + defer.returnValue(request.response) + +class FileInterceptMacro(InterceptMacro): + """ + An intercepting macro that loads a macro from a file. + """ + def __init__(self, filename=''): + InterceptMacro.__init__(self) self.file_name = '' # name from the file self.filename = filename or '' # filename we load from self.source = None @@ -85,36 +121,6 @@ class InterceptMacro(object): s += ' (%s)' % ('/'.join(names)) return "" % s - @property - def do_req(self): - if (self.source and hasattr(self.source, 'async_mangle_request') or \ - self.source and hasattr(self.source, 'mangle_request')) and \ - self.intercept_requests: - return True - return False - - @property - def do_rsp(self): - if (self.source and hasattr(self.source, 'async_mangle_response') or \ - self.source and hasattr(self.source, 'mangle_response')) and \ - self.intercept_responses: - return True - return False - - @property - def async_req(self): - if self.source and hasattr(self.source, 'async_mangle_request'): - return True - else: - return False - - @property - def async_rsp(self): - if self.source and hasattr(self.source, 'async_mangle_response'): - return True - else: - return False - def load(self): if self.filename: match = re.findall('.*int_(.*).py$', self.filename) @@ -122,6 +128,9 @@ class InterceptMacro(object): self.file_name = match[0] else: self.file_name = self.filename + st = os.stat(self.filename) + if (st.st_mode & stat.S_IWOTH): + raise PappyException("Refusing to load world-writable macro: %s" % self.filename) module_name = os.path.basename(os.path.splitext(self.filename)[0]) self.source = imp.load_source('%s'%module_name, self.filename) self.name = self.source.MACRO_NAME @@ -141,9 +150,28 @@ class InterceptMacro(object): else: self.source = None - def init(self, line): + # Update what we can do + if self.source and hasattr(self.source, 'mangle_request'): + self.intercept_requests = True + self.async_req = False + elif self.source and hasattr(self.source, 'async_mangle_request'): + self.intercept_requests = True + self.async_req = True + else: + self.intercept_requests = True + + if self.source and hasattr(self.source, 'mangle_response'): + self.intercept_responses = True + self.async_rsp = False + elif self.source and hasattr(self.source, 'async_mangle_response'): + self.intercept_responses = True + self.async_rsp = True + else: + self.intercept_responses = False + + def init(self, args): if hasattr(self.source, 'init'): - self.source.init(line) + self.source.init(args) def mangle_request(self, request): if hasattr(self.source, 'mangle_request'): @@ -178,12 +206,18 @@ def load_macros(loc): macro_files = glob.glob(loc + "/macro_*.py") macro_objs = [] for f in macro_files: - macro_objs.append(Macro(f)) + try: + macro_objs.append(Macro(f)) + except PappyException as e: + print str(e) int_macro_files = glob.glob(loc + "/int_*.py") int_macro_objs = [] for f in int_macro_files: - int_macro_objs.append(InterceptMacro(f)) + try: + int_macro_objs.append(FileInterceptMacro(f)) + except PappyException as e: + print str(e) return (macro_objs, int_macro_objs) def req_obj_def(req): @@ -198,6 +232,8 @@ def req_obj_def(req): else: if req.port != 80: params.append('port=%d'%req.port) + if 'host' in req.headers and req.host != req.headers['host']: + params.append('host=%d'%req.host) if params: req_params = ', '+', '.join(params) else: @@ -223,7 +259,6 @@ def macro_from_requests(reqs, short_name='', long_name=''): subs['short_name'] = short_name - n = 0 req_lines = [] req_params = [] for req in reqs: diff --git a/pappyproxy/pappy.py b/pappyproxy/pappy.py index 97a20fe..38a213d 100755 --- a/pappyproxy/pappy.py +++ b/pappyproxy/pappy.py @@ -1,31 +1,54 @@ #!/usr/bin/env python2 import argparse -import cmd2 import crochet import datetime -import imp import os import schema.update import shutil import sys -import sqlite3 import tempfile -from pappyproxy import console -from pappyproxy import config -from pappyproxy import comm -from pappyproxy import http -from pappyproxy import context -from pappyproxy import proxy + +from . import comm +from . import config +from . import context +from . import http +from . import plugin +from . import proxy +from .console import ProxyCmd from twisted.enterprise import adbapi from twisted.internet import reactor, defer -from twisted.internet.threads import deferToThread -from twisted.internet.protocol import ServerFactory from twisted.internet.error import CannotListenError - +from twisted.internet.protocol import ServerFactory +from twisted.internet.threads import deferToThread crochet.no_setup() +server_factory = None +main_context = context.Context() +all_contexts = [main_context] +plugin_loader = None +cons = None +@defer.inlineCallbacks +def wait_for_saves(ignored): + reset = True + printed = False + lastprint = 0 + while reset: + reset = False + togo = 0 + for c in all_contexts: + for r in c.all_reqs: + if r.reqid == '--': + reset = True + togo += 1 + d = defer.Deferred() + d.callback(None) + yield d + if togo % 10 == 0 and lastprint != togo: + lastprint = togo + print '%d requests left to be saved (probably won\'t work)' % togo + def parse_args(): # parses sys.argv and returns a settings dictionary @@ -51,6 +74,9 @@ def delete_datafile(): @defer.inlineCallbacks def main(): + global server_factory + global plugin_loader + global cons settings = parse_args() load_start = datetime.datetime.now() @@ -77,7 +103,12 @@ def main(): check_same_thread=False, cp_openfun=set_text_factory, cp_max=1) - yield schema.update.update_schema(dbpool) + try: + yield schema.update.update_schema(dbpool, config.DATAFILE) + except Exception as e: + print 'Error updating schema: %s' % e + print 'Exiting...' + reactor.stop() http.init(dbpool) yield context.init() @@ -85,17 +116,17 @@ def main(): if config.DEBUG_DIR and os.path.exists(config.DEBUG_DIR): shutil.rmtree(config.DEBUG_DIR) print 'Removing old debugging output' - serv_factory = proxy.ProxyServerFactory(save_all=True) + server_factory = proxy.ProxyServerFactory(save_all=True) listen_strs = [] - listening = False + ports = [] for listener in config.LISTENERS: try: - reactor.listenTCP(listener[0], serv_factory, interface=listener[1]) - listening = True + port = reactor.listenTCP(listener[0], server_factory, interface=listener[1]) listener_str = 'port %d' % listener[0] if listener[1] not in ('127.0.0.1', 'localhost'): listener_str += ' (bound to %s)' % listener[1] listen_strs.append(listener_str) + ports.append(port) except CannotListenError as e: print repr(e) if listen_strs: @@ -112,19 +143,31 @@ def main(): # Load the scope yield context.load_scope(http.dbpool) - context.reset_to_scope() + context.reset_to_scope(main_context) # Apologize for slow start times load_end = datetime.datetime.now() load_time = (load_end - load_start) if load_time.total_seconds() > 20: print 'Startup was slow (%s)! Sorry!' % load_time - print 'Database has {0} requests (~{1:.2f}ms per request)'.format(len(context.active_requests), ((load_time.total_seconds()/len(context.active_requests))*1000)) + print 'Database has {0} requests (~{1:.2f}ms per request)'.format(len(main_context.active_requests), ((load_time.total_seconds()/len(main_context.active_requests))*1000)) sys.argv = [sys.argv[0]] # cmd2 tries to parse args - cons = console.ProxyCmd() - console.set_proxy_server_factory(serv_factory) + cons = ProxyCmd() + plugin_loader = plugin.PluginLoader(cons) + for d in config.PLUGIN_DIRS: + if not os.path.exists(d): + os.makedirs(d) + plugin_loader.load_directory(d) + + @defer.inlineCallbacks + def close_listeners(ignored): + for port in ports: + yield port.stopListening() + d = deferToThread(cons.cmdloop) + d.addCallback(close_listeners) + d.addCallback(wait_for_saves) d.addCallback(lambda ignored: reactor.stop()) if delete_data_on_quit: d.addCallback(lambda ignored: delete_datafile()) diff --git a/pappyproxy/plugin.py b/pappyproxy/plugin.py new file mode 100644 index 0000000..13e0f6f --- /dev/null +++ b/pappyproxy/plugin.py @@ -0,0 +1,150 @@ +""" +This module contains all the api calls written for use in plugins. If you want +to do anything that is't allowed through these function calls or through the +functions provided for macros, contact me and I'll see what I can do to add some +more functionality into the next version. +""" + +import glob +import imp +import os +import pappyproxy +import stat + +from .proxy import add_intercepting_macro as proxy_add_intercepting_macro +from .proxy import remove_intercepting_macro as proxy_remove_intercepting_macro +from .util import PappyException + +class Plugin(object): + + def __init__(self, cmd, fname=None): + self.cmd = cmd + self.filename = '' + self.source = None + self.module_name = '' + + if fname: + self.filename = fname + self.load_file(fname) + + def load_file(self, fname): + module_name = os.path.basename(os.path.splitext(fname)[0]) + if os.path.basename(fname) == '__init__.py': + return + st = os.stat(fname) + if (st.st_mode & stat.S_IWOTH): + raise PappyException("Refusing to load world-writable plugin: %s" % fname) + self.source = imp.load_source('%s'%module_name, fname) + if hasattr(self.source, 'load_cmds'): + self.source.load_cmds(self.cmd) + else: + print ('WARNING: %s does not define load_cmds. It will not be ' + 'possible to interact with the plugin through the console.' % fname) + self.module_name = module_name + + +class PluginLoader(object): + + def __init__(self, cmd): + self.cmd = cmd + self.loaded_plugins = [] + self.plugins_by_name = {} + + def load_plugin(self, fname): + p = Plugin(self.cmd, fname) + self.loaded_plugins.append(p) + self.plugins_by_name[p.module_name] = p + + def load_directory(self, directory): + fnames = glob.glob(os.path.join(directory, '*.py')) + for fname in fnames: + try: + self.load_plugin(fname) + except PappyException as e: + print str(e) + +########################## +## Plugin helper functions + +def plugin_by_name(name): + """ + Returns an interface to access the methods of a plugin from its name. + For example, to call the ``foo`` function from the ``bar`` plugin + you would call ``plugin_by_name('bar').foo()``. + """ + import pappyproxy.pappy + if name in pappyproxy.pappy.plugin_loader.plugins_by_name: + return pappyproxy.pappy.plugin_loader.plugins_by_name[name].source + else: + raise PappyException('No plugin with name %s is loaded' % name) + +def add_intercepting_macro(name, macro): + """ + Adds an intercepting macro to the proxy. You can either use a + :class:`pappyproxy.macros.FileInterceptMacro` to load an intercepting macro + from the disk, or you can create your own using an :class:`pappyproxy.macros.InterceptMacro` + for a base class. You must give a unique name that will be used in + :func:`pappyproxy.plugin.remove_intercepting_macro` to deactivate it. Remember + that activating an intercepting macro will disable request streaming and will + affect performance. So please try and only use this if you may need to modify + messages before they are passed along. + """ + proxy_add_intercepting_macro(name, macro, pappyproxy.pappy.server_factory.intercepting_macros) + +def remove_intercepting_macro(name): + """ + Stops an active intercepting macro. You must pass in the name that you used + when calling :func:`pappyproxy.plugin.add_intercepting_macro` to identify + which macro you would like to stop. + """ + proxy_remove_intercepting_macro(name, pappyproxy.pappy.server_factory.intercepting_macros) + +def active_intercepting_macros(): + """ + Returns a list of the active intercepting macro objects. Modifying this list + will not affect which macros are active. + """ + return pappyproxy.pappy.server_factory.intercepting_macros[:] + +def in_memory_reqs(): + """ + Returns a list containing all out of the requests which exist in memory only + (requests with an m## style id). + You can call either :func:`pappyproxy.http.Request.save` or + :func:`pappyproxy.http.Request.async_save` to save the request to the data file. + """ + return list(pappyproxy.context.Context.in_memory_requests) + +def all_reqs(): + """ + Returns a list containing all the requests in history (including requests + that only exist in memory). Modifying this list will not modify requests + included in the history. However, you can edit the requests + in this list then call either :func:`pappyproxy.http.Request.save` or + :func:`pappyproxy.http.Request.async_save` to modify the actual request. + """ + return list(pappyproxy.context.Context.all_reqs) + +def main_context(): + """ + Returns the context object representing the main context. Use this to interact + with the context. The returned object can be modified + at will. Avoid modifying any class values (ie all_reqs, in_memory_requests) + and use the class methods to add/remove requests. See the documentation on + :class:`pappyproxy.context.Context` for more information. + """ + return pappyproxy.pappy.main_context + +def add_req(req): + """ + Adds a request to the history. Will not do anything to requests which are + already in history. If the request is not saved, it will be given an m## id. + """ + pappyproxy.pappy.main_context.add_request(req) + +def run_cmd(cmd): + """ + Run a command as if you typed it into the console. Try and use existing APIs + to do what you want before using this. + """ + pappyproxy.pappy.cons.onecmd(cmd) diff --git a/pappyproxy/plugins/__init__.py b/pappyproxy/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pappyproxy/plugins/filter.py b/pappyproxy/plugins/filter.py new file mode 100644 index 0000000..366c32f --- /dev/null +++ b/pappyproxy/plugins/filter.py @@ -0,0 +1,192 @@ +import crochet +import pappyproxy + +from pappyproxy.console import confirm +from pappyproxy.util import PappyException +from twisted.internet import defer + +class BuiltinFilters(object): + _filters = { + 'not_image': ( + ['path nctr "(\.png$|\.jpg$|\.gif$)"'], + 'Filter out image requests', + ), + 'not_jscss': ( + ['path nctr "(\.js$|\.css$)"'], + 'Filter out javascript and css files', + ), + } + + @staticmethod + @defer.inlineCallbacks + def get(name): + if name not in BuiltinFilters._filters: + raise PappyException('%s not a bult in filter' % name) + if name in BuiltinFilters._filters: + filters = [pappyproxy.context.Filter(f) for f in BuiltinFilters._filters[name][0]] + for f in filters: + yield f.generate() + defer.returnValue(filters) + raise PappyException('"%s" is not a built-in filter' % name) + + @staticmethod + def list(): + return [k for k, v in BuiltinFilters._filters.iteritems()] + + @staticmethod + def help(name): + if name not in BuiltinFilters._filters: + raise PappyException('"%s" is not a built-in filter' % name) + return pappyproxy.context.Filter(BuiltinFilters._filters[name][1]) + + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def filtercmd(line): + """ + Apply a filter to the current context + Usage: filter + See README.md for information on filter strings + """ + if not line: + raise PappyException("Filter string required") + + filter_to_add = pappyproxy.context.Filter(line) + yield filter_to_add.generate() + pappyproxy.pappy.main_context.add_filter(filter_to_add) + +def complete_builtin_filter(text, line, begidx, endidx): + all_names = BuiltinFilters.list() + if not text: + ret = all_names[:] + else: + ret = [n for n in all_names if n.startswith(text)] + return ret + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def builtin_filter(line): + if not line: + raise PappyException("Filter name required") + + filters_to_add = yield BuiltinFilters.get(line) + for f in filters_to_add: + print f.filter_string + pappyproxy.pappy.main_context.add_filter(f) + defer.returnValue(None) + +def filter_up(line): + """ + Remove the last applied filter + Usage: filter_up + """ + pappyproxy.pappy.main_context.filter_up() + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def filter_clear(line): + """ + Reset the context so that it contains no filters (ignores scope) + Usage: filter_clear + """ + pappyproxy.pappy.main_context.active_filters = [] + yield pappyproxy.context.reload_from_storage() + +def filter_list(line): + """ + Print the filters that make up the current context + Usage: filter_list + """ + for f in pappyproxy.pappy.main_context.active_filters: + print f.filter_string + + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def scope_save(line): + """ + Set the scope to be the current context. Saved between launches + Usage: scope_save + """ + pappyproxy.context.save_scope(pappyproxy.pappy.main_context) + yield pappyproxy.context.store_scope(pappyproxy.http.dbpool) + +def scope_reset(line): + """ + Set the context to be the scope (view in-scope items) + Usage: scope_reset + """ + pappyproxy.context.reset_to_scope(pappyproxy.pappy.main_context) + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def scope_delete(line): + """ + Delete the scope so that it contains all request/response pairs + Usage: scope_delete + """ + pappyproxy.context.set_scope([]) + yield pappyproxy.context.store_scope(pappyproxy.http.dbpool) + +def scope_list(line): + """ + Print the filters that make up the scope + Usage: scope_list + """ + pappyproxy.context.print_scope() + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def filter_prune(line): + """ + Delete all out of context requests from the data file. + CANNOT BE UNDONE!! Be careful! + Usage: filter_prune + """ + # Delete filtered items from datafile + print '' + print 'Currently active filters:' + for f in pappyproxy.pappy.main_context.active_filters: + print '> %s' % f.filter_string + + # We copy so that we're not removing items from a set we're iterating over + reqs = list(pappyproxy.pappy.main_context.inactive_requests) + act_reqs = list(pappyproxy.pappy.main_context.active_requests) + message = 'This will delete %d/%d requests. You can NOT undo this!! Continue?' % (len(reqs), (len(reqs) + len(act_reqs))) + if not confirm(message, 'n'): + defer.returnValue(None) + + for r in reqs: + yield r.deep_delete() + print 'Deleted %d requests' % len(reqs) + defer.returnValue(None) + +############### +## Plugin hooks + +def load_cmds(cmd): + cmd.set_cmds({ + 'filter_prune': (filter_prune, None), + 'scope_list': (scope_list, None), + 'scope_delete': (scope_delete, None), + 'scope_reset': (scope_reset, None), + 'scope_save': (scope_save, None), + 'filter_list': (filter_list, None), + 'filter_clear': (filter_clear, None), + 'filter_up': (filter_up, None), + 'builtin_filter': (builtin_filter, complete_builtin_filter), + 'filter': (filtercmd, None), + }) + cmd.add_aliases([ + #('filter_prune', ''), + ('scope_list', 'sls'), + #('scope_delete', ''), + ('scope_reset', 'sr'), + #('scope_save', ''), + ('filter_list', 'fls'), + ('filter_clear', 'fc'), + ('filter_up', 'fu'), + ('builtin_filter', 'fbi'), + ('filter', 'f'), + ('filter', 'fl'), + ]) diff --git a/pappyproxy/plugins/macrocmds.py b/pappyproxy/plugins/macrocmds.py new file mode 100644 index 0000000..1a9bbfc --- /dev/null +++ b/pappyproxy/plugins/macrocmds.py @@ -0,0 +1,215 @@ +import crochet +import pappyproxy +import shlex + +from pappyproxy.plugin import active_intercepting_macros, add_intercepting_macro, remove_intercepting_macro +from pappyproxy.console import load_reqlist +from pappyproxy.macros import load_macros, macro_from_requests, gen_imacro +from pappyproxy.util import PappyException +from twisted.internet import defer + +loaded_macros = [] +loaded_int_macros = [] +macro_dict = {} +int_macro_dict = {} + +def load_macros_cmd(line): + """ + Load macros from a directory. By default loads macros in the current directory. + Usage: load_macros [dir] + """ + global macro_dict + global int_macro_dict + global loaded_macros + global loaded_int_macros + + if line: + load_dir = line + else: + load_dir = '.' + (to_load, int_to_load) = load_macros(load_dir) + if not to_load and not int_to_load: + raise PappyException('No macros to load.') + + macro_dict = {} + loaded_macros = [] + int_macro_dict = {} + loaded_int_macros = [] + + for macro in to_load: + if macro.name in macro_dict: + print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.name) + elif macro.short_name and macro.short_name in macro_dict: + print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.short_name) + elif macro.file_name in macro_dict: + print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.file_name) + else: + macro_dict[macro.name] = macro + macro_dict[macro.file_name] = macro + if macro.short_name: + macro_dict[macro.short_name] = macro + loaded_macros.append(macro) + print 'Loaded "%s"' % macro + + for macro in int_to_load: + if macro.name in int_macro_dict: + print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.name) + elif macro.short_name and macro.short_name in int_macro_dict: + print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.short_name) + elif macro.file_name in int_macro_dict: + print 'Name conflict in %s! "%s" already in use, not loading.' % (macro.filename, macro.file_name) + else: + int_macro_dict[macro.name] = macro + int_macro_dict[macro.file_name] = macro + if macro.short_name: + int_macro_dict[macro.short_name] = macro + loaded_int_macros.append(macro) + print 'Loaded "%s"' % macro + +def run_macro(line): + """ + Run a macro + Usage: run_macro + """ + global macro_dict + global loaded_macros + args = shlex.split(line) + if not args: + raise PappyException('You must give a macro to run. You can give its short name, or the name in the filename.') + mname = args[0] + if mname not in macro_dict: + raise PappyException('%s not a loaded macro' % mname) + macro = macro_dict[mname] + macro.execute(args[1:]) + +def run_int_macro(line): + """ + Activate an intercepting macro + Usage: run_int_macro + Macro can be stopped with stop_int_macro + """ + global int_macro_dict + global loaded_int_macros + args = shlex.split(line) + if len(args) == 0: + raise PappyException('You must give an intercepting macro to run. You can give its short name, or the name in the filename.') + if args[0] not in int_macro_dict: + raise PappyException('%s not a loaded intercepting macro' % line) + macro = int_macro_dict[args[0]] + macro.init(args[1:]) + add_intercepting_macro(macro.name, macro) + print '"%s" started' % macro.name + +def stop_int_macro(line): + """ + Stop a running intercepting macro + Usage: stop_int_macro + """ + global int_macro_dict + global loaded_int_macros + if not line: + raise PappyException('You must give an intercepting macro to run. You can give its short name, or the name in the filename.') + if line not in int_macro_dict: + raise PappyException('%s not a loaded intercepting macro' % line) + macro = int_macro_dict[line] + remove_intercepting_macro(macro.name) + print '"%s" stopped' % macro.name + +def list_int_macros(line): + """ + List all active/inactive intercepting macros + """ + global int_macro_dict + global loaded_int_macros + running = [] + not_running = [] + for macro in loaded_int_macros: + if macro.name in active_intercepting_macros(): + running.append(macro) + else: + not_running.append(macro) + + if not running and not not_running: + print 'No loaded intercepting macros' + + if running: + print 'Active intercepting macros:' + for m in running: + print ' %s' % m + + if not_running: + print 'Inactive intercepting macros:' + for m in not_running: + print ' %s' % m + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def generate_macro(line): + """ + Generate a macro script with request objects + Usage: generate_macro [reqs] + """ + if line == '': + raise PappyException('Macro name is required') + args = shlex.split(line) + name = args[0] + if len(args) > 1: + reqs = yield load_reqlist(args[1]) + else: + reqs = [] + script_str = macro_from_requests(reqs) + fname = 'macro_%s.py' % name + with open(fname, 'wc') as f: + f.write(script_str) + print 'Wrote script to %s' % fname + +def generate_int_macro(line): + """ + Generate an intercepting macro script + Usage: generate_int_macro + """ + if line == '': + raise PappyException('Macro name is required') + args = shlex.split(line) + name = args[0] + script_str = gen_imacro() + fname = 'int_%s.py' % name + with open(fname, 'wc') as f: + f.write(script_str) + print 'Wrote script to %s' % fname + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def rpy(line): + """ + Copy python object definitions of requests. + Usage: rpy + """ + reqs = yield load_reqlist(line) + for req in reqs: + print pappyproxy.macros.req_obj_def(req) + +############### +## Plugin hooks + +def load_cmds(cmd): + cmd.set_cmds({ + 'rpy': (rpy, None), + 'generate_int_macro': (generate_int_macro, None), + 'generate_macro': (generate_macro, None), + 'list_int_macros': (list_int_macros, None), + 'stop_int_macro': (stop_int_macro, None), + 'run_int_macro': (run_int_macro, None), + 'run_macro': (run_macro, None), + 'load_macros': (load_macros_cmd, None), + }) + cmd.add_aliases([ + #('rpy', ''), + ('generate_int_macro', 'gima'), + ('generate_macro', 'gma'), + ('list_int_macros', 'lsim'), + ('stop_int_macro', 'sim'), + ('run_int_macro', 'rim'), + ('run_macro', 'rma'), + ('load_macros', 'lma'), + ]) diff --git a/pappyproxy/plugins/manglecmds.py b/pappyproxy/plugins/manglecmds.py new file mode 100644 index 0000000..8aa0398 --- /dev/null +++ b/pappyproxy/plugins/manglecmds.py @@ -0,0 +1,243 @@ +import crochet +import curses +import os +import pappyproxy +import shlex +import subprocess +import tempfile + +from pappyproxy.util import PappyException +from pappyproxy.macros import InterceptMacro +from pappyproxy.http import Request, Response +from pappyproxy.plugin import add_intercepting_macro, remove_intercepting_macro +from pappyproxy import comm, config +from twisted.internet import defer + +PLUGIN_ID="manglecmds" + +edit_queue = [] + +class MangleInterceptMacro(InterceptMacro): + """ + A class representing a macro that modifies requests as they pass through the + proxy + """ + def __init__(self): + InterceptMacro.__init__(self) + self.name = 'Pappy Interceptor Macro' + self.intercept_requests = False + self.intercept_responses = False + self.async_req = True + self.async_rsp = True + + def __repr__(self): + return "" % self.name + + @defer.inlineCallbacks + def async_mangle_request(self, request): + # This function gets called to mangle/edit requests passed through the proxy + + retreq = request + # Write original request to the temp file + with tempfile.NamedTemporaryFile(delete=False) as tf: + tfName = tf.name + tf.write(request.full_request) + + # Have the console edit the file + yield edit_file(tfName) + + # Create new mangled request from edited file + with open(tfName, 'r') as f: + text = f.read() + + os.remove(tfName) + + # Check if dropped + if text == '': + pappyproxy.proxy.log('Request dropped!') + defer.returnValue(None) + + mangled_req = Request(text, update_content_length=True) + mangled_req.port = request.port + mangled_req.is_ssl = request.is_ssl + + # Check if it changed + if mangled_req.full_request != request.full_request: + retreq = mangled_req + + defer.returnValue(retreq) + + @defer.inlineCallbacks + def async_mangle_response(self, request): + # This function gets called to mangle/edit respones passed through the proxy + + retrsp = request.response + # Write original response to the temp file + with tempfile.NamedTemporaryFile(delete=False) as tf: + tfName = tf.name + tf.write(request.response.full_response) + + # Have the console edit the file + yield edit_file(tfName, front=True) + + # Create new mangled response from edited file + with open(tfName, 'r') as f: + text = f.read() + + os.remove(tfName) + + # Check if dropped + if text == '': + pappyproxy.proxy.log('Response dropped!') + defer.returnValue(None) + + mangled_rsp = Response(text, update_content_length=True) + + if mangled_rsp.full_response != request.response.full_response: + mangled_rsp.unmangled = request.response + retrsp = mangled_rsp + + defer.returnValue(retrsp) + + +############### +## Helper funcs + +def edit_file(fname, front=False): + global edit_queue + # Adds the filename to the edit queue. Returns a deferred that is fired once + # the file is edited and the editor is closed + d = defer.Deferred() + if front: + edit_queue = [(fname, d)] + edit_queue + else: + edit_queue.append((fname, d)) + return d + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def check_reqid(reqid): + # Used for the repeater command. Must not be async + try: + yield pappyproxy.http.Request.load_request(reqid) + except: + raise PappyException('"%s" is not a valid request id' % reqid) + defer.returnValue(None) + +def start_editor(reqid): + script_loc = os.path.join(config.PAPPY_DIR, "plugins", "vim_repeater", "repeater.vim") + #print "RepeaterSetup %d %d"%(reqid, comm_port) + subprocess.call(["vim", "-S", script_loc, "-c", "RepeaterSetup %s %d"%(reqid, comm.comm_port)]) + +#################### +## Command functions + +def repeater(line): + """ + Open a request in the repeater + Usage: repeater + """ + # This is not async on purpose. start_editor acts up if this is called + # with inline callbacks. As a result, check_reqid and get_unmangled + # cannot be async + args = shlex.split(line) + reqid = args[0] + + check_reqid(reqid) + start_editor(reqid) + +def intercept(line): + """ + Intercept requests and/or responses and edit them with before passing them along + Usage: intercept + """ + global edit_queue + args = shlex.split(line) + intercept_requests = False + intercept_responses = False + + req_names = ('req', 'request', 'requests') + rsp_names = ('rsp', 'response', 'responses') + + if any(a in req_names for a in args): + intercept_requests = True + if any(a in rsp_names for a in args): + intercept_responses = True + + if intercept_requests and intercept_responses: + intercept_str = 'Requests and responses' + elif intercept_requests: + intercept_str = 'Requests' + elif intercept_responses: + intercept_str = 'Responses' + else: + intercept_str = 'NOTHING' + + mangle_macro = MangleInterceptMacro() + mangle_macro.intercept_requests = intercept_requests + mangle_macro.intercept_responses = intercept_responses + + add_intercepting_macro('pappy_intercept', mangle_macro) + + ## Interceptor loop + stdscr = curses.initscr() + curses.noecho() + curses.cbreak() + + try: + editnext = False + stdscr.nodelay(True) + while True: + stdscr.addstr(0, 0, "Currently intercepting: %s" % intercept_str) + stdscr.clrtoeol() + stdscr.addstr(1, 0, "%d item(s) in queue." % len(edit_queue)) + stdscr.clrtoeol() + if editnext: + stdscr.addstr(2, 0, "Waiting for next item... Press 'q' to quit or 'b' to quit waiting") + else: + stdscr.addstr(2, 0, "Press 'n' to edit the next item or 'q' to quit interceptor.") + stdscr.clrtoeol() + + c = stdscr.getch() + if c == ord('q'): + break + elif c == ord('n'): + editnext = True + elif c == ord('b'): + editnext = False + + if editnext and edit_queue: + editnext = False + (to_edit, deferred) = edit_queue.pop(0) + editor = 'vi' + if 'EDITOR' in os.environ: + editor = os.environ['EDITOR'] + subprocess.call([editor, to_edit]) + stdscr.clear() + deferred.callback(None) + finally: + curses.nocbreak() + stdscr.keypad(0) + curses.echo() + curses.endwin() + try: + remove_intercepting_macro('pappy_intercept') + except PappyException: + pass + # Send remaining requests along + while len(edit_queue) > 0: + (fname, deferred) = edit_queue.pop(0) + deferred.callback(None) + +############### +## Plugin hooks + +def load_cmds(cmd): + cmd.set_cmds({ + 'intercept': (intercept, None), + 'repeater': (repeater, None), + }) + cmd.add_aliases([ + ('intercept', 'ic'), + ('repeater', 'rp'), + ]) diff --git a/pappyproxy/plugins/misc.py b/pappyproxy/plugins/misc.py new file mode 100644 index 0000000..b67dd9e --- /dev/null +++ b/pappyproxy/plugins/misc.py @@ -0,0 +1,85 @@ +import crochet +import pappyproxy +import shlex + +from pappyproxy.console import confirm, load_reqlist +from pappyproxy.util import PappyException +from twisted.internet import defer + +def clrmem(line): + """ + Delete all in-memory only requests + Usage: clrmem + """ + to_delete = list(pappyproxy.context.Context.in_memory_requests) + for r in to_delete: + pappyproxy.context.Context.remove_request(r) + +def gencerts(line): + """ + Generate CA cert and private CA file + Usage: gencerts [/path/to/put/certs/in] + """ + dest_dir = line or pappyproxy.config.CERT_DIR + message = "This will overwrite any existing certs in %s. Are you sure?" % dest_dir + if not confirm(message, 'n'): + return False + print "Generating certs to %s" % dest_dir + pappyproxy.proxy.generate_ca_certs(dest_dir) + +def log(line): + """ + Display the log in real time. Honestly it probably doesn't work. + Usage: log [verbosity (default is 1)] + verbosity=1: Show connections as they're made/lost, some additional info + verbosity=3: Show full requests/responses as they are processed by the proxy + """ + try: + verbosity = int(line.strip()) + except: + verbosity = 1 + pappyproxy.config.DEBUG_VERBOSITY = verbosity + raw_input() + pappyproxy.config.DEBUG_VERBOSITY = 0 + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def export(line): + """ + Write the full request/response of a request/response to a file. + Usage: export [req|rsp] + """ + args = shlex.split(line) + if len(args) < 2: + print 'Requires req/rsp and and request id(s)' + defer.returnValue(None) + + if args[0] not in ('req', 'rsp'): + raise PappyException('Request or response not specified') + + reqs = yield load_reqlist(args[1]) + for req in reqs: + try: + if args[0] == 'req': + fname = 'req_%s.txt'%req.reqid + with open(fname, 'w') as f: + f.write(req.full_request) + print 'Full request written to %s' % fname + elif args[0] == 'rsp': + fname = 'rsp_%s.txt'%req.reqid + with open(fname, 'w') as f: + f.write(req.full_response) + print 'Full response written to %s' % fname + except PappyException as e: + print 'Unable to export %s: %s' % (req.reqid, e) + +def load_cmds(cmd): + cmd.set_cmds({ + 'clrmem': (clrmem, None), + 'gencerts': (gencerts, None), + 'export': (export, None), + 'log': (log, None), + }) + cmd.add_aliases([ + #('rpy', ''), + ]) diff --git a/pappyproxy/plugins/tagcmds.py b/pappyproxy/plugins/tagcmds.py new file mode 100644 index 0000000..d81b254 --- /dev/null +++ b/pappyproxy/plugins/tagcmds.py @@ -0,0 +1,102 @@ +import crochet +import pappyproxy +import shlex + +from pappyproxy.plugin import main_context +from pappyproxy.console import load_reqlist +from pappyproxy.util import PappyException +from twisted.internet import defer + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def tag(line): + """ + Add a tag to requests. + Usage: tag [request ids] + You can tag as many requests as you want at the same time. If no + ids are given, the tag will be applied to all in-context requests. + """ + args = shlex.split(line) + if len(args) == 0: + raise PappyException('Tag name is required') + tag = args[0] + + if len(args) > 1: + reqs = yield load_reqlist(args[1], False) + ids = [r.reqid for r in reqs] + print 'Tagging %s with %s' % (', '.join(ids), tag) + else: + print "Tagging all in-context requests with %s" % tag + reqs = main_context().active_requests + + for req in reqs: + if tag not in req.tags: + req.tags.append(tag) + if req.saved: + yield req.async_save() + add_req(req) + else: + print 'Request %s already has tag %s' % (req.reqid, tag) + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def untag(line): + """ + Remove a tag from requests + Usage: untag + You can provide as many request ids as you want and the tag will + be removed from all of them. If no ids are given, the tag will + be removed from all in-context requests. + """ + args = shlex.split(line) + if len(args) == 0: + raise PappyException("Tag and request ids are required") + tag = args[0] + + ids = [] + if len(args) > 1: + reqs = yield load_reqlist(args[1], False) + ids = [r.reqid for r in reqs] + else: + print "Untagging all in-context requests with tag %s" % tag + reqs = main_context().active_requests + + for req in reqs: + if tag in req.tags: + req.tags.remove(tag) + if req.saved: + yield req.async_save() + if ids: + print 'Tag %s removed from %s' % (tag, ', '.join(ids)) + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def clrtag(line): + """ + Clear all the tags from requests + Usage: clrtag + """ + args = shlex.split(line) + if len(args) == 0: + raise PappyException('No request IDs given') + reqs = yield load_reqlist(args[0], False) + + for req in reqs: + if req.tags: + req.tags = [] + print 'Tags cleared from request %s' % (req.reqid) + if req.saved: + yield req.async_save() + +############### +## Plugin hooks + +def load_cmds(cmd): + cmd.set_cmds({ + 'clrtag': (clrtag, None), + 'untag': (untag, None), + 'tag': (tag, None), + }) + cmd.add_aliases([ + #('rpy', ''), + ]) diff --git a/pappyproxy/plugins/view.py b/pappyproxy/plugins/view.py new file mode 100644 index 0000000..2f08b97 --- /dev/null +++ b/pappyproxy/plugins/view.py @@ -0,0 +1,328 @@ +import crochet +import datetime +import pappyproxy +import shlex + +from pappyproxy.console import load_reqlist, print_table, print_requests +from pappyproxy.util import PappyException +from pappyproxy.plugin import main_context +from pappyproxy.http import Request +from twisted.internet import defer + +################### +## Helper functions + +def view_full_message(request, headers_only=False): + if headers_only: + print request.headers_section_pretty + else: + print request.full_message_pretty + +def print_request_extended(request): + # Prints extended info for the request + title = "Request Info (reqid=%s)" % request.reqid + print title + print '-'*len(title) + reqlen = len(request.body) + reqlen = '%d bytes' % reqlen + rsplen = 'No response' + + mangle_str = 'Nothing mangled' + if request.unmangled: + mangle_str = 'Request' + + if request.response: + response_code = str(request.response.response_code) + \ + ' ' + request.response.response_text + rsplen = len(request.response.body) + rsplen = '%d bytes' % rsplen + + if request.response.unmangled: + if mangle_str == 'Nothing mangled': + mangle_str = 'Response' + else: + mangle_str += ' and Response' + else: + response_code = '' + + time_str = '--' + if request.time_start and request.time_end: + time_delt = request.time_end - request.time_start + time_str = "%.2f sec" % time_delt.total_seconds() + + if request.is_ssl: + is_ssl = 'YES' + else: + is_ssl = 'NO' + + if request.time_start: + time_made_str = request.time_start.strftime('%a, %b %d, %Y, %I:%M:%S %p') + else: + time_made_str = '--' + + print 'Made on %s' % time_made_str + print 'ID: %s' % request.reqid + print 'Verb: %s' % request.verb + print 'Host: %s' % request.host + print 'Path: %s' % request.full_path + print 'Status Code: %s' % response_code + print 'Request Length: %s' % reqlen + print 'Response Length: %s' % rsplen + if request.response and request.response.unmangled: + print 'Unmangled Response Length: %s bytes' % len(request.response.unmangled.full_response) + print 'Time: %s' % time_str + print 'Port: %s' % request.port + print 'SSL: %s' % is_ssl + print 'Mangled: %s' % mangle_str + print 'Tags: %s' % (', '.join(request.tags)) + if request.plugin_data: + print 'Plugin Data: %s' % (request.plugin_data) + +def get_site_map(reqs): + # Takes in a list of requests and returns a tree representing the site map + paths_set = set() + for req in reqs: + paths_set.add(req.path_tuple) + paths = sorted(list(paths_set)) + return paths + +def print_tree(tree): + # Prints a tree. Takes in a sorted list of path tuples + _print_tree_helper(tree, 0, []) + +def _get_tree_prefix(depth, print_bars, last): + if depth == 0: + return u'' + else: + ret = u'' + pb = print_bars + [True] + for i in range(depth): + if pb[i]: + ret += u'\u2502 ' + else: + ret += u' ' + if last: + ret += u'\u2514\u2500\u2500 ' + else: + ret += u'\u251c\u2500\u2500 ' + return ret + +def _print_tree_helper(tree, depth, print_bars): + # Takes in a tree and prints it at the given depth + if tree == [] or tree == [()]: + return + while tree[0] == (): + tree = tree[1:] + if tree == [] or tree == [()]: + return + if len(tree) == 1 and len(tree[0]) == 1: + print _get_tree_prefix(depth, print_bars + [False], True) + tree[0][0] + return + + curkey = tree[0][0] + subtree = [] + for row in tree: + if row[0] != curkey: + if curkey == '': + curkey = '/' + print _get_tree_prefix(depth, print_bars, False) + curkey + if depth == 0: + _print_tree_helper(subtree, depth+1, print_bars + [False]) + else: + _print_tree_helper(subtree, depth+1, print_bars + [True]) + curkey = row[0] + subtree = [] + subtree.append(row[1:]) + if curkey == '': + curkey = '/' + print _get_tree_prefix(depth, print_bars, True) + curkey + _print_tree_helper(subtree, depth+1, print_bars + [False]) + + +#################### +## Command functions + +def list_reqs(line): + """ + List the most recent in-context requests. By default shows the most recent 25 + Usage: list [a|num] + + If `a` is given, all the in-context requests are shown. If a number is given, + that many requests will be shown. + """ + args = shlex.split(line) + if len(args) > 0: + if args[0][0].lower() == 'a': + print_count = -1 + else: + try: + print_count = int(args[0]) + except: + print "Please enter a valid argument for list" + return + else: + print_count = 25 + + def key_reqtime(req): + if req.time_start is None: + return -1 + else: + return (req.time_start-datetime.datetime(1970,1,1)).total_seconds() + + to_print = sorted(main_context().active_requests, key=key_reqtime, reverse=True) + if print_count > 0: + to_print = to_print[:print_count] + print_requests(to_print) + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def view_request_info(line): + """ + View information about request + Usage: view_request_info [u] + If 'u' is given as an additional argument, the unmangled version + of the request will be displayed. + """ + args = shlex.split(line) + reqids = args[0] + + reqs = yield load_reqlist(reqids) + + for req in reqs: + print '' + print_request_extended(req) + print '' + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def view_request_headers(line): + """ + View the headers of the request + Usage: view_request_headers [u] + If 'u' is given as an additional argument, the unmangled version + of the request will be displayed. + """ + args = shlex.split(line) + reqid = args[0] + + reqs = yield load_reqlist(reqid) + for req in reqs: + if len(reqs) > 1: + print 'Request %s:' % req.reqid + print '' + view_full_message(req, True) + if len(reqs) > 1: + print '-'*30 + + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def view_full_request(line): + """ + View the full data of the request + Usage: view_full_request [u] + If 'u' is given as an additional argument, the unmangled version + of the request will be displayed. + """ + args = shlex.split(line) + reqid = args[0] + + reqs = yield load_reqlist(reqid) + for req in reqs: + if len(reqs) > 1: + print 'Request %s:' % req.reqid + print '' + view_full_message(req) + if len(reqs) > 1: + print '-'*30 + + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def view_response_headers(line): + """ + View the headers of the response + Usage: view_response_headers + """ + reqs = yield load_reqlist(line) + for req in reqs: + if req.response: + if len(reqs) > 1: + print '-'*15 + (' %s ' % req.reqid) + '-'*15 + view_full_message(req.response, True) + else: + print "Request %s does not have a response" % req.reqid + + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def view_full_response(line): + """ + View the full data of the response associated with a request + Usage: view_full_response + """ + reqs = yield load_reqlist(line) + for req in reqs: + if req.response: + if len(reqs) > 1: + print '-'*15 + (' %s ' % req.reqid) + '-'*15 + view_full_message(req.response) + else: + print "Request %s does not have a response" % req.reqid + + +@crochet.wait_for(timeout=None) +@defer.inlineCallbacks +def dump_response(line): + """ + Dump the data of the response to a file. + Usage: dump_response + """ + # dump the data of a response + args = shlex.split(line) + reqid = args[0] + req = yield Request.load_request(reqid) + rsp = req.response + if len(args) >= 2: + fname = args[1] + else: + fname = req.path.split('/')[-1] + + with open(fname, 'w') as f: + f.write(rsp.body) + print 'Response data written to %s' % fname + +def site_map(line): + """ + Print the site map. Only includes requests in the current context. + Usage: site_map + """ + to_print = [r for r in main_context().active_requests if not r.response or r.response.response_code != 404] + tree = get_site_map(to_print) + print_tree(tree) + + +############### +## Plugin hooks + +def load_cmds(cmd): + cmd.set_cmds({ + 'list': (list_reqs, None), + 'view_request_info': (view_request_info, None), + 'view_request_headers': (view_request_headers, None), + 'view_full_request': (view_full_request, None), + 'view_response_headers': (view_response_headers, None), + 'view_full_response': (view_full_response, None), + 'site_map': (site_map, None), + 'dump_response': (dump_response, None), + }) + cmd.add_aliases([ + ('list', 'ls'), + ('view_request_info', 'viq'), + ('view_request_headers', 'vhq'), + ('view_full_request', 'vfq'), + ('view_response_headers', 'vhs'), + ('site_map', 'sm'), + ('view_full_response', 'vfs'), + #('dump_response', 'dr'), + ]) diff --git a/pappyproxy/plugins/vim_repeater/__init__.py b/pappyproxy/plugins/vim_repeater/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pappyproxy/plugins/vim_repeater/repeater.py b/pappyproxy/plugins/vim_repeater/repeater.py new file mode 100644 index 0000000..97bc455 --- /dev/null +++ b/pappyproxy/plugins/vim_repeater/repeater.py @@ -0,0 +1,135 @@ +import base64 +import vim +import sys +import socket +import json + +class CommError(Exception): + pass + +def escape(s): + return s.replace("'", "''") + +def communicate(data): + global PAPPY_PORT + # Submits data to the comm port of the proxy + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect(('127.0.0.1', int(vim.eval('s:commport')))) + datastr = json.dumps(data) + + # Send our data + total_sent = 0 + while total_sent < len(data): + sent = s.send(datastr[total_sent:]) + assert sent != 0 + total_sent += sent + s.send('\n') + + # Get our response + retstr = '' + c = '' + while c != '\n': + retstr = retstr + c + c = s.recv(1) + assert c != '' + result = json.loads(retstr) + if not result['success']: + vim.command("echoerr '%s'" % escape(result['message'])) + raise CommError(result['message']) + return result + +def read_line(conn): + data = '' + c = '' + while c != '\n': + data = data + c + c = conn.read(1) + return data + +def run_command(command): + funcs = { + "setup": set_up_windows, + "submit": submit_current_buffer, + } + if command in funcs: + funcs[command]() + +def set_buffer_content(buf, text): + buf[:] = None + first = True + for l in text.split('\n'): + if first: + buf[0] = l + first = False + else: + buf.append(l) + +def set_up_windows(): + reqid = vim.eval("a:2") + comm_port = vim.eval("a:3") + vim.command("let s:commport=%d"%int(comm_port)) + # Get the left buffer + vim.command("new") + vim.command("only") + b2 = vim.current.buffer + vim.command("let s:b2=bufnr('$')") + + # Vsplit new file + vim.command("vnew") + b1 = vim.current.buffer + vim.command("let s:b1=bufnr('$')") + + # Get the request + comm_data = {"action": "get_request", "reqid": reqid} + try: + reqdata = communicate(comm_data) + except CommError: + return + + comm_data = {"action": "get_response", "reqid": reqid} + try: + rspdata = communicate(comm_data) + except CommError: + return + + # Set up the buffers + set_buffer_content(b1, base64.b64decode(reqdata['full_message'])) + if 'full_message' in rspdata: + set_buffer_content(b2, base64.b64decode(rspdata['full_message'])) + + # Save the port, ssl, host setting + vim.command("let s:repport=%d" % int(reqdata['port'])) + vim.command("let s:rephost='%s'" % escape(reqdata['host'])) + + if reqdata['is_ssl']: + vim.command("let s:repisssl=1") + else: + vim.command("let s:repisssl=0") + +def submit_current_buffer(): + curbuf = vim.current.buffer + b2_id = int(vim.eval("s:b2")) + b2 = vim.buffers[b2_id] + vim.command("let s:b1=bufnr('$')") + vim.command("only") + vim.command("rightbelow vertical new") + vim.command("b %d" % b2_id) + vim.command("wincmd h") + + full_request = '\n'.join(curbuf) + commdata = {'action': 'submit', + 'full_message': base64.b64encode(full_request), + 'port': int(vim.eval("s:repport")), + 'host': vim.eval("s:rephost")} + if vim.eval("s:repisssl") == '1': + commdata["is_ssl"] = True + else: + commdata["is_ssl"] = False + result = communicate(commdata) + set_buffer_content(b2, base64.b64decode(result['response']['full_message'])) + +# (left, right) = set_up_windows() +# set_buffer_content(left, 'Hello\nWorld') +# set_buffer_content(right, 'Hello\nOther\nWorld') +#print "Arg is %s" % vim.eval("a:arg") +run_command(vim.eval("a:1")) diff --git a/pappyproxy/plugins/vim_repeater/repeater.vim b/pappyproxy/plugins/vim_repeater/repeater.vim new file mode 100644 index 0000000..737605c --- /dev/null +++ b/pappyproxy/plugins/vim_repeater/repeater.vim @@ -0,0 +1,17 @@ +if !has('python') + echo "Vim must support python in order to use the repeater" + finish +endif + +let s:pyscript = resolve(expand(':p:h') . '/repeater.py') + +function! RepeaterAction(...) + execute 'pyfile ' . s:pyscript +endfunc + +command! -nargs=* RepeaterSetup call RepeaterAction('setup', ) +command! RepeaterSubmitBuffer call RepeaterAction('submit') + +" Bind forward to f +nnoremap f :RepeaterSubmitBuffer + diff --git a/pappyproxy/proxy.py b/pappyproxy/proxy.py index 86fa030..74c69ff 100644 --- a/pappyproxy/proxy.py +++ b/pappyproxy/proxy.py @@ -1,30 +1,18 @@ import copy import datetime -import gzip import os import random -import re -import schema.update -import shutil -import string -import StringIO -import sys -import urlparse -import zlib + from OpenSSL import SSL +from OpenSSL import crypto from pappyproxy import config -from pappyproxy import console from pappyproxy import context from pappyproxy import http -from pappyproxy import mangle -from pappyproxy.util import PappyException -from twisted.enterprise import adbapi +from pappyproxy.util import PappyException, printable_data +from twisted.internet import defer from twisted.internet import reactor, ssl from twisted.internet.protocol import ClientFactory, ServerFactory from twisted.protocols.basic import LineReceiver -from twisted.internet import defer - -from OpenSSL import crypto next_connection_id = 1 @@ -43,7 +31,7 @@ def add_intercepting_macro(key, macro, int_macro_dict): def remove_intercepting_macro(key, int_macro_dict): if not key in int_macro_dict: - raise PappyException('Macro with key %s not loaded' % key) + raise PappyException('Macro with key %s not currently running' % key) del int_macro_dict[key] def log(message, id=None, symbol='*', verbosity_level=1): @@ -94,6 +82,12 @@ class ProxyClient(LineReceiver): line = '' self._response_obj.add_line(line) self.log(line, symbol='r<', verbosity_level=3) + if self.factory.stream_response: + self.log('Returning line back through stream') + self.factory.return_transport.write(line+'\r\n') + else: + self.log('Not streaming, not returning') + self.log(self.factory.stream_response) if self._response_obj.headers_complete: if self._response_obj.complete: self.handle_response_end() @@ -103,9 +97,12 @@ class ProxyClient(LineReceiver): def rawDataReceived(self, *args, **kwargs): data = args[0] + self.log('Returning data back through stream') + if self.factory.stream_response: + self.factory.return_transport.write(data) if not self._response_obj.complete: if data: - s = console.printable_data(data) + s = printable_data(data) dlines = s.split('\n') for l in dlines: self.log(l, symbol=' 0: + # Back up data file + if verbose_update: + print 'Backing up data file' + backup = create_backup(filename) if verbose_update: - print "Updating datafaile schema to version %d" % (i+1) - yield schemas[i][1].update(dbpool) + print 'Backed up to %s' % backup + try: + for i in to_run: + # schemas[0] is v1, schemas[1] is v2, etc + if verbose_update: + print "Updating datafaile schema to version %d" % (i+1) + yield schemas[i][1].update(dbpool) + # Delete backup + os.remove(backup) + if verbose_update: + print 'Update successful! Deleted backup' + except Exception as e: + # restore the backup + print 'Update failed, restoring backup' + with open(filename, 'w') as a: + with open(backup, 'r') as b: + copy_file(b, a) + os.remove(backup) + raise e @defer.inlineCallbacks def main(): diff --git a/pappyproxy/templates/intmacro.py b/pappyproxy/templates/intmacro.py index d1d95fb..861ad22 100644 --- a/pappyproxy/templates/intmacro.py +++ b/pappyproxy/templates/intmacro.py @@ -2,9 +2,15 @@ from pappyproxy.session import Session MACRO_NAME = '{{macro_name}}' SHORT_NAME = '{{short_name}}' +runargs = [] + +def init(args): + runargs = args def mangle_request(request): + global runargs return request def mangle_response(request): + global runargs return request.response diff --git a/pappyproxy/templates/macro.py b/pappyproxy/templates/macro.py index 9096c45..32fafe0 100644 --- a/pappyproxy/templates/macro.py +++ b/pappyproxy/templates/macro.py @@ -1,4 +1,4 @@ -from pappyproxy.http import Request, get_request, post_request +from pappyproxy.http import Request, get_request, post_request, request_by_id from pappyproxy.context import set_tag from pappyproxy.iter import * diff --git a/pappyproxy/tests/old_test_mangle.py b/pappyproxy/tests/old_test_mangle.py new file mode 100644 index 0000000..b556785 --- /dev/null +++ b/pappyproxy/tests/old_test_mangle.py @@ -0,0 +1,211 @@ +import pytest +import mock +import pappyproxy + +from pappyproxy.mangle import async_mangle_request, async_mangle_response +from pappyproxy.http import Request, Response +from testutil import no_tcp, no_database, func_deleted, mock_deferred, mock_deep_save, fake_saving + +def retf(r): + return False + +@pytest.fixture +def ignore_edit(mocker): + new_edit = mock.MagicMock() + new_edit.return_value = mock_deferred(None) + new_plugin = mock.MagicMock() + new_plugin.return_value = new_edit + mocker.patch('pappyproxy.plugin.plugin_by_name', new=new_plugin) + +@pytest.fixture +def ignore_delete(mocker): + new_os_remove = mock.MagicMock() + mocker.patch('os.remove', new=new_os_remove) + return new_os_remove + +@pytest.fixture(autouse=True) +def no_logging(mocker): + mocker.patch('pappyproxy.proxy.log') + +@pytest.fixture +def req(): + r = Request() + r.start_line = 'GET / HTTP/1.1' + r.host = 'www.ffffff.eeeeee' + r.body = 'AAAA' + return r + +@pytest.fixture +def req_w_rsp(req): + r = Response() + r.start_line = 'HTTP/1.1 200 OK' + r.headers['Test-Header'] = 'ABC123' + r.body = 'AAAA' + req.response = r + return req + +@pytest.fixture +def mock_tempfile(mocker): + new_tfile_obj = mock.MagicMock() + tfile_instance = mock.MagicMock() + new_tfile_obj.return_value.__enter__.return_value = tfile_instance + + tfile_instance.name = 'mockTemporaryFile' + mocker.patch('tempfile.NamedTemporaryFile', new=new_tfile_obj) + + new_open = mock.MagicMock() + fake_file = mock.MagicMock(spec=file) + new_open.return_value.__enter__.return_value = fake_file + mocker.patch('__builtin__.open', new_open) + + return (new_tfile_obj, tfile_instance, new_open, fake_file) + + +######################## +## Test request mangling + +@pytest.inlineCallbacks +def test_mangle_request_edit(req, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req + new_contents = ('GET / HTTP/1.1\r\n' + 'Content-Length: 4\r\n\r\n' + 'BBBB') + fake_file.read.return_value = new_contents + new_req = yield async_mangle_request(r) + assert not mock_deep_save.called + assert tfile_obj.called + assert tfile_instance.write.called + assert tfile_instance.write.call_args == ((r.full_request,),) + assert new_open.called + assert fake_file.read.called + + assert new_req.full_request == new_contents + +@pytest.inlineCallbacks +def test_mangle_request_edit_newlines(req, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + # Intercepting is off, request in scope + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req + new_contents = ('GET / HTTP/1.1\r\n' + 'Test-Head: FOOBIE\n' + 'Content-Length: 4\n\r\n' + 'BBBB') + fake_file.read.return_value = new_contents + new_req = yield async_mangle_request(r) + + assert new_req.full_request == ('GET / HTTP/1.1\r\n' + 'Test-Head: FOOBIE\r\n' + 'Content-Length: 4\r\n\r\n' + 'BBBB') + assert new_req.headers['Test-Head'] == 'FOOBIE' + +@pytest.inlineCallbacks +def test_mangle_request_drop(req, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + # Intercepting is off, request in scope + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req + new_contents = '' + fake_file.read.return_value = new_contents + new_req = yield async_mangle_request(r) + + assert new_req is None + +@pytest.inlineCallbacks +def test_mangle_request_edit_len(req, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + # Intercepting is off, request in scope + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req + new_contents = ('GET / HTTP/1.1\r\n' + 'Test-Head: FOOBIE\n' + 'Content-Length: 4\n\r\n' + 'BBBBAAAA') + fake_file.read.return_value = new_contents + new_req = yield async_mangle_request(r) + + assert new_req.full_request == ('GET / HTTP/1.1\r\n' + 'Test-Head: FOOBIE\r\n' + 'Content-Length: 8\r\n\r\n' + 'BBBBAAAA') + + +######################### +## Test response mangling + +@pytest.inlineCallbacks +def test_mangle_response_edit(req_w_rsp, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + # Intercepting is on, edit + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req_w_rsp + old_rsp = r.response.full_response + new_contents = ('HTTP/1.1 403 NOTOKIEDOKIE\r\n' + 'Content-Length: 4\r\n' + 'Other-Header: foobles\r\n\r\n' + 'BBBB') + fake_file.read.return_value = new_contents + mangled_rsp = yield async_mangle_response(r) + assert not mock_deep_save.called + assert tfile_obj.called + assert tfile_instance.write.called + assert tfile_instance.write.call_args == ((old_rsp,),) + assert new_open.called + assert fake_file.read.called + + assert mangled_rsp.full_response == new_contents + +@pytest.inlineCallbacks +def test_mangle_response_newlines(req_w_rsp, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + # Intercepting is off, request in scope + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req_w_rsp + old_rsp = r.response.full_response + new_contents = ('HTTP/1.1 403 NOTOKIEDOKIE\n' + 'Content-Length: 4\n' + 'Other-Header: foobles\r\n\n' + 'BBBB') + fake_file.read.return_value = new_contents + mangled_rsp = yield async_mangle_response(r) + + assert mangled_rsp.full_response == ('HTTP/1.1 403 NOTOKIEDOKIE\r\n' + 'Content-Length: 4\r\n' + 'Other-Header: foobles\r\n\r\n' + 'BBBB') + assert mangled_rsp.headers['Other-Header'] == 'foobles' + +@pytest.inlineCallbacks +def test_mangle_response_drop(req_w_rsp, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + # Intercepting is off, request in scope + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req_w_rsp + old_rsp = r.response.full_response + new_contents = '' + fake_file.read.return_value = new_contents + mangled_rsp = yield async_mangle_response(r) + + assert mangled_rsp is None + +@pytest.inlineCallbacks +def test_mangle_response_new_len(req_w_rsp, mock_deep_save, mock_tempfile, + ignore_edit, ignore_delete): + # Intercepting is off, request in scope + tfile_obj, tfile_instance, new_open, fake_file = mock_tempfile + r = req_w_rsp + old_rsp = r.response.full_response + new_contents = ('HTTP/1.1 403 NOTOKIEDOKIE\n' + 'Content-Length: 4\n' + 'Other-Header: foobles\r\n\n' + 'BBBBAAAA') + fake_file.read.return_value = new_contents + mangled_rsp = yield async_mangle_response(r) + + assert mangled_rsp.full_response == ('HTTP/1.1 403 NOTOKIEDOKIE\r\n' + 'Content-Length: 8\r\n' + 'Other-Header: foobles\r\n\r\n' + 'BBBBAAAA') diff --git a/pappyproxy/tests/test_context.py b/pappyproxy/tests/test_context.py index 5dd4bfe..8bb3efb 100644 --- a/pappyproxy/tests/test_context.py +++ b/pappyproxy/tests/test_context.py @@ -11,8 +11,8 @@ def test_filter_reqs(): pass def test_gen_filter_by_all_request(): - f = context.gen_filter_by_all(context.cmp_contains, 'hello') - fn = context.gen_filter_by_all(context.cmp_contains, 'hello', negate=True) + f = context.gen_filter_by_all(['ct', 'hello']) + fn = context.gen_filter_by_all(['nct', 'hello']) # Nowhere r = Request('GET / HTTP/1.1\r\n') @@ -31,7 +31,7 @@ def test_gen_filter_by_all_request(): # Data r = Request('GET / HTTP/1.1\r\n') - r.raw_data = 'hello' + r.body = 'hello' assert f(r) assert not fn(r) @@ -73,8 +73,8 @@ def test_gen_filter_by_all_request(): def test_gen_filter_by_all_response(http_request): - f = context.gen_filter_by_all(context.cmp_contains, 'hello') - fn = context.gen_filter_by_all(context.cmp_contains, 'hello', negate=True) + f = context.gen_filter_by_all(['ct', 'hello']) + fn = context.gen_filter_by_all(['nct', 'hello']) # Nowhere r = Response('HTTP/1.1 200 OK\r\n') @@ -91,7 +91,7 @@ def test_gen_filter_by_all_response(http_request): # Data r = Response('HTTP/1.1 200 OK\r\n') http_request.response = r - r.raw_data = 'hello' + r.body = 'hello' assert f(http_request) assert not fn(http_request) @@ -138,8 +138,8 @@ def test_gen_filter_by_all_response(http_request): assert fn(http_request) def test_filter_by_host(http_request): - f = context.gen_filter_by_host(context.cmp_contains, 'sexy') - fn = context.gen_filter_by_host(context.cmp_contains, 'sexy', negate=True) + f = context.gen_filter_by_host(['ct', 'sexy']) + fn = context.gen_filter_by_host(['nct', 'sexy']) http_request.headers['Host'] = 'google.com' http_request.headers['MiscHeader'] = 'vim.sexy' @@ -152,55 +152,55 @@ def test_filter_by_host(http_request): assert not fn(http_request) def test_filter_by_body(): - f = context.gen_filter_by_body(context.cmp_contains, 'sexy') - fn = context.gen_filter_by_body(context.cmp_contains, 'sexy', negate=True) + f = context.gen_filter_by_body(['ct', 'sexy']) + fn = context.gen_filter_by_body(['nct', 'sexy']) # Test request bodies r = Request() - r.status_line = 'GET /sexy HTTP/1.1' + r.start_line = 'GET /sexy HTTP/1.1' r.headers['Header'] = 'sexy' - r.raw_data = 'foo' + r.body = 'foo' assert not f(r) assert fn(r) - r.raw_data = 'sexy' + r.body = 'sexy' assert f(r) assert not fn(r) # Test response bodies r = Request() rsp = Response() - rsp.status_line = 'HTTP/1.1 200 OK' + rsp.start_line = 'HTTP/1.1 200 OK' rsp.headers['sexy'] = 'sexy' - r.status_line = 'GET /sexy HTTP/1.1' + r.start_line = 'GET /sexy HTTP/1.1' r.headers['Header'] = 'sexy' r.response = rsp assert not f(r) assert fn(r) - rsp.raw_data = 'sexy' + rsp.body = 'sexy' assert f(r) assert not fn(r) def test_filter_by_response_code(http_request): - f = context.gen_filter_by_response_code(context.cmp_eq, 200) - fn = context.gen_filter_by_response_code(context.cmp_eq, 200, negate=True) + f = context.gen_filter_by_response_code(['eq', '200']) + fn = context.gen_filter_by_response_code(['neq', '200']) r = Response() http_request.response = r - r.status_line = 'HTTP/1.1 404 Not Found' + r.start_line = 'HTTP/1.1 404 Not Found' assert not f(http_request) assert fn(http_request) - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' assert f(http_request) assert not fn(http_request) def test_filter_by_raw_headers_request(): - f1 = context.gen_filter_by_raw_headers(context.cmp_contains, 'Sexy:') - fn1 = context.gen_filter_by_raw_headers(context.cmp_contains, 'Sexy:', negate=True) - f2 = context.gen_filter_by_raw_headers(context.cmp_contains, 'sexy\r\nHeader') - fn2 = context.gen_filter_by_raw_headers(context.cmp_contains, 'sexy\r\nHeader', negate=True) + f1 = context.gen_filter_by_raw_headers(['ct', 'Sexy:']) + fn1 = context.gen_filter_by_raw_headers(['nct', 'Sexy:']) + f2 = context.gen_filter_by_raw_headers(['ct', 'sexy\r\nHeader']) + fn2 = context.gen_filter_by_raw_headers(['nct', 'sexy\r\nHeader']) r = Request('GET / HTTP/1.1\r\n') rsp = Response('HTTP/1.1 200 OK\r\n') @@ -228,10 +228,10 @@ def test_filter_by_raw_headers_request(): assert not fn2(r) def test_filter_by_raw_headers_response(): - f1 = context.gen_filter_by_raw_headers(context.cmp_contains, 'Sexy:') - fn1 = context.gen_filter_by_raw_headers(context.cmp_contains, 'Sexy:', negate=True) - f2 = context.gen_filter_by_raw_headers(context.cmp_contains, 'sexy\r\nHeader') - fn2 = context.gen_filter_by_raw_headers(context.cmp_contains, 'sexy\r\nHeader', negate=True) + f1 = context.gen_filter_by_raw_headers(['ct', 'Sexy:']) + fn1 = context.gen_filter_by_raw_headers(['nct', 'Sexy:']) + f2 = context.gen_filter_by_raw_headers(['ct', 'sexy\r\nHeader']) + fn2 = context.gen_filter_by_raw_headers(['nct', 'sexy\r\nHeader']) r = Request('GET / HTTP/1.1\r\n') rsp = Response('HTTP/1.1 200 OK\r\n') @@ -259,25 +259,24 @@ def test_filter_by_raw_headers_response(): assert not fn2(r) def test_filter_by_path(http_request): - f = context.gen_filter_by_path(context.cmp_contains, 'porn') # find the fun websites - fn = context.gen_filter_by_path(context.cmp_contains, 'porn', negate=True) # find the boring websites + f = context.gen_filter_by_path(['ct', 'porn']) # find the fun websites + fn = context.gen_filter_by_path(['nct', 'porn']) # find the boring websites - http_request.status_line = 'GET / HTTP/1.1' + http_request.start_line = 'GET / HTTP/1.1' assert not f(http_request) assert fn(http_request) - http_request.status_line = 'GET /path/to/great/porn HTTP/1.1' + http_request.start_line = 'GET /path/to/great/porn HTTP/1.1' assert f(http_request) assert not fn(http_request) - http_request.status_line = 'GET /path/to/porn/great HTTP/1.1' + http_request.start_line = 'GET /path/to/porn/great HTTP/1.1' assert f(http_request) assert not fn(http_request) def test_gen_filter_by_submitted_cookies(): - f1 = context.gen_filter_by_submitted_cookies(context.cmp_contains, 'Session') - f2 = context.gen_filter_by_submitted_cookies(context.cmp_contains, 'Cookie', - context.cmp_contains, 'CookieVal') + f1 = context.gen_filter_by_submitted_cookies(['ct', 'Session']) + f2 = context.gen_filter_by_submitted_cookies(['ct', 'Cookie', 'nct', 'CookieVal']) r = Request(('GET / HTTP/1.1\r\n' 'Cookie: foo=bar\r\n' '\r\n')) @@ -294,18 +293,17 @@ def test_gen_filter_by_submitted_cookies(): 'Cookie: Session=bar; CookieThing=NoMatch\r\n' '\r\n')) assert f1(r) - assert not f2(r) + assert f2(r) r = Request(('GET / HTTP/1.1\r\n' 'Cookie: Session=bar; CookieThing=CookieValue\r\n' '\r\n')) assert f1(r) - assert f2(r) + assert not f2(r) def test_gen_filter_by_set_cookies(): - f1 = context.gen_filter_by_set_cookies(context.cmp_contains, 'Session') - f2 = context.gen_filter_by_set_cookies(context.cmp_contains, 'Cookie', - context.cmp_contains, 'CookieVal') + f1 = context.gen_filter_by_set_cookies(['ct', 'Session']) + f2 = context.gen_filter_by_set_cookies(['ct', 'Cookie', 'ct', 'CookieVal']) r = Request('GET / HTTP/1.1\r\n\r\n') rsp = Response(('HTTP/1.1 200 OK\r\n' @@ -345,9 +343,8 @@ def test_gen_filter_by_set_cookies(): assert f2(r) def test_filter_by_params_get(): - f1 = context.gen_filter_by_params(context.cmp_contains, 'Session') - f2 = context.gen_filter_by_params(context.cmp_contains, 'Cookie', - context.cmp_contains, 'CookieVal') + f1 = context.gen_filter_by_params(['ct', 'Session']) + f2 = context.gen_filter_by_params(['ct', 'Cookie', 'ct', 'CookieVal']) r = Request('GET / HTTP/1.1\r\n\r\n') assert not f1(r) @@ -366,30 +363,29 @@ def test_filter_by_params_get(): assert f2(r) def test_filter_by_params_post(): - f1 = context.gen_filter_by_params(context.cmp_contains, 'Session') - f2 = context.gen_filter_by_params(context.cmp_contains, 'Cookie', - context.cmp_contains, 'CookieVal') + f1 = context.gen_filter_by_params(['ct', 'Session']) + f2 = context.gen_filter_by_params(['ct', 'Cookie', 'ct', 'CookieVal']) r = Request(('GET / HTTP/1.1\r\n' 'Content-Type: application/x-www-form-urlencoded\r\n\r\n')) - r.raw_data = 'foo=bar' + r.body = 'foo=bar' assert not f1(r) assert not f2(r) r = Request(('GET / HTTP/1.1\r\n' 'Content-Type: application/x-www-form-urlencoded\r\n\r\n')) - r.raw_data = 'Session=bar' + r.body = 'Session=bar' assert f1(r) assert not f2(r) r = Request(('GET / HTTP/1.1\r\n' 'Content-Type: application/x-www-form-urlencoded\r\n\r\n')) - r.raw_data = 'Session=bar&Cookie=foo' + r.body = 'Session=bar&Cookie=foo' assert f1(r) assert not f2(r) r = Request(('GET / HTTP/1.1\r\n' 'Content-Type: application/x-www-form-urlencoded\r\n\r\n')) - r.raw_data = 'Session=bar&CookieThing=CookieValue' + r.body = 'Session=bar&CookieThing=CookieValue' assert f1(r) assert f2(r) diff --git a/pappyproxy/tests/test_http.py b/pappyproxy/tests/test_http.py index 0e060f1..5f1fd02 100644 --- a/pappyproxy/tests/test_http.py +++ b/pappyproxy/tests/test_http.py @@ -86,7 +86,7 @@ def test_chunked_simple(): full_data += '0\r\n\r\n' c.add_data(full_data) assert c.complete - assert c.raw_data == 'A'*5 + assert c.body == 'A'*5 def test_chunked_hex(): # Test hex lengths @@ -97,7 +97,7 @@ def test_chunked_hex(): full_data += '0\r\n\r\n' c.add_data(full_data) assert c.complete - assert c.raw_data == 'A'*0xAF + assert c.body == 'A'*0xAF c = http.ChunkedData() full_data = 'AF\r\n' @@ -106,7 +106,7 @@ def test_chunked_hex(): full_data += '0\r\n\r\n' c.add_data(full_data) assert c.complete - assert c.raw_data == 'A'*0xAF + assert c.body == 'A'*0xAF c = http.ChunkedData() full_data = 'aF\r\n' @@ -115,7 +115,7 @@ def test_chunked_hex(): full_data += '0\r\n\r\n' c.add_data(full_data) assert c.complete - assert c.raw_data == 'A'*0xAF + assert c.body == 'A'*0xAF def test_chunked_leading_zeros(): # Test leading zeros @@ -126,7 +126,7 @@ def test_chunked_leading_zeros(): full_data += '0\r\n\r\n' c.add_data(full_data) assert c.complete - assert c.raw_data == 'A'*0xAF + assert c.body == 'A'*0xAF def test_chunked_one_char_add(): # Test adding one character at a time @@ -138,7 +138,7 @@ def test_chunked_one_char_add(): for ch in full_data: c.add_data(ch) assert c.complete - assert c.raw_data == 'A'*0xAF + assert c.body == 'A'*0xAF def test_chunked_incomplete(): # Tests that complete isn't true until the data is received @@ -168,11 +168,11 @@ def test_length_data_simple(): assert not l.complete l.add_data('A'*100) assert l.complete - assert l.raw_data == 'A'*100 + assert l.body == 'A'*100 l = http.LengthData(0) assert l.complete - assert l.raw_data == '' + assert l.body == '' # Test incomplete l = http.LengthData(100) @@ -185,7 +185,7 @@ def test_length_one_character(): for i in range(100): l.add_data('A') assert l.complete - assert l.raw_data == 'A'*100 + assert l.body == 'A'*100 # Test adding one character at a time (incomplete) l = http.LengthData(100) @@ -198,7 +198,7 @@ def test_length_overflow(): l = http.LengthData(100) l.add_data('A'*400) assert l.complete - assert l.raw_data == 'A'*100 + assert l.body == 'A'*100 # Test throwing an exception when adding data after complete l = http.LengthData(100) @@ -369,7 +369,80 @@ def test_response_cookie_blank(): assert c.val == '' assert c.path == '/' assert c.secure - + +#################### +## HTTPMessage tests + +def test_message_simple(): + raw = ('foobar\r\n' + 'a: b\r\n' + 'Content-Length: 100\r\n\r\n') + raw += 'A'*100 + m = http.HTTPMessage(raw) + assert m.complete + assert m.malformed == False + assert m.start_line == 'foobar' + assert m.body == 'A'*100 + assert m.headers.all_pairs() == [('a', 'b'), ('Content-Length', '100')] + assert m.headers['A'] == 'b' + assert m.headers_section == ('foobar\r\n' + 'a: b\r\n' + 'Content-Length: 100\r\n\r\n') + assert m.full_message == raw + +def test_message_build(): + raw = ('foobar\r\n' + 'a: b\r\n' + 'Content-Length: 100\r\n\r\n') + raw += 'A'*100 + m = http.HTTPMessage() + m.add_line('foobar') + m.add_line('a: b') + m.add_line('Content-Length: 100') + m.add_line('') + assert not m.complete + m.add_data('A'*50) + assert not m.complete + m.add_data('A'*50) + assert m.complete + assert m.malformed == False + assert m.start_line == 'foobar' + assert m.body == 'A'*100 + assert m.headers.all_pairs() == [('a', 'b'), ('Content-Length', '100')] + assert m.headers['A'] == 'b' + assert m.headers_section == ('foobar\r\n' + 'a: b\r\n' + 'Content-Length: 100\r\n\r\n') + assert m.full_message == raw + +def test_message_build_chunked(): + raw = ('foobar\r\n' + 'a: b\r\n' + 'Content-Length: 100\r\n\r\n') + raw += 'A'*100 + m = http.HTTPMessage() + m.add_line('foobar') + m.add_line('a: b') + m.add_line('Transfer-Encoding: chunked') + m.add_line('') + assert not m.complete + m.add_data('%x\r\n' % 50) + m.add_data('A'*50) + m.add_data('\r\n') + m.add_data('%x\r\n' % 50) + m.add_data('A'*50) + m.add_data('\r\n') + m.add_data('0\r\n') + assert m.complete + assert m.malformed == False + assert m.start_line == 'foobar' + assert m.body == 'A'*100 + assert m.headers.all_pairs() == [('a', 'b'), ('Content-Length', '100')] + assert m.headers['A'] == 'b' + assert m.headers_section == ('foobar\r\n' + 'a: b\r\n' + 'Content-Length: 100\r\n\r\n') + assert m.full_message == raw #################### ## Request tests @@ -398,7 +471,7 @@ def test_request_simple(): assert r.is_ssl == False assert r.path == '/' assert r.port == 80 - assert r.status_line == 'GET / HTTP/1.1' + assert r.start_line == 'GET / HTTP/1.1' assert r.verb == 'GET' assert r.version == 'HTTP/1.1' assert r.headers['Content-Length'] == '100' @@ -409,7 +482,7 @@ def test_request_simple(): assert r.headers['Host'] == 'www.test.com' assert r.headers['Connection'] == 'Keep-Alive' assert r.headers['Cache-Control'] == 'no-cache' - assert r.raw_data == 'A'*100 + assert r.body == 'A'*100 test(rf) test(rl) test(ru) @@ -536,6 +609,7 @@ def test_request_parse_host(): rf, rl, ru, rj = req_by_lines_and_full(header_lines) def test(r): assert r.complete + assert r.port == 443 assert r.host == 'www.test.com' assert r.is_ssl test(rf) @@ -574,7 +648,7 @@ def test_repeated_request_headers(): def test_request_update_statusline(): r = http.Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' assert r.verb == 'GET' assert r.path == '/' assert r.version == 'HTTP/1.1' @@ -584,7 +658,7 @@ def test_request_update_statusline(): def test_request_update_cookies(): r = http.Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' # Check new cookies r.cookies['foo'] = 'bar' @@ -607,7 +681,7 @@ def test_request_update_cookies(): def test_request_update_headers(): r = http.Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' r.headers['Content-Length'] = '0' r.headers['Test-Header'] = 'Test Value' r.headers['Other-Header'] = 'Other Value' @@ -624,11 +698,11 @@ def test_request_update_headers(): def test_request_modified_headers(): r = http.Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' r.headers['content-length'] = '100' r.headers['cookie'] = 'abc=123' r.cookies['abc'] = '456' - r.raw_data = 'AAAA' + r.body = 'AAAA' assert r.full_request == ('GET / HTTP/1.1\r\n' 'content-length: 4\r\n' 'cookie: abc=456\r\n\r\n' @@ -638,33 +712,34 @@ def test_request_modified_headers(): def test_request_update_data(): r = http.Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' r.headers['content-length'] = 500 - r.raw_data = 'AAAA' + r.body = 'AAAA' assert r.full_request == ('GET / HTTP/1.1\r\n' 'content-length: 4\r\n' '\r\n' 'AAAA') def test_request_to_json(): r = http.Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' r.headers['content-length'] = 500 r.tags = ['foo', 'bar'] - r.raw_data = 'AAAA' + r.body = 'AAAA' r.reqid = '1' rsp = http.Response() - rsp.status_line = 'HTTP/1.1 200 OK' + rsp.start_line = 'HTTP/1.1 200 OK' rsp.rspid = '2' r.response = rsp - expected_reqdata = {u'full_request': unicode(base64.b64encode(r.full_request)), + expected_reqdata = {u'full_message': unicode(base64.b64encode(r.full_request)), u'response_id': str(rsp.rspid), u'port': 80, u'is_ssl': False, u'tags': ['foo', 'bar'], u'reqid': str(r.reqid), + u'host': '', } assert json.loads(r.to_json()) == expected_reqdata @@ -764,7 +839,7 @@ def test_request_copy(): def test_request_url_blankpath(): r = http.Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' r.url = 'https://www.google.com' r.headers['Host'] = r.host r.url_params.from_dict({'foo': 'bar'}) @@ -789,10 +864,10 @@ def test_response_simple(): rf, rl, ru, rj = rsp_by_lines_and_full(header_lines, data) def test(r): assert r.complete - assert r.raw_data == data + assert r.body == data assert r.response_code == 200 assert r.response_text == 'OK' - assert r.status_line == 'HTTP/1.1 200 OK' + assert r.start_line == 'HTTP/1.1 200 OK' assert r.version == 'HTTP/1.1' assert r.headers['Date'] == 'Thu, 22 Oct 2015 00:37:17 GMT' @@ -826,7 +901,7 @@ def test_response_chunked(): rf, rl, ru, rj = rsp_by_lines_and_full(header_lines, data) def test(r): assert r.complete - assert r.raw_data == 'A'*0xAF + 'B'*0xBF + assert r.body == 'A'*0xAF + 'B'*0xBF test(rf) test(rl) @@ -851,7 +926,7 @@ def test_response_gzip(): rf, rl, ru, rj = rsp_by_lines_and_full(header_lines, data_comp) def test(r): assert r.complete - assert r.raw_data == data_decomp + assert r.body == data_decomp test(rf) test(rl) @@ -876,7 +951,7 @@ def test_response_deflate(): rf, rl, ru, rj = rsp_by_lines_and_full(header_lines, data_comp) def test(r): assert r.complete - assert r.raw_data == data_decomp + assert r.body == data_decomp test(rf) test(rl) @@ -907,7 +982,7 @@ def test_response_chunked_gzip(): rf, rl, ru, rj = rsp_by_lines_and_full(header_lines, data_chunked) def test(r): assert r.complete - assert r.raw_data == data_decomp + assert r.body == data_decomp assert r.headers['Content-Length'] == str(len(data_decomp)) assert r.full_response == ('HTTP/1.1 200 OK\r\n' 'Date: Thu, 22 Oct 2015 00:37:17 GMT\r\n' @@ -924,7 +999,7 @@ def test_response_chunked_gzip(): def test_response_early_completion(): r = http.Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' r.add_line('Content-Length: 0') assert not r.complete r.add_line('') @@ -992,7 +1067,7 @@ def test_repeated_response_headers(): def test_response_update_statusline(): r = http.Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' assert r.version == 'HTTP/1.1' assert r.response_code == 200 assert r.response_text == 'OK' @@ -1002,7 +1077,7 @@ def test_response_update_statusline(): def test_response_update_headers(): r = http.Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' r.headers['Test-Header'] = 'Test Value' r.headers['Other-Header'] = 'Other Value' @@ -1018,9 +1093,9 @@ def test_response_update_headers(): def test_response_update_modified_headers(): r = http.Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' r.headers['content-length'] = '500' - r.raw_data = 'AAAA' + r.body = 'AAAA' assert r.full_response == ('HTTP/1.1 200 OK\r\n' 'content-length: 4\r\n\r\n' 'AAAA') @@ -1028,7 +1103,7 @@ def test_response_update_modified_headers(): def test_response_update_cookies(): r = http.Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' # Test by adding headers r.headers['Set-Cookie'] = 'abc=123' assert r.full_response == ('HTTP/1.1 200 OK\r\n' @@ -1042,7 +1117,7 @@ def test_response_update_cookies(): assert r.cookies['abc'].val == '456' r = http.Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' # Test by adding cookie objects c = http.ResponseCookie('abc=123; secure') r.cookies['abc'] = c @@ -1060,10 +1135,10 @@ def test_response_update_content_length(): def test_response_to_json(): rsp = http.Response() - rsp.status_line = 'HTTP/1.1 200 OK' + rsp.start_line = 'HTTP/1.1 200 OK' rsp.rspid = 2 - expected_reqdata = {'full_response': base64.b64encode(rsp.full_response), + expected_reqdata = {'full_message': base64.b64encode(rsp.full_response), 'rspid': rsp.rspid, #'tag': r.tag, } diff --git a/pappyproxy/tests/test_proxy.py b/pappyproxy/tests/test_proxy.py index 512fc02..d29573d 100644 --- a/pappyproxy/tests/test_proxy.py +++ b/pappyproxy/tests/test_proxy.py @@ -6,7 +6,6 @@ import twisted.test from pappyproxy import http from pappyproxy import macros -from pappyproxy import mangle from pappyproxy import config from pappyproxy.proxy import ProxyClient, ProxyClientFactory, ProxyServerFactory from testutil import mock_deferred, func_deleted, func_ignored_deferred, func_ignored, no_tcp @@ -18,7 +17,7 @@ from twisted.internet import defer, reactor ## Fixtures MANGLED_REQ = 'GET /mangled HTTP/1.1\r\n\r\n' -MANGLED_RSP = 'HTTP/1.1 500 MANGLED\r\n\r\n' +MANGLED_RSP = 'HTTP/1.1 500 MANGLED\r\nContent-Length: 0\r\n\r\n' @pytest.fixture def unconnected_proxyserver(mocker): @@ -140,25 +139,25 @@ def gen_mangle_macro(modified_req=None, modified_rsp=None, macro = mock.MagicMock() if modified_req or drop_req: macro.async_req = True - macro.do_req = True + macro.intercept_requests = True if drop_req: newreq = None else: newreq = http.Request(modified_req) macro.async_mangle_request.return_value = mock_deferred(newreq) else: - macro.do_req = False + macro.intercept_requests = False if modified_rsp or drop_rsp: macro.async_rsp = True - macro.do_rsp = True + macro.intercept_responses = True if drop_rsp: newrsp = None else: newrsp = http.Response(modified_rsp) macro.async_mangle_response.return_value = mock_deferred(newrsp) else: - macro.do_rsp = False + macro.intercept_responses = False return macro def notouch_mangle_req(request): @@ -255,7 +254,7 @@ def test_proxy_client_mangle_rsp(mocker, proxy_connection, in_scope_true): prot.lineReceived('') req = yield retreq_deferred response = req.response.full_response - assert response == 'HTTP/1.1 500 MANGLED\r\n\r\n' + assert response == 'HTTP/1.1 500 MANGLED\r\nContent-Length: 0\r\n\r\n' @pytest.inlineCallbacks def test_proxy_drop_req(mocker, proxy_connection, in_scope_true): diff --git a/pappyproxy/tests/test_session.py b/pappyproxy/tests/test_session.py index 0b96514..0d6847c 100644 --- a/pappyproxy/tests/test_session.py +++ b/pappyproxy/tests/test_session.py @@ -5,13 +5,13 @@ from pappyproxy.http import Request, Response, ResponseCookie @pytest.fixture def req(): r = Request() - r.status_line = 'GET / HTTP/1.1' + r.start_line = 'GET / HTTP/1.1' return r @pytest.fixture def rsp(): r = Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' return r def test_session_basic(req, rsp): @@ -96,7 +96,7 @@ def test_session_mixed(req, rsp): assert 'auth' not in rsp.headers r = Response() - r.status_line = 'HTTP/1.1 200 OK' + r.start_line = 'HTTP/1.1 200 OK' r.set_cookie(ResponseCookie('state=bazzers')) r.set_cookie(ResponseCookie('session=buzzers')) s.get_rsp(r) diff --git a/pappyproxy/util.py b/pappyproxy/util.py index eff2114..ecdcec1 100644 --- a/pappyproxy/util.py +++ b/pappyproxy/util.py @@ -1,3 +1,24 @@ +import string class PappyException(Exception): + """ + The exception class for Pappy. If a plugin command raises one of these, the + message will be printed to the console rather than displaying a traceback. + """ pass + +def printable_data(data): + """ + Return ``data``, but replaces unprintable characters with periods. + + :param data: The data to make printable + :type data: String + :rtype: String + """ + chars = [] + for c in data: + if c in string.printable: + chars += c + else: + chars += '.' + return ''.join(chars) diff --git a/setup.py b/setup.py index 2f453ea..f80cba2 100755 --- a/setup.py +++ b/setup.py @@ -4,12 +4,12 @@ import pkgutil from setuptools import setup, find_packages setup(name='pappyproxy', - version='0.1.1', + version='0.2.0', description='The Pappy Intercepting Proxy', author='Rob Glew', author_email='rglew56@gmail.com', url='https://www.github.com/roglew/pappy-proxy', - packages=['pappyproxy', 'pappyproxy.schema'], + packages=['pappyproxy', 'pappyproxy.schema', 'pappyproxy.plugins'], include_package_data = True, license='MIT', entry_points = {