From f6ebcd271bec161b1b7acbf46b6bd2bbedc7bc89 Mon Sep 17 00:00:00 2001 From: Rob Glew Date: Tue, 17 Nov 2015 19:27:41 -0600 Subject: [PATCH] Initial release --- .gitignore | 11 + LICENSE.txt | 21 + README.md | 255 +++++ pappy-proxy/.coveragerc | 3 + pappy-proxy/Makefile | 9 + pappy-proxy/__init__.py | 0 pappy-proxy/__main__.py | 7 + pappy-proxy/certs/certificate.crt | 22 + pappy-proxy/certs/private.key | 28 + pappy-proxy/comm.py | 106 +++ pappy-proxy/config.py | 51 + pappy-proxy/console.py | 626 +++++++++++++ pappy-proxy/context.py | 490 ++++++++++ pappy-proxy/default_user_config.json | 7 + pappy-proxy/http.py | 1129 +++++++++++++++++++++++ pappy-proxy/mangle.py | 104 +++ pappy-proxy/pappy.py | 76 ++ pappy-proxy/proxy.py | 362 ++++++++ pappy-proxy/repeater.py | 8 + pappy-proxy/schema/__init__.py | 0 pappy-proxy/schema/schema_1.py | 54 ++ pappy-proxy/schema/update.py | 53 ++ pappy-proxy/tests/__init__.py | 0 pappy-proxy/tests/test_context.py | 402 ++++++++ pappy-proxy/tests/test_http.py | 994 ++++++++++++++++++++ pappy-proxy/tests/test_proxy.py | 36 + pappy-proxy/tests/testutil.py | 15 + pappy-proxy/util.py | 3 + pappy-proxy/vim_repeater/.#repeater.vim | 1 + pappy-proxy/vim_repeater/__init__.py | 0 pappy-proxy/vim_repeater/repeater.py | 116 +++ pappy-proxy/vim_repeater/repeater.vim | 17 + setup.py | 22 + 33 files changed, 5028 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE.txt create mode 100644 pappy-proxy/.coveragerc create mode 100644 pappy-proxy/Makefile create mode 100644 pappy-proxy/__init__.py create mode 100644 pappy-proxy/__main__.py create mode 100644 pappy-proxy/certs/certificate.crt create mode 100644 pappy-proxy/certs/private.key create mode 100644 pappy-proxy/comm.py create mode 100644 pappy-proxy/config.py create mode 100644 pappy-proxy/console.py create mode 100644 pappy-proxy/context.py create mode 100644 pappy-proxy/default_user_config.json create mode 100644 pappy-proxy/http.py create mode 100644 pappy-proxy/mangle.py create mode 100755 pappy-proxy/pappy.py create mode 100644 pappy-proxy/proxy.py create mode 100644 pappy-proxy/repeater.py create mode 100644 pappy-proxy/schema/__init__.py create mode 100644 pappy-proxy/schema/schema_1.py create mode 100644 pappy-proxy/schema/update.py create mode 100644 pappy-proxy/tests/__init__.py create mode 100644 pappy-proxy/tests/test_context.py create mode 100644 pappy-proxy/tests/test_http.py create mode 100644 pappy-proxy/tests/test_proxy.py create mode 100644 pappy-proxy/tests/testutil.py create mode 100644 pappy-proxy/util.py create mode 120000 pappy-proxy/vim_repeater/.#repeater.vim create mode 100644 pappy-proxy/vim_repeater/__init__.py create mode 100644 pappy-proxy/vim_repeater/repeater.py create mode 100644 pappy-proxy/vim_repeater/repeater.vim create mode 100755 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..52926f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.pyc +certs/* +debug_out/* +data.db +.coverage +.cache +tests/.cache +.DS_Store +TAGS +config.json +build/* \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..49948ff --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Robert Glew + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index e69de29..5247f4f 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,255 @@ +The Pappy Proxy +=============== + +Introduction +------------ +The Pappy (**P**roxy **A**ttack **P**roxy **P**rox**Y**) Proxy is an intercepting proxy for performing web application security testing. Its features are often similar, or straight up rippoffs from [Burp Suite](https://portswigger.net/burp/). However, Burp Suite is neither open source nor a command line tool, thus making a proxy like Pappy inevitable. The project is still in its early stages, so there are bugs and not a ton of features, but it should be ready for the bigtime soon (I'm already trying to use it as a replacement for Burp Suite). + +Contributing +------------ +**I am taking any and all feature requests.** If you've used Burp and had any inconvenience with it, tell me about it and I'll do everything in my power to make sure Pappy doesn't have those issues. Or even better, if you want Burp to do something that it doesn't already, let me know so that I can ~~use it to stomp them into the dust~~ improve my project. + +If you're brave and want to try and contribute code, please let me know. Right now the codebase is a giant clusterfun which I have refactored a few times already, but I would be more than happy to find a stable part of the codebase that you can contribute to. + +How to Use It +============= + +Installation +------------ +Installation requires `pip` or some other command that can handle a `setup.py` with requirements. Once the requirements are installed, you can run the `pappy.py` script to run the proxy. You're on your own to link it somewhere in your PATH. +``` +$ git clone +$ cd pappy-proxy +$ pip install -e . +``` + +Quickstart +---------- +Pappy projects take up an entire directory. While a full directory may seem like a dumb idea compared to storing everything in a zip file, future releases will do fun stuff like generate attack scripts or other files that need to be used by other programs on a regular basis. To start a project, do something like: + +``` +$ mkdir test_project +$ cd test_project +$ /path/to/pappy.py +Copying default config to directory +Updating schema to version 1 +Proxy is listening on port 8000 +itsPappyTime> exit +$ ls +data.db project_config.json project_config.pyc +$ +``` + +And that's it! The proxy will by default be running on port 8000 and bound to localhost (to keep the hackers out). You can modify the port/interface in `config.json`. You can list all your intercepted requests with `ls`, view a full request with `vfq ` or view a full response with `vfs `. No you can't delete them yet. I'm working on it. + +Adding The CA Cert to Your Browser +---------------------------------- +In order for Pappy to view data sent using HTTPS, you need to add a generated CA cert (`certificate.crt`) to your browser. Certificates are generated using the `gencerts` command and are by default stored in the same directory as `pappy.py`. This allows Pappy to act as a CA and MITM HTTPS connections. I believe that Firefox and Chrome ignore keychain/system certs, so you will have to install the CA cert to the browsers instead of (or in addition to) adding the cert to your keychain. + +### Firefox +You can add the CA cert to Firefox by going to `Preferences -> Advanced -> View Certificates -> Authorities -> Import` and selecting the `certificate.crt` file in the `certs` directory. + +### Chrome +You can add the CA cert to Chrome by going to `Settings -> Show advanced settings -> HTTPS/SSL -> Manage Certificates -> Authorities -> Import` and selecting the `certificate.crt` file in the `certs` directory. + +### Safari +For Safari (on macs, obviously), you need to add the CA cert to your system keychain. You can do this by double clicking on the CA cert and following the prompts. + +### Internet Explorer +I didn't search too hard for instructions on this and I don't own a Windows machine to try this, so if you have trouble, hit me up and I'll see if I can help and add real instructions. According to Google you can double-click the cert to install it to the system, or you can do `Tools -> Content -> Certificates -> Trusted Root Certificates -> Import` + +Configuration +------------- +Configuration for each project is done in the `config.json` file. The file is a JSON-formatted dictionary that contains settings for the proxy. The following fields can be used to configure the proxy: + +| Key | Value | +|:--|:--| +| data_file | The file where requests and images will be stored | +| debug_dir (optional) | Where connection debug info should be stored. If not present, debug info is not saved to a file. | +| cert_dir | Where the CA cert and the private key for the CA cert are stored | +| proxy_listeners | A list of dicts which describe which ports the proxy will listen on. Each item is a dict with "port" and "interface" values which determine which port and interface to listen on. For example, if port=8000 and the interface is 127.0.0.1, the proxy will only accept connections from localhost on port 8000. To accept connections from anywhere, set the interface to 0.0.0.0. | + +The following tokens will also be replaced with values: + +| Token | Replaced with | +|:--|:--| +| {PAPPYDIR} | The directory where Pappy's files are stored | + +Generating Pappy's CA Cert +-------------------------- +In order to intercept and modify requests to sites that use HTTPS, you have to generate and install CA certs to your browser. You can do this by running the `gencerts` command in Pappy. By default, certs are stored in the same directory as Pappy's script files. However, you can change where Pappy will look for the private key file in the config file. In addition, you can give the `gencerts` command an argument to have it put the generated certs in a different directory. + +| Command | Description | +|:--------|:------------| +| gencerts [/path/to/put/certs/in] | Generate a CA cert that can be added to your browser to let Pappy decrypt HTTPS traffic. Also generates the private key for that cert in the same directory. | + +Browsing Recorded Requests/Responses +------------------------------------ +The following commands can be used to view requests and responses + +| Command | Aliases | Description | +|:--------|:--------|:------------| +| ls [a|]| list, ls |List requests that are in the current context (see Context section). Has information like the host, target path, and status code. With no arguments, it will print the 50 most recent requests in the current context. If you pass 'a' or 'all' as an argument, it will print all the requests in the current context. If you pass a number "n" as an argument, it will print the n most recent requests in the current context. | +| vfq [u] | view_full_request, vfq | [V]iew [F]ull Re[Q]uest, prints the full request including headers and data. If 'u' is given as an additional argument, it will print the unmangled version of the request. | +| vhq [u] | view_request_headers, vhq | [V]iew [H]eaders of a Re[Q]uest. Prints just the headers of a request. If 'u' is given as an additional argument, it will print the unmangled version of the request. | +| vfs [u] | view_full_response, vfs |[V]iew [F]ull Re[S]ponse, prints the full response associated with a request including headers and data. If 'u' is given as an additional argument, it will print the unmangled version of the response. | +| vhs [u] | view_response_headers, vhs | [V]iew [H]eaders of a Re[S]ponse. Prints just the headers of a response associated with a request. If 'u' is given as an additional argument, it will print the unmangled version of the response. | + +The table shown will have the following columns: + +| Label | Description | +|:------|:------------| +| ID | The request ID of that request. Used to identify the request for other commands. | +| Method | The method(/http verb) for the request | +| Host | The host that the request was sent to | +| Path | The path of the request | +| S-Code | The status code of the response | +| Req Len | The length of the data submitted | +| Rsp Len | The length of the data returned in the response | +| Time | The time in seconds it took to complete the request | +| Mngl | If the request or response were mangled with the interceptor. If the request was mangled, the column will show 'q'. If the response was mangled, the column will show 's'. If both were mangled, it will show 'q/s'. | + +Context +------- +The context is a set of filters that define which requests are considered "active". Only requests in the current context are displayed with `ls`, and eventually contexts will be how Pappy will manage requests for group operations. By default, the context includes every single request that passes through the proxy. You can limit down the current context by applying filters. Filters apply rules such as "the response code must equal 500" or "the host must contain google.com". Once you apply one or more filters, only requests/responses which pass every active filter will be a part of the current context. + +| Command | Aliases | Description | +|:--------|:------------|:---| +| fl | filter, fl |Add a filter that limits which requests are included in the current context. See the Filter String section for how to create a filter string | +| fc | filter_clear, fc | Clears the filters and resets the context to contain all requests and responses. Ignores scope | +| fls | filter_list, fls | Print the filters that make up the current context | + +Filter Strings +-------------- +Filter strings define a condition that a request/response pair must pass to be part of a context. Most filter strings have the following format: + +``` + +``` + +Where `` is some part of the request/response, `` is some comparison to ``. Also **if you prefix a comparer with 'n' it turns it into a negation.** For example, if you wanted a filter that only matches requests to target.org, you could use the following filter string: + +``` +host is target.org + +field = "host" +comparer = "is" +value = "target.org" +``` + +For fields that are a list of key/value pairs (headers, get params, post params, and cookies) you can use the following format: + +``` + [ ] +``` + +This is a little more complicated. If you don't give comparer2/value2, the filter will pass any pair where the key or the value matches comparer1 and value1. If you do give comparer2/value2, the key must match comparer1/value1 and the value must match comparer2/value2 For example: + +``` +Filter A: + cookie contains Session + +Filter B: + cookie contains Session contains 456 + +Filter C: + cookie ncontains Ultra + +Cookie: SuperSession=abc123 +Matches A and C but not B + +Cookie: UltraSession=abc123456 +Matches both A and B but not C +``` + +### List of fields +| Field Name | Aliases | Description | Format | +|:--------|:------------|:-----|:------| +| all | all | The entire request represented as one string | String | +| host | host, domain, hs, dm | The target host (ie www.target.com) | String | +| path | path, pt | The path of the url (ie /path/to/secrets.php) | String | +| body | body, data, bd, dt | The body (data section) of either the request or the response | String | +| verb | verb, vb | The HTTP verb of the request (ie GET, POST) | String | +| param | param, pm | Either the get or post parameters | Key/Value | +| header | header, hd | An HTTP header (ie User-Agent, Basic-Authorization) in the request or response | Key/Value | +| rawheaders | rawheaders, rh | The entire header section (as one string) of either the head or the response | String | +| sentcookie | sentcookie, sck | A cookie sent in a request | Key/Value | +| setcookie | setcookie, stck | A cookie set by a response | Key/Value | +| statuscode | statuscode, sc, responsecode | The response code of the response | Numeric | + +### List of comparers +| Field Name | Aliases | Description | +|:--------|:------------|:-----| +| is | is | Exact string match | +| contains | contains, ct | A contain B is true if B is a substring of A | +| containsr | containsr, ctr | A containr B is true if A matches regexp B (NOT IMPLEMENTED) | +| exists | exists, ex | A exists B if A is not an empty string (likely buggy) | +| Leq | Leq, L= | A Leq B if A's length equals B (B must be a number) | +| Lgt | Lgt, L> | A Lgt B if A's length is greater than B (B must be a number ) | +| Llt | Llt, L< | A Llt B if A's length is less than B (B must be a number) | +| eq | eq, = | A eq B if A = B (A and B must be a number) | +| gt | gt, > | A gt B if A > B (A and B must be a number) | +| lt | lt, < | A lt B if A < B (A and B must be a number) | + +Scope +----- +Scope is a set of rules to define whether Pappy should mess with a request. You define the scope by setting the context to what you want the scope to be and running `scope_save`. The scope is saved in data.db and is automatically restored when using the same project directory. + +Any requests which don't match all the filters in the scope will be passed straight to the browser and will not be caught by the interceptor or recorded in the database. This is useful to make sure you don't accidentally do something like log in to your email through the proxy and have your plaintext username/password stored and accidentally shown to your coworkers. + +| Command | Aliases | Description | +|:--------|:--------|:------------| +| scope_save |scope_save| Set the current context to be the scope | +| sr |scope_reset, sr| Set the current context to the scope | +| scope_delete |scope_delete| Clear the scope (everything's in scope!) | +| scope_list |scope_list, sls| List all the filters that are applied to the scope | + + +Interceptor +----------- +This feature is like Burp's proxy with "Intercept Mode" turned on, except it's not turned on unless you explicitly turn it on. When the proxy gets a request while in intercept mode, it lets you edit it with vim before it forwards it to the server. In addition, it can stop responses from the server and let you edit them with vim before they get forwarded to the browser. When you run the command, you can pass `request` and/or `response` as arguments to say whether you would like to intercept requests and/or responses. Only in-scope requests/responses will be intercepted (see Scope section) + +To forward a request, edit it, save the file, then quit. + +| Command | Aliases | Description | +|:--------|:--------|:------------| +| ic + | intercept, ic | Begins interception mode. Press enter to leave interception mode and return to the command prompt. Pass in `request` to intercept requests, `response` to intercept responses, or both to intercept both. | + +``` +Intercept both requests and responses: +> ic requests responses +> ic req rsp + +Intercept just requests: +> ic requests +> ic req + +Intercept just responses: +> ic responses +> ic rsp + +Be totally useless: +> ic +``` + +Repeater +-------- +This feature is like Burp's repeater (yes, really). You choose a request and Pappy will open vim in a split window with your request on the left and the original response on the right. You can make changes to the request and then run ":RepeaterSubmitBuffer" to submit the modified request. The response will be displayed on the right. This command is bound to `f` by default, but you can rebind it in your vimrc (I think, dunno if vim will complain if it's undefined). This command will submit whatever buffer your cursor is in, so make sure it's in the request buffer. + +When you're done with repeater, run ":qa!" to avoid having to save changes to nonexistent files. + +| Command | Aliases | Description | +|:--------|:--------|:------------| +| rp | repeater, rp | Open the specified request in the repeater | + +| Vim Command | Keybinding | Action | +|:--------|:-----------|:-------| +| RepeaterSubmitBuffer | f | Submit the current buffer, split the windows vertically, and show the result in the right window | + +Logging +------- +You can watch in real-time what requests are going through the proxy. Verbosisty defaults to 1 which just states when connections are made/lost and some information on what is happening. If verbosity is set to 3, it includes all the data which is sent through the proxy and processed. It will print the raw response from the server, what it decodes it to, etc. Even if you don't run this command, all the information is stored in the dubug directory (the directory is cleared every start though!) + +| Command | Description | +|:--------|:------------| +| log [verbosity] | View the log at the given verbosity. Default verbosity is 1 which just shows connections being made/lost and some other info, verbosity 3 shows full requests/responses as they pass through and are processed by the proxy | diff --git a/pappy-proxy/.coveragerc b/pappy-proxy/.coveragerc new file mode 100644 index 0000000..da3e126 --- /dev/null +++ b/pappy-proxy/.coveragerc @@ -0,0 +1,3 @@ +[run] +omit = tests/*, schema/* + diff --git a/pappy-proxy/Makefile b/pappy-proxy/Makefile new file mode 100644 index 0000000..1c9c6fc --- /dev/null +++ b/pappy-proxy/Makefile @@ -0,0 +1,9 @@ + +install-third-party: + pip install -r requirements.txt + +test: + py.test -rw --twisted --cov-config .coveragerc --cov=. tests/ + +test-verbose: + py.test -v -rw --twisted --cov-config .coveragerc --cov-report term-missing --cov=. tests/ diff --git a/pappy-proxy/__init__.py b/pappy-proxy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pappy-proxy/__main__.py b/pappy-proxy/__main__.py new file mode 100644 index 0000000..7c09aa9 --- /dev/null +++ b/pappy-proxy/__main__.py @@ -0,0 +1,7 @@ +import pappy + +from twisted.internet import reactor + +if __name__ == '__main__': + reactor.callWhenRunning(pappy.main) + reactor.run() diff --git a/pappy-proxy/certs/certificate.crt b/pappy-proxy/certs/certificate.crt new file mode 100644 index 0000000..9e4ef97 --- /dev/null +++ b/pappy-proxy/certs/certificate.crt @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDjzCCAncCFQDmrLdMg37vTWXeF9Zp0WjQmQWF1jANBgkqhkiG9w0BAQsFADBg +MQswCQYDVQQGEwJVUzERMA8GA1UECAwITWljaGlnYW4xEjAQBgNVBAcMCUFubiBB +cmJvcjEUMBIGA1UECgwLUGFwcHkgUHJveHkxFDASBgNVBAMMC1BhcHB5IFByb3h5 +MB4XDTE1MTAyNjE2MDYxMVoXDTI1MTAyMzE2MDYxMVowYDELMAkGA1UEBhMCVVMx +ETAPBgNVBAgMCE1pY2hpZ2FuMRIwEAYDVQQHDAlBbm4gQXJib3IxFDASBgNVBAoM +C1BhcHB5IFByb3h5MRQwEgYDVQQDDAtQYXBweSBQcm94eTCCASIwDQYJKoZIhvcN +AQEBBQADggEPADCCAQoCggEBAPNQo64jLgvKVKNqqLi0cDBfWqp+ZhEDaGdm3Rjl +AFerqmDHyAeCu1GENQAwcmmeXCwMYSbjcMHSrExR+rcQRxvJ8OOp2doP43+T9hd8 +rZt+PPOiBVG0cUrfdsVdbUyGjPmZFtWaiSVG2gUOdO2m7jK5WwIEcW5u6vEfmgco +/JLvtdgGZGIlsZGeQGcJdeZ6LaPKLHxPAkgRQduQTpK5nKiFi0Aqj4AsqddcZ4fo +X3zGsypkt0NVTn4nMZLR9Ml5mwzTltr9BBtSVqMIMwqVkKLkGFdaIFsY5dK3UYUV +vqLGB6ubheULLjmkv9FJLmaHfnLb2jjA17K+y3QKosMVldcCAwEAAaNFMEMwEgYD +VR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFNo5o+5e +a0sNMlW/75VgGJCv2AcJMA0GCSqGSIb3DQEBCwUAA4IBAQBMbpA8XkEvtpErHsy/ +FCtzQGmn88idU43fFSi0bcsWWc1ekapd7iTramItvZ8OCZD3/oVE4VIwumuJuoVk +OU/Tip0e+haPV5f1JImdsk2f20WJ0lJ5CyrrRcddqgVrcQbB8DwaJSJRXzrSD9Cp +UDfJhIh2zxRolGql29X6QiFukV3CIHn2hF+QYlMrxkoI0e4r6sDtmN4/VccgADdH +pQeVz4z/ZxKBIh7Xol8K6Qr+gXnlkbp3n5WXGHbv4YsK995z9yVZpuLPUHbpnSzr +KVJ5I4joA22uc2tqeKvfp4QsE8fa/nVNRv/LZZeCdg0zrXXpE9RoxNirwEcQwAo1 +x25g +-----END CERTIFICATE----- diff --git a/pappy-proxy/certs/private.key b/pappy-proxy/certs/private.key new file mode 100644 index 0000000..f0af364 --- /dev/null +++ b/pappy-proxy/certs/private.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDzUKOuIy4LylSj +aqi4tHAwX1qqfmYRA2hnZt0Y5QBXq6pgx8gHgrtRhDUAMHJpnlwsDGEm43DB0qxM +Ufq3EEcbyfDjqdnaD+N/k/YXfK2bfjzzogVRtHFK33bFXW1Mhoz5mRbVmoklRtoF +DnTtpu4yuVsCBHFuburxH5oHKPyS77XYBmRiJbGRnkBnCXXmei2jyix8TwJIEUHb +kE6SuZyohYtAKo+ALKnXXGeH6F98xrMqZLdDVU5+JzGS0fTJeZsM05ba/QQbUlaj +CDMKlZCi5BhXWiBbGOXSt1GFFb6ixgerm4XlCy45pL/RSS5mh35y29o4wNeyvst0 +CqLDFZXXAgMBAAECggEBAJxlD+ClkjpX4lFsBGk86gPdtrxyJI74/snAD4up3q97 +kzdEEuno+Rhrf1nQyinjdWGGz4ecl+St0rv30cyLdPmCswjTK0mD/voJFByCsmCJ +IwqC8SJUdqHmw0QXSmLu9XyWD1xbSZ4hTZAEe9op+1+1Tq8cRgDy4Kb+ZhYGHVsf +4o1RFGBCtSGLFBC908xZnQlqzGHtCuiBecJiWqoFK+mm3TgEUp4VDPRSPsWDWYnJ +KxciTSE9roBF7VAe5ocTRdn+tj9GVaNaBLqb1XhkU41wZxVMoid0OVgxkmyEdAyR +lL1/zVyQDgJbke4t6dgu4NCAoPWXKZP1zxNa1Ied51kCgYEA+h2X7MO8rYyWHGT7 +EZoPpHSrR3F1MnsRgXnkVt5dSrwAQlLmQmmWnjVtEQM72Eox1Czdz+GjILpvfwNF +fktzDa1GghO5TdDibcchG01qLeqEj0vgvtCP1YFLeCBZJv4yPxpaHWhyUOYPWoXq +Mze7yYbkh2uYORPKgu+N4b4oH90CgYEA+QoWQ+44j2jld4DLvYpW/tf2kvKkmFl5 +43KSVXkDHSnEfO+RFpFQ8rCOKetlMbcuQMakTz++fh3smHWGZ/S1Hm1ZUIRQqCzq +m1dTg8PX6pH9e7/0gebFqQWtGhWQdnSWmGZAEnAnmFq6DrDB0FHvfS+VePC1knEJ +/Aw4l+YFy0MCgYA60YLM1ysj1Q/oFYdFmGldT2KIJpJdELwJKtUb6Kcf0B5vendT +3ujgw8emXJBSSQB22SZAoNtv8ugNgoNxM+UWrk0KggDt39Wf41hRx17U9XW/DSUJ +OprYptNMqK7OkLDYTiYrDEj15WRu8VcmPFEZD3PmtNLTeWgCart+/u0IsQKBgQCG +xSirdl1xbmjPtQmM9zKBE0pC18CvGazWo4gBbU18GMBWhCbWOam+zEEC+np23xTO +xTDiGjLyeSsyjldAJrNlVfPBmPk1KamEi0uMwQ01ye+NaqHdMo/BGmtE9GqLUCi3 +LI576+nhjyelD46zN8QM0RVor4rzRu0KU2rE+RwllQKBgQDZ1j5Uhblxn+WJ1/z3 +xZfP23VJLVCCvBIXaHENCl01/9hSBFqH0K+EUUfeJesWoh7KSdaiHXGRR1XdB1rs +Bmzh4wPgIlcc8CPmJxZ09fM2ggHSZf1baV8lEf64/N3OnENDvUAepzwIe0IhKs1i +pzpCgCGttWxEZJvcug4AOulfQA== +-----END PRIVATE KEY----- diff --git a/pappy-proxy/comm.py b/pappy-proxy/comm.py new file mode 100644 index 0000000..838beca --- /dev/null +++ b/pappy-proxy/comm.py @@ -0,0 +1,106 @@ +import base64 +import http +import json + +from twisted.protocols.basic import LineReceiver +from twisted.internet import defer +from util import PappyException + +""" +comm.py +Handles creating a listening server bound to localhost that other processes can +use to interact with the proxy. +""" + +comm_port = 0 +debug = True + +def set_comm_port(port): + global comm_port + comm_port = port + +class CommServer(LineReceiver): + + def __init__(self): + self.delimiter = '\n' + self.action_handlers = { + 'ping': self.action_ping, + 'get_request': self.action_get_request, + 'get_response': self.action_get_response, + 'submit': self.action_submit_request, + } + + def lineReceived(self, line): + if line == '': + return + try: + command_data = json.loads(line) + command = command_data['action'] + valid = False + if command in self.action_handlers: + valid = True + result = {'success': True} + func_defer = self.action_handlers[command](command_data) + func_defer.addCallback(self.action_result_handler, result) + func_defer.addErrback(self.action_error_handler, result) + if not valid: + raise PappyException('%s is an invalid command' % command_data['action']) + except PappyException as e: + return_data = {'success': False, 'message': str(e)} + self.sendLine(json.dumps(return_data)) + + def action_result_handler(self, data, result): + result.update(data) + self.sendLine(json.dumps(result)) + + def action_error_handler(self, error, result): + if debug: + print error.getTraceback() + return_data = {'success': False, 'message': 'Debug mode enabled, traceback on main terminal'} + else: + return_data = {'success': False, 'message': str(error.getErrorMessage())} + result.update(result) + self.sendLine(json.dumps(return_data)) + error.trap(Exception) + return True + + def action_ping(self, data): + return defer.succeed({'ping': 'pong'}) + + @defer.inlineCallbacks + def action_get_request(self, data): + try: + reqid = int(data['reqid']) + except KeyError: + raise PappyException("Request with given ID does not exist") + + req = yield http.Request.load_request(reqid) + dat = json.loads(req.to_json()) + defer.returnValue(dat) + + @defer.inlineCallbacks + def action_get_response(self, data): + try: + reqid = int(data['reqid']) + except KeyError: + raise PappyException("Request with given ID does not exist, cannot fetch associated response.") + + req = yield http.Request.load_request(reqid) + rsp = yield http.Response.load_response(req.response.rspid) + dat = json.loads(rsp.to_json()) + defer.returnValue(dat) + + @defer.inlineCallbacks + def action_submit_request(self, data): + try: + req = http.Request(base64.b64decode(data['full_request'])) + except: + raise PappyException("Error parsing request") + req_sub = yield req.submit_self() + yield req_sub.deep_save() + + retdata = {} + retdata['request'] = json.loads(req_sub.to_json()) + if req_sub.response: + retdata['response'] = json.loads(req_sub.response.to_json()) + defer.returnValue(retdata) diff --git a/pappy-proxy/config.py b/pappy-proxy/config.py new file mode 100644 index 0000000..372170b --- /dev/null +++ b/pappy-proxy/config.py @@ -0,0 +1,51 @@ +import imp +import json +import os +import shutil + +# Make sure we have a config file +if not os.path.isfile('./config.json'): + print "Copying default config to directory" + default_config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'default_user_config.json') + shutil.copyfile(default_config_file, './config.json') + +# Load local project config +with open('./config.json', 'r') as f: + proj_config = json.load(f) + +# Substitution dictionary +subs = {} +subs['PAPPYDIR'] = os.path.dirname(os.path.realpath(__file__)) + +# Data file settings +if 'data_file' in proj_config: + DATAFILE = proj_config["data_file"].format(**subs) +else: + DATAFILE = 'data.db' + +# Debug settings +if 'debug_dir' in proj_config: + DEBUG_TO_FILE = True + DEBUG_DIR = proj_config["debug_dir"].format(**subs) +else: + DEBUG_DIR = None + DEBUG_TO_FILE = False +DEBUG_VERBOSITY = 0 + +# Cert directory settings +if 'cert_dir' in proj_config: + CERT_DIR = proj_config["cert_dir"].format(**subs) +else: + CERT_DIR = './certs' +SSL_PKEY_FILE = 'private.key' +SSL_CA_FILE = 'certificate.crt' + +# Listener settings +if "proxy_listeners" in proj_config: + LISTENERS = [] + for l in proj_config["proxy_listeners"]: + LISTENERS.append((l['port'], l['interface'])) +else: + LISTENERS = [(8000, '127.0.0.1')] + diff --git a/pappy-proxy/console.py b/pappy-proxy/console.py new file mode 100644 index 0000000..10c69ab --- /dev/null +++ b/pappy-proxy/console.py @@ -0,0 +1,626 @@ +import cmd2 +import config +import context +import crochet +import mangle +import proxy +import repeater +import select +import shlex +import string +import subprocess +import sys +import termios +import time + +import http +from twisted.internet import defer, reactor +from util import PappyException + +""" +console.py + +Functions and classes involved with interacting with console input and output +""" + +# http://www.termsys.demon.co.uk/vtansi.htm#cursor +SAVE_CURSOR = '\x1b[7' +UNSAVE_CURSOR = '\x1b[8' +LINE_UP = '\x1b[1A' +LINE_ERASE = '\x1b[2K' +PRINT_LINE = '\x1b[1i' + +edit_queue = [] + +def print_pappy_errors(func): + def catch(*args, **kwargs): + try: + func(*args, **kwargs) + except PappyException as e: + print str(e) + return catch + +class ProxyCmd(cmd2.Cmd): + + def __init__(self, *args, **kwargs): + self.alerts = [] + self.prompt = 'itsPappyTime> ' + self.debug = True + cmd2.Cmd.__init__(self, *args, **kwargs) + + def add_alert(self, alert): + self.alerts.append(alert) + + def postcmd(self, stop, line): + for l in self.alerts: + print '[!] ', l + self.alerts = [] + return stop + + def help_view_request_headers(self): + print ("View the headers of the request\n" + "Usage: view_request_headers [u]" + "If 'u' is given as an additional argument, the unmangled version " + "of the request will be displayed.") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_view_request_headers(self, line): + args = shlex.split(line) + try: + reqid = int(args[0]) + showid = reqid + except: + raise PappyException("Enter a valid number for the request id") + + req = yield http.Request.load_request(reqid) + showreq = req + + show_unmangled = False + if len(args) > 1 and args[1][0].lower() == 'u': + if not req.unmangled: + raise PappyException("Request was not mangled") + show_unmangled = True + showreq = req.unmangled + + print '' + print_requests([showreq]) + if show_unmangled: + print '' + print 'UNMANGLED --------------------' + print '' + view_full_request(showreq, True) + + def help_view_full_request(self): + print ("View the full data of the request\n" + "Usage: view_full_request [u]\n" + "If 'u' is given as an additional argument, the unmangled version " + "of the request will be displayed.") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_view_full_request(self, line): + args = shlex.split(line) + try: + reqid = int(args[0]) + showid = reqid + except: + raise PappyException("Enter a valid number for the request id") + + req = yield http.Request.load_request(reqid) + showreq = req + + show_unmangled = False + if len(args) > 1 and args[1][0].lower() == 'u': + if not req.unmangled: + raise PappyException("Request was not mangled") + show_unmangled = True + showreq = req.unmangled + + print '' + print_requests([showreq]) + if show_unmangled: + print '' + print 'UNMANGLED --------------------' + print '' + view_full_request(showreq) + + def help_view_response_headers(self): + print ("View the headers of the response\n" + "Usage: view_response_headers ") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_view_response_headers(self, line): + args = shlex.split(line) + try: + reqid = int(args[0]) + showid = reqid + except: + raise PappyException("Enter a valid number for the request id") + + req = yield http.Request.load_request(reqid) + showrsp = req.response + + show_unmangled = False + if len(args) > 1 and args[1][0].lower() == 'u': + if not req.response.unmangled: + raise PappyException("Response was not mangled") + show_unmangled = True + showrsp = req.response.unmangled + + print '' + print_requests([req]) + if show_unmangled: + print '' + print 'UNMANGLED --------------------' + print '' + view_full_response(showrsp, True) + + def help_view_full_response(self): + print ("View the full data of the response associated with a request\n" + "Usage: view_full_response ") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_view_full_response(self, line): + args = shlex.split(line) + try: + reqid = int(args[0]) + showid = reqid + except: + raise PappyException("Enter a valid number for the request id") + + req = yield http.Request.load_request(reqid) + showrsp = req.response + + show_unmangled = False + if len(args) > 1 and args[1][0].lower() == 'u': + if not req.response.unmangled: + raise PappyException("Response was not mangled") + show_unmangled = True + showrsp = req.response.unmangled + + print '' + print_requests([req]) + if show_unmangled: + print '' + print 'UNMANGLED --------------------' + print '' + view_full_response(showrsp) + + def help_list(self): + print ("List request/response pairs in the current context\n" + "Usage: list") + + @print_pappy_errors + def do_list(self, line): + args = shlex.split(line) + if len(args) > 0: + if args[0][0].lower() == 'a': + print_count = -1 + else: + try: + print_count = int(args[0]) + except: + print "Please enter a valid argument for list" + return + else: + print_count = 50 + + context.sort() + if print_count > 0: + to_print = context.active_requests[:] + to_print = sorted(to_print, key=lambda x: x.reqid, reverse=True) + to_print = to_print[:print_count] + print_requests(to_print) + else: + print_requests(context.active_requests) + + def help_filter(self): + print ("Apply a filter to the current context\n" + "Usage: filter \n" + "See README.md for information on filter strings") + + @print_pappy_errors + def do_filter(self, line): + if not line: + raise PappyException("Filter string required") + + filter_to_add = context.Filter(line) + context.add_filter(filter_to_add) + + def help_filter_clear(self): + print ("Reset the context so that it contains no filters (ignores scope)\n" + "Usage: filter_clear") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_filter_clear(self, line): + context.active_filters = [] + yield context.reload_from_storage() + + def help_filter_list(self): + print ("Print the filters that make up the current context\n" + "Usage: filter_list") + + @print_pappy_errors + def do_filter_list(self, line): + for f in context.active_filters: + print f.filter_string + + + def help_scope_save(self): + print ("Set the scope to be the current context. Saved between launches\n" + "Usage: scope_save") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_scope_save(self, line): + context.save_scope() + yield context.store_scope(http.dbpool) + + def help_scope_reset(self): + print ("Set the context to be the scope (view in-scope items)\n" + "Usage: scope_reset") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_scope_reset(self, line): + yield context.reset_to_scope() + + def help_scope_delete(self): + print ("Delete the scope so that it contains all request/response pairs\n" + "Usage: scope_delete") + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_scope_delete(self, line): + context.set_scope([]) + yield context.store_scope(http.dbpool) + + def help_scope_list(self): + print ("Print the filters that make up the scope\n" + "Usage: scope_list") + + @print_pappy_errors + def do_scope_list(self, line): + context.print_scope() + + def help_repeater(self): + print ("Open a request in the repeater\n" + "Usage: repeater ") + + @print_pappy_errors + def do_repeater(self, line): + repeater.start_editor(int(line)) + + def help_submit(self): + print "Submit a request again (NOT IMPLEMENTED)" + + @print_pappy_errors + @crochet.wait_for(timeout=5.0) + @defer.inlineCallbacks + def do_submit(self, line): + pass + # reqid = int(line) + # req = yield http.Request.load_request(reqid) + # rsp = yield req.submit() + # print printable_data(rsp.full_response) + + def help_intercept(self): + print ("Intercept requests and/or responses and edit them with vim before passing them along\n" + "Usage: intercept ") + + @print_pappy_errors + def do_intercept(self, line): + global edit_queue + args = shlex.split(line) + intercept_requests = False + intercept_responses = False + + req_names = ('req', 'request', 'requests') + rsp_names = ('rsp', 'response', 'responses') + + if any(a in req_names for a in args): + intercept_requests = True + if any(a in rsp_names for a in args): + intercept_responses = True + + if intercept_requests: + print "Intercepting reqeusts" + if intercept_responses: + print "Intercepting responses" + + mangle.set_intercept_requests(intercept_requests) + mangle.set_intercept_responses(intercept_responses) + while 1: + if select.select([sys.stdin,],[],[],0.0)[0]: + break; + else: + if len(edit_queue) > 0: + (to_edit, deferred) = edit_queue.pop(0) + # Edit the file + subprocess.call(['vim', to_edit]) + # Fire the callback + deferred.callback(None) + time.sleep(0.2) + + # Send remaining requests along + while len(edit_queue) > 0: + (fname, deferred) = edit_queue.pop(0) + deferred.callback(None) + + # Flush stdin so that anything we typed doesn't go into the prompt + termios.tcflush(sys.stdin, termios.TCIOFLUSH) + mangle.set_intercept_requests(False) + mangle.set_intercept_responses(False) + + def help_gencerts(self): + print ("Generate CA cert and private CA file\n" + "Usage: gencerts [/path/to/put/certs/in]") + + @print_pappy_errors + def do_gencerts(self, line): + dest_dir = line or config.CERT_DIR + print "This will overwrite any existing certs in %s. Are you sure?" % dest_dir + print "(y/N)", + answer = raw_input() + if not answer or answer[0].lower() != 'y': + return False + print "Generating certs to %s" % dest_dir + proxy.generate_ca_certs(dest_dir) + + def help_log(self): + print ("View the log\n" + "Usage: log [verbosity (default is 1)]\n" + "verbosity=1: Show connections as they're made/lost, some additional info\n" + "verbosity=3: Show full requests/responses as they are processed by the proxy") + + @print_pappy_errors + def do_log(self, line): + try: + verbosity = int(line.strip()) + except: + verbosity = 1 + config.DEBUG_VERBOSITY = verbosity + raw_input() + config.DEBUG_VERBOSITY = 0 + + @print_pappy_errors + def do_testerror(self, line): + raise PappyException("Test error") + + @print_pappy_errors + def do_EOF(self): + print "EOF" + return True + + ### ABBREVIATIONS + def help_ls(self): + self.help_list() + + @print_pappy_errors + def do_ls(self, line): + self.onecmd('list %s' % line) + + def help_sr(self): + self.help_scope_reset() + + @print_pappy_errors + def do_sr(self, line): + self.onecmd('scope_reset %s' % line) + + def help_sls(self): + self.help_scope_list() + + @print_pappy_errors + def do_sls(self, line): + self.onecmd('scope_list %s' % line) + + def help_vhq(self): + self.help_view_request_headers() + + @print_pappy_errors + def do_vhq(self, line): + self.onecmd('view_request_headers %s' % line) + + def help_vfq(self): + self.help_view_full_request() + + @print_pappy_errors + def do_vfq(self, line): + self.onecmd('view_full_request %s' % line) + + def help_vhs(self): + self.help_view_response_headers() + + @print_pappy_errors + def do_vhs(self, line): + self.onecmd('view_response_headers %s' % line) + + def help_vfs(self): + self.help_view_full_response() + + @print_pappy_errors + def do_vfs(self, line): + self.onecmd('view_full_response %s' % line) + + def help_fl(self): + self.help_filter() + + @print_pappy_errors + def do_fl(self, line): + self.onecmd('filter %s' % line) + + def help_fls(self): + self.help_filter_list() + + @print_pappy_errors + def do_fls(self, line): + self.onecmd('filter_list %s' % line) + + def help_fc(self): + self.help_filter_clear() + + @print_pappy_errors + def do_fc(self, line): + self.onecmd('filter_clear %s' % line) + + def help_rp(self): + self.help_repeater() + + @print_pappy_errors + def do_rp(self, line): + self.onecmd('repeater %s' % line) + + def help_ic(self): + self.help_intercept() + + @print_pappy_errors + def do_ic(self, line): + self.onecmd('intercept %s' % line) + + + +def cmd_failure(cmd): + print "FAILURE" + +def edit_file(fname): + global edit_queue + # Adds the filename to the edit queue. Returns a deferred that is fired once + # the file is edited and the editor is closed + d = defer.Deferred() + edit_queue.append((fname, d)) + return d + +def print_table(coldata, rows): + # Coldata: List of dicts with info on how to print the columns. + # name: heading to give column + # width: (optional) maximum width before truncating. 0 for unlimited + # Rows: List of tuples with the data to print + + # Get the width of each column + widths = [] + headers = [] + for data in coldata: + if 'name' in data: + headers.append(data['name']) + else: + headers.append('') + empty_headers = True + for h in headers: + if h != '': + empty_headers = False + if not empty_headers: + rows = [headers] + rows + + for i in range(len(coldata)): + col = coldata[i] + if 'width' in col and col['width'] > 0: + maxwidth = col['width'] + else: + maxwidth = 0 + colwidth = 0 + for row in rows: + printstr = str(row[i]) + if len(printstr) > colwidth: + colwidth = len(printstr) + if maxwidth > 0 and colwidth > maxwidth: + widths.append(maxwidth) + else: + widths.append(colwidth) + + # Print rows + padding = 2 + for row in rows: + for (col, width) in zip(row, widths): + printstr = str(col) + if len(printstr) > width: + for i in range(len(printstr)-4, len(printstr)-1): + printstr=printstr[:width] + printstr=printstr[:-3]+'...' + sys.stdout.write(printstr) + sys.stdout.write(' '*(width-len(printstr))) + sys.stdout.write(' '*padding) + sys.stdout.write('\n') + sys.stdout.flush() + + +def printable_data(data): + chars = [] + for c in data: + if c in string.printable: + chars += c + else: + chars += '.' + return ''.join(chars) + + +def view_full_request(request, headers_only=False): + if headers_only: + print printable_data(request.raw_headers) + else: + print printable_data(request.full_request) + +def view_full_response(response, headers_only=False): + if headers_only: + print printable_data(response.raw_headers) + else: + print printable_data(response.full_response) + +def print_requests(requests): + # Print a table with info on all the requests in the list + cols = [ + {'name':'ID'}, + {'name':'Method'}, + {'name': 'Host'}, + {'name':'Path', 'width':40}, + {'name':'S-Code'}, + {'name':'Req Len'}, + {'name':'Rsp Len'}, + {'name':'Time'}, + {'name':'Mngl'}, + ] + rows = [] + for request in requests: + rid = request.reqid + method = request.verb + host = request.headers['host'] + path = request.path + reqlen = len(request.raw_data) + rsplen = 'None' + mangle_str = '--' + + if request.unmangled: + mangle_str = 'q' + + if request.response: + response_code = str(request.response.response_code) + \ + ' ' + request.response.response_text + rsplen = len(request.response.raw_data) + if request.response.unmangled: + if mangle_str == '--': + mangle_str = 's' + else: + mangle_str += '/s' + else: + response_code = '' + + time_str = '--' + if request.time_start and request.time_end: + time_delt = request.time_end - request.time_start + time_str = "%.2f" % time_delt.total_seconds() + + rows.append([rid, method, host, path, response_code, + reqlen, rsplen, time_str, mangle_str]) + print_table(cols, rows) + diff --git a/pappy-proxy/context.py b/pappy-proxy/context.py new file mode 100644 index 0000000..b4953e7 --- /dev/null +++ b/pappy-proxy/context.py @@ -0,0 +1,490 @@ +from twisted.internet import defer +from util import PappyException +import http +import shlex + + +""" +context.py + +Functions and classes involved with managing the current context and filters +""" + +scope = [] +base_filters = [] +active_filters = [] +active_requests = [] + +class FilterParseError(PappyException): + pass + +class Filter(object): + + def __init__(self, filter_string): + self.filter_func = self.from_filter_string(filter_string) + self.filter_string = filter_string + + def __call__(self, *args, **kwargs): + return self.filter_func(*args, **kwargs) + + @staticmethod + def from_filter_string(filter_string): + args = shlex.split(filter_string) + field = args[0] + relation = args[1] + new_filter = None + + negate = False + if relation[0] == 'n' and len(relation) > 1: + negate = True + relation = relation[1:] + + # Raises exception if invalid + comparer = get_relation(relation) + + if field in ("all",): + new_filter = gen_filter_by_all(comparer, args[2], negate) + elif field in ("host", "domain", "hs", "dm"): + new_filter = gen_filter_by_host(comparer, args[2], negate) + elif field in ("path", "pt"): + new_filter = gen_filter_by_path(comparer, args[2], negate) + elif field in ("body", "bd", "data", "dt"): + new_filter = gen_filter_by_body(comparer, args[2], negate) + elif field in ("verb", "vb"): + new_filter = gen_filter_by_verb(comparer, args[2], negate) + elif field in ("param", "pm"): + if len(args) > 4: + comparer2 = get_relation(args[3]) + new_filter = gen_filter_by_params(comparer, args[2], + comparer2, args[4], negate) + else: + new_filter = gen_filter_by_params(comparer, args[2], + negate=negate) + elif field in ("header", "hd"): + if len(args) > 4: + comparer2 = get_relation(args[3]) + new_filter = gen_filter_by_headers(comparer, args[2], + comparer2, args[4], negate) + else: + new_filter = gen_filter_by_headers(comparer, args[2], + negate=negate) + elif field in ("rawheaders", "rh"): + new_filter = gen_filter_by_raw_headers(comparer, args[2], negate) + elif field in ("sentcookie", "sck"): + if len(args) > 4: + comparer2 = get_relation(args[3]) + new_filter = gen_filter_by_submitted_cookies(comparer, args[2], + comparer2, args[4], negate) + else: + new_filter = gen_filter_by_submitted_cookies(comparer, args[2], + negate=negate) + elif field in ("setcookie", "stck"): + if len(args) > 4: + comparer2 = get_relation(args[3]) + new_filter = gen_filter_by_set_cookies(comparer, args[2], + comparer2, args[4], negate) + else: + new_filter = gen_filter_by_set_cookies(comparer, args[2], + negate=negate) + elif field in ("statuscode", "sc", "responsecode"): + new_filter = gen_filter_by_response_code(comparer, args[2], negate) + elif field in ("responsetime", "rt"): + pass + else: + raise FilterParseError("%s is not a valid field" % field) + + if new_filter is not None: + return new_filter + else: + raise FilterParseError("Error creating filter") + + +def filter_reqs(requests, filters): + to_delete = [] + # Could definitely be more efficient, but it stays like this until + # it impacts performance + for filt in filters: + for req in requests: + if not filt(req): + to_delete.append(req) + new_requests = [r for r in requests if r not in to_delete] + requests = new_requests + to_delete = [] + return requests + +def cmp_is(a, b): + return str(a) == str(b) + +def cmp_contains(a, b): + return (b.lower() in a.lower()) + +def cmp_exists(a, b=None): + return (a is not None) + +def cmp_len_eq(a, b): + return (len(a) == int(b)) + +def cmp_len_gt(a, b): + return (len(a) > int(b)) + +def cmp_len_lt(a, b): + return (len(a) < int(b)) + +def cmp_eq(a, b): + return (int(a) == int(b)) + +def cmp_gt(a, b): + return (int(a) > int(b)) + +def cmp_lt(a, b): + return (int(a) < int(b)) + + +def gen_filter_by_attr(comparer, val, attr, negate=False): + """ + Filters by an attribute whose name is shared by the request and response + objects + """ + def f(req): + req_match = comparer(getattr(req, attr), val) + if req.response: + rsp_match = comparer(getattr(req.response, attr), val) + else: + rsp_match = False + + result = req_match or rsp_match + if negate: + return not result + else: + return result + + return f + +def gen_filter_by_all(comparer, val, negate=False): + def f(req): + req_match = comparer(req.full_request, val) + if req.response: + rsp_match = comparer(req.response.full_response, val) + else: + rsp_match = False + + result = req_match or rsp_match + if negate: + return not result + else: + return result + + return f + +def gen_filter_by_host(comparer, val, negate=False): + def f(req): + result = comparer(req.host, val) + if negate: + return not result + else: + return result + + return f + +def gen_filter_by_body(comparer, val, negate=False): + return gen_filter_by_attr(comparer, val, 'raw_data', negate=negate) + +def gen_filter_by_raw_headers(comparer, val, negate=False): + return gen_filter_by_attr(comparer, val, 'raw_headers', negate=negate) + +def gen_filter_by_response_code(comparer, val, negate=False): + def f(req): + if req.response: + result = comparer(req.response.response_code, val) + else: + result = False + if negate: + return not result + else: + return result + + return f + +def gen_filter_by_path(comparer, val, negate=False): + def f(req): + result = comparer(req.path, val) + if negate: + return not result + else: + return result + + return f + +def gen_filter_by_responsetime(comparer, val, negate=False): + def f(req): + result = comparer(req.rsptime, val) + if negate: + return not result + else: + return result + + return f + +def gen_filter_by_verb(comparer, val, negate=False): + def f(req): + result = comparer(req.verb, val) + if negate: + return not result + else: + return result + + return f + +def check_repeatable_dict(d, comparer1, val1, comparer2=None, val2=None, negate=False): + result = False + for k, v in d.all_pairs(): + if comparer2: + key_matches = comparer1(k, val1) + val_matches = comparer2(v, val2) + if key_matches and val_matches: + result = True + break + else: + # We check if the first value matches either + key_matches = comparer1(k, val1) + val_matches = comparer1(v, val1) + if key_matches or val_matches: + result = True + break + if negate: + return not result + else: + return result + +def gen_filter_by_repeatable_dict_attr(attr, keycomparer, keyval, valcomparer=None, + valval=None, negate=False, check_req=True, + check_rsp=True): + def f(req): + matched = False + d = getattr(req, attr) + if check_req and check_repeatable_dict(d, keycomparer, keyval, valcomparer, valval): + matched = True + if check_rsp and req.response: + d = getattr(req.response, attr) + if check_repeatable_dict(d, keycomparer, keyval, valcomparer, valval): + matched = True + if negate: + return not matched + else: + return matched + + return f + +def gen_filter_by_headers(keycomparer, keyval, valcomparer=None, valval=None, + negate=False): + return gen_filter_by_repeatable_dict_attr('headers', keycomparer, keyval, + valcomparer, valval, negate=negate) + +def gen_filter_by_submitted_cookies(keycomparer, keyval, valcomparer=None, + valval=None, negate=False): + return gen_filter_by_repeatable_dict_attr('cookies', keycomparer, keyval, + valcomparer, valval, negate=negate, + check_rsp=False) + +def gen_filter_by_set_cookies(keycomparer, keyval, valcomparer=None, + valval=None, negate=False): + def f(req): + if not req.response: + return False + + for k, c in req.response.cookies.all_pairs(): + if keycomparer(c.key, keyval): + if not valcomparer: + return True + else: + if valcomparer(c.val, valval): + return True + + return False + + return f + +def gen_filter_by_get_params(keycomparer, keyval, valcomparer=None, valval=None, + negate=False): + def f(req): + matched = False + for k, v in req.get_params.all_pairs(): + if keycomparer(k, keyval): + if not valcomparer: + matched = True + else: + if valcomparer(v, valval): + matched = True + if negate: + return not matched + else: + return matched + + return f + +def gen_filter_by_post_params(keycomparer, keyval, valcomparer=None, valval=None, + negate=False): + def f(req): + matched = False + for k, v in req.post_params.all_pairs(): + if keycomparer(k, keyval): + if not valcomparer: + matched = True + else: + if valcomparer(v, valval): + matched = True + if negate: + return not matched + else: + return matched + + + return f + +def gen_filter_by_params(keycomparer, keyval, valcomparer=None, valval=None, + negate=False): + def f(req): + matched = False + # purposely don't pass negate here, otherwise we get double negatives + f1 = gen_filter_by_post_params(keycomparer, keyval, valcomparer, valval) + f2 = gen_filter_by_get_params(keycomparer, keyval, valcomparer, valval) + if f1(req): + matched = True + if f2(req): + matched = True + + if negate: + return not matched + else: + return matched + + return f + +def get_relation(s): + # Gets the relation function associated with the string + # Returns none if not found + if s in ("is",): + return cmp_is + elif s in ("contains", "ct"): + return cmp_contains + elif s in ("containsr", "ctr"): + # TODO + return None + elif s in ("exists", "ex"): + return cmp_exists + elif s in ("Leq", "L="): + return cmp_len_eq + elif s in ("Lgt", "L>"): + return cmp_len_gt + elif s in ("Llt", "L<"): + return cmp_len_lt + elif s in ("eq", "="): + return cmp_eq + elif s in ("gt", ">"): + return cmp_gt + elif s in ("lt", "<"): + return cmp_lt + + raise FilterParseError("Invalid relation: %s" % s) + +@defer.inlineCallbacks +def init(): + yield reload_from_storage() + +@defer.inlineCallbacks +def reload_from_storage(): + global active_requests + active_requests = yield http.Request.load_from_filters(active_filters) + +def add_filter(filt): + global active_requests + global active_filters + active_filters.append(filt) + active_requests = filter_reqs(active_requests, active_filters) + +def add_request(req): + global active_requests + if passes_filters(req, active_filters): + active_requests.append(req) + +def filter_recheck(): + global active_requests + global active_filters + new_reqs = [] + for req in active_requests: + if passes_filters(req, active_filters): + new_reqs.append(req) + active_requests = new_reqs + +def passes_filters(request, filters): + for filt in filters: + if not filt(request): + return False + return True + +def sort(key=None): + global active_requests + if key: + active_requests = sorted(active_requests, key=key) + else: + active_requests = sorted(active_requests, key=lambda r: r.reqid) + +def in_scope(request): + global scope + return passes_filters(request, scope) + +def set_scope(filters): + global scope + scope = filters + +def save_scope(): + global active_filters + global scope + scope = active_filters[:] + +@defer.inlineCallbacks +def reset_to_scope(): + global active_filters + global scope + active_filters = scope[:] + yield reload_from_storage() + +def print_scope(): + global scope + for f in scope: + print f.filter_string + +@defer.inlineCallbacks +def store_scope(dbpool): + # Delete the old scope + yield dbpool.runQuery( + """ + DELETE FROM scope + """ + ); + + # Insert the new scope + i = 0 + for f in scope: + yield dbpool.runQuery( + """ + INSERT INTO scope (filter_order, filter_string) VALUES (?, ?); + """, + (i, f.filter_string) + ); + i += 1 + +@defer.inlineCallbacks +def load_scope(dbpool): + global scope + rows = yield dbpool.runQuery( + """ + SELECT filter_order, filter_string FROM scope; + """, + ) + rows = sorted(rows, key=lambda r: int(r[0])) + new_scope = [] + for row in rows: + new_filter = Filter(row[1]) + new_scope.append(new_filter) + scope = new_scope diff --git a/pappy-proxy/default_user_config.json b/pappy-proxy/default_user_config.json new file mode 100644 index 0000000..6bc85dd --- /dev/null +++ b/pappy-proxy/default_user_config.json @@ -0,0 +1,7 @@ +{ + "data_file": "./data.db", + "cert_dir": "{PAPPYDIR}/certs", + "proxy_listeners": [ + {"port": 8000, "interface": "127.0.0.1"} + ] +} diff --git a/pappy-proxy/http.py b/pappy-proxy/http.py new file mode 100644 index 0000000..8f3646d --- /dev/null +++ b/pappy-proxy/http.py @@ -0,0 +1,1129 @@ +import base64 +import collections +import console +import context +import crochet +import datetime +import gzip +import json +import proxy +import re +import StringIO +import urlparse +import zlib +from twisted.internet import defer, reactor +from util import PappyException + +ENCODE_NONE = 0 +ENCODE_DEFLATE = 1 +ENCODE_GZIP = 2 + +dbpool = None + +class DataAlreadyComplete(Exception): + pass + +def init(pool): + global dbpool + if dbpool is None: + dbpool = pool + assert(dbpool) + +def destruct(): + assert(dbpool) + dbpool.close() + +def decode_encoded(data, encoding): + if encoding == ENCODE_NONE: + return data + + if encoding == ENCODE_DEFLATE: + dec_data = StringIO.StringIO(zlib.decompress(data)) + else: + dec_data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(data)) + return dec_data.read() + +def repeatable_parse_qs(s): + pairs = s.split('&') + ret_dict = RepeatableDict() + for pair in pairs: + if '=' in pair: + t = tuple(pair.split('=', 1)) + ret_dict.append(t[0], t[1]) + else: + ret_dict.append(pair, None) + return ret_dict + +class RepeatableDict: + """ + A dict that retains the order of items inserted and keeps track of + duplicate values. Can optionally treat keys as case insensitive. + Custom made for the proxy, so it has strange features + """ + + def __init__(self, from_pairs=None, case_insensitive=False): + # If efficiency becomes a problem, add a dict that keeps a list by key + # and use that for getting data. But until then, this stays. + self._pairs = [] + self._keys = set() + self._modify_callback = None + self.case_insensitive = case_insensitive + + if from_pairs: + for k, v in from_pairs: + self.append(k, v) + + def _ef_key(self, key): + # "effective key", returns key.lower() if we're case insensitive, + # otherwise it returns the same key + if self.case_insensitive: + return key.lower() + return key + + def _mod_callback(self): + # Calls the modify callback if we have one + if self._modify_callback: + self._modify_callback() + + def __contains__(self, val): + return self._ef_key(val) in self._keys + + def __getitem__(self, key): + for p in reversed(self._pairs): + if self._ef_key(p[0]) == self._ef_key(key): + return p[1] + raise KeyError + + def __setitem__(self, key, val): + # Replaces first instance of `key` and deletes the rest + self.set_val(key, val) + + def __delitem__(self, key): + self._keys.remove(key) + self._pairs = [p for p in self._pairs if self._ef_key(p[0]) != self._ef_key(key)] + self._mod_callback() + + def __nonzero__(self): + if self._pairs: + return True + else: + return False + + def _add_key(self, key): + self._keys.add(self._ef_key(key)) + + def _remove_key(self, key): + self._keys.remove(self._ef_key(key)) + + def all_pairs(self): + return self._pairs[:] + + def append(self, key, val, do_callback=True): + # Add a duplicate entry for key + self._add_key(key) + self._pairs.append((key, val)) + if do_callback: + self._mod_callback() + + def set_val(self, key, val, do_callback=True): + new_pairs = [] + added = False + self._add_key(key) + for p in self._pairs: + if self._ef_key(p[0]) == self._ef_key(key): + if not added: + # only add the first instance + new_pairs.append((key, val)) + added = True + else: + new_pairs.append(p) + if not added: + new_pairs.append((key, val)) + self._pairs = new_pairs + + if do_callback: + self._mod_callback() + + def update(self, key, val, do_callback=True): + # If key is already in the dict, replace that value with the new value + if key in self: + for k, v in self.all_pairs(): + if self._ef_key(k) == self._ef_key(key): + self.set_val(k, val, do_callback=do_callback) + break + else: + self.set_val(key, val, do_callback=do_callback) + + def clear(self, do_callback=True): + self._pairs = [] + if do_callback: + self._mod_callback() + + def all_vals(self, key): + return [p[1] for p in self._pairs if self._ef_key(p[0]) == self._ef_key(key)] + + def add_pairs(self, pairs, do_callback=True): + for pair in pairs: + self._add_key(pair[0]) + self._pairs += pairs + if do_callback: + self._mod_callback() + + def sort(self): + # Sorts pairs by key alphabetaclly + pairs = sorted(pairs, key=lambda x: x[0]) + + def set_modify_callback(self, callback): + # Add a function to be called whenever an element is added, changed, or + # deleted. Set to None to remove + self._modify_callback = callback + + +class LengthData: + def __init__(self, length=None): + self.raw_data = '' + self.complete = False + self.length = length or 0 + + if self.length == 0: + self.complete = True + + def add_data(self, data): + if self.complete: + raise DataAlreadyComplete() + remaining_length = self.length-len(self.raw_data) + if len(data) >= remaining_length: + self.raw_data += data[:remaining_length] + assert(len(self.raw_data) == self.length) + self.complete = True + else: + self.raw_data += data + +class ChunkedData: + + def __init__(self): + self.raw_data = '' + self._pos = 0 + self._state = 0 # 0=reading length, 1=reading data, 2=going over known string + self._len_str = '' + self._chunk_remaining = 0 + self._known_str = '' + self._known_str_pos = 0 + self._next_state = 0 + self._raw_data = '' + self.complete = False + self.unchunked_data = '' + + def add_data(self, data): + self._raw_data += data + self.scan_forward() + + def scan_forward(self): + # Don't add more data if we're already done + if self.complete: + return + + while self._pos < len(self._raw_data): + curchar = self._raw_data[self._pos] + if self._state == 0: + if curchar.lower() in '0123456789abcdef': + # Read the next char of the length + self._len_str += curchar + + # Move to the next char + self._pos += 1 + elif curchar == '\r': + # Save how much chunk to read + self._chunk_remaining = int(self._len_str, 16) + + # If the length is 0, chunked encoding is done! + if self._chunk_remaining == 0: + self.complete = True + # I should probably just rename raw_data since it's what + # you use to look at unchunked data, but you're not + # supposed to look at it until after it's complete + # anyways + self._raw_data = self.unchunked_data + self.raw_data = self._raw_data # Expose raw_data + return + + # There should be a newline after the \r + self._known_str = '\n' + self._state = 2 + self._next_state = 1 + + # Reset the length str + self._len_str = '' + + # Move to the next char + self._pos += 1 + else: + raise Exception("Malformed chunked encoding!") + + elif self._state == 1: + if self._chunk_remaining > 0: + # Read next byte of data + self.unchunked_data += curchar + self._chunk_remaining -= 1 + self._pos += 1 + else: + # Read newline then read a new chunk + self._known_str = '\r\n' + self._next_state = 0 # Read len after newlines + self._state = 2 # Read newlines + # Don't move to the next char because we didn't do anything + elif self._state == 2: + # Read a char of an expected string + + # If the expected char doesn't match, throw an error + if self._known_str[self._known_str_pos] != curchar: + raise Exception("Unexpected data") + + # Move to the next char in the raw data and in our known string + self._known_str_pos += 1 + self._pos += 1 + + # If we've reached the end of the known string, go to the next state + if self._known_str_pos == len(self._known_str): + self._known_str_pos = 0 + self._state = self._next_state + +class ResponseCookie(object): + + def __init__(self, set_cookie_string=None): + self.key = None + self.val = None + self.expires = None + self.max_age = None + self.domain = None + self.path = None + self.secure = False + self.http_only = False + + if set_cookie_string: + self.from_cookie(set_cookie_string) + + @property + def cookie_av(self): + av = '%s=%s' % (self.key, self.val) + to_add = [av] + if self.expires: + to_add.append('expires=%s'%self.expires) + if self.max_age: + to_add.append('Max-Age=%d'%self.max_age) + if self.domain: + to_add.append('Domain=%s'%self.domain) + if self.path: + to_add.append('Path=%s'%self.path) + if self.secure: + to_add.append('secure') + if self.http_only: + to_add.append('httponly') + return '; '.join(to_add) + + def parse_cookie_av(self, cookie_av): + if '=' in cookie_av: + key, val = cookie_av.split('=', 1) + key = key.lstrip() + if key.lower() == 'expires': + self.expires = val + if key.lower() == 'max-age': + self.max_age = int(val) + if key.lower() == 'domain': + self.domain = val + if key.lower() == 'path': + self.path = val + elif cookie_av.lstrip().lower() == 'secure': + self.secure = True + elif cookie_av.lstrip().lower() == 'httponly': + self.http_only = True + + def from_cookie(self, set_cookie_string): + if ';' in set_cookie_string: + cookie_pair, rest = set_cookie_string.split(';', 1) + self.key, self.val = cookie_pair.split('=',1) + cookie_avs = rest.split(';') + for cookie_av in cookie_avs: + cookie_av.lstrip() + self.parse_cookie_av(cookie_av) + else: + self.key, self.val = set_cookie_string.split('=',1) + + +class Request(object): + + def __init__(self, full_request=None, update_content_length=False): + self.time_end = None + self.time_start = None + self.complete = False + self.cookies = RepeatableDict() + self.fragment = None + self.get_params = RepeatableDict() + self.header_len = 0 + self.headers = RepeatableDict(case_insensitive=True) + self.headers_complete = False + self.host = None + self.is_ssl = False + self.path = '' + self.port = None + self.post_params = RepeatableDict() + self._raw_data = '' + self.reqid = None + self.response = None + self.submitted = False + self.unmangled = None + self.verb = '' + self.version = '' + + self._first_line = True + #self._connect_response = False + #self._encoding_type = ENCODE_NONE + self._data_length = 0 + self._partial_data = '' + + self.set_dict_callbacks() + + # Get values from the raw request + if full_request is not None: + self.from_full_request(full_request, update_content_length) + + @property + def rsptime(self): + if self.time_start and self.time_end: + return self.time_end-self.time_start + else: + return None + + @property + def status_line(self): + path = self.path + if self.get_params: + path += '?' + pairs = [] + for pair in self.get_params.all_pairs(): + if pair[1] is None: + pairs.append(pair[0]) + else: + pairs.append('='.join(pair)) + path += '&'.join(pairs) + if self.fragment: + path += '#' + path += self.fragment + return '%s %s %s' % (self.verb, path, self.version) + + @status_line.setter + def status_line(self, val): + self.handle_statusline(val) + + @property + def raw_headers(self): + ret = self.status_line + '\r\n' + for k, v in self.headers.all_pairs(): + ret = ret + "%s: %s\r\n" % (k, v) + ret = ret + '\r\n' + return ret + + @property + def full_request(self): + ret = self.raw_headers + ret = ret + self.raw_data + return ret + + @property + def raw_data(self): + return self._raw_data + + @raw_data.setter + def raw_data(self, val): + self._raw_data = val + self.update_from_data() + self.complete = True + + def set_dict_callbacks(self): + # Add callbacks to dicts + self.headers.set_modify_callback(self.update_from_text) + self.cookies.set_modify_callback(self.update_from_objects) + self.post_params.set_modify_callback(self.update_from_data) + + def from_full_request(self, full_request, update_content_length=False): + # Get rid of leading CRLF. Not in spec, should remove eventually + # technically doesn't treat \r\n same as \n, but whatever. + while full_request[0:2] == '\r\n': + full_request = full_request[2:] + + # We do redundant splits, but whatever + lines = full_request.splitlines() + for line in lines: + if self.headers_complete: + break + self.add_line(line) + + if not self.headers_complete: + self.add_line('') + + if not self.complete: + data = full_request[self.header_len:] + if update_content_length: + self.raw_data = data + else: + self.add_data(data) + assert(self.complete) + + def update_from_data(self): + # Updates metadata that's based off of data + self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False) + if 'content-type' in self.headers: + if self.headers['content-type'] == 'application/x-www-form-urlencoded': + self.post_params = repeatable_parse_qs(self.raw_data) + self.set_dict_callbacks() + + def update_from_objects(self): + # Updates text values that depend on objects. + # DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION + if self.cookies: + assignments = [] + for ck, cv in self.cookies.all_pairs(): + asn = '%s=%s' % (ck, cv) + assignments.append(asn) + header_val = '; '.join(assignments) + self.headers.update('Cookie', header_val, do_callback=False) + if self.post_params: + pairs = [] + for k, v in self.post_params: + pairs.append('%s=%s' % (k, v)) + self.raw_data = '&'.join(pairs) + + def update_from_text(self): + # Updates metadata that depends on header/status line values + self.cookies = RepeatableDict() + self.set_dict_callbacks() + for k, v in self.headers.all_pairs(): + self.handle_header(k, v) + + def add_data(self, data): + # Add data (headers must be complete) + len_remaining = self._data_length - len(self._partial_data) + if len(data) >= len_remaining: + self._partial_data += data[:len_remaining] + self._raw_data = self._partial_data + self.complete = True + self.handle_data_end() + else: + self._partial_data += data + + def _process_host(self, hostline, overwrite=False): + # Only overwrite if told to since we may set it during the CONNECT request and we don't want to + # overwrite that + # Get address and port + if ':' in hostline: + self.host, self.port = hostline.split(':') + self.port = int(self.port) + if self.port == 443: + self.is_ssl = True + else: + self.host = hostline + if not self.port or overwrite: # could be changed by connect request + self.port = 80 + self.host.strip() + + def add_line(self, line): + # Add a line (for status line and headers) + # Modifies first line if it is in full url form + + if self._first_line and line == '': + # Ignore leading newlines because fuck the spec + return + + if self._first_line: + self.handle_statusline(line) + self._first_line = False + else: + # Either header or newline (end of headers) + if line == '': + self.headers_complete = True + if self._data_length == 0: + self.complete = True + else: + key, val = line.split(':', 1) + val = val.strip() + if self.handle_header(key, val): + self.headers.append(key, val, do_callback=False) + self.header_len += len(line)+2 + + def handle_statusline(self, status_line): + parts = status_line.split() + uri = None + if len(parts) == 3: + self.verb, uri, self.version = parts + elif len(parts) == 2: + self.verb, self.version = parts + else: + raise Exception("Unexpected format of first line of request") + + # Get path using urlparse + if uri is not None: + if not re.match('(?:^.+)://', uri): + uri = '//' + uri + parsed_path = urlparse.urlparse(uri) + netloc = parsed_path.netloc + self._process_host(netloc) + + # Check for https + if re.match('^https://', uri) or self.port == 443: + self.is_ssl = True + self.port = 443 + + reqpath = parsed_path.path + self.path = parsed_path.path + if parsed_path.query: + reqpath += '?' + reqpath += parsed_path.query + self.get_params = repeatable_parse_qs(parsed_path.query) + if parsed_path.fragment: + reqpath += '#' + reqpath += parsed_path.fragment + self.fragment = parsed_path.fragment + + def handle_header(self, key, val): + # We may have duplicate headers + stripped = False + + if key.lower() == 'content-length': + self._data_length = int(val) + elif key.lower() == 'cookie': + # We still want the raw key/val for the cookies header + # because it's still a header + cookie_strs = val.split('; ') + + # The only whitespace that matters is the space right after the + # semicolon. If actual implementations mess this up, we could + # probably strip whitespace around the key/value + for cookie_str in cookie_strs: + splitted = cookie_str.split('=',1) + assert(len(splitted) == 2) + (cookie_key, cookie_val) = splitted + # we want to parse duplicate cookies + self.cookies.append(cookie_key, cookie_val, do_callback=False) + elif key.lower() == 'host': + self._process_host(val) + elif key.lower() == 'connection': + #stripped = True + pass + + return (not stripped) + + def handle_data_end(self): + if 'content-type' in self.headers: + if self.headers['content-type'] == 'application/x-www-form-urlencoded': + self.post_params = repeatable_parse_qs(self.raw_data) + self.set_dict_callbacks() + + @defer.inlineCallbacks + def save(self): + assert(dbpool) + if self.reqid: + # If we have reqid, we're updating + yield dbpool.runInteraction(self._update) + assert(self.reqid is not None) + else: + yield dbpool.runInteraction(self._insert) + assert(self.reqid is not None) + + @defer.inlineCallbacks + def deep_save(self): + "Saves self, unmangled, response, and unmangled response" + if self.response: + if self.response.unmangled: + yield self.response.unmangled.save() + yield self.response.save() + if self.unmangled: + yield self.unmangled.save() + yield self.save() + + def _update(self, txn): + # If we don't have an reqid, we're creating a new reuqest row + setnames = ["full_request=?"] + queryargs = [self.full_request] + if self.response: + setnames.append('response_id=?') + assert(self.response.rspid is not None) # should be saved first + queryargs.append(self.response.rspid) + if self.unmangled: + setnames.append('unmangled_id=?') + assert(self.unmangled.reqid is not None) # should be saved first + queryargs.append(self.unmangled.reqid) + if self.time_start: + setnames.append('start_datetime=?') + queryargs.append(self.time_start.isoformat()) + if self.time_end: + setnames.append('end_datetime=?') + queryargs.append(self.time_end.isoformat()) + + setnames.append('submitted=?') + if self.submitted: + queryargs.append('1') + else: + queryargs.append('0') + + queryargs.append(self.reqid) + txn.execute( + """ + UPDATE requests SET %s WHERE id=?; + """ % ','.join(setnames), + tuple(queryargs) + ) + + def _insert(self, txn): + # If we don't have an reqid, we're creating a new reuqest row + colnames = ["full_request"] + colvals = [self.full_request] + if self.response: + colnames.append('response_id') + assert(self.response.rspid is not None) # should be saved first + colvals.append(self.response.rspid) + if self.unmangled: + colnames.append('unmangled_id') + assert(self.unmangled.reqid is not None) # should be saved first + colvals.append(self.unmangled.reqid) + if self.time_start: + colnames.append('start_datetime') + colvals.append(self.time_start.isoformat()) + if self.time_end: + colnames.append('end_datetime') + colvals.append(self.time_end.isoformat()) + colnames.append('submitted') + if self.submitted: + colvals.append('1') + else: + colvals.append('0') + + txn.execute( + """ + INSERT INTO requests (%s) VALUES (%s); + """ % (','.join(colnames), ','.join(['?']*len(colvals))), + tuple(colvals) + ) + self.reqid = txn.lastrowid + assert txn.lastrowid is not None + assert self.reqid is not None + + def to_json(self): + # We base64 encode the full response because json doesn't paly nice with + # binary blobs + data = { + 'full_request': base64.b64encode(self.full_request), + 'reqid': self.reqid, + } + if self.response: + data['response_id'] = self.response.rspid + else: + data['response_id'] = None + + if self.unmangled: + data['unmangled_id'] = self.unmangled.reqid + + if self.time_start: + data['start'] = self.time_start.isoformat() + if self.time_end: + data['end'] = self.time_end.isoformat() + + return json.dumps(data) + + def from_json(self, json_string): + data = json.loads(json_string) + self.from_full_request(base64.b64decode(data['full_request'])) + self.update_from_text() + self.update_from_data() + if data['reqid']: + self.reqid = int(data['reqid']) + + def delete(self): + assert(self.reqid is not None) + row = yield dbpool.runQuery( + """ + DELETE FROM requests WHERE id=?; + """, + (self.reqid,) + ) + + def duplicate(self): + return Request(self.full_request) + + @staticmethod + @defer.inlineCallbacks + def submit(host, port, is_ssl, full_request): + new_obj = Request(full_request) + factory = proxy.ProxyClientFactory(new_obj) + factory.connection_id = proxy.get_next_connection_id() + if is_ssl: + reactor.connectSSL(host, port, factory, proxy.ClientTLSContext()) + else: + reactor.connectTCP(host, port, factory) + new_req = yield factory.data_defer + defer.returnValue(new_req) + + def submit_self(self): + new_req = Request.submit(self.host, self.port, self.is_ssl, + self.full_request) + return new_req + + @staticmethod + @defer.inlineCallbacks + def load_request(reqid): + assert(dbpool) + rows = yield dbpool.runQuery( + """ + SELECT full_request, response_id, id, unmangled_id, start_datetime, end_datetime + FROM requests + WHERE id=?; + """, + (reqid,) + ) + if len(rows) != 1: + raise PappyException("Request with id %d does not exist" % reqid) + full_request = rows[0][0] + req = Request(full_request) + if rows[0][1]: + rsp = yield Response.load_response(int(rows[0][1])) + req.response = rsp + if rows[0][3]: + unmangled_req = yield Request.load_request(int(rows[0][3])) + req.unmangled = unmangled_req + if rows[0][4]: + req.time_start = datetime.datetime.strptime(rows[0][4], "%Y-%m-%dT%H:%M:%S.%f") + if rows[0][5]: + req.time_end = datetime.datetime.strptime(rows[0][5], "%Y-%m-%dT%H:%M:%S.%f") + req.reqid = int(rows[0][2]) + defer.returnValue(req) + + @staticmethod + @defer.inlineCallbacks + def load_from_filters(filters): + # Not efficient in any way + # But it stays this way until we hit performance issues + assert(dbpool) + rows = yield dbpool.runQuery( + """ + SELECT r1.id FROM requests r1 + LEFT JOIN requests r2 ON r1.id=r2.unmangled_id + WHERE r2.id is NULL; + """, + ) + reqs = [] + for r in rows: + newreq = yield Request.load_request(int(r[0])) + reqs.append(newreq) + + reqs = context.filter_reqs(reqs, filters) + + defer.returnValue(reqs) + + + +class Response(object): + + def __init__(self, full_response=None, update_content_length=False): + self.complete = False + self.cookies = RepeatableDict() + self.header_len = 0 + self.headers = RepeatableDict(case_insensitive=True) + self.headers_complete = False + self.host = None + self._raw_data = '' + self.response_code = 0 + self.response_text = '' + self.rspid = None + self._status_line = '' + self.unmangled = None + self.version = '' + + self._encoding_type = ENCODE_NONE + self._first_line = True + self._data_obj = None + self._end_after_headers = False + + self.set_dict_callbacks() + + if full_response is not None: + self.from_full_response(full_response, update_content_length) + + @property + def raw_headers(self): + ret = self.status_line + '\r\n' + for k, v in self.headers.all_pairs(): + ret = ret + "%s: %s\r\n" % (k, v) + ret = ret + '\r\n' + return ret + + @property + def status_line(self): + return self._status_line + + @status_line.setter + def status_line(self, val): + self._status_line = val + self.handle_statusline(val) + + @property + def raw_data(self): + return self._raw_data + + @raw_data.setter + def raw_data(self, val): + self._raw_data = val + self._data_obj = LengthData(len(val)) + self._data_obj.add_data(val) + self._encoding_type = ENCODE_NONE + self.complete = True + self.update_from_data() + + @property + def full_response(self): + ret = self.raw_headers + ret = ret + self.raw_data + return ret + + def set_dict_callbacks(self): + # Add callbacks to dicts + self.headers.set_modify_callback(self.update_from_text) + self.cookies.set_modify_callback(self.update_from_objects) + + def from_full_response(self, full_response, update_content_length=False): + # Get rid of leading CRLF. Not in spec, should remove eventually + while full_response[0:2] == '\r\n': + full_response = full_response[2:] + + # We do redundant splits, but whatever + lines = full_response.splitlines() + for line in lines: + if self.headers_complete: + break + self.add_line(line) + if not self.headers_complete: + self.add_line('') + + if not self.complete: + data = full_response[self.header_len:] + if update_content_length: + self.raw_data = data + else: + self.add_data(data) + assert(self.complete) + + def add_line(self, line): + assert(not self.headers_complete) + self.header_len += len(line)+2 + if not line and self._first_line: + return + if not line: + self.headers_complete = True + + if self._end_after_headers: + self.complete = True + return + + if not self._data_obj: + self._data_obj = LengthData(0) + self.complete = self._data_obj.complete + return + + if self._first_line: + self.handle_statusline(line) + self._first_line = False + else: + key, val = line.split(':', 1) + val = val.strip() + self.handle_header(key, val) + + def handle_statusline(self, status_line): + self._first_line = False + self._status_line = status_line + self.version, self.response_code, self.response_text = \ + status_line.split(' ', 2) + self.response_code = int(self.response_code) + + if self.response_code == 304 or self.response_code == 204 or \ + self.response_code/100 == 1: + self._end_after_headers = True + + def handle_header(self, key, val): + stripped = False + if key.lower() == 'content-encoding': + if val in ('gzip', 'x-gzip'): + self._encoding_type = ENCODE_GZIP + elif val in ('deflate'): + self._encoding_type = ENCODE_DEFLATE + + # We send our requests already decoded, so we don't want a header + # saying it's encoded + if self._encoding_type != ENCODE_NONE: + stripped = True + elif key.lower() == 'transfer-encoding' and val.lower() == 'chunked': + self._data_obj = ChunkedData() + self.complete = self._data_obj.complete + stripped = True + elif key.lower() == 'content-length': + # We use our own content length + self._data_obj = LengthData(int(val)) + elif key.lower() == 'set-cookie': + cookie = ResponseCookie(val) + self.cookies.append(cookie.key, cookie, do_callback=False) + elif key.lower() == 'host': + self.host = val + + if stripped: + return False + else: + self.headers.append(key, val, do_callback=False) + return True + + def update_from_data(self): + self.headers.update('Content-Length', str(len(self.raw_data)), do_callback=False) + + def update_from_objects(self): + # Updates headers from objects + # DOES NOT MAINTAIN HEADER DUPLICATION, ORDER, OR CAPITALIZATION + + # Cookies + new_headers = RepeatableDict() + cookies_added = False + for pair in self.headers.all_pairs(): + if pair[0].lower() == 'set-cookie': + # If we haven't added our cookies, add them all. Otherwise + # strip the header (do nothing) + if not cookies_added: + # Add all our cookies here + for k, c in self.cookies.all_pairs(): + new_headers.append('Set-Cookie', c.cookie_av) + cookies_added = True + else: + new_headers.append(pair[0], pair[1]) + + if not cookies_added: + # Add all our cookies to the end + for k, c in self.cookies.all_pairs(): + new_headers.append('Set-Cookie', c.cookie_av) + + self.headers = new_headers + self.set_dict_callbacks() + + def update_from_text(self): + self.cookies = RepeatableDict() + self.set_dict_callbacks() + for k, v in self.headers.all_pairs(): + if k.lower() == 'set-cookie': + # Parse the cookie + cookie = ResponseCookie(v) + self.cookies.append(cookie.key, cookie, do_callback=False) + + def add_data(self, data): + assert(self._data_obj) + assert(not self._data_obj.complete) + assert not self.complete + self._data_obj.add_data(data) + if self._data_obj.complete: + self._raw_data = decode_encoded(self._data_obj.raw_data, + self._encoding_type) + self.complete = True + self.update_from_data() + + def add_cookie(self, cookie): + self.cookies.append(cookie.key, cookie, do_callback=False) + + def to_json(self): + # We base64 encode the full response because json doesn't paly nice with + # binary blobs + data = { + 'rspid': self.rspid, + 'full_response': base64.b64encode(self.full_response), + } + if self.unmangled: + data['unmangled_id'] = self.unmangled.rspid + + return json.dumps(data) + + def from_json(self, json_string): + data = json.loads(json_string) + self.from_full_response(base64.b64decode(data['full_response'])) + self.update_from_text() + self.update_from_data() + if data['rspid']: + self.rspid = int(data['rspid']) + + @defer.inlineCallbacks + def save(self): + assert(dbpool) + if self.rspid: + # If we have rspid, we're updating + yield dbpool.runInteraction(self._update) + else: + yield dbpool.runInteraction(self._insert) + assert(self.rspid is not None) + + def _update(self, txn): + setnames = ["full_response=?"] + queryargs = [self.full_response] + if self.unmangled: + setnames.append('unmangled_id=?') + assert(self.unmangled.rspid is not None) # should be saved first + queryargs.append(self.unmangled.rspid) + + queryargs.append(self.rspid) + txn.execute( + """ + UPDATE responses SET %s WHERE id=?; + """ % ','.join(setnames), + tuple(queryargs) + ) + assert(self.rspid is not None) + + def _insert(self, txn): + # If we don't have an rspid, we're creating a new one + colnames = ["full_response"] + colvals = [self.full_response] + if self.unmangled is not None: + colnames.append('unmangled_id') + assert(self.unmangled.rspid is not None) # should be saved first + colvals.append(self.unmangled.rspid) + + txn.execute( + """ + INSERT INTO responses (%s) VALUES (%s); + """ % (','.join(colnames), ','.join(['?']*len(colvals))), + tuple(colvals) + ) + self.rspid = txn.lastrowid + assert(self.rspid is not None) + + def delete(self): + assert(self.rspid is not None) + row = yield dbpool.runQuery( + """ + DELETE FROM responses WHERE id=?; + """, + (self.rspid,) + ) + + @staticmethod + @defer.inlineCallbacks + def load_response(respid): + assert(dbpool) + rows = yield dbpool.runQuery( + """ + SELECT full_response, id, unmangled_id + FROM responses + WHERE id=?; + """, + (respid,) + ) + if len(rows) != 1: + raise PappyException("Response with request id %d does not exist" % respid) + full_response = rows[0][0] + resp = Response(full_response) + resp.rspid = int(rows[0][1]) + if rows[0][2]: + unmangled_response = yield Response.load_response(int(rows[0][2])) + resp.unmangled = unmangled_response + defer.returnValue(resp) + + diff --git a/pappy-proxy/mangle.py b/pappy-proxy/mangle.py new file mode 100644 index 0000000..e8d48e8 --- /dev/null +++ b/pappy-proxy/mangle.py @@ -0,0 +1,104 @@ +import console +import context +import proxy +import string +import subprocess +import tempfile +import http + +from twisted.internet import defer + +active_requests = {} + +intercept_requests = False +intercept_responses = False + +def set_intercept_requests(val): + global intercept_requests + intercept_requests = val + +def set_intercept_responses(val): + global intercept_responses + intercept_responses = val + +@defer.inlineCallbacks +def mangle_request(request, connection_id): + # This function gets called to mangle/edit requests passed through the proxy + global intercept_requests + + orig_req = http.Request(request.full_request) + retreq = orig_req + + if context.in_scope(orig_req): + if intercept_requests: # if we want to mangle... + # Write original request to the temp file + with tempfile.NamedTemporaryFile(delete=False) as tf: + tfName = tf.name + tf.write(orig_req.full_request) + + # Have the console edit the file + yield console.edit_file(tfName) + + # Create new mangled request from edited file + with open(tfName, 'r') as f: + mangled_req = http.Request(f.read(), update_content_length=True) + + # Check if it changed + if mangled_req.full_request != orig_req.full_request: + # Set the object's metadata + mangled_req.unmangled = orig_req + retreq = mangled_req + + # Add our request to the context + context.add_request(retreq) + else: + proxy.log('Out of scope! Request passed along unharmed', id=connection_id) + + active_requests[connection_id] = retreq + retreq.submitted = True + defer.returnValue(retreq) + +@defer.inlineCallbacks +def mangle_response(response, connection_id): + # This function gets called to mangle/edit respones passed through the proxy + global intercept_responses + #response = string.replace(response, 'cloud', 'butt') + #response = string.replace(response, 'Cloud', 'Butt') + + myreq = active_requests[connection_id] + + orig_rsp = http.Response(response.full_response) + retrsp = orig_rsp + + if context.in_scope(myreq): + if intercept_responses: # If we want to mangle... + # Write original request to the temp file + with tempfile.NamedTemporaryFile(delete=False) as tf: + tfName = tf.name + tf.write(orig_rsp.full_response) + + # Have the console edit the file + yield console.edit_file(tfName) + + # Create new mangled request from edited file + with open(tfName, 'r') as f: + mangled_rsp = http.Response(f.read(), update_content_length=True) + + if mangled_rsp.full_response != orig_rsp.full_response: + mangled_rsp.unmangled = orig_rsp + retrsp = mangled_rsp + + if not myreq.reqid: + myreq.save() + if myreq.unmangled: + myreq.unmangled.save() + myreq.response = retrsp + else: + proxy.log('Out of scope! Response passed along unharmed', id=connection_id) + del active_requests[connection_id] + myreq.response = retrsp + context.filter_recheck() + defer.returnValue(myreq) + +def connection_lost(connection_id): + del active_requests[connection_id] diff --git a/pappy-proxy/pappy.py b/pappy-proxy/pappy.py new file mode 100755 index 0000000..0fd6121 --- /dev/null +++ b/pappy-proxy/pappy.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python2 + +import cmd2 +import config +import console +import comm +import context +import crochet +import http +import imp +import os +import schema.update +import proxy +import shutil +import sys +import sqlite3 +from twisted.enterprise import adbapi +from twisted.internet import reactor, defer +from twisted.internet.threads import deferToThread +from twisted.internet.protocol import ServerFactory + + +crochet.no_setup() + +def set_text_factory(conn): + conn.text_factory = str + +@defer.inlineCallbacks +def main(): + # If the data file doesn't exist, create it with restricted permissions + if not os.path.isfile(config.DATAFILE): + with os.fdopen(os.open(config.DATAFILE, os.O_CREAT, 0o0600), 'r') as f: + pass + + # Set up data store + dbpool = adbapi.ConnectionPool("sqlite3", config.DATAFILE, + check_same_thread=False, + cp_openfun=set_text_factory, + cp_max=1) + yield schema.update.update_schema(dbpool) + http.init(dbpool) + yield context.init() + + # Run the proxy + if config.DEBUG_DIR and os.path.exists(config.DEBUG_DIR): + shutil.rmtree(config.DEBUG_DIR) + print 'Removing old debugging output' + factory = ServerFactory() + factory.protocol = proxy.ProxyServer + listen_strs = [] + for listener in config.LISTENERS: + reactor.listenTCP(listener[0], factory, interface=listener[1]) + listener_str = 'port %d' % listener[0] + if listener[1] not in ('127.0.0.1', 'localhost'): + listener_str += ' (bound to %s)' % listener[1] + listen_strs.append(listener_str) + if listen_strs: + print 'Proxy is listening on %s' % (', '.join(listen_strs)) + + com_factory = ServerFactory() + com_factory.protocol = comm.CommServer + # Make the port different for every instance of pappy, then pass it to + # anything we run. Otherwise we can only have it running once on a machine + comm_port = reactor.listenTCP(0, com_factory, interface='127.0.0.1') + comm.set_comm_port(comm_port.getHost().port) + + d = deferToThread(console.ProxyCmd().cmdloop) + d.addCallback(lambda ignored: reactor.stop()) + + # Load the scope + yield context.load_scope(http.dbpool) + context.reset_to_scope() + +if __name__ == '__main__': + reactor.callWhenRunning(main) + reactor.run() diff --git a/pappy-proxy/proxy.py b/pappy-proxy/proxy.py new file mode 100644 index 0000000..833bdc6 --- /dev/null +++ b/pappy-proxy/proxy.py @@ -0,0 +1,362 @@ +import config +import console +import datetime +import gzip +import mangle +import http +import os +import random +import re +import schema.update +import shutil +import string +import StringIO +import sys +import urlparse +import zlib +from OpenSSL import SSL +from twisted.enterprise import adbapi +from twisted.internet import reactor, ssl +from twisted.internet.protocol import ClientFactory +from twisted.protocols.basic import LineReceiver +from twisted.internet import defer + +from OpenSSL import crypto + +next_connection_id = 1 + +cached_certs = {} + +def get_next_connection_id(): + global next_connection_id + ret_id = next_connection_id + next_connection_id += 1 + return ret_id + +def log(message, id=None, symbol='*', verbosity_level=1): + + if config.DEBUG_TO_FILE and not os.path.exists(config.DEBUG_DIR): + os.makedirs(config.DEBUG_DIR) + if id: + debug_str = '[%s](%d) %s' % (symbol, id, message) + if config.DEBUG_TO_FILE: + with open(config.DEBUG_DIR+'/connection_%d.log' % id, 'a') as f: + f.write(debug_str+'\n') + else: + debug_str = '[%s] %s' % (symbol, message) + if config.DEBUG_TO_FILE: + with open(config.DEBUG_DIR+'/debug.log', 'a') as f: + f.write(debug_str+'\n') + if config.DEBUG_VERBOSITY >= verbosity_level: + print debug_str + +def log_request(request, id=None, symbol='*', verbosity_level=3): + r_split = request.split('\r\n') + for l in r_split: + log(l, id, symbol, verbosity_level) + +class ClientTLSContext(ssl.ClientContextFactory): + isClient = 1 + def getContext(self): + return SSL.Context(SSL.TLSv1_METHOD) + + +class ProxyClient(LineReceiver): + + def __init__(self, request): + self.factory = None + self._response_sent = False + self._sent = False + self.request = request + + self._response_obj = http.Response() + + def log(self, message, symbol='*', verbosity_level=1): + log(message, id=self.factory.connection_id, symbol=symbol, verbosity_level=verbosity_level) + + def lineReceived(self, *args, **kwargs): + line = args[0] + if line is None: + line = '' + self._response_obj.add_line(line) + self.log(line, symbol='r<', verbosity_level=3) + if self._response_obj.headers_complete: + if self._response_obj.complete: + self.handle_response_end() + return + self.log("Headers end, length given, waiting for data", verbosity_level=3) + self.setRawMode() + + def rawDataReceived(self, *args, **kwargs): + data = args[0] + if not self._response_obj.complete: + if data: + s = console.printable_data(data) + dlines = s.split('\n') + for l in dlines: + self.log(l, symbol=' 3 + data_chunked = '3\r\n' + data_chunked += data_comp[:3] + data_chunked += '\r\n%x\r\n' % (len(data_comp[3:])) + data_chunked += data_comp[3:] + data_chunked += '\r\n0\r\n' + + header_lines = [ + 'HTTP/1.1 200 OK', + 'Date: Thu, 22 Oct 2015 00:37:17 GMT', + 'Cache-Control: private, max-age=0', + 'Content-Type: text/html; charset=UTF-8', + 'Server: gws', + 'Content-Encoding: gzip', + 'Transfer-Encoding: chunked', + '', + ] + + rf, rl, ru, rj = rsp_by_lines_and_full(header_lines, data_chunked) + def test(r): + assert r.complete + assert r.raw_data == data_decomp + assert r.headers['Content-Length'] == str(len(data_decomp)) + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Date: Thu, 22 Oct 2015 00:37:17 GMT\r\n' + 'Cache-Control: private, max-age=0\r\n' + 'Content-Type: text/html; charset=UTF-8\r\n' + 'Server: gws\r\n' + 'Content-Length: %d\r\n\r\n' + '%s') % (len(data_decomp), data_decomp) + + test(rf) + test(rl) + test(ru) + test(rj) + +def test_response_early_completion(): + r = http.Response() + r.status_line = 'HTTP/1.1 200 OK' + r.add_line('Content-Length: 0') + assert not r.complete + r.add_line('') + assert r.complete + +def test_response_cookies(): + header_lines = [ + 'HTTP/1.1 200 OK', + 'Content-Length: 0', + 'Set-Cookie: ck=1234=567;Expires=Wed, 09 Jun 2021 10:18:14 GMT;secure;httponly;path=/;max-age=12;domain=.foo.bar', + 'Set-Cookie: abc=123', + 'Set-Cookie: def=456', + '', + ] + + rf, rl, ru, rj = rsp_by_lines_and_full(header_lines) + def test(r): + assert r.complete + assert r.cookies['ck'].key == 'ck' + assert r.cookies['ck'].val == '1234=567' + assert r.cookies['ck'].domain == '.foo.bar' + assert r.cookies['ck'].expires == 'Wed, 09 Jun 2021 10:18:14 GMT' + assert r.cookies['ck'].http_only + assert r.cookies['ck'].max_age == 12 + assert r.cookies['ck'].path == '/' + assert r.cookies['ck'].secure + + assert r.cookies['abc'].val == '123' + assert r.cookies['def'].val == '456' + + test(rf) + test(rl) + test(ru) + test(rj) + +def test_response_repeated_cookies(): + r = http.Response(('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: foo=bar\r\n' + 'Set-Cookie: baz=buzz\r\n' + 'Set-Cookie: foo=buzz\r\n' + '\r\n')) + expected_pairs = [('foo', 'bar'), ('baz', 'buzz'), ('foo', 'buzz')] + check_response_cookies(expected_pairs, r) + +def test_repeated_response_headers(): + # Repeated headers can be used for attacks, so ironically we have to handle + # them well. We always use the last header as the correct one. + header_lines = [ + 'HTTP/1.1 200 OK', + 'Content-Length: 0', + 'Test-Head: WRONG', + 'Test-Head: RIGHTish', + '', + ] + + rf, rl, ru, rj = rsp_by_lines_and_full(header_lines) + def test(r): + assert r.complete + assert r.headers['test-head'] == 'RIGHTish' + + test(rf) + test(rl) + test(ru) + test(rj) + +def test_response_update_statusline(): + r = http.Response() + r.status_line = 'HTTP/1.1 200 OK' + assert r.version == 'HTTP/1.1' + assert r.response_code == 200 + assert r.response_text == 'OK' + assert not r.complete + + assert r.full_response == 'HTTP/1.1 200 OK\r\n\r\n' + +def test_response_update_headers(): + r = http.Response() + r.status_line = 'HTTP/1.1 200 OK' + r.headers['Test-Header'] = 'Test Value' + r.headers['Other-Header'] = 'Other Value' + + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Test-Header: Test Value\r\n' + 'Other-Header: Other Value\r\n\r\n') + + r.headers.append('Test-Header', 'Other Test Value') + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Test-Header: Test Value\r\n' + 'Other-Header: Other Value\r\n' + 'Test-Header: Other Test Value\r\n\r\n') + +def test_response_update_modified_headers(): + r = http.Response() + r.status_line = 'HTTP/1.1 200 OK' + r.headers['content-length'] = '500' + r.raw_data = 'AAAA' + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'content-length: 4\r\n\r\n' + 'AAAA') + assert r.headers['content-length'] == '4' + +def test_response_update_cookies(): + r = http.Response() + r.status_line = 'HTTP/1.1 200 OK' + # Test by adding headers + r.headers['Set-Cookie'] = 'abc=123' + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: abc=123\r\n\r\n') + assert r.cookies['abc'].val == '123' + r.headers.append('Set-Cookie', 'abc=456') + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: abc=123\r\n' + 'Set-Cookie: abc=456\r\n\r\n' + ) + assert r.cookies['abc'].val == '456' + + r = http.Response() + r.status_line = 'HTTP/1.1 200 OK' + # Test by adding cookie objects + c = http.ResponseCookie('abc=123; secure') + r.cookies['abc'] = c + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: abc=123; secure\r\n\r\n') + +def test_response_update_content_length(): + r = http.Response(('HTTP/1.1 200 OK\r\n' + 'Content-Length: 4\r\n\r\n' + 'AAAAAAAAAA'), update_content_length=True) + + assert r.full_response == (('HTTP/1.1 200 OK\r\n' + 'Content-Length: 10\r\n\r\n' + 'AAAAAAAAAA')) + +def test_response_to_json(): + rsp = http.Response() + rsp.status_line = 'HTTP/1.1 200 OK' + rsp.rspid = 2 + + expected_reqdata = {'full_response': base64.b64encode(rsp.full_response), + 'rspid': rsp.rspid, + #'tag': r.tag, + } + + assert json.loads(rsp.to_json()) == expected_reqdata + +def test_response_update_from_objects_cookies(): + r = http.Response(('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: foo=bar\r\n' + 'Set-Cookie: baz=buzz\r\n' + 'Header: out of fucking nowhere\r\n' + 'Set-Cookie: foo=buzz\r\n' + '\r\n')) + expected_pairs = [('foo', 'bar'), ('baz', 'buzz'), ('foo', 'buzz')] + check_response_cookies(expected_pairs, r) + + new_pairs = [('foo', http.ResponseCookie('foo=banana')), + ('baz', http.ResponseCookie('baz=buzz')), + ('scooby', http.ResponseCookie('scooby=doo')), + ('foo', http.ResponseCookie('foo=boo'))] + r.cookies.clear() + r.cookies.add_pairs(new_pairs) + + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Header: out of fucking nowhere\r\n' + 'Set-Cookie: foo=banana\r\n' + 'Set-Cookie: baz=buzz\r\n' + 'Set-Cookie: scooby=doo\r\n' + 'Set-Cookie: foo=boo\r\n' + '\r\n') + expected_pairs = [('foo', 'banana'), ('baz', 'buzz'), ('scooby', 'doo'), ('foo', 'boo')] + check_response_cookies(expected_pairs, r) + +def test_response_update_from_objects_cookies_replace(): + r = http.Response(('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: foo=bar\r\n' + 'Set-Cookie: baz=buzz\r\n' + 'Header: out of fucking nowhere\r\n' + 'Set-Cookie: foo=buzz\r\n' + '\r\n')) + expected_pairs = [('foo', 'bar'), ('baz', 'buzz'), ('foo', 'buzz')] + check_response_cookies(expected_pairs, r) + + + r.cookies['foo'] = http.ResponseCookie('foo=banana') + + assert r.full_response == ('HTTP/1.1 200 OK\r\n' + 'Set-Cookie: foo=banana\r\n' + 'Set-Cookie: baz=buzz\r\n' + 'Header: out of fucking nowhere\r\n' + '\r\n') diff --git a/pappy-proxy/tests/test_proxy.py b/pappy-proxy/tests/test_proxy.py new file mode 100644 index 0000000..e558922 --- /dev/null +++ b/pappy-proxy/tests/test_proxy.py @@ -0,0 +1,36 @@ +import pytest + +from proxy import ProxyClient, ProxyClientFactory, ProxyServer +from testutil import mock_deferred +from twisted.internet.protocol import ServerFactory +from twisted.test import proto_helpers +from twisted.internet import defer + +#################### +## Fixtures + +@pytest.fixture +def proxyserver(): + factory = ServerFactory() + factory.protocol = ProxyServer + protocol = factory.buildProtocol(('127.0.0.1', 0)) + transport = proto_helpers.StringTransport() + protocol.makeConnection(transport) + return (protocol, transport) + +#################### +## Basic tests + +def test_proxy_server_fixture(proxyserver): + prot = proxyserver[0] + tr = proxyserver[1] + prot.transport.write('hello') + print tr.value() + assert tr.value() == 'hello' + +@pytest.inlineCallbacks +def test_mock_deferreds(mock_deferred): + d = mock_deferred('Hello!') + r = yield d + assert r == 'Hello!' + diff --git a/pappy-proxy/tests/testutil.py b/pappy-proxy/tests/testutil.py new file mode 100644 index 0000000..750f431 --- /dev/null +++ b/pappy-proxy/tests/testutil.py @@ -0,0 +1,15 @@ +import pytest +from twisted.internet import defer + +@pytest.fixture +def mock_deferred(): + # Generates a function that can be used to make a deferred that can be used + # to mock out deferred-returning responses + def f(value): + def g(data): + return value + d = defer.Deferred() + d.addCallback(g) + d.callback(None) + return d + return f diff --git a/pappy-proxy/util.py b/pappy-proxy/util.py new file mode 100644 index 0000000..eff2114 --- /dev/null +++ b/pappy-proxy/util.py @@ -0,0 +1,3 @@ + +class PappyException(Exception): + pass diff --git a/pappy-proxy/vim_repeater/.#repeater.vim b/pappy-proxy/vim_repeater/.#repeater.vim new file mode 120000 index 0000000..2d09db2 --- /dev/null +++ b/pappy-proxy/vim_repeater/.#repeater.vim @@ -0,0 +1 @@ +glew@localhost.787:1446907770 \ No newline at end of file diff --git a/pappy-proxy/vim_repeater/__init__.py b/pappy-proxy/vim_repeater/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pappy-proxy/vim_repeater/repeater.py b/pappy-proxy/vim_repeater/repeater.py new file mode 100644 index 0000000..5a0303c --- /dev/null +++ b/pappy-proxy/vim_repeater/repeater.py @@ -0,0 +1,116 @@ +import base64 +import vim +import sys +import socket +import json + +class CommError(Exception): + pass + +def communicate(data): + global PAPPY_PORT + # Submits data to the comm port of the proxy + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect(('127.0.0.1', int(vim.eval('s:commport')))) + datastr = json.dumps(data) + + # Send our data + total_sent = 0 + while total_sent < len(data): + sent = s.send(datastr[total_sent:]) + assert sent != 0 + total_sent += sent + s.send('\n') + + # Get our response + retstr = '' + c = '' + while c != '\n': + retstr = retstr + c + c = s.recv(1) + assert c != '' + result = json.loads(retstr) + if not result['success']: + vim.command('echoerr %s' % result['message']) + raise CommError(result['message']) + return result + +def read_line(conn): + data = '' + c = '' + while c != '\n': + data = data + c + c = conn.read(1) + return data + +def run_command(command): + funcs = { + "setup": set_up_windows, + "submit": submit_current_buffer, + } + if command in funcs: + funcs[command]() + +def set_buffer_content(buf, text): + buf[:] = None + first = True + for l in text.split('\n'): + if first: + buf[0] = l + first = False + else: + buf.append(l) + +def set_up_windows(): + reqid = vim.eval("a:2") + comm_port = vim.eval("a:3") + vim.command("let s:commport=%d"%int(comm_port)) + # Get the left buffer + vim.command("new") + vim.command("only") + b2 = vim.current.buffer + vim.command("let s:b2=bufnr('$')") + + # Vsplit new file + vim.command("vnew") + b1 = vim.current.buffer + vim.command("let s:b1=bufnr('$')") + + # Get the request + comm_data = {"action": "get_request", "reqid": reqid} + try: + reqdata = communicate(comm_data) + except CommError: + return + + comm_data = {"action": "get_response", "reqid": reqid} + try: + rspdata = communicate(comm_data) + except CommError: + return + + # Set up the buffers + set_buffer_content(b1, base64.b64decode(reqdata['full_request'])) + set_buffer_content(b2, base64.b64decode(rspdata['full_response'])) + +def submit_current_buffer(): + curbuf = vim.current.buffer + b2_id = vim.eval("s:b2") + b2 = vim.buffers[int(b2_id)] + vim.command("let s:b1=bufnr('$')") + vim.command("only") + vim.command("rightbelow vertical new") + vim.command("b %s" % b2_id) + vim.command("wincmd h") + + full_request = '\n'.join(curbuf) + commdata = {'action': 'submit', + 'full_request': base64.b64encode(full_request)} + result = communicate(commdata) + set_buffer_content(b2, base64.b64decode(result['response']['full_response'])) + +# (left, right) = set_up_windows() +# set_buffer_content(left, 'Hello\nWorld') +# set_buffer_content(right, 'Hello\nOther\nWorld') +#print "Arg is %s" % vim.eval("a:arg") +run_command(vim.eval("a:1")) diff --git a/pappy-proxy/vim_repeater/repeater.vim b/pappy-proxy/vim_repeater/repeater.vim new file mode 100644 index 0000000..737605c --- /dev/null +++ b/pappy-proxy/vim_repeater/repeater.vim @@ -0,0 +1,17 @@ +if !has('python') + echo "Vim must support python in order to use the repeater" + finish +endif + +let s:pyscript = resolve(expand(':p:h') . '/repeater.py') + +function! RepeaterAction(...) + execute 'pyfile ' . s:pyscript +endfunc + +command! -nargs=* RepeaterSetup call RepeaterAction('setup', ) +command! RepeaterSubmitBuffer call RepeaterAction('submit') + +" Bind forward to f +nnoremap f :RepeaterSubmitBuffer + diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..f80a435 --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +from distutils.core import setup + +setup(name='Pappy', + version='0.0.1', + description='The Pappy Intercepting Proxy', + author='Rob Glew', + author_email='rglew56@gmail.com', + url='https://www.github.com/roglew/pappy-proxy', + packages=['pappy-proxy'], + license='MIT', + install_requires=[ + 'twisted', + 'crochet', + 'cmd2', + 'service_identity', + 'pytest', + 'pytest-cov', + 'pytest-twisted', + ] + )