PythonでHTTPS接続する

python2.5だと素直にできないらしい。
urrlib2 opener for SSL proxy (CONNECT method) « Python recipes « ActiveState Code に参考ソースがあった。コメント欄に以下のパッチが書かれていたのでそれを取り込みつつちょっと手直し。

  • HTTPSのリダイレクト対応
  • 認証プロキシ対応

最終的なソースは以下の通り。

## {{{ http://code.activestate.com/recipes/456195/ (r2)
# urllib2 opener to connection through a proxy using the CONNECT method, (useful for SSL)
# tested with python 2.4

import base64
import cookielib
import urllib2
import urllib
import httplib
import socket


class ProxyHTTPConnection(httplib.HTTPConnection):

    _proxyuser = None
    _proxypass = None
    _ports = {'http' : 80, 'https' : 443}

    def request(self, method, url, body=None, headers={}):
        #request is called before connect, so can interpret url and get
        #real host/port to be used to make CONNECT request to proxy
        proto, rest = urllib.splittype(url)
        if proto is None:
            raise ValueError, "unknown URL type: %s" % url
        #get host
        host, rest = urllib.splithost(rest)
        #try to get port
        host, port = urllib.splitport(host)
        #if port is not defined try to get from proto
        if port is None:
            try:
                port = self._ports[proto]
            except KeyError:
                raise ValueError, "unknown protocol for: %s" % url
        self._real_host = host
        self._real_port = port
        httplib.HTTPConnection.request(self, method, url, body, headers)
        

    def connect(self):
        httplib.HTTPConnection.connect(self)
        #send proxy CONNECT request
        #self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host, self._real_port))
        connmsg = 'CONNECT %s:%s HTTP/1.1\r\n' % (self._real_host, self._real_port)
        connmsg += 'Proxy-Connection: keep-alive\r\n'
        connmsg += 'Connection: keep-alive\r\n'
        connmsg += 'Host: %s\r\n' % self._real_host
        
        if self._proxyuser:
            connmsg += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode('%s:%s' % (self._proxyuser, self._proxypass))
        connmsg += '\r\n'
        #expect a HTTP/1.0 200 Connection established
        self.send(connmsg)
        
        response = self.response_class(self.sock, strict=self.strict, method=self._method)
        (version, code, message) = response._read_status()
        #probably here we can handle auth requests...
        if code != 200:
            #proxy returned and error, abort connection, and raise exception
            self.close()
            raise socket.error, "Proxy connection failed: %d %s" % (code, message.strip())
        #eat up header block from proxy....
        while True:
            #should not use directly fp probablu
            line = response.fp.readline()
            if line == '\r\n': break


class ProxyHTTPSConnection(ProxyHTTPConnection):
    default_port = 443

    def __init__(self, host, port=None, key_file=None, cert_file=None, strict=None):
        ProxyHTTPConnection.__init__(self, host, port)
        self.key_file = key_file
        self.cert_file = cert_file
    
    def connect(self):
        ProxyHTTPConnection.connect(self)
        #make the sock ssl-aware
        ssl = socket.ssl(self.sock, self.key_file, self.cert_file)
        self.sock = httplib.FakeSocket(self.sock, ssl)
        
class ConnectHTTPHandler(urllib2.HTTPHandler):

    def __init__(self, proxy=None, debuglevel=0):
        if not proxy:
            self.proxy = None
        else:
            self.proxy = '%s:%d' % (proxy['host'], proxy['port'])
            if proxy.has_key('user') and proxy.has_key('pass'):
                ProxyHTTPConnection._proxyuser = proxy['user']
                ProxyHTTPConnection._proxypass = proxy['pass']
            
        urllib2.HTTPHandler.__init__(self, debuglevel)

    def do_open(self, http_class, req):
        if self.proxy is not None:
            req.set_proxy(self.proxy, 'http')
        return urllib2.HTTPHandler.do_open(self, ProxyHTTPConnection, req)

class ConnectHTTPSHandler(urllib2.HTTPSHandler):

    def __init__(self, proxy=None, debuglevel=0):
        if not proxy:
            self.proxy = None
        else:
            self.proxy = '%s:%d' % (proxy['host'], proxy['port'])
            if proxy.has_key('user') and proxy.has_key('pass'):
                ProxyHTTPConnection._proxyuser = proxy['user']
                ProxyHTTPConnection._proxypass = proxy['pass']
            
        urllib2.HTTPSHandler.__init__(self, debuglevel)

    def do_open(self, http_class, req):
        if self.proxy is not None:
            req.set_proxy(self.proxy, 'https')
            
        return urllib2.HTTPSHandler.do_open(self, ProxyHTTPSConnection, req)

if __name__ == '__main__':
    
    proxy = {'host': 'myproxy', 'port': 8080}
    opener = urllib2.build_opener(
            ConnectHTTPHandler(proxy=proxy), ConnectHTTPSHandler(proxy=proxy, debuglevel=0))
    body = opener.open("https://mixi.jp").read()
    print body
    
## end of http://code.activestate.com/recipes/456195/ }}}