从GoogleCode的svn下载文件的脚本

因为想下源码,又没装svn客户端,于是做了一个从GoogleCode的svn下载文件的脚本(开发平台:windows 7),初学者,写得比较乱:

下面是主要代码:

# -*- coding: utf-8 -*-
import re
import urllib,os,time
from openlib import opener
baseurl = '''http://3rgbcom.googlecode.com/svn/trunk/n3c_webpy_v0.2/'''
dirs=[baseurl]    
downloadlist=[]   
get_time = time.localtime()
basedir=str(get_time[0])+str(get_time[1])+str(get_time[2])+'/'

if not os.path.exists(basedir):
    os.mkdir(basedir)

def GetFilelist(dirs,downloadlist,baseurl):
    if len(dirs) == 0:
        return 0
    else:
        mainurl=dirs.pop()
        print mainurl,22 #22是为了看程序执行过程中的变量情况,下同
        temppath = mainurl[len(baseurl):-1].replace('/','\\')
        #file[len(mainurl):][:-len(file.split('/')[-1])].replace('/','\\')[:-1]
        if not os.path.exists(basedir+temppath):
            os.mkdir(basedir+temppath)
            print basedir+temppath,33
        svn = opener.open(mainurl).read()
        myre=re.compile('<a href="(.+)"')
        urls = myre.findall(svn)
        n=0
        for url in urls:
            url = url.strip()
            if url.startswith('http:'):
                continue
            elif url == '''../''':
                continue
            elif url.endswith('.pyc'):
                continue
            elif url.endswith('.pyo'):
                continue              
            elif url.endswith('/'):
                dirs.append(mainurl+url)
                #print dirs
            else:
                downloadlist.append(mainurl+url)
                #print downloadlist,n
                n+=1
        DownFiles(downloadlist)
        return GetFilelist(dirs,downloadlist,baseurl)        

def MakePath(file,downloadlist):

    if file.strip(file.split('/')[-1])==baseurl:
        return basedir+file.split('/')[-1]
    else:
        return basedir+file[len(baseurl):] 


def DownFiles(downloadlist):
    while len(downloadlist)>0:
        file = downloadlist.pop()
        print file
        downpath = MakePath(file,downloadlist)
        content=urllib.urlretrieve(file,downpath)
if __name__=='__main__':
    GetFilelist(dirs,downloadlist,baseurl)
    print 'Finish!'

下面是从这里借来的代码,用于开启gzip支持

import urllib2
from gzip import GzipFile
from StringIO import StringIO
class ContentEncodingProcessor(urllib2.BaseHandler):
  """A handler to add gzip capabilities to urllib2 requests """

  # add headers to requests
  def http_request(self, req):
    req.add_header("Accept-Encoding", "gzip, deflate")
    return req

  # decode
  def http_response(self, req, resp):
    old_resp = resp
    # gzip
    if resp.headers.get("content-encoding") == "gzip":
        gz = GzipFile(
                    fileobj=StringIO(resp.read()),
                    mode="r"
                  )
        resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
        resp.msg = old_resp.msg
    # deflate
    if resp.headers.get("content-encoding") == "deflate":
        gz = StringIO( deflate(resp.read()) )
        resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)  # 'class to add info() and
        resp.msg = old_resp.msg
    return resp


# deflate support
import zlib
def deflate(data):   # zlib only provides the zlib compress format, not the deflate format;
  try:               # so on top of all there's this workaround:
    return zlib.decompress(data, -zlib.MAX_WBITS)
  except zlib.error:
    return zlib.decompress(data)


encoding_support = ContentEncodingProcessor
opener = urllib2.build_opener( encoding_support, urllib2.HTTPHandler )

if __name__=="__main__":
    url = 'http://www.baidu.com'
    #直接用opener打开网页,如果服务器支持gzip/defalte则自动解压缩
    content = opener.open(url).read()
    print  content
2011-04-09 15:48323