从googlecode的svn下载文件的脚本
因为想下源码,又没装svn客户端,于是做了一个从GoogleCode的svn下载文件的脚本(开发平台:windows 7),初学者,写得比较乱:
下面是主要代码:
# -*- coding: utf-8 -*-
import re
import urllib,os,time
from openlib import opener
baseurl = '''http://3rgbcom.googlecode.com/svn/trunk/n3c_webpy_v0.2/'''
dirs=[baseurl]
downloadlist=[]
get_time = time.localtime()
basedir=str(get_time[0])+str(get_time[1])+str(get_time[2])+'/'
if not os.path.exists(basedir):
os.mkdir(basedir)
def GetFilelist(dirs,downloadlist,baseurl):
if len(dirs) == 0:
return 0
else:
mainurl=dirs.pop()
print mainurl,22 #22是为了看程序执行过程中的变量情况,下同
temppath = mainurl[len(baseurl):-1].replace('/','\\')
#file[len(mainurl):][:-len(file.split('/')[-1])].replace('/','\\')[:-1]
if not os.path.exists(basedir+temppath):
os.mkdir(basedir+temppath)
print basedir+temppath,33
svn = opener.open(mainurl).read()
myre=re.compile('<a href="(.+)"')
urls = myre.findall(svn)
n=0
for url in urls:
url = url.strip()
if url.startswith('http:'):
continue
elif url == '''../''':
continue
elif url.endswith('.pyc'):
continue
elif url.endswith('.pyo'):
continue
elif url.endswith('/'):
dirs.append(mainurl+url)
#print dirs
else:
downloadlist.append(mainurl+url)
#print downloadlist,n
n+=1
DownFiles(downloadlist)
return GetFilelist(dirs,downloadlist,baseurl)
def MakePath(file,downloadlist):
if file.strip(file.split('/')[-1])==baseurl:
return basedir+file.split('/')[-1]
else:
return basedir+file[len(baseurl):]
def DownFiles(downloadlist):
while len(downloadlist)>0:
file = downloadlist.pop()
print file
downpath = MakePath(file,downloadlist)
content=urllib.urlretrieve(file,downpath)
if __name__=='__main__':
GetFilelist(dirs,downloadlist,baseurl)
print 'Finish!'
<p>下面是从<a href="http://obmem.info/?p=753">这里</a>借来的代码,用于开启gzip支持</p>
import urllib2
from gzip import GzipFile
from StringIO import StringIO
class ContentEncodingProcessor(urllib2.BaseHandler):
"""A handler to add gzip capabilities to urllib2 requests """
# add headers to requests
def http_request(self, req):
req.add_header("Accept-Encoding", "gzip, deflate")
return req
# decode
def http_response(self, req, resp):
old_resp = resp
# gzip
if resp.headers.get("content-encoding") == "gzip":
gz = GzipFile(
fileobj=StringIO(resp.read()),
mode="r"
)
resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
# deflate
if resp.headers.get("content-encoding") == "deflate":
gz = StringIO( deflate(resp.read()) )
resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) # 'class to add info() and
resp.msg = old_resp.msg
return resp
# deflate support
import zlib
def deflate(data): # zlib only provides the zlib compress format, not the deflate format;
try: # so on top of all there's this workaround:
return zlib.decompress(data, -zlib.MAX_WBITS)
except zlib.error:
return zlib.decompress(data)
encoding_support = ContentEncodingProcessor
opener = urllib2.build_opener( encoding_support, urllib2.HTTPHandler )
if __name__=="__main__":
url = 'http://www.baidu.com'
#直接用opener打开网页,如果服务器支持gzip/defalte则自动解压缩
content = opener.open(url).read()
print content
作者: Lerry
发表时间:2011-04-09
版权说明:CC BY-NC-ND 4.0 DEED