# -*- coding: utf-8 -*-import reold_url = 'http://www.jikexueyuan.com/course/android/?pageNum=2'total_page = 20f = open('1.wenben.txt','r+')html = f.read()f.close()# re.S 包括换行# 抓取标题 search 找到内容后自动停止查找 findall则是遍历title = re.search('(.*?) ',html,re.S).group(1)print title# sub的使用s = '123adsg123'output = re.sub('123(.*?)123','houzhong%d'%88,s)print output
不要使用compile。
#匹配数字
a = 'asdfsf12313dfadfad'b = re.findall('\d',a)print b
结果:['1', '2', '3', '1', '3']
a = 'asdfsf12313dfadfad2131'b = re.findall('\d+',a)print b
结果:['12313', '2131']
翻页功能 re.sub
import reold_url = 'http://www.jikexueyuan.com/course/android/?pageNum=2'total_page = 20for i in range(total_page): i += 1 new_url = re.sub('pageNum=\d+','pageNum=%d'%i, old_url) print new_url