网络爬虫

找标签最快的方法

from  bs4 import  BeautifulSoup
ulist = []
b = soup.find_all('th')
	for o in b:
		ulist.append(o.string)
	for q in range(11):
		print(ulist[q],end='\t')
		count2 += 1
		if count2<3:
			print(end='\t')

爬取网页的通用代码框架

#爬取网页的通用代码框架
def getHTMLText(url,code="utf-8"):
    try:
        kv = {'user-agent': 'Mozilla/5.0'}  # 模拟浏览器发送请求
        r = requests.get(url,headers=kv,timeout=30)
        r.raise_for_status()  # 如果状态不是200.引发HTTPErtor异常
        r.encoding = code
        return r.text
    except:
        return "产生异常"
赞赏