1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| #单个新闻信息 import requests from bs4 import BeautifulSoup
def getNewsDetail(newsurl): result = {} res = requests.get(newsurl) res.encoding = 'utf-8' soup = BeautifulSoup(res.text, 'html.parser') result['title'] = soup.select('.main-title')[0].text result['newssource'] = soup.select('.date-source')[0].a.text date = soup.select('.date-source')[0].span.text result['dt'] = datetime.strptime(date, '%Y年%m月%d日 %H:%M') #str转time result['article'] = ' '.join([p.text.strip() for p in soup.select('#article p')[:-1]]) result['editor'] = soup.select('.show_author')[0].text.lstrip('责任编辑:') result['comments'] = getCommentCount(newsurl) return result
#test news = 'http://news.sina.com.cn/c/nd/2018-07-24/doc-ihftenhz7547208.shtml' getNewsDetail(news)
|