[Python_bigdata] 파이썬을 활용한 크롤링

#day06_01_crawling.py

#크롤링 예제, jtbc 뉴스검색 사이트에서 검색어에 따른 기사 갯수 구하기

from bs4 import BeautifulSoup

import requests as req

keyword = '빅데이터'

url = 'http://jtbc.joins.com/search/news?term='+keyword

#url 주소에 요청(request)

response = req.get(url)

response.text #html

#응답 받은 response 객체의 text를 html 형태로 반환

soup = BeautifulSoup(response.text,'html.parser')

# '.'은 class, '#'은 id

soup.select('.txt_result')[0] #list 형태이기 때문에 첫번째 인덱스 값을 가져올 것이다.

#<strong> 태그 내부의 글자가 필요하다.

cnt = soup.select('.txt_result')[0]

cnt.select('strong')[0].text #리스트 인덱스 0 의 내용 중 텍스트 값만 뽑아올 수 있다.

print(keyword,':',cnt.select('strong')[0].text,'건')

Stay Hungry Stay Foolish