이제 네이버에 영화 제목을 입력 후, 각종 정보를 scraping 해보자.
from bs4 import BeautifulSoup
from urllib.request import urlopen
url='https://search.naver.com/search.naver?&query=%EC%8A%A4%ED%8C%8C%EC%9D%B4%EB%8D%94%EB%A7%A8%EB%85%B8%EC%9B%A8%EC%9D%B4%ED%99%88'
URL = urlopen(url)
html_rsp = BeautifulSoup(URL, 'html.parser')
title = html_rsp.find('strong', {'class':'_text'})
print(title.get_text())
스파이더맨 노웨이 홈의 제목을 출력하는 코드
from bs4 import BeautifulSoup
from urllib.request import urlopen
url='https://search.naver.com/search.naver?&query=%EC%8A%A4%ED%8C%8C%EC%9D%B4%EB%8D%94%EB%A7%A8%EB%85%B8%EC%9B%A8%EC%9D%B4%ED%99%88'
URL = urlopen(url)
html_rsp = BeautifulSoup(URL, 'html.parser')
title = html_rsp.find('div', {'class':'list_info'}).find_all('a')
for i in title:
print(i.get_text())
영화의 출연진을 출력하는 코드
from bs4 import BeautifulSoup
from urllib.request import urlopen
url = 'https://www.seattletimes.com/business/real-estate/zillows-zestimate-overvalued-a-washington-home-by-700-percent-in-a-case-of-algorithms-gone-wrong/'
URL = urlopen(url)
html_rsp = BeautifulSoup(URL, 'html.parser')
with open('seattle.txt', 'w', encoding= 'udf-8') as f:
news = html_rsp.find('div', {'id' : 'article-content'}).find('p')
for i in news:
f.write(i.get_text()+'\n')
시에틀 뉴스 내용을 txt 파일로 저장하는 코드
from bs4 import BeautifulSoup
from urllib.request import urlopen
url = 'https://brunch.co.kr/@imagineer/267'
URL = urlopen(url)
http_rsp = BeautifulSoup(URL, 'html.parser')
with open('brunch.txt', 'w', encoding = 'utf-8') as f:
brunch = http_rsp.find('div', {'class' : 'wrap_body'}).find_all('p')
for i in brunch:
print(i.get_text())
f.write(i.get_text())
brunch 블로그 내용을 txt 파일로 저장하는 코드
자동화를 실시
from bs4 import BeautifulSoup
from urllib.request import urlopen
for i in range(10):
try:
url = 'https://brunch.co.kr/@imagineer/'+ str(i)
URL = urlopen(url)
http_rsp = BeautifulSoup(URL, 'html.parser')
with open('brunch.txt', 'a', encoding = 'utf-8') as f:
brunch = http_rsp.find('div', {'class' : 'wrap_body'}).find_all('p')
for j in brunch:
print(j.get_text())
f.write(j.get_text())
except:
print(str(i)+'번째 글에서 오류가 발생')
자동화로 저장