from bs4 import BeautifulSoup
import requests
#import threading
GOOGLE_NEWS = 'https://news.google.com.mx/'
CUSTOM_TARGET = 'EKhAIACoHCAowob_vCjCR'
def get_beautiful_soup(href):
re = requests.get(href)
if re.status_code == 200:
return BeautifulSoup(re.text, 'html.parser')
def scrapping_site():
soup = get_beautiful_soup(GOOGLE_NEWS)
if soup is not None:
articles = soup.find_all('h3', {'class':'ipQwMb ekueJc gEATFF RD0gLb'})
#find_all regresa un objeto iterable
for article in articles:
#title = article.find('a', {'class':'DY5T1d'}).getText()
href = article.find('a').get('href')
href_complete = 'news.google.com' + href[1::]
if CUSTOM_TARGET in href_complete:
soup = get_beautiful_soup(href_complete)
if soup is not None:
container = soup.find('div', {'class':'field field-name-body field-type-text-with-summary field-label-hidden'})
paragraphs = container.find_all('p')
for paragraph in paragraphs:
print(paragraph)
if __name__ == '__main__':
scrapping_site()