From 9dca3a8bb9ab50354f5cb73a80ea9d3b8bad85b0 Mon Sep 17 00:00:00 2001 From: Xaloc Date: Thu, 28 Jan 2021 12:52:33 +0100 Subject: [PATCH] fix scrapping for both AND and world --- and.py | 40 ++++++++++++++++++++++++---------------- world.py | 30 +++++++++++++++++------------- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/and.py b/and.py index 1c0a331..73fe007 100755 --- a/and.py +++ b/and.py @@ -1,28 +1,36 @@ #!/usr/bin/python3 -#-*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import requests +from bs4 import BeautifulSoup urlAnd = 'https://www.govern.ad/covid/taula.php' headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'} resp = requests.get(urlAnd, headers=headers) -webAnd = resp.text.split("\n") - - -conf = webAnd[10] -acti = webAnd[11] -hosp = webAnd[12] - - -conf = conf.split(">",1)[1] -conf = conf.split("<",1)[0] -acti = acti.split(">",1)[1] -acti = acti.split("<",1)[0] -hosp = hosp.split(">",1)[1] -hosp = hosp.split("<",1)[0] +webAnd = BeautifulSoup(resp.text, "lxml") +webAnd = webAnd.get_text() +webAnd = webAnd.split('\n') +webAnd = [line for line in webAnd if line.strip() != ""] outfile = open("and.txt", "w") -print("SituaciĆ³ Andorra \nCasos confirmats: %s \nCasos actius: %s \nHospitalitzats: %s" %(conf, acti, hosp), file=outfile) +i=0 +tmp='' +for txt in webAnd: + if i == 0: + outfile.write(txt+'\n') + outfile.write('\n') + elif i==11: + outfile.write('\n-------\n') + outfile.write(txt+'\n') + outfile.write('\n') + elif i%2==1: + tmp=txt+': ' + elif i%2==0: + tmp=tmp+txt+'\n' + outfile.write(tmp) + tmp='' + + i+=1 outfile.close() diff --git a/world.py b/world.py index 1d23eb9..df42027 100755 --- a/world.py +++ b/world.py @@ -1,26 +1,30 @@ #!/usr/bin/python3 -# -*- coding: utf-8 -*- - +# -*- coding: latin-1 -*- import requests +from bs4 import BeautifulSoup as sp url='https://www.worldometers.info/coronavirus/' response = requests.get(url) -web=response.text.split("\n") +soup = sp(response.text, "lxml") -total = web[360] -deaths = web[370] -recovered = web[378] +divs = soup.findAll('div', {'id':'maincounter-wrap'}) -total = total.split(">",1)[1] -deaths = deaths.split(">",1)[1] -recovered = recovered.split(">",1)[1] +spans=[] -total = total.split(" ",1)[0] -deaths = deaths.split("<",1)[0] -recovered = recovered.split("<",1)[0] +for div in divs: + spans.append(div.find('span').get_text()) + +for i in range(len(spans)): + spans[i]=spans[i].replace(',','') + +total = int(spans[0]) +deaths = int(spans[1]) +recovered = int(spans[2]) + +active = int(total)-int(deaths)-int(recovered) outfile = open("world.txt","w") -print("World situation\nTotal coronavirus cases: %s\nTotal deaths: %s\nRecovered people: %s" %(total, deaths, recovered), file=outfile) +print('Overall data about the COVID-19 pandemic\n\nTotal cases: {:,}\nActive cases: {:,}\nRecovered people: {:,}\nDeaths: {:,}'.format(total, active, recovered, deaths), file=outfile)