fix scrapping for both AND and world

This commit is contained in:
Xaloc 2021-01-28 12:52:33 +01:00
parent db1a8269b2
commit 9dca3a8bb9
2 changed files with 41 additions and 29 deletions

40
and.py
View File

@ -1,28 +1,36 @@
#!/usr/bin/python3 #!/usr/bin/python3
#-*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import requests import requests
from bs4 import BeautifulSoup
urlAnd = 'https://www.govern.ad/covid/taula.php' urlAnd = 'https://www.govern.ad/covid/taula.php'
headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'} headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'}
resp = requests.get(urlAnd, headers=headers) resp = requests.get(urlAnd, headers=headers)
webAnd = resp.text.split("\n") webAnd = BeautifulSoup(resp.text, "lxml")
webAnd = webAnd.get_text()
webAnd = webAnd.split('\n')
conf = webAnd[10] webAnd = [line for line in webAnd if line.strip() != ""]
acti = webAnd[11]
hosp = webAnd[12]
conf = conf.split(">",1)[1]
conf = conf.split("<",1)[0]
acti = acti.split(">",1)[1]
acti = acti.split("<",1)[0]
hosp = hosp.split(">",1)[1]
hosp = hosp.split("<",1)[0]
outfile = open("and.txt", "w") outfile = open("and.txt", "w")
print("Situació Andorra \nCasos confirmats: %s \nCasos actius: %s \nHospitalitzats: %s" %(conf, acti, hosp), file=outfile) i=0
tmp=''
for txt in webAnd:
if i == 0:
outfile.write(txt+'\n')
outfile.write('\n')
elif i==11:
outfile.write('\n-------\n')
outfile.write(txt+'\n')
outfile.write('\n')
elif i%2==1:
tmp=txt+': '
elif i%2==0:
tmp=tmp+txt+'\n'
outfile.write(tmp)
tmp=''
i+=1
outfile.close() outfile.close()

View File

@ -1,26 +1,30 @@
#!/usr/bin/python3 #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: latin-1 -*-
import requests import requests
from bs4 import BeautifulSoup as sp
url='https://www.worldometers.info/coronavirus/' url='https://www.worldometers.info/coronavirus/'
response = requests.get(url) response = requests.get(url)
web=response.text.split("\n") soup = sp(response.text, "lxml")
total = web[360] divs = soup.findAll('div', {'id':'maincounter-wrap'})
deaths = web[370]
recovered = web[378]
total = total.split(">",1)[1] spans=[]
deaths = deaths.split(">",1)[1]
recovered = recovered.split(">",1)[1]
total = total.split(" ",1)[0] for div in divs:
deaths = deaths.split("<",1)[0] spans.append(div.find('span').get_text())
recovered = recovered.split("<",1)[0]
for i in range(len(spans)):
spans[i]=spans[i].replace(',','')
total = int(spans[0])
deaths = int(spans[1])
recovered = int(spans[2])
active = int(total)-int(deaths)-int(recovered)
outfile = open("world.txt","w") outfile = open("world.txt","w")
print("World situation\nTotal coronavirus cases: %s\nTotal deaths: %s\nRecovered people: %s" %(total, deaths, recovered), file=outfile) print('Overall data about the COVID-19 pandemic\n\nTotal cases: {:,}\nActive cases: {:,}\nRecovered people: {:,}\nDeaths: {:,}'.format(total, active, recovered, deaths), file=outfile)