fix scrapping for both AND and world
This commit is contained in:
parent
db1a8269b2
commit
9dca3a8bb9
40
and.py
40
and.py
|
@ -1,28 +1,36 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
#-*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
urlAnd = 'https://www.govern.ad/covid/taula.php'
|
urlAnd = 'https://www.govern.ad/covid/taula.php'
|
||||||
headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'}
|
headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'}
|
||||||
|
|
||||||
resp = requests.get(urlAnd, headers=headers)
|
resp = requests.get(urlAnd, headers=headers)
|
||||||
webAnd = resp.text.split("\n")
|
webAnd = BeautifulSoup(resp.text, "lxml")
|
||||||
|
webAnd = webAnd.get_text()
|
||||||
|
webAnd = webAnd.split('\n')
|
||||||
conf = webAnd[10]
|
webAnd = [line for line in webAnd if line.strip() != ""]
|
||||||
acti = webAnd[11]
|
|
||||||
hosp = webAnd[12]
|
|
||||||
|
|
||||||
|
|
||||||
conf = conf.split(">",1)[1]
|
|
||||||
conf = conf.split("<",1)[0]
|
|
||||||
acti = acti.split(">",1)[1]
|
|
||||||
acti = acti.split("<",1)[0]
|
|
||||||
hosp = hosp.split(">",1)[1]
|
|
||||||
hosp = hosp.split("<",1)[0]
|
|
||||||
|
|
||||||
outfile = open("and.txt", "w")
|
outfile = open("and.txt", "w")
|
||||||
|
|
||||||
print("Situació Andorra \nCasos confirmats: %s \nCasos actius: %s \nHospitalitzats: %s" %(conf, acti, hosp), file=outfile)
|
i=0
|
||||||
|
tmp=''
|
||||||
|
for txt in webAnd:
|
||||||
|
if i == 0:
|
||||||
|
outfile.write(txt+'\n')
|
||||||
|
outfile.write('\n')
|
||||||
|
elif i==11:
|
||||||
|
outfile.write('\n-------\n')
|
||||||
|
outfile.write(txt+'\n')
|
||||||
|
outfile.write('\n')
|
||||||
|
elif i%2==1:
|
||||||
|
tmp=txt+': '
|
||||||
|
elif i%2==0:
|
||||||
|
tmp=tmp+txt+'\n'
|
||||||
|
outfile.write(tmp)
|
||||||
|
tmp=''
|
||||||
|
|
||||||
|
i+=1
|
||||||
|
|
||||||
outfile.close()
|
outfile.close()
|
||||||
|
|
30
world.py
30
world.py
|
@ -1,26 +1,30 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: latin-1 -*-
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from bs4 import BeautifulSoup as sp
|
||||||
|
|
||||||
url='https://www.worldometers.info/coronavirus/'
|
url='https://www.worldometers.info/coronavirus/'
|
||||||
|
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
|
|
||||||
web=response.text.split("\n")
|
soup = sp(response.text, "lxml")
|
||||||
|
|
||||||
total = web[360]
|
divs = soup.findAll('div', {'id':'maincounter-wrap'})
|
||||||
deaths = web[370]
|
|
||||||
recovered = web[378]
|
|
||||||
|
|
||||||
total = total.split(">",1)[1]
|
spans=[]
|
||||||
deaths = deaths.split(">",1)[1]
|
|
||||||
recovered = recovered.split(">",1)[1]
|
|
||||||
|
|
||||||
total = total.split(" ",1)[0]
|
for div in divs:
|
||||||
deaths = deaths.split("<",1)[0]
|
spans.append(div.find('span').get_text())
|
||||||
recovered = recovered.split("<",1)[0]
|
|
||||||
|
for i in range(len(spans)):
|
||||||
|
spans[i]=spans[i].replace(',','')
|
||||||
|
|
||||||
|
total = int(spans[0])
|
||||||
|
deaths = int(spans[1])
|
||||||
|
recovered = int(spans[2])
|
||||||
|
|
||||||
|
active = int(total)-int(deaths)-int(recovered)
|
||||||
|
|
||||||
outfile = open("world.txt","w")
|
outfile = open("world.txt","w")
|
||||||
|
|
||||||
print("World situation\nTotal coronavirus cases: %s\nTotal deaths: %s\nRecovered people: %s" %(total, deaths, recovered), file=outfile)
|
print('Overall data about the COVID-19 pandemic\n\nTotal cases: {:,}\nActive cases: {:,}\nRecovered people: {:,}\nDeaths: {:,}'.format(total, active, recovered, deaths), file=outfile)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user