fix scrapping for both AND and world

2021-01-28 12:52:33 +01:00 · 2021-01-28 12:52:33 +01:00 · 9dca3a8bb9
commit 9dca3a8bb9
parent db1a8269b2
2 changed files with 41 additions and 29 deletions
--- a/and.py
+++ b/and.py
@ -1,28 +1,36 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 import requests
 from bs4 import BeautifulSoup
 urlAnd = 'https://www.govern.ad/covid/taula.php'
 headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'}
 resp = requests.get(urlAnd, headers=headers)
-webAnd = resp.text.split("\n")
+webAnd = BeautifulSoup(resp.text, "lxml")
-
+webAnd = webAnd.get_text()
-
+webAnd = webAnd.split('\n')
-conf = webAnd[10]
+webAnd = [line for line in webAnd if line.strip() != ""]
 acti = webAnd[11]
 hosp = webAnd[12]
 conf = conf.split(">",1)[1]
 conf = conf.split("<",1)[0]
 acti = acti.split(">",1)[1]
 acti = acti.split("<",1)[0]
 hosp = hosp.split(">",1)[1]
 hosp = hosp.split("<",1)[0]
 outfile = open("and.txt", "w")
-print("Situació Andorra \nCasos confirmats: %s \nCasos actius: %s \nHospitalitzats: %s" %(conf, acti, hosp), file=outfile)
+i=0
 tmp=''
 for txt in webAnd:
    if i == 0:
        outfile.write(txt+'\n')
        outfile.write('\n')
    elif i==11:
        outfile.write('\n-------\n')
        outfile.write(txt+'\n')
        outfile.write('\n')
    elif i%2==1:
        tmp=txt+': '
    elif i%2==0:
        tmp=tmp+txt+'\n'
        outfile.write(tmp)
        tmp=''
    i+=1
 outfile.close()
--- a/world.py
+++ b/world.py
@ -1,26 +1,30 @@
 #!/usr/bin/python3
-# -*- coding: utf-8 -*-
+# -*- coding: latin-1 -*-
 import requests
 from bs4 import BeautifulSoup as sp
 url='https://www.worldometers.info/coronavirus/'
 response = requests.get(url)
-web=response.text.split("\n")
+soup = sp(response.text, "lxml")
-total = web[360]
+divs = soup.findAll('div', {'id':'maincounter-wrap'})
 deaths = web[370]
 recovered = web[378]
-total = total.split(">",1)[1]
+spans=[]
 deaths = deaths.split(">",1)[1]
 recovered = recovered.split(">",1)[1]
-total = total.split(" ",1)[0]
+for div in divs:
-deaths = deaths.split("<",1)[0]
+    spans.append(div.find('span').get_text())
-recovered = recovered.split("<",1)[0]
+
 for i in range(len(spans)):
    spans[i]=spans[i].replace(',','')
 total = int(spans[0])
 deaths = int(spans[1])
 recovered = int(spans[2])
 active = int(total)-int(deaths)-int(recovered)
 outfile = open("world.txt","w")
-print("World situation\nTotal coronavirus cases: %s\nTotal deaths: %s\nRecovered people: %s" %(total, deaths, recovered), file=outfile)
+print('Overall data about the COVID-19 pandemic\n\nTotal cases: {:,}\nActive cases: {:,}\nRecovered people: {:,}\nDeaths: {:,}'.format(total, active, recovered, deaths), file=outfile)