fix scrapping for both AND and world

2021-01-28 12:52:33 +01:00 · 2021-01-28 12:52:33 +01:00 · 9dca3a8bb9
commit 9dca3a8bb9
parent db1a8269b2
2 changed files with 41 additions and 29 deletions
--- a/and.py
+++ b/and.py
@ -1,28 +1,36 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 import requests
+from bs4 import BeautifulSoup

 urlAnd = 'https://www.govern.ad/covid/taula.php'
 headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'}

 resp = requests.get(urlAnd, headers=headers)
-webAnd = resp.text.split("\n")
-
-
-conf = webAnd[10]
-acti = webAnd[11]
-hosp = webAnd[12]
-
-
-conf = conf.split(">",1)[1]
-conf = conf.split("<",1)[0]
-acti = acti.split(">",1)[1]
-acti = acti.split("<",1)[0]
-hosp = hosp.split(">",1)[1]
-hosp = hosp.split("<",1)[0]
+webAnd = BeautifulSoup(resp.text, "lxml")
+webAnd = webAnd.get_text()
+webAnd = webAnd.split('\n')
+webAnd = [line for line in webAnd if line.strip() != ""]

 outfile = open("and.txt", "w")

-print("Situació Andorra \nCasos confirmats: %s \nCasos actius: %s \nHospitalitzats: %s" %(conf, acti, hosp), file=outfile)
+i=0
+tmp=''
+for txt in webAnd:
+    if i == 0:
+        outfile.write(txt+'\n')
+        outfile.write('\n')
+    elif i==11:
+        outfile.write('\n-------\n')
+        outfile.write(txt+'\n')
+        outfile.write('\n')
+    elif i%2==1:
+        tmp=txt+': '
+    elif i%2==0:
+        tmp=tmp+txt+'\n'
+        outfile.write(tmp)
+        tmp=''
+
+    i+=1

 outfile.close()
--- a/world.py
+++ b/world.py
@ -1,26 +1,30 @@
 #!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
+# -*- coding: latin-1 -*-
 import requests
+from bs4 import BeautifulSoup as sp

 url='https://www.worldometers.info/coronavirus/'

 response = requests.get(url)

-web=response.text.split("\n")
+soup = sp(response.text, "lxml")

-total = web[360]
-deaths = web[370]
-recovered = web[378]
+divs = soup.findAll('div', {'id':'maincounter-wrap'})

-total = total.split(">",1)[1]
-deaths = deaths.split(">",1)[1]
-recovered = recovered.split(">",1)[1]
+spans=[]

-total = total.split(" ",1)[0]
-deaths = deaths.split("<",1)[0]
-recovered = recovered.split("<",1)[0]
+for div in divs:
+    spans.append(div.find('span').get_text())
+
+for i in range(len(spans)):
+    spans[i]=spans[i].replace(',','')
+
+total = int(spans[0])
+deaths = int(spans[1])
+recovered = int(spans[2])
+
+active = int(total)-int(deaths)-int(recovered)

 outfile = open("world.txt","w")

-print("World situation\nTotal coronavirus cases: %s\nTotal deaths: %s\nRecovered people: %s" %(total, deaths, recovered), file=outfile)
+print('Overall data about the COVID-19 pandemic\n\nTotal cases: {:,}\nActive cases: {:,}\nRecovered people: {:,}\nDeaths: {:,}'.format(total, active, recovered, deaths), file=outfile)