From 9dca3a8bb9ab50354f5cb73a80ea9d3b8bad85b0 Mon Sep 17 00:00:00 2001
From: Xaloc <xaloc@tutanota.com>
Date: Thu, 28 Jan 2021 12:52:33 +0100
Subject: [PATCH] fix scrapping for both AND and world

---
 and.py   | 40 ++++++++++++++++++++++++----------------
 world.py | 30 +++++++++++++++++-------------
 2 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/and.py b/and.py
index 1c0a331..73fe007 100755
--- a/and.py
+++ b/and.py
@@ -1,28 +1,36 @@
 #!/usr/bin/python3
-#-*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
 import requests
+from bs4 import BeautifulSoup
 
 urlAnd = 'https://www.govern.ad/covid/taula.php'
 headers = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0'}
 
 resp = requests.get(urlAnd, headers=headers)
-webAnd = resp.text.split("\n")
-
-
-conf = webAnd[10]
-acti = webAnd[11]
-hosp = webAnd[12]
-
-
-conf = conf.split(">",1)[1]
-conf = conf.split("<",1)[0]
-acti = acti.split(">",1)[1]
-acti = acti.split("<",1)[0]
-hosp = hosp.split(">",1)[1]
-hosp = hosp.split("<",1)[0]
+webAnd = BeautifulSoup(resp.text, "lxml")
+webAnd = webAnd.get_text()
+webAnd = webAnd.split('\n')
+webAnd = [line for line in webAnd if line.strip() != ""]
 
 outfile = open("and.txt", "w")
 
-print("Situació Andorra \nCasos confirmats: %s \nCasos actius: %s \nHospitalitzats: %s" %(conf, acti, hosp), file=outfile)
+i=0
+tmp=''
+for txt in webAnd:
+    if i == 0:
+        outfile.write(txt+'\n')
+        outfile.write('\n')
+    elif i==11:
+        outfile.write('\n-------\n')
+        outfile.write(txt+'\n')
+        outfile.write('\n')
+    elif i%2==1:
+        tmp=txt+': '
+    elif i%2==0:
+        tmp=tmp+txt+'\n'
+        outfile.write(tmp)
+        tmp=''
+
+    i+=1
 
 outfile.close()
diff --git a/world.py b/world.py
index 1d23eb9..df42027 100755
--- a/world.py
+++ b/world.py
@@ -1,26 +1,30 @@
 #!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
+# -*- coding: latin-1 -*-
 import requests
+from bs4 import BeautifulSoup as sp
 
 url='https://www.worldometers.info/coronavirus/'
 
 response = requests.get(url)
 
-web=response.text.split("\n")
+soup = sp(response.text, "lxml")
 
-total = web[360]
-deaths = web[370]
-recovered = web[378]
+divs = soup.findAll('div', {'id':'maincounter-wrap'})
 
-total = total.split(">",1)[1]
-deaths = deaths.split(">",1)[1]
-recovered = recovered.split(">",1)[1]
+spans=[]
 
-total = total.split(" ",1)[0]
-deaths = deaths.split("<",1)[0]
-recovered = recovered.split("<",1)[0]
+for div in divs:
+    spans.append(div.find('span').get_text())
+
+for i in range(len(spans)):
+    spans[i]=spans[i].replace(',','')
+
+total = int(spans[0])
+deaths = int(spans[1])
+recovered = int(spans[2])
+
+active = int(total)-int(deaths)-int(recovered)
 
 outfile = open("world.txt","w")
 
-print("World situation\nTotal coronavirus cases: %s\nTotal deaths: %s\nRecovered people: %s" %(total, deaths, recovered), file=outfile)
+print('Overall data about the COVID-19 pandemic\n\nTotal cases: {:,}\nActive cases: {:,}\nRecovered people: {:,}\nDeaths: {:,}'.format(total, active, recovered, deaths), file=outfile)