--- a/data/script/scrapeso.py Thu Jun 28 17:26:15 2018 +0200
+++ b/data/script/scrapeso.py Thu Jun 28 18:50:20 2018 +0200
@@ -64,6 +64,13 @@
infosdict['image'] = {}
infosdict['object'] = {}
realurl = "https://www.eso.org" + j['url']
+
+ tempo_imgid = realurl.strip('/').split('/')[-1]
+ tempo_imgdirectory = directory + tempo_imgid
+ if os.path.isdir(tempo_imgdirectory) and os.path.isfile(os.path.join(tempo_imgdirectory, tempo_imgid+".json")):
+ print("Image alredy processed skipping %s" % realurl)
+ continue
+
page = requests.get(realurl)
#print(realurl)
soup = BeautifulSoup(page.text, "html5lib")