|
45
|
1 |
# -*- coding: utf-8 -*- |
|
|
2 |
''' |
|
|
3 |
Created on Nov 14, 2012 |
|
|
4 |
|
|
|
5 |
@author: ymh |
|
|
6 |
''' |
|
|
7 |
import urlparse |
|
|
8 |
import requests |
|
|
9 |
from . import settings |
|
|
10 |
import logging |
|
|
11 |
|
|
|
12 |
logger = logging.getLogger(__name__) |
|
|
13 |
|
|
|
14 |
def get_abs_url(url, default_domain): |
|
|
15 |
|
|
|
16 |
if not url: |
|
|
17 |
return url |
|
|
18 |
url_part = urlparse.urlparse(url) |
|
|
19 |
if url_part.netloc: |
|
|
20 |
return url |
|
|
21 |
else: |
|
|
22 |
pr = urlparse.ParseResult('http', default_domain, url_part.path, url_part.params, url_part.query, url_part.fragment) |
|
|
23 |
return pr.geturl() |
|
|
24 |
|
|
|
25 |
|
|
|
26 |
def get_all_objects(res_url, field_filter): |
|
|
27 |
contents = [] |
|
|
28 |
url = res_url |
|
|
29 |
while url: |
|
|
30 |
r = requests.get(url) |
|
|
31 |
if r.status_code != requests.codes.ok: #@UndefinedVariable |
|
|
32 |
logger.error("Error when requesting contents " + repr(r.status_code) + " : " + repr(r.text)) |
|
|
33 |
break |
|
|
34 |
contents.extend([ c for c in r.json['objects'] if c.get(field_filter, None) ]) |
|
|
35 |
url = get_abs_url(r.json.get('meta',{}).get('next',None), settings.LDT_NETLOC) |
|
|
36 |
return contents |