| author | ymh <ymh.work@gmail.com> |
| Sat, 22 Sep 2018 15:25:25 +0200 | |
| changeset 1473 | 9939b42f1eb4 |
| parent 1137 | 5c757e167687 |
| permissions | -rw-r--r-- |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
1 |
import argparse |
|
888
6fc6637d8403
update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
693
diff
changeset
|
2 |
import logging |
|
6fc6637d8403
update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
693
diff
changeset
|
3 |
import math |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
4 |
import re |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
5 |
import time |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
6 |
import urllib |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
7 |
|
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
8 |
from blessings import Terminal |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
9 |
import requests |
|
888
6fc6637d8403
update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
693
diff
changeset
|
10 |
import twitter |
|
6fc6637d8403
update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
693
diff
changeset
|
11 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
12 |
from iri_tweet import models, utils |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
13 |
from iri_tweet.processor import TwitterProcessorStatus |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
14 |
|
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
15 |
from selenium import webdriver |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
16 |
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
17 |
from selenium.webdriver.common.by import By |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
18 |
from selenium.webdriver.support.ui import WebDriverWait |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
19 |
from selenium.webdriver.support import expected_conditions as EC |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
20 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
21 |
from lxml import html |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
22 |
import json |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
23 |
|
|
888
6fc6637d8403
update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
693
diff
changeset
|
24 |
logger = logging.getLogger(__name__) |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
25 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
26 |
APPLICATION_NAME = "Tweet recorder user" |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
27 |
|
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
28 |
dcap = dict(DesiredCapabilities.PHANTOMJS) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
29 |
dcap["phantomjs.page.settings.userAgent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.103 Safari/537.36" |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
30 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
31 |
class TopsyResource(object): |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
32 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
33 |
def __init__(self, query, **kwargs): |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
34 |
|
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
35 |
self.options = {} |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
36 |
self.options['q'] = query |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
37 |
self.options.update(kwargs) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
38 |
self.base_url = "http://topsy.com/s" |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
39 |
self.driver = webdriver.PhantomJS(desired_capabilities=dcap) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
40 |
self.driver.set_window_size(1024, 768) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
41 |
self.page = -1 |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
42 |
self.tree = None |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
43 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
44 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
45 |
def __do_request(self, params): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
46 |
url = "%s?%s" % (self.base_url, urllib.urlencode(params).replace('+','%20')) #calculate url with urllib |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
47 |
print('Requesting %s' % url) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
48 |
self.driver.get(url) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
49 |
try: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
50 |
element = WebDriverWait(self.driver, 60).until( |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
51 |
EC.presence_of_element_located((By.CLASS_NAME, "result-tweet")) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
52 |
) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
53 |
except Exception as e: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
54 |
print('Exception requesting %s : %s' % (url, e)) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
55 |
self.tree = None |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
56 |
else: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
57 |
self.tree = html.fromstring(self.driver.page_source) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
58 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
59 |
def __check_last(self): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
60 |
if self.page < 0: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
61 |
return False |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
62 |
if self.tree is None or len(self.tree.xpath("//*[@id=\"module-pager\"]/div/ul/li[@data-page=\"next\"and @class=\"disabled\"]")): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
63 |
return True |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
64 |
else: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
65 |
return False |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
66 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
67 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
68 |
def __next_page(self): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
69 |
if self.__check_last(): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
70 |
return False |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
71 |
self.page += 1 |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
72 |
params = {} |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
73 |
params.update(self.options) |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
74 |
if self.page: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
75 |
params['offset'] = self.page*self.options.get('perpage',10) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
76 |
self.__do_request(params) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
77 |
return self.tree is not None |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
78 |
|
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
79 |
def __iter__(self): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
80 |
result_xpath = "//*[@id=\"results\"]/div" |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
81 |
while self.__next_page(): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
82 |
for res_node in self.tree.xpath(result_xpath): |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
83 |
res_obj = { |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
84 |
'user': "".join(res_node.xpath("./div/div/h5/a/text()")), |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
85 |
'content': "".join(res_node.xpath("./div/div/div/text()")), |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
86 |
'url': "".join(res_node.xpath("./div/div/ul/li[1]/small/a/@href")) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
87 |
} |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
88 |
if res_obj['url']: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
89 |
yield res_obj |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
90 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
91 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
92 |
def get_options(): |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
93 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
94 |
usage = "usage: %(prog)s [options] <connection_str_or_filepath>" |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
95 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
96 |
parser = argparse.ArgumentParser(usage=usage) |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
97 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
98 |
parser.add_argument(dest="conn_str", |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
99 |
help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR") |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
100 |
parser.add_argument("-Q", dest="query", |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
101 |
help="query", metavar="QUERY") |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
102 |
parser.add_argument("-k", "--key", dest="consumer_key", |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
103 |
help="Twitter consumer key", metavar="CONSUMER_KEY") |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
104 |
parser.add_argument("-s", "--secret", dest="consumer_secret", |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
105 |
help="Twitter consumer secret", metavar="CONSUMER_SECRET") |
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
106 |
parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
107 |
help="Token file name") |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
108 |
parser.add_argument("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None, |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
109 |
help="Topsy apikey") |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
110 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
111 |
utils.set_logging_options(parser) |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
112 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
113 |
return parser.parse_args() |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
114 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
115 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
116 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
117 |
if __name__ == "__main__": |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
118 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
119 |
options = get_options() |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
120 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
121 |
utils.set_logging(options); |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
122 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
123 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
124 |
acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME) |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
125 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
126 |
t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True) |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
127 |
t.secure = True |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
128 |
|
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
129 |
conn_str = options.conn_str.strip() |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
130 |
if not re.match("^\w+://.+", conn_str): |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
131 |
conn_str = 'sqlite:///' + conn_str |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
132 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
133 |
engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
134 |
session = None |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
135 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
136 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
137 |
topsy_parameters = { |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
138 |
'perpage': 10, |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
139 |
'window': 'a', |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
140 |
'type': 'tweet', |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
141 |
'hidden': 1, |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
142 |
'sort': 'date' |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
143 |
} |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
144 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
145 |
term = Terminal() |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
146 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
147 |
try: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
148 |
session = Session() |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
149 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
150 |
results = None |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
151 |
page = 1 |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
152 |
print options.query |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
153 |
|
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
154 |
tr = TopsyResource(options.query, **topsy_parameters) |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
155 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
156 |
move_up = 0 |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
157 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
158 |
for i,item in enumerate(tr): |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
159 |
# get id |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
160 |
url = item.get("url") |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
161 |
tweet_id = url.split("/")[-1] |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
162 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
163 |
if move_up > 0: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
164 |
print((move_up+1)*term.move_up()) |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
165 |
move_up = 0 |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
166 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
167 |
print ("%d: %s - %r" % (i+1, tweet_id, item.get("content") ) + term.clear_eol()) |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
168 |
move_up += 1 |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
169 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
170 |
count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count() |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
171 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
172 |
if count_tweet: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
173 |
continue |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
174 |
try: |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
175 |
tweet = t.statuses.show(id=tweet_id, include_entities=True) |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
176 |
except twitter.api.TwitterHTTPError as e: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
177 |
if e.e.code == 404 or e.e.code == 403: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
178 |
continue |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
179 |
else: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
180 |
raise |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
181 |
|
|
888
6fc6637d8403
update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
693
diff
changeset
|
182 |
processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger) |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
183 |
processor.process() |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
184 |
session.flush() |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
185 |
session.commit() |
|
982
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
186 |
|
|
11c1322cffe6
correct search twitter and topsy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
888
diff
changeset
|
187 |
print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol()) |
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
188 |
move_up += 1 |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
189 |
rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit')) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
190 |
rate_limit_remaining = int(tweet.rate_limit_remaining) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
191 |
|
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
192 |
if rate_limit_remaining < rate_limit_limit: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
193 |
time_to_sleep = 0 |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
194 |
else: |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
195 |
time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining)) |
|
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
196 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
197 |
for i in xrange(time_to_sleep): |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
198 |
if i: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
199 |
print(2*term.move_up()) |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
200 |
else: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
201 |
move_up += 1 |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
202 |
print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol()) |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
203 |
time.sleep(1) |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
204 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
205 |
except twitter.api.TwitterHTTPError as e: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
206 |
fmt = ("." + e.format) if e.format else "" |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
207 |
print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)) |
|
1137
5c757e167687
improve topsy search
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
982
diff
changeset
|
208 |
|
|
693
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
209 |
finally: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
210 |
if session: |
|
2ef837069108
Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
211 |
session.close() |