146 query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date).all() |
149 query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date).all() |
147 |
150 |
148 root = None |
151 root = None |
149 ensemble_parent = None |
152 ensemble_parent = None |
150 |
153 |
151 if content_file and os.path.exists(content_file): |
154 #to do : analyse situation ldt or iri ? filename set or not ? |
152 |
155 |
|
156 if content_file and content_file.find("http") == 0: |
|
157 |
|
158 logging.debug("url : " + content_file) |
|
159 |
|
160 h = httplib2.Http() |
|
161 resp, content = h.request(content_file) |
|
162 |
|
163 logging.debug("url response " + repr(resp) + " content " + repr(content)) |
|
164 |
|
165 project = anyjson.deserialize(content) |
|
166 root = etree.fromstring(project["ldt"]) |
|
167 |
|
168 elif content_file and os.path.exists(content_file): |
|
169 |
153 doc = etree.parse(content_file) |
170 doc = etree.parse(content_file) |
154 root = doc.getroot() |
171 root = doc.getroot() |
155 |
172 |
156 ensemble_parent = root.xpath("//ensembles")[0] |
173 |
157 |
174 if root is None: |
158 else: |
175 |
159 root = etree.Element(u"iri") |
176 root = etree.Element(u"iri") |
160 |
177 |
161 project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
178 project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
162 |
179 |
163 medias = etree.SubElement(root, u"medias") |
180 medias = etree.SubElement(root, u"medias") |
164 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
181 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
165 |
182 |
166 annotations = etree.SubElement(root, u"annotations") |
183 annotations = etree.SubElement(root, u"annotations") |
167 content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)}) |
184 content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)}) |
168 ensemble_parent = content |
185 ensemble_parent = content |
|
186 |
|
187 |
|
188 if ensemble_parent is None: |
|
189 file_type = None |
|
190 for node in root: |
|
191 if node.tag == "project": |
|
192 file_type = "ldt" |
|
193 break |
|
194 elif node.tag == "head": |
|
195 file_type = "iri" |
|
196 break |
|
197 |
|
198 if file_type == "ldt": |
|
199 media_nodes = root.xpath("//media") |
|
200 if len(media_nodes) > 0: |
|
201 media = media_nodes[0] |
|
202 annotations_node = root.find(u"annotations") |
|
203 if annotations_node is None: |
|
204 annotations_node = etree.SubElement(root, u"annotations") |
|
205 content_node = annotations_node.find(u"content") |
|
206 if content_node is None: |
|
207 content_node = etree.SubElement(annotations_node,u"content", id=media["id"]) |
|
208 ensemble_parent = content_node |
|
209 elif file_type == "iri": |
|
210 body_node = root.find(u"body") |
|
211 if body_node is None: |
|
212 body_node = etree.SubElement(root, u"body") |
|
213 ensembles_node = body_node.find(u"ensembles") |
|
214 if ensembles_node is None: |
|
215 ensembles_node = etree.SubElement(body_node, u"ensembles") |
|
216 ensemble_parent = ensembles_node |
|
217 |
|
218 |
|
219 if ensemble_parent is None: |
|
220 logging.error("Can not process file") |
|
221 sys.exit() |
169 |
222 |
170 if options.replace: |
223 if options.replace: |
171 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
224 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
172 if ens.get("id","").startswith("tweet_"): |
225 if ens.get("id","").startswith("tweet_"): |
173 ensemble_parent.remove(ens) |
226 ensemble_parent.remove(ens) |
174 |
227 |
175 ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter pour ENMI 2009"}) |
228 ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) |
176 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) |
229 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) |
177 |
230 |
178 etree.SubElement(decoupage, u"title").text = unicode(options.name) |
231 etree.SubElement(decoupage, u"title").text = unicode(options.name) |
179 etree.SubElement(decoupage, u"abstract").text = unicode(options.name) |
232 etree.SubElement(decoupage, u"abstract").text = unicode(options.name) |
180 |
233 |
213 etree.SubElement(polemics_element, u'polemic').text = pol_link |
266 etree.SubElement(polemics_element, u'polemic').text = pol_link |
214 if polemic_added: |
267 if polemic_added: |
215 meta_element.append(polemics_element) |
268 meta_element.append(polemics_element) |
216 |
269 |
217 etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json)) |
270 etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json)) |
218 |
271 |
219 |
272 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True) |
220 if content_file and os.path.exists(content_file): |
273 |
221 dest_file_name = content_file |
274 if content_file and content_file.find("http") == 0: |
|
275 |
|
276 project["ldt"] = output_data |
|
277 body = anyjson.serialize(project) |
|
278 logging.debug("write http " + content_file) |
|
279 logging.debug("write http " + repr(body)) |
|
280 h = httplib2.Http() |
|
281 resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body) |
|
282 logging.debug("write http " + repr(resp) + " content " + content) |
222 else: |
283 else: |
223 dest_file_name = options.filename |
284 if content_file and os.path.exists(content_file): |
224 |
285 dest_file_name = content_file |
225 logging.debug("WRITE : " + dest_file_name) |
286 else: |
226 output = open(content_file, "w") |
287 dest_file_name = options.filename |
227 output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)) |
288 |
228 output.flush() |
289 logging.debug("WRITE : " + dest_file_name) |
229 output.close() |
290 output = open(content_file, "w") |
|
291 output.write(output_data) |
|
292 output.flush() |
|
293 output.close() |
230 |
294 |
231 finally: |
295 finally: |
232 session.close() |
296 session.close() |
233 finally: |
297 finally: |
234 conn.close() |
298 conn.close() |