diff -r 863871f4c44c -r d49991fe4892 tweetcast/nodejs-bis/tweetcast.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tweetcast/nodejs-bis/tweetcast.js Thu Nov 10 17:54:37 2011 +0100 @@ -0,0 +1,230 @@ +READ_OLD_TWEETS = true; +RECORD_NEW_TWEETS = false; +var conf_file = flagOption('-c', 'conf.js'); +console.log('Reading Configuration from ' + conf_file); +var fs = require('fs'); +eval(fs.readFileSync(conf_file,'utf8')); + +tweet_file = flagOption('-f', (typeof tweet_file == "undefined" ? 'tweets.txt' : tweet_file )); +tracking_keyword = flagOption('-t', (typeof tracking_keyword == "undefined" ? null : tracking_keyword )); +user_pass = flagOption('-u', (typeof user_pass == "undefined" ? null : user_pass )); + +if (!user_pass) { + console.log("You must provide user credentials in " + conf_file + " or on the command-line, e.g. node tweetcast.js -u user:pass") + process.exit(); +} +if (!tracking_keyword) { + console.log("You must provide keyword(s) in " + conf_file + " or on the command-line, e.g. node tweetcast.js -t Bieber") + process.exit(); +} + +console.log("Tracking keyword "+tracking_keyword); + +var http = require('http'), + https = require('https'), + socketio = require('socket.io'), + app = http.createServer(httpHandler), + io = socketio.listen(app) + tweets = [], + tweet_ids = [], + keys_to_delete = [ + 'in_reply_to_screen_name', + 'in_reply_to_user_id', + 'retweeted', + 'place', + 'geo', + 'source', + 'contributors', + 'coordinates', + 'retweet_count', + 'favorited', + 'truncated', + 'possibly_sensitive' + ], + user_keys_to_delete = [ + 'default_profile_image', + 'show_all_inline_media', + 'contributors_enabled', + 'profile_sidebar_fill_color', + 'created_at', + 'lang', + 'time_zone', + 'profile_sidebar_border_color', + 'follow_request_sent', + 'profile_background_image_url', + 'profile_background_image_url_https', + 'followers_count', + 'description', + 'url', + 'geo_enabled', + 'profile_use_background_image', + 'default_profile', + 'following', + 'profile_text_color', + 'is_translator', + 'favourites_count', + 'listed_count', + 'friends_count', + 'profile_link_color', + 'protected', + 'location', + 'notifications', + 'profile_image_url_https', + 'statuses_count', + 'verified', + 'profile_background_color', + 'profile_background_tile', + 'utc_offset' + ]; + +function httpHandler(req, res) { + console.log("HTTP Request for URL "+req.url); + var url = ( req.url == "/config" ? conf_file : __dirname + "/client" + req.url + ( req.url[req.url.length - 1] == "/" ? "index.html" : "" ) ); + fs.readFile( url, function(err, data) { + if (err) { + console.log("Error 404"); + res.writeHead(404); + return res.end('File not found'); + } + res.writeHead(200); + res.end(data); + }); +} + +function flagOption(flag, defaultValue) { + var flagPos = process.argv.indexOf(flag); + return ( flagPos != -1 && flagPos < process.argv.length - 1) ? process.argv[flagPos + 1] : defaultValue; +} + +function addToList(tweet) { + if (tweet_ids.indexOf(tweet.id) != -1) { + console.log("Error: Tweet already in list"); + return false; + } + tweets.push(tweet); + tweet_ids.push(tweet.id); + return true; +} + +function textids(object) { + for (var key in object) { + // Workaround for Unicode bug in socket.io. + + if (typeof object[key] == "string") { + var tmp = ''; + for (var i = 0; i < object[key].length; i++) { + tmp += ( object[key].charCodeAt(i) < 128 ? object[key].charAt(i) : "&#" + object[key].charCodeAt(i) + ";" ); + } + object[key] = tmp; + } + if (key.substr(-2) == 'id') { + object[key] = object[key + '_str']; + delete object[key + '_str']; + } + } +} + +function readTweetsFromFile(file_name) { + console.log("Trying to read tweets from " + file_name); + try { + var oldtweets = fs.readFileSync(file_name, 'utf8').split('\n'); + var tweetscopied = 0; + for (var i in oldtweets) { + if (oldtweets[i].length > 0) { + addToList(JSON.parse(oldtweets[i])); + tweetscopied++; + } + } + console.log(tweetscopied + " tweets copied"); + } + catch (err) { + console.log("Error opening "+file_name); + } +} + +function requestTweets() { + console.log("Fetching tweets from https://stream.twitter.com/1/statuses/filter.json") + var writestream = null; + var req = https.request({ + host: "stream.twitter.com", + path: "/1/statuses/filter.json", + method: "POST", + headers: { + 'Authorization': 'Basic ' + new Buffer( user_pass ).toString('base64'), + 'Content-Type': 'application/x-www-form-urlencoded' + } + }, function(res) { + writestream = fs.createWriteStream( tweet_file, { flags: 'a+', encoding: 'utf-8' } ); + console.log('Response received, status : ' + res.statusCode); + res.setEncoding('utf8'); + res.on('data', function(chunk) { + var newdata = chunk.split('\r\n'), + tweetpos = tweets.length; + try { + for (var i in newdata) { + if (newdata[i].length > 0) { + var tweet = JSON.parse(newdata[i]), + ann = []; + for (var j in annotations) { + if (j != "default") { + for (var k in annotations[j].keywords) { + if (tweet.text.search(annotations[j].keywords[k]) != -1) { + ann.push(j); + break; + } + } + } + } + tweet.annotations = ann; + textids(tweet); + for (var j in keys_to_delete) { + delete tweet[keys_to_delete[j]]; + } + textids(tweet.user); + for (var j in user_keys_to_delete) { + delete tweet.user[user_keys_to_delete[j]]; + } + if (tweet.retweeted_status) { + textids(tweet.retweeted_status); + for (var j in keys_to_delete) { + delete tweet.retweeted_status[keys_to_delete[j]]; + } + } + if (addToList(tweet)) { + writestream.write(JSON.stringify(tweet)+'\n'); + } + } + } + io.sockets.emit('update', { + "new_tweets" : tweets.slice(tweetpos) + }); + console.log("New tweets received. We now have", tweets.length, "tweets in memory"); + } + catch(err) { + console.log(err.message); + } + }); + }); + + req.write('track=' + encodeURIComponent((tracking_keyword))); + req.end(); +} + +app.listen(app_port); +console.log("Listening on port: "+app_port); + +if (READ_OLD_TWEETS) { + readTweetsFromFile(tweet_file); +} + +if (RECORD_NEW_TWEETS) { + requestTweets(); +} + +io.set('log level', 0); +io.sockets.on('connection', function(socket) { + console.log("New connection from "+socket.handshake.address.address); + socket.emit('initial_data', { + "tweets" : tweets + }); +}); \ No newline at end of file