web/hdabo/management/commands/querywikipedia.py
author ymh <ymh.work@gmail.com>
Thu, 09 Jun 2011 18:52:02 +0200
changeset 17 89782c9e96cf
parent 15 a9136d8f0b4a
child 19 e2f27df4e17b
permissions -rw-r--r--
update wikipedia_activated to true
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
15
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on Jun 7, 2011
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
from django.core.management.base import NoArgsCommand
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from django.core.management.color import no_style
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
from hdabo.models import Tag
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
from optparse import make_option
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
from wikitools import wiki, api
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
import math
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
import sys
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
class Command(NoArgsCommand):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
    '''
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
    query and update wikipedia for tag title.
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
    '''
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
    options = ''
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    help = """query and update wikipedia for tag title."""
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
    
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    option_list = NoArgsCommand.option_list + (
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
        make_option('--force',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
            action='store_true',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
            dest='force',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
            default=False,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
            help='force all tags to be updated, not only those not yet processed'),
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
        make_option('--random',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
            action='store_true',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
            dest='random',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
            default=False,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
            help='randomize query on tags'),
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
        make_option('--site',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
            action='store',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
            type='string',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
            dest='site_url',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
            default="http://fr.wikipedia.org/w/api.php",
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
            help='the url for the wikipedia site'),
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
        make_option('--limit',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
            action='store',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
            type='int',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
            dest='limit',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
            default=-1,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
            help='number of tag to process'),
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        make_option('--start',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
            action='store',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
            type='int',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            dest='start',
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
            default=0,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
            help='number of tag to ignore'),
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
        )
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
    
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    def __is_homonymie(self, page_dict):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        for cat in page_dict.get(u"categories", []):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
            if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
                return True
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        return False
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
    def process_wp_response(self,label,response):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
#        {u'query': 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
#         {u'redirects': [{u'to': u'\xc9criture', u'from': u'Ecriture'}],
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
#          u'normalized': [{u'to': u'Ecriture', u'from': u'ecriture'}],
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
#          u'pages': {
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
#            u'2985492': {u'lastrevid': 66075812,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
#                         u'pageid': 2985492, 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
#                         u'title': u'\xc9criture', 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
#                         u'editurl': u'http://fr.wikipedia.org/w/index.php?title=%C3%89criture&action=edit', 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
#                         u'counter': u'', 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
#                         u'length': 7968, 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
#                         u'touched': u'2011-06-05T14:09:13Z', 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
#                         u'ns': 0, 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
#                         u'fullurl': u'    '}
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
#                     }
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
#          }
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
#         }
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
#    TAG_URL_STATUS_DICT = {
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
#        "match":3,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
#        "redirection":1,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
#        "homonyme":2,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
#        "null_result":0,
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
#    }
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
        query_dict = response['query']
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
        # get page if multiple pages or none -> return Tag.null_result
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
        pages = query_dict.get("pages", {})
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
        if len(pages) > 1 or len(pages)==0:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
        page = pages.values()[0]
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
        if u"invalid" in page or u"missing" in page:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
        url = page.get(u'fullurl', None)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
        pageid = page.get(u'pageid', None)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
        new_label = page[u'title']
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
        if self.__is_homonymie(page):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
            status = Tag.TAG_URL_STATUS_DICT["homonyme"]
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
        elif u"redirect" in page:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
            status = Tag.TAG_URL_STATUS_DICT["redirection"]
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
        else:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
            status = Tag.TAG_URL_STATUS_DICT["match"]
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
        return new_label, status, url, pageid 
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
    def show_progress(self, current_line, total_line, label, width):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
        percent = (float(current_line) / float(total_line)) * 100.0
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
        marks = math.floor(width * (percent / 100.0))
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
        spaces = math.floor(width - marks)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
    
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
        loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
    
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
        sys.stdout.write(u"%s %d%% %d/%d - %s\r" % (loader, percent, current_line - 1, total_line - 1, repr(label))) #takes the header into account
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
        if percent >= 100:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
            sys.stdout.write("\n")
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
        sys.stdout.flush()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
    def handle_noargs(self, **options):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
        self.style = no_style()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
        interactive = options.get('interactive', True)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
        verbosity = int(options.get('verbosity', '1'))
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
        force = options.get('force', False)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        limit = options.get("limit", -1)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
        start = options.get("start", 0)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
        site_url = options.get('site_url', "http://fr.wikipedia.org/w/api.php")
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
        random = options.get('random', False)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
        if verbosity > 2:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
            print "option passed : " + repr(options)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
        if force and interactive:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
            confirm = raw_input("""You have requested to query and replace the wikipedia information for all datasheets.
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
Are you sure you want to do this ?
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
    Type 'yes' to continue, or 'no' to cancel: """)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
        else:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
            confirm = 'yes'
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
        if confirm != "yes":
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
            print "wikipedia query cancelled"
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
            return
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
        if force:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
            queryset = Tag.objects.all()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
        else:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
            queryset = Tag.objects.filter(url_status=None)                    
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
        if random:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
            queryset = queryset.order_by("?")
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
        else:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
            queryset = queryset.order_by("label")
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
        if limit>=0:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
            queryset = queryset[start:limit]
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
        else:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
            queryset = queryset[start:]
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
        if verbosity > 2 :
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
            print "Tag Query is %s" % (queryset.query)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
        site = wiki.Wiki(site_url)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
        count = queryset.count()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
        if verbosity > 1:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
            print "Processing %d tags" % (count)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
        
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
        for i,tag in enumerate(queryset):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
            if verbosity > 1:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
                print "processing tag %s (%d/%d)" % (tag.label,i+1,count)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
            else:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
                self.show_progress(i+1, count, tag.label, 60)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
            params = {'action':'query', 'titles': tag.label, 'prop':'info|categories', 'inprop':'url'}            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
            wpquery = api.APIRequest(site,params)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
            response = wpquery.query()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
            if verbosity >= 2 :
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
                print "response from query to %s with parameters %s :" % (site_url, repr(params))
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
                print repr(response)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
                
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
            new_label, status, url, pageid = self.process_wp_response(tag.label,response)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
            if new_label is not None:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
                tag.label = new_label
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
            if status is not None:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
                tag.url_status = status
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
            if url is not None:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
                tag.wikipedia_url = url
17
89782c9e96cf update wikipedia_activated to true
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   208
                tag.wikipedia_activated = True
15
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
            if pageid is not None:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
                tag.wikipedia_pageid = pageid
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
                
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
            tag.save()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
            
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214