src/catedit/views/utils.py
author durandn
Tue, 14 Apr 2015 13:01:25 +0200
changeset 99 a55054e72fe4
parent 97 fe8782a67fcf
child 103 ef02353dff20
permissions -rw-r--r--
Reworking cache keys registries and access

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
utils.py:
Module that groups utility functions and classes that are used by views,
partly because most of them do requests to the Github API and as such must
be cached
"""

from catedit import app, github, cache, log_api_rate, save_links, celery
from catedit.models import Category
from catedit.resources import CategoryAPI, CategoryChangesAPI
from flask import redirect, url_for, session
from flask.ext.github import GitHubError
from datetime import datetime
from rdflib import Graph
from base64 import b64decode
from io import StringIO

logger = app.logger

class Pagination(object):

    def __init__(self, page, per_page, last_page):
        self.page = page
        self.last_page = last_page
        self.per_page = per_page

    @property
    def pages(self):
        return self.last_page

    @property
    def has_prev(self):
        return self.page > 1

    @property
    def has_next(self):
        return self.page < self.pages

    def iter_pages(self, left_edge=2, left_current=2,
                   right_current=5, right_edge=2):
        last = 0
        for num in range(1, self.pages+1):
            if num <= left_edge or \
               (num > self.page - left_current - 1 and \
                num < self.page + right_current) or \
               num > self.pages+1 - right_edge:
                if last + 1 != num:
                    yield None
                yield num
                last = num


def check_user_status_and_repo_access(repository):
    """
        Function to call at the beginning of every view that would require
        authentication and editing privilege to work properly (basically
        everything save login and index page)
    """
    if repository not in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]:
        return redirect(url_for("home.index"))
    if not session.get("user_logged", None):
        return redirect(url_for("home.index"))
    if not session.get("user_can_edit", {}).get(repository, False):
        return redirect(url_for("home.index"))


def get_comments(repository, thread_type, thread_id, page=1, per_page=30):
    """
        Function that takes the following args:
            * repository: the repository from which to get comments from
            * type: either "issues" or "commits"
            * id: the id of the issue of commit to get comments from
            * page: the page of comments to get
            * per_page: the number of comments per page
        then builds a comment list with each comment being a dict with the
        following format:
        {
            "author": author of the comment
            "body": body of the comment
            "date": date of the comment format dd/mm/yy hh:mm
        }
    """
    # Cache key for this function and parameters
    cache_key = "get_comments_" \
                + repository + "_" \
                + thread_type + "_" \
                + thread_id + "_" \
                + str(page) + "_" + str(per_page)
    # Cache key for comments key registry for this repo
    registry_key = "comments_"+repository+"_keys"
    
    if  (cache.get(registry_key) is None or cache_key not in cache.get(registry_key)) and cache.get(cache_key) is None:
        github_comments_data = []

        try:
            github_comments_data = github.get(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/"
                + thread_type + "/"
                + thread_id
                + "/comments?per_page=" + str(per_page)
                + "&page=" + str(page),
                hooks=dict(response=save_links)
            )
        except GitHubError as ghe:
            logger.error(
                "Error trying to get comments with following data:"
                + " - repository : " + repository
                + " - thread_type : " + thread_type
                + " - thread_id : " + thread_id
                + " - page : " + page
                + " - per_page : " + per_page
            )
            logger.error(ghe.response.text)
        pagination = None
        if session.get("pagination_links", None) is not None:
            # If there are multiple pages we create a pagination class that
            # will be sent to the template
            pagination = Pagination(
                page=session["pagination_links"]["current_page"],
                per_page=per_page,
                last_page=session["pagination_links"]["last_page"]
            )
            session.pop("pagination_links", None)


        comment_list = []
        for comment in github_comments_data:
            comment_dict = {
                "author": comment["user"]["login"],
                "body": comment["body"],
                "date": convert_github_date(
                    comment["created_at"]
                )
            }
            comment_list.append(comment_dict)

        discussion_data = {}
        try:
            discussion_data = github.get(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/"
                + thread_type + "/"
                + thread_id,
                hooks=dict(response=log_api_rate)
            )
        except GitHubError as ghe:
            logger.error(
                "Error trying to get the commit or issue of id " + thread_id
            )
            logger.error(
                "endpoint: " + "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/"
                + thread_type + "/"
                + thread_id
            )
            logger.error(ghe.response.text)

        thread_author = ""
        thread_opening_date = ""
        thread_title = ""
        thread_opening_post = ""

        if thread_type == "commits":
            thread_author = discussion_data.get("author", {}).get("login", "")
            thread_opening_date = convert_github_date(
                discussion_data.get(
                    "commit",
                    {}
                ).get(
                    "author",
                    {}
                ).get("date", "")
            )
            thread_title = discussion_data.get("commit", {}).get("message", "")
        elif thread_type == "issues":
            thread_author = discussion_data.get("user", {}).get("login", "")
            thread_opening_date = convert_github_date(
                discussion_data.get("created_at", "0001-01-01T00:00:00Z")
            )
            thread_title = discussion_data.get("title", "")
            thread_opening_post = discussion_data.get("body", "")
        thread_dict = {
            "author": thread_author,
            "title": thread_title,
            "opening_date": thread_opening_date,
            "comment_list": comment_list,
            "opening_post": thread_opening_post,
            "per_page": per_page
        }
        rv = (thread_dict, pagination)
        
        # Cache operations
        # Setting key for this function
        cache.set(cache_key, rv, timeout=3600)
        # Setting key in the registry
        if cache.get(registry_key) is None:
            cache.set(registry_key, [cache_key])
        else:
            if cache_key not in cache.get(registry_key):
                key_list = cache.get(registry_key)
                key_list.append(cache_key)
                logger.debug("list of keys for "+registry_key+" registry: "+str(key_list))
                cache.set(registry_key, key_list) 
        return rv
    else:
        return cache.get(cache_key)

def post_comment(repository, thread_type, thread_id,
                 comment_body, thread_title=""):
    """
        Function that posts a given comment to github.

        * repository is the repository to post in
        * type is either "issues" or "commits"
        * thread_id is the id of the issue or the commit to comment
        * thread_title is the title of the new issue, if we're posting a new
        issue
        * comment_body is the content of the comment
    """
    comment_data = {
        "body": comment_body
    }
    return_id = ""
    if thread_id != "new":
        try:
            github_response = github.post(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/"
                + thread_type + "/"
                + thread_id
                + "/comments",
                data=comment_data,
                hooks=dict(response=log_api_rate)
            )
            return_id = thread_id
        except GitHubError as ghe:
            logger.error(
                "Error posting comment with following data: "
                + " - repository : " + repository
                + " - thread_id : " + thread_id
                + " - thread_type : " + thread_type
                + " - comment_body : " + comment_body
            )
            logger.error(ghe.response.text)
    else:
        # We're posting a new issue
        comment_data["title"] = thread_title
        try:
            github_response = github.post(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/"
                + thread_type,
                data=comment_data,
                hooks=dict(response=log_api_rate)
            )
            return_id = str(github_response["number"])
        except GitHubError as ghe:
            logger.error(
                "Error posting new issue with following data: "
                + " - repository : " + repository
                + " - thread_id : " + thread_id
                + " - thread_type : " + thread_type
                + " - thread_title : " + thread_title
                + " - comment_body : " + comment_body
            )
            logger.error(ghe.response.text)
            
    registry_key = "comments_"+repository+"_keys"
    if cache.get(registry_key) is not None:
        for key in cache.get(registry_key):
            if cache.get(key) is not None:
                cache.delete(key)
        cache.delete(registry_key)
    return return_id


def get_commits(repository, per_page, page):
    """
        Fuction that get the list of commits for a given repository. Returns a
        list of dict with the format:
        {
            id : commit id
            title : commit message
            author : commit author
            comment_count : commit comments count
        }
    """
    # Cache key for this function and parameters
    cache_key = "get_commits_" \
                + repository + "_" \
                + str(page) + "_" + str(per_page)
                           
    # Cache key for comments key registry for this repo
    registry_key = "categories_"+repository+"_keys"      
    
    if (cache.get(registry_key) is None or cache_key not in cache.get(registry_key)) and cache.get(cache_key) is None:
        commits_data = []
        try:
            commits_data = github.get(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository
                + "/commits?per_page=" + str(per_page) + "&page=" + str(page),
                hooks=dict(response=save_links)
            )
        except GitHubError as ghe:
            logger.error("Error getting commits for repo " + repository)
            logger.error(ghe.response.text)

        commits_pagination = None
        if session.get("pagination_links", None) is not None:
            commits_pagination = Pagination(
                page=session["pagination_links"]["current_page"],
                per_page=per_page,
                last_page=session["pagination_links"]["last_page"]
            )
            session.pop("pagination_links", None)

        changeset_list = [
            {
                "id": commit["sha"],
                "title": commit["commit"]["message"],
                "date": convert_github_date(
                    commit["commit"]["committer"]["date"]
                ),
                "author": commit["commit"]["committer"]["name"],
                "comment_count": commit["commit"]["comment_count"],
            }
            for commit in commits_data
        ]
        rv = (changeset_list, commits_pagination)
        
        # Cache operations
        # Setting key for this function
        cache.set(cache_key, rv, timeout=3600)
        # Setting key in the registry
        if cache.get(registry_key) is None:
            cache.set(registry_key, [cache_key])
        else:
            if cache_key not in cache.get(registry_key):
                key_list = cache.get(registry_key)
                key_list.append(cache_key)
                logger.debug("list of keys for "+registry_key+" registry: "+str(key_list))
                cache.set(registry_key, key_list)      
        return rv
    else:
        return cache.get(cache_key)

def get_issues(repository, per_page=30, page=1):
    """
        Fuction that get the list of issues for a given repository. Returns a
        list of dict with the format:
        {
            id: issue id
            title: issue title
            author: issue author
            opening_date: issue opening date
            last_updated: last update date
            comment_count: comments count
        }
    """
    
    # Cache key for this function and parameters
    cache_key = "get_issues_" \
                + repository + "_" \
                + str(page) + "_" + str(per_page)
                           
    # Cache key for comments key registry for this repo
    registry_key = "comments_"+repository+"_keys"
    
    if (cache.get(registry_key) is None or cache_key not in cache.get(registry_key)) and cache.get(cache_key) is None:
        issues_data = []
        try:
            issues_data = github.get(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository
                + "/issues?per_page=" + str(per_page) + "&page=" + str(page),
                hooks=dict(response=save_links)
            )
        except GitHubError as ghe:
            logger.error("Error getting issues for repo " + repository)
            logger.error(ghe.response.text)

        discussions_pagination = None
        if session.get("pagination_links", None) is not None:
            discussions_pagination = Pagination(
                page=session["pagination_links"]["current_page"],
                per_page=per_page,
                last_page=session["pagination_links"]["last_page"]
            )
            session.pop("pagination_links", None)

        discussion_list = [
            {
                "id": str(issue["number"]),
                "title": issue["title"],
                "author": issue["user"]["login"],
                "opening_date": convert_github_date(issue["created_at"]),
                "last_updated": convert_github_date(issue["updated_at"]),
                "comment_count": issue["comments"],
            }
            for issue in issues_data
        ]
        rv = (discussion_list, discussions_pagination)
        
        # Cache operations
        # Setting key for this function
        cache.set(cache_key, rv, timeout=3600)
        # Setting key in the registry
        if cache.get(registry_key) is None:
            cache.set(registry_key, [cache_key])
        else:
            if cache_key not in cache.get(registry_key):
                key_list = cache.get(registry_key)
                key_list.append(cache_key)
                logger.debug("list of keys for "+registry_key+" registry: "+str(key_list))
                cache.set(registry_key, key_list)
        return rv
    else:
        return cache.get(cache_key)


def get_category_list_for_commit(repository, changeset_id, get_parent=False):
    """
        Get the category list as it was following the changeset of
        id changeset_id
    """
    # Cache key for a given changeset (or its parent) in a given repository
    # As this key will never need to be cleared we have no use putting it in a registry
    cache_key = "get_category_list_for_commit_" \
                + repository + "_" \
                + changeset_id + "_parent_" + str(get_parent)
    if cache.get(cache_key) is None:
        # First step
        commit_data = {}
        try:
            commit_data = github.get(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/commits/"
                + changeset_id
            )
        except GitHubError as ghe:
            logger.error(
                "Error trying to get the commit of id " + changeset_id
            )
            logger.error(ghe.response.text)

        parent_sha = ""
        if get_parent:
            parents = commit_data.get("parents", [])
            if parents != []:
                parent_sha = parents[0].get("sha", "")
            try:
                commit_data = github.get(
                    "repos/"
                    + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"]
                    + "/" + repository + "/commits/"
                    + parent_sha
                )
            except GitHubError as ghe:
                logger.error(
                    "Error trying to get the commit of id " + parent_sha
                )
                logger.error(ghe.response.text)

        tree_sha = commit_data.get("commit", {}).get("tree", {}).get("sha", "")

        # Second step
        tree_data = {}
        try:
            tree_data = github.get(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/git/trees/"
                + commit_data["commit"]["tree"]["sha"]
                + "?recursive=1"
            )
        except GitHubError as ghe:
            logger.error("Error trying to get the tree of sha " + tree_sha)
            logger.error(ghe.response.text)

        # Third step and fourth step
        cat_list = []
        for blob in tree_data.get("tree", []):
            if app.config["PERSISTENCE_CONFIG"]["CATEGORIES_PATH"] \
            in blob["path"]:
                blob_data = {}
                try:
                    blob_data = github.get(
                        "repos/"
                        + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"]
                        + "/" + repository + "/git/blobs/"
                        + blob["sha"]
                    )
                except GitHubError as ghe:
                    logger.error(
                        "Error trying to get the blob of sha " + blob["sha"]
                    )
                    logger.error(ghe.response.text)

                cat_graph = Graph()
                cat_graph.parse(
                    source=StringIO(
                        str(b64decode(blob_data["content"]), "utf-8")
                    ),
                    format="turtle"
                )
                category = Category(graph=cat_graph)
                cat_list.append(category)
        cache.set(cache_key, cat_list, timeout=3600)
        return cat_list
    else:
        return cache.get(cache_key)

def convert_github_date(date):
    """
        Function that converts github date format to a
        "dd/mm/yyyy à hh:mm" format
    """
    return datetime.strptime(
        date,
        "%Y-%m-%dT%H:%M:%SZ"
    ).strftime(
        "%d/%m/%Y à %H:%M"
    )


def get_current_category_list(repository, with_local_changes=True):
    """
        Shortcut function that generates the list of category to use for
        view templates.
        Each category is a dict with the following format:
        {
            "cat_label": category label,
             "cat_description": category description,
             "cat_id": category id,
             "cat_properties": category properties,
             "state": category state (one of {"untouched", "created",
             "edited", "deleted"})
        }

        Caching it is unecessary because the API call to get the list is
        already cached
    """
    cat_api_instance = CategoryAPI()
    cat_changes_api_instance = CategoryChangesAPI()

    deleted_cat_dict = {}
    modified_cat_dict = {}
    serialized_cat_list = []
    if session.get("user_logged", None) is not None:
        serialized_cat_list = cat_api_instance.get(repository=repository)[0]
        cat_changes = cat_changes_api_instance.get(repository=repository)[0]
        modified_cat_dict = cat_changes["modified_categories"]
        deleted_cat_dict = cat_changes["deleted_categories"]
    # logger.debug(serialized_cat_list)
    cat_list = []
    original_cat_list = []
    for serialized_cat in serialized_cat_list:
        cat_rdf_graph = Graph()
        cat_rdf_graph.parse(source=StringIO(serialized_cat),
                            format='turtle')
        original_cat_list.append(Category(graph=cat_rdf_graph))

    if with_local_changes:
        # We want the categories updated with the changes current user made
        edited_cat_list = []
        for modified_cat_name in modified_cat_dict.keys():
            new_cat_rdf_graph = Graph()
            new_cat_rdf_graph.parse(
                source=StringIO(
                    modified_cat_dict[modified_cat_name]
                ),
                format='turtle'
            )
            edited_cat_list.append(Category(graph=new_cat_rdf_graph))
        # first we find the untouched, edited and deleted categories
        cat_state = ""
        for category in original_cat_list:
            if category.cat_id not in modified_cat_dict.keys():
                if category.cat_id in deleted_cat_dict.keys():
                    cat_state = "deleted"
                else:
                    cat_state = "untouched"

                cat_list.append(
                    {
                        "cat_label": category.label,
                        "cat_description": category.description,
                        "cat_id": category.cat_id,
                        "cat_properties": category.properties,
                        "state": cat_state
                    }
                )

        # now we must find the not yet submitted categories that were created
        cat_state = ""
        for category in edited_cat_list:
            if category.cat_id not in [cat.cat_id for
                                       cat in original_cat_list]:
                cat_state = "created"
            else:
                cat_state = "modified"
            cat_list.append({"cat_label": category.label,
                             "cat_description": category.description,
                             "cat_id": category.cat_id,
                             "cat_properties": category.properties,
                             "state": cat_state})
    else:
        # We only want the categories
        for category in original_cat_list:
            cat_list.append(
                {
                    "cat_label": category.label,
                    "cat_description": category.description,
                    "cat_id": category.cat_id,
                    "cat_properties": category.properties,
                    "state": "original"
                }
            )
    return cat_list


def get_tasks_status(task_ids_list, delete_task_from_session, repository) :
    logger.debug("get_tasks_status : %r " % task_ids_list)
    res = { 'states' : {} }
    running_tasks_nb = 0
    for task_id in task_ids_list:
        a_res = celery.AsyncResult(task_id)
        res['states'][task_id] = a_res.state
        if a_res.ready():
            a_res.forget()
            if delete_task_from_session:
                session.get('tasks',{}).get(repository, []).remove(task_id)
        else:
            running_tasks_nb += 1
    res['running_tasks_nb'] = running_tasks_nb

    return res