src/catedit/views/utils.py
author Nicolas DURAND <nicolas.durand@iri.centrepompidou.fr>
Fri, 20 Feb 2015 10:55:54 +0100
changeset 45 1506da593f40
parent 43 6d0e2523e17d
child 46 5bd3fb023396
permissions -rw-r--r--
Caching fix (now works on views.categories submodule) and config (dict CACHE_CONFIG, see Flask-cache doc for expected values) + Pagination for comments (changeset & issues) + Hooks to log API rate consumption and get pagination info

"""
utils.py:
Module that groups utility functions and classes that are used by views,
partly because most of them do requests to the Github API and as such must
be cached
"""

from catedit import app, github, cache, log_api_rate, save_links
from catedit.models import Category
from catedit.resources import CategoryAPI, CategoryChangesAPI
from flask import redirect, url_for, session, abort
from flask.ext.github import GitHubError
from datetime import datetime
from rdflib import Graph
from base64 import b64decode
from io import StringIO

logger = app.logger

class Pagination(object):

    def __init__(self, page, per_page, last_page):
        self.page = page
        self.last_page = last_page
        self.per_page = per_page

    @property
    def pages(self):
        return self.last_page

    @property
    def has_prev(self):
        return self.page > 1

    @property
    def has_next(self):
        if self.last_page != -1:
            return self.page < self.pages
        else:
            return True

    def iter_pages(self, left_edge=2, left_current=2,
                   right_current=5, right_edge=2):
        last = 0
        if self.last_page != -1:
            for num in range(1, self.pages+1):
                if num <= left_edge or \
                   (num > self.page - left_current - 1 and \
                    num < self.page + right_current) or \
                   num > self.pages+1 - right_edge:
                    if last + 1 != num:
                        yield None
                    yield num
                    last = num
        else:
            for num in range(1, self.page+2):
                if num <= left_edge or \
                   num > self.page - left_current - 1:
                    if last + 1 != num:
                        yield None
                    yield num
                    last = num


def check_user_status_and_repo_access(repository):
    """
        Function to call at the beginning of every view that would require
        authentication and editing privilege to work properly (basically
        everything save login and index page)
    """
    if repository not in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]:
        return redirect(url_for("home.index"))
    if not session.get("user_logged", None):
        return redirect(url_for("home.index"))
    if not session.get("user_can_edit", {}).get(repository, False):
        return redirect(url_for("home.index"))


def get_comments(repository, thread_type, thread_id, page=1, per_page=30):
    """
        Function that takes the following args:
            * repository: the repository from which to get comments from
            * type: either "issues" or "commits"
            * id: the id of the issue of commit to get comments from
            * page: the page of comments to get
            * per_page: the number of comments per page
        then builds a comment list with each comment being a dict with the
        following format:
        {
            "author": author of the comment
            "body": body of the comment
            "date": date of the comment format dd/mm/yy hh:mm
        }
    """
    github_comments_data = []

    if thread_type == "commits":
        commits_list = get_commits(repository)
        if thread_id not in [commit["id"] for commit in commits_list]:
            abort(404)
    elif thread_type == "issues":
        issues_list = get_issues(repository)
        if thread_id not in [issue["id"] for issue in issues_list]:
            abort(404)

    try:
        github_comments_data = github.get(
            "repos/"
            + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
            + repository + "/"
            + thread_type + "/"
            + thread_id
            + "/comments?per_page=" + str(per_page)
            + "&page=" + str(page),
            hooks=dict(response=save_links)
        )

    except GitHubError as ghe:
        logger.error(
            "Error trying to get comments with following data:"
            + " - repository : " + repository
            + " - thread_type : " + thread_type
            + " - thread_id : " + thread_id
            + " - page : " + page
            + " - per_page : " + per_page
        )
        logger.error(ghe.response.text)

    comment_list = []
    for comment in github_comments_data:
        comment_dict = {
            "author": comment["user"]["login"],
            "body": comment["body"],
            "date": convert_github_date(
                comment["created_at"]
            )
        }
        comment_list.append(comment_dict)

    discussion_data = {}
    try:
        discussion_data = github.get(
            "repos/"
            + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
            + repository + "/"
            + thread_type + "/"
            + thread_id,
            hooks=dict(response=log_api_rate)
        )
    except GitHubError as ghe:
        logger.error(
            "Error trying to get the or issue of id " + thread_id
        )
        logger.error(
            "endpoint: " + "repos/"
            + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
            + repository + "/"
            + thread_type + "/"
            + thread_id
        )
        logger.error(ghe.response.text)

    thread_author = ""
    thread_opening_date = ""
    thread_title = ""
    thread_opening_post = ""

    route_target = ""

    if thread_type == "commits":
        thread_author = discussion_data.get("author", {}).get("login", "")
        thread_opening_date = convert_github_date(
            discussion_data.get(
                "commit",
                {}
            ).get(
                "author",
                {}
            ).get("date", "")
        )
        thread_title = discussion_data.get("commit", {}).get("message", "")
    elif thread_type == "issues":
        route_target = "social.discussion"
        thread_author = discussion_data.get("user", {}).get("login", "")
        thread_opening_date = convert_github_date(
            discussion_data.get("created_at", "0001-01-01T00:00:00Z")
        )
        thread_title = discussion_data.get("title", "")
        thread_opening_post = discussion_data.get("body", "")


    thread_dict = {
        "author": thread_author,
        "title": thread_title,
        "opening_date": thread_opening_date,
        "comment_list": comment_list,
        "opening_post": thread_opening_post,
        "per_page": per_page
    }

    return thread_dict


def post_comment(repository, thread_type, thread_id,
                 comment_body, thread_title=""):
    """
        Function that posts a given comment to github.

        * repository is the repository to post in
        * type is either "issues" or "commits"
        * thread_id is the id of the issue or the commit to comment
        * thread_title is the title of the new issue, if we're posting a new
        issue
        * comment_body is the content of the comment
    """
    comment_data = {
        "body": comment_body
    }
    return_id = ""
    if thread_id != "new":
        try:
            github_response = github.post(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/"
                + thread_type + "/"
                + thread_id
                + "/comments",
                data=comment_data,
                hooks=dict(response=log_api_rate)
            )
            return_id = thread_id
        except GitHubError as ghe:
            logger.error(
                "Error posting comment with following data: "
                + " - repository : " + repository
                + " - thread_id : " + thread_id
                + " - thread_type : " + thread_type
                + " - comment_body : " + comment_body
            )
            logger.error(ghe.response.text)
    else:
        # We're posting a new issue
        comment_data["title"] = thread_title
        try:
            github_response = github.post(
                "repos/"
                + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                + repository + "/"
                + thread_type,
                data=comment_data,
                hooks=dict(response=log_api_rate)
            )
            return_id = str(github_response["number"])
        except GitHubError as ghe:
            logger.error(
                "Error posting new issue with following data: "
                + " - repository : " + repository
                + " - thread_id : " + thread_id
                + " - thread_type : " + thread_type
                + " - thread_title : " + thread_title
                + " - comment_body : " + comment_body
            )
            logger.error(ghe.response.text)
    cache.clear()
    return return_id


@cache.memoize(timeout=3600)
def get_commits(repository):
    """
        Fuction that get the list of commits for a given repository. Returns a
        list of dict with the format:
        {
            id : commit id
            title : commit message
            author : commit author
            comment_count : commit comments count
        }
    """
    commits_data = []
    try:
        commits_data = github.get(
            "repos/"
            + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
            + repository
            + "/commits?per_page=5",
            hooks=dict(response=save_links)
        )
    except GitHubError as ghe:
        logger.error("Error getting commits for repo " + repository)
        logger.error(ghe.response.text)
    changeset_list = [
        {
            "id": commit["sha"],
            "title": commit["commit"]["message"],
            "date": convert_github_date(
                commit["commit"]["committer"]["date"]
            ),
            "author": commit["commit"]["committer"]["name"],
            "comment_count": commit["commit"]["comment_count"],
        }
        for commit in commits_data
    ]

    return changeset_list


@cache.memoize(timeout=3600)
def get_issues(repository):
    """
        Fuction that get the list of issues for a given repository. Returns a
        list of dict with the format:
        {
            id: issue id
            title: issue title
            author: issue author
            opening_date: issue opening date
            last_updated: last update date
            comment_count: comments count
        }
    """
    issues_data = []
    try:
        issues_data = github.get(
            "repos/"
            + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
            + repository
            + "/issues?per_page=100"
        )
    except GitHubError as ghe:
        logger.error("Error getting issues for repo " + repository)
        logger.error(ghe.response.text)

    discussion_list = [
        {
            "id": str(issue["number"]),
            "title": issue["title"],
            "author": issue["user"]["login"],
            "opening_date": convert_github_date(issue["created_at"]),
            "last_updated": convert_github_date(issue["updated_at"]),
            "comment_count": issue["comments"],
        }
        for issue in issues_data
    ]


    return discussion_list


@cache.memoize(timeout=3600)
def get_category_list_for_commit(repository, changeset_id):
    """
        Get the category list as it was following the changeset of
        id changeset_id
    """
    commits_list = get_commits(repository)
    if changeset_id not in [commit["id"] for commit in commits_list]:
        abort(404)

    # First step
    commit_data = {}
    try:
        commit_data = github.get(
            "repos/"
            + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
            + repository + "/commits/"
            + changeset_id
        )
    except GitHubError as ghe:
        logger.error("Error trying to get the commit of id " + changeset_id)
        logger.error(ghe.response.text)



    tree_sha = commit_data.get("commit", {}).get("tree", {}).get("sha", "")

    # Second step
    tree_data = {}
    try:
        tree_data = github.get(
            "repos/"
            + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
            + repository + "/git/trees/"
            + commit_data["commit"]["tree"]["sha"]
            + "?recursive=1"
        )
    except GitHubError as ghe:
        logger.error("Error trying to get the tree of sha " + tree_sha)
        logger.error(ghe.response.text)


    logger.debug(tree_data)

    # Third step and fourth step
    cat_list = []
    for blob in tree_data.get("tree", []):
        if app.config["PERSISTENCE_CONFIG"]["CATEGORIES_PATH"] in blob["path"]:
            blob_data = {}
            try:
                blob_data = github.get(
                    "repos/"
                    + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"]
                    + "/" + repository + "/git/blobs/"
                    + blob["sha"]
                )
            except GitHubError as ghe:
                logger.error(
                    "Error trying to get the blob of sha " + blob["sha"]
                )
                logger.error(ghe.response.text)

            cat_graph = Graph()
            cat_graph.parse(
                source=StringIO(
                    str(b64decode(blob_data["content"]), "utf-8")
                ),
                format="turtle"
            )
            category = Category(graph=cat_graph)
            cat_list.append(
                {
                    "cat_label": category.label,
                    "cat_description": category.description,
                    "cat_id": category.cat_id,
                    "cat_properties": category.properties,
                    "state": "original"
                }
            )

    return cat_list


def convert_github_date(date):
    """
        Function that converts github date format to a
        "dd/mm/yyyy à hh:mm" format
    """
    return datetime.strptime(
        date,
        "%Y-%m-%dT%H:%M:%SZ"
    ).strftime(
        "%d/%m/%Y à %H:%M"
    )


def get_current_category_list(repository, with_local_changes=True):
    """
        Shortcut function that generates the list of category to use for
        view templates.
        Each category is a dict with the following format:
        {
            "cat_label": category label,
             "cat_description": category description,
             "cat_id": category id,
             "cat_properties": category properties,
             "state": category state (one of {"untouched", "created",
             "edited", "deleted"})
        }
    """
    cat_api_instance = CategoryAPI()
    cat_changes_api_instance = CategoryChangesAPI()

    deleted_cat_dict = {}
    modified_cat_dict = {}
    serialized_cat_list = []
    if session.get("user_logged", None) is not None:
        serialized_cat_list = cat_api_instance.get(repository=repository) \
                                                  [0]
        cat_changes = cat_changes_api_instance.get(repository=repository) \
                                                  [0]
        modified_cat_dict = cat_changes["modified_categories"]
        deleted_cat_dict = cat_changes["deleted_categories"]
    # logger.debug(serialized_cat_list)
    cat_list = []
    original_cat_list = []
    for serialized_cat in serialized_cat_list:
        cat_rdf_graph = Graph()
        cat_rdf_graph.parse(source=StringIO(serialized_cat),
                            format='turtle')
        original_cat_list.append(Category(graph=cat_rdf_graph))

    if with_local_changes:
        # We want the categories updated with the changes current user made
        edited_cat_list = []
        for modified_cat_name in modified_cat_dict.keys():
            new_cat_rdf_graph = Graph()
            new_cat_rdf_graph.parse(
                source=StringIO(
                    modified_cat_dict[modified_cat_name]
                ),
                format='turtle'
            )
            edited_cat_list.append(Category(graph=new_cat_rdf_graph))
        # first we find the untouched, edited and deleted categories
        cat_state = ""
        for category in original_cat_list:
            if category.cat_id not in modified_cat_dict.keys():
                if category.cat_id in deleted_cat_dict.keys():
                    cat_state = "deleted"
                else:
                    cat_state = "untouched"

                cat_list.append(
                    {
                        "cat_label": category.label,
                        "cat_description": category.description,
                        "cat_id": category.cat_id,
                        "cat_properties": category.properties,
                        "state": cat_state
                    }
                )

        # now we must find the not yet submitted categories that were created
        cat_state = ""
        logger.debug("Edited cat list: "
                     + str([cat.label for cat in edited_cat_list])
                     + " - Original cat list: "
                     + str([cat.label for cat in original_cat_list]))
        for category in edited_cat_list:
            if category.cat_id not in [cat.cat_id for
                                       cat in original_cat_list]:
                cat_state = "created"
            else:
                cat_state = "modified"
            cat_list.append({"cat_label": category.label,
                             "cat_description": category.description,
                             "cat_id": category.cat_id,
                             "cat_properties": category.properties,
                             "state": cat_state})
    else:
        # We only want the categories
        for category in original_cat_list:
            cat_list.append(
                {
                    "cat_label": category.label,
                    "cat_description": category.description,
                    "cat_id": category.cat_id,
                    "cat_properties": category.properties,
                    "state": "original"
                }
            )
    return cat_list