src/catedit/utils.py
author ymh <ymh.work@gmail.com>
Wed, 14 Aug 2024 22:08:14 +0200
changeset 142 640fb0f13022
parent 134 fc3b0b59cf0f
permissions -rw-r--r--
server and docker migration

"""
    utils.py:
    Module that lists utility functions used through the app
"""
from base64 import b64decode
from catedit import app, cache, github, log_api_rate
import json
import logging

from flask import session
from rdflib import RDF, RDFS, URIRef
from rdflib.compare import to_isomorphic, graph_diff
from rdflib.namespace import split_uri

from flask_github import GitHubError


logger = logging.getLogger(__name__)

def compare_categories(first_category, second_category, repository, with_details=True):
    """
        Compares 2 categories and generate a dict with 3 lists of
        differences:
        * "same_category" is a True/False attribute which is True if both
        categories have the same id
        * "same_content" is a True/False attribute which is True if both
        categories are the exact same
        * "only_in_first" are the properties we can only find
        in first_category
        * "only_in_second" are the properties we can only find
        in second_category
        * "in_both" are the properties that can be found in both
        categories. To be considered "in both" a property has to have the same
        predicate and object in both categories that must have the same id.

        Each "property" is a triple with predicate id (property key defined in
        config PROPERTY_LIST) and object (either id if we're referencing
        another category or the link/text in other cases)

        If both categories are the exact same, returns
    """
    compare_result = {}
    first_iso_cat = to_isomorphic(first_category.cat_graph)
    second_iso_cat = to_isomorphic(second_category.cat_graph)

    compare_result["same_id"] = (
        first_category.cat_id == second_category.cat_id
    )
    compare_result["same_content"] = (
        first_iso_cat == second_iso_cat
    )
    
    if not(compare_result["same_content"]) and with_details:
        rdf_in_both = []
        rdf_only_in_first = []
        rdf_only_in_second = []
        rdf_in_both, rdf_only_in_first, rdf_only_in_second = graph_diff(
            first_category.cat_graph,
            second_category.cat_graph
        )
        in_first = []
        in_both = []
        in_second = []
        #TODO: vérifier encoding - logger.debug(rdf_in_both)
        #TODO: vérifier encoding - logger.debug(rdf_only_in_first)
        #TODO: vérifier encoding - logger.debug(rdf_only_in_second)
        for (final_list, diff_list) in [
                (in_both, rdf_in_both),
                (in_first, rdf_only_in_first),
                (in_second, rdf_only_in_second)
        ]:
            for triple in diff_list.triples((None, None, None)):
                if triple[1] == RDFS.label:
                    final_list.append(("label", triple[2].toPython()))
                elif triple[1] == RDF.Description:
                    final_list.append(("description", triple[2].toPython()))
                else:
                    for predicate in get_property_list()[repository].keys():
                        if triple[1] == \
                        URIRef(get_property_list()[repository][predicate]["rdflib_class"]):
                            if (get_property_list()[repository]
                                                   [predicate]
                                                   ["object_type"] == "uriref-link"
                            or get_property_list()[repository]
                                                  [predicate]
                                                  ["object_type"] == "literal"):
                                final_list.append(
                                    (
                                        predicate,
                                        triple[2].toPython()
                                    )
                                )
                            else:
                                can_append = True
                                try: 
                                    namespace, _ = split_uri(triple[2])
                                    if namespace != app.config["CATEGORY_NAMESPACE"]:
                                        can_append = False
                                except:
                                    can_append = False
                                if can_append:
                                    final_list.append(
                                        (
                                            predicate,
                                            split_uri(triple[2])[1]
                                        )
                                    )
        compare_result["only_in_first"] = in_first
        compare_result["only_in_second"] = in_second
        compare_result["in_both"] = in_both
    return compare_result


def make_differences_list(first_category_list, second_category_list, repository):
    """
        Compares 2 category lists and generates a dict that lists addition,
        modification and deletions from first_category_list to
        second_category_list

        * "additions": list of categories that were added
        * "modifications" : list of couples before/after of categories that
        were modified
        * "deletions": list of categories that were deleted
    """
    created_categories = []
    modified_categories = []
    deleted_categories = []

    for first_list_category in first_category_list:
        if first_list_category.cat_id not in [second_list_category.cat_id
        for second_list_category in second_category_list]:
            deleted_categories.append(first_list_category)
        else:
            for second_list_category in second_category_list:
                if first_list_category.cat_id == second_list_category.cat_id:
                    if not(compare_categories(
                            first_list_category,
                            second_list_category,
                            repository,
                            with_details=False
                    )["same_content"]):
                        modified_categories.append(
                            (first_list_category, second_list_category)
                        )

    for second_list_category in second_category_list:
        if second_list_category.cat_id not in [first_list_category.cat_id
        for first_list_category in first_category_list]:
            created_categories.append(second_list_category)
    return {
        "additions": created_categories,
        "modifications": modified_categories,
        "deletions": deleted_categories
    }

def get_property_list():
    """
        Function to get the property list, also registered in templates
    """
    cache_key = "property_lists_dict"
    if cache.get(cache_key) is None:
        property_list = {
            repo: {} for repo
            in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]
        }
        if app.config["PERSISTENCE_CONFIG"]["METHOD"] == "PersistenceToGithub":
            for repo in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]:
                try:
                    json_file=github.get(
                        "repos/"
                        + app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
                        + repo + "/contents/properties/properties.json",
                        hooks=dict(response=log_api_rate)
                    )
                    #TODO: vérifier encoding - logger.debug("repo: "+repo+" - properties: "+str(json.loads(str(b64decode(json_file["content"]), "utf-8")))) #wat
                    property_list[repo] = json.loads(str(b64decode(json_file["content"]), "utf-8"))["property_list"]
                except GitHubError as ghe:
                    logger.debug(
                        "GitHub Error trying to get the property list. We'll assume " +
                        "there is none and use default list as defined in config.py"
                    )
                    property_list[repo] = app.config["PROPERTY_LIST"]
        else:
            for repo in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]:
                property_list[repo] = app.config["PROPERTY_LIST"]
        rv = property_list
        cache.set(cache_key, rv, timeout=3600)
        return rv
    else: 
        return cache.get(cache_key)
app.jinja_env.globals['get_property_list'] = get_property_list