"""
utils.py:
Module that lists utility functions used through the app
"""
from base64 import b64decode
from catedit import app, cache, github, log_api_rate
import json
import logging
from flask import session
from rdflib import RDF, RDFS, URIRef
from rdflib.compare import to_isomorphic, graph_diff
from rdflib.namespace import split_uri
from flask_github import GitHubError
logger = logging.getLogger(__name__)
def compare_categories(first_category, second_category, repository, with_details=True):
"""
Compares 2 categories and generate a dict with 3 lists of
differences:
* "same_category" is a True/False attribute which is True if both
categories have the same id
* "same_content" is a True/False attribute which is True if both
categories are the exact same
* "only_in_first" are the properties we can only find
in first_category
* "only_in_second" are the properties we can only find
in second_category
* "in_both" are the properties that can be found in both
categories. To be considered "in both" a property has to have the same
predicate and object in both categories that must have the same id.
Each "property" is a triple with predicate id (property key defined in
config PROPERTY_LIST) and object (either id if we're referencing
another category or the link/text in other cases)
If both categories are the exact same, returns
"""
compare_result = {}
first_iso_cat = to_isomorphic(first_category.cat_graph)
second_iso_cat = to_isomorphic(second_category.cat_graph)
compare_result["same_id"] = (
first_category.cat_id == second_category.cat_id
)
compare_result["same_content"] = (
first_iso_cat == second_iso_cat
)
if not(compare_result["same_content"]) and with_details:
rdf_in_both = []
rdf_only_in_first = []
rdf_only_in_second = []
rdf_in_both, rdf_only_in_first, rdf_only_in_second = graph_diff(
first_category.cat_graph,
second_category.cat_graph
)
in_first = []
in_both = []
in_second = []
#TODO: vérifier encoding - logger.debug(rdf_in_both)
#TODO: vérifier encoding - logger.debug(rdf_only_in_first)
#TODO: vérifier encoding - logger.debug(rdf_only_in_second)
for (final_list, diff_list) in [
(in_both, rdf_in_both),
(in_first, rdf_only_in_first),
(in_second, rdf_only_in_second)
]:
for triple in diff_list.triples((None, None, None)):
if triple[1] == RDFS.label:
final_list.append(("label", triple[2].toPython()))
elif triple[1] == RDF.Description:
final_list.append(("description", triple[2].toPython()))
else:
for predicate in get_property_list()[repository].keys():
if triple[1] == \
URIRef(get_property_list()[repository][predicate]["rdflib_class"]):
if (get_property_list()[repository]
[predicate]
["object_type"] == "uriref-link"
or get_property_list()[repository]
[predicate]
["object_type"] == "literal"):
final_list.append(
(
predicate,
triple[2].toPython()
)
)
else:
can_append = True
try:
namespace, _ = split_uri(triple[2])
if namespace != app.config["CATEGORY_NAMESPACE"]:
can_append = False
except:
can_append = False
if can_append:
final_list.append(
(
predicate,
split_uri(triple[2])[1]
)
)
compare_result["only_in_first"] = in_first
compare_result["only_in_second"] = in_second
compare_result["in_both"] = in_both
return compare_result
def make_differences_list(first_category_list, second_category_list, repository):
"""
Compares 2 category lists and generates a dict that lists addition,
modification and deletions from first_category_list to
second_category_list
* "additions": list of categories that were added
* "modifications" : list of couples before/after of categories that
were modified
* "deletions": list of categories that were deleted
"""
created_categories = []
modified_categories = []
deleted_categories = []
for first_list_category in first_category_list:
if first_list_category.cat_id not in [second_list_category.cat_id
for second_list_category in second_category_list]:
deleted_categories.append(first_list_category)
else:
for second_list_category in second_category_list:
if first_list_category.cat_id == second_list_category.cat_id:
if not(compare_categories(
first_list_category,
second_list_category,
repository,
with_details=False
)["same_content"]):
modified_categories.append(
(first_list_category, second_list_category)
)
for second_list_category in second_category_list:
if second_list_category.cat_id not in [first_list_category.cat_id
for first_list_category in first_category_list]:
created_categories.append(second_list_category)
return {
"additions": created_categories,
"modifications": modified_categories,
"deletions": deleted_categories
}
def get_property_list():
"""
Function to get the property list, also registered in templates
"""
cache_key = "property_lists_dict"
if cache.get(cache_key) is None:
property_list = {
repo: {} for repo
in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]
}
if app.config["PERSISTENCE_CONFIG"]["METHOD"] == "PersistenceToGithub":
for repo in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]:
try:
json_file=github.get(
"repos/"
+ app.config["PERSISTENCE_CONFIG"]["REPOSITORY_OWNER"] + "/"
+ repo + "/contents/properties/properties.json",
hooks=dict(response=log_api_rate)
)
#TODO: vérifier encoding - logger.debug("repo: "+repo+" - properties: "+str(json.loads(str(b64decode(json_file["content"]), "utf-8")))) #wat
property_list[repo] = json.loads(str(b64decode(json_file["content"]), "utf-8"))["property_list"]
except GitHubError as ghe:
logger.debug(
"GitHub Error trying to get the property list. We'll assume " +
"there is none and use default list as defined in config.py"
)
property_list[repo] = app.config["PROPERTY_LIST"]
else:
for repo in app.config["PERSISTENCE_CONFIG"]["REPOSITORY_LIST"]:
property_list[repo] = app.config["PROPERTY_LIST"]
rv = property_list
cache.set(cache_key, rv, timeout=3600)
return rv
else:
return cache.get(cache_key)
app.jinja_env.globals['get_property_list'] = get_property_list