Module: gd07

Module: gd07#

License: CC BY-NC 4.0 DE | EN

from lxml import etree # https://lxml.de/
import rdflib
import owlrl

# global variables
verbose = None
NewID = 1000

GenDifS#

main class.

class GenDifS:
    """Main class managing file info; contains dict of GenDifS_taxonomy classes"""

    def __init__(self, mindmap_file_location=None, *, from_string=None, verb = 1):

        global verbose
        verbose = verb

        if mindmap_file_location != None:
            # open .mm file, parse xml
            xml_parser = etree.XMLParser(remove_blank_text=True)
            self.mindmap_file_location = mindmap_file_location
            self.mindmap_xml = etree.parse(self.mindmap_file_location, xml_parser)
        elif from_string != None:
            self.mindmap_xml = self.xml_mindmap_from_string(from_string)
        else:
            print("ERROR 53: no mindmap given, nothing to do")
            return

        self.mindmap_topnode = self.mindmap_xml.getroot().find("node") # upmost xml node with xml name "node"

        
        
        self.dict_of_all_taxonomy_xlm_nodes = None
        self.taxonomies_by_ID = None
        self.taxonomies_by_name = None
        self.code = None
        self.languages = None

        
    def xml_mindmap_from_string(self, string):

        def depth_lstrip(x):
            x = x.rstrip()
            xlstrip = x.lstrip()
            depth = len(x) - len(xlstrip)
            return depth, xlstrip

        def texttree_to_xml(current_depth, parent):
            nonlocal i, texttree, texttree_len

            #subtree = []
            new_parent = parent

            while i < texttree_len:
                d, txt = depth_lstrip(texttree[i])
                #print("10:", d, txt)
                if d == current_depth:
                    if len(txt) > 0:
                        #subtree.append(txt)
                        child = etree.SubElement(parent, "node")
                        child.set("TEXT", txt)

                        dct = string2dict(txt)
                        if "ID" in dct.keys():
                            child.set("ID", "_".join(dct["ID"]))
                        else:
                            child.set("ID", f"{i}")
                        new_parent = child
                    i += 1
                elif d > current_depth:
                    texttree_to_xml(d, new_parent)
                else:
                    return parent
            return parent
    
        i = 0
        texttree = string.split("\n")
        texttree_len = len(texttree)
        root_map = etree.Element("map")
        #node_one = etree.SubElement(root_map, "node")
        #node_one.set("TEXT", "XML mm from string")
        #root_et = etree.ElementTree(root_map)
        xml = texttree_to_xml(i, root_map)
        return etree.ElementTree(xml)


    
    def compile(self, *, language_list_list = [] ):

        self.language_list_list = language_list_list
        
        # create list of TAXONOMY xml nodes
        self.dict_of_all_taxonomy_xlm_nodes = {}
        self.search_mm_for_taxonomy_xml_nodes(self.mindmap_topnode, self.dict_of_all_taxonomy_xlm_nodes)
        # print(self.dict_of_all_taxonomy_xlm_nodes)
    
        # for each TAXONOMY node: allocate a GenDifS_taxonomy object 
        self.taxonomies_by_ID = {}
        self.taxonomies_by_name = {}
        for xml_node_ID, xml_node in self.dict_of_all_taxonomy_xlm_nodes.items():
            t = GenDifS_taxonomy(xml_node_ID, xml_node, language_list_list = self.language_list_list )
            
            # taxonomies by ID
            self.taxonomies_by_ID[xml_node_ID] = t
            
            # taxonomies by name
            # t.tree.context is the root node of each taxonomy tree, i.e. the node with "TAXONOMY"
            t_name = t.tree.context["taxonomy_name"]
            if t_name in self.taxonomies_by_name:
                print(f"WARNING 62: multiple use of taxonomy name {t_name}; using ID {xml_node_ID} instead.")
                t_name = xml_node_ID
            self.taxonomies_by_name[t_name] = t

        # collect code
        # for each taxonomy: for each GenDifsNode: for each language
        self.code = {}
        for taxonomy_name, taxonomy in self.taxonomies_by_name.items():
            taxonomy_code = { gdn_ID: gdn.molecule_per_language.code for gdn_ID, gdn in taxonomy.dict_of_all_gdn.items() }
            self.code[taxonomy_name] = taxonomy_code
        # self.languages = { lang for taxdict in self.code.values()  for codedict in taxdict.values() for lang in codedict.keys()  }
        self.languages = { l for taxonomy in self.taxonomies_by_ID.values() for l in taxonomy.languages }

        # def collect_rdflib_graphs(self, language_list = []):
        ##for taxonomy_name, taxonomy in self.taxonomies_by_name.items():
        #    taxonomy.GenDifS_taxonomy_collect_rdflib_graphs(language_list)

    
    def describe_mindmap(self):
        """Mindmap metadata: #nodes etc."""
        print(f"{self.mindmap_topnode.get('TEXT')=}")
        print(f"{len(self.mindmap_xml.getroot().xpath('.//node'))=} nodes")
        if self.languages != None:
            print(f"{self.languages=}")

    def describe_taxonomies(self, indent=True):
        for t in self.taxonomies_by_ID.values():
            t.describe()

    def content_taxonomies(self, indent=True):
        for t in self.taxonomies_by_ID.values():
            t.describe_contexts(indent)

    # Identify the xml nodes witch define a taxonomy.
    # Each taxonomy will be translated into an own standalone named rdf graph.
    # All taxonomies together will contribute to a rdf dataset.
    # The default graph contains the metadata of the whole ensemble.
    
    def search_mm_for_taxonomy_xml_nodes(self, xml_node, dict_of_all_taxonomy_nodes):
        global NewID
        if xml_node.get('TEXT').startswith("TAXONOMY"):
            # print(f"search_gdntree_for_taxonomy: {xml_node.get('TEXT')=}")
                
            # it's essential to have unique node IDs
            ID = xml_node.get('ID', None)
            if ID == None or ID in dict_of_all_taxonomy_nodes:
                NewID += 1
                ID = f"NewID_{NewID}"
                xml_node["ID"] = ID
            dict_of_all_taxonomy_nodes[ID] = xml_node
    
        # we allow for nested TAXONOMY trees
        for child_xml_node in xml_node.findall('node'):
            button_cancel = 'button_cancel' in [ icon.attrib['BUILTIN'] for icon in child_xml_node.findall("icon") ]
            if not button_cancel:
                self.search_mm_for_taxonomy_xml_nodes(child_xml_node, dict_of_all_taxonomy_nodes)
class GenDifS_taxonomy():
    """class managing a stand alone taxonomy; contains root element of a tree of gdn objects"""

    def __init__(self, xml_ID, xml_node, *, language_list_list = []):
        self.ID = xml_ID
        self.TEXT = xml_node.get("TEXT")
        self.language_list_list = language_list_list
        self.language_set = []
        
        # Dictionary { ID: GenDifS Node } of all GenDifS nodes of the current taxonomy. 
        self.dict_of_all_gdn = {} 

        # walk the tree several times, add more and more information

        # Step 1:
        # build tree of gdn (GenDifS nodes)
        self.tree = self.build_gdntree_from_mm(xml_node, None, 0, self.dict_of_all_gdn )
        # self.dict_of_all_gdn is a dict of all gdn (GenDifS node) objects

        # Step 2:
        # collect context information of each gdn to allow for context free code generation
        [ gdn.collect_context() for gdn in self.dict_of_all_gdn.values() ]

        # Step 2:
        # generate ttl code
        [ gdn.generate_molecule_per_language() for gdn in self.dict_of_all_gdn.values() ]

        self.languages = { lang for gdn in self.dict_of_all_gdn.values() for lang in gdn.molecule_dict.keys()  }
        
        # Dictionary of the rdflib Conjunctive Graphs (cjg)
        # one cjg for each language like RDFS, SKOS etc.
        self.rdf_graphs = {}
        self.rdf_graphs_ttl = {}
        
        self.default_namespaces_dict = {
            "ex": "<http://example.net/namespace/ex#>" ,  # examples of OWL A-Box
            "cpt": "<http://example.net/namespace/cpt#>" ,    # concept; instance of skos:concept
            "sheet": "<http://example.net/namespace/sheet#>" ,    # instances from Excel-Sheet
            "": "<http://example.net/namespace/default#>" , # OWL T-Box
            
            "owl": "<http://www.w3.org/2002/07/owl#>" ,
            "rdf": "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>" ,
            "xml": "<http://www.w3.org/XML/1998/namespace>" ,
            "xsd": "<http://www.w3.org/2001/XMLSchema#>" ,
            "rdfs": "<http://www.w3.org/2000/01/rdf-schema#>" ,
            "skos": "<http://www.w3.org/2004/02/skos/core#>" ,
            "gendifs": "<http://jbusse.de/gendifs#>" ,
        }
        
        ttl_prefixes =    lambda dct: "\n".join([f"@prefix {key}: {val} ." for key, val in dct.items()])
        sparql_prefixes = lambda dct: "\n".join([f"PREFIX  {key}: {val}  " for key, val in dct.items()])            
            
        self.ttl_namespaces = ttl_prefixes(self.default_namespaces_dict)
        self.sparql_namespaces = sparql_prefixes(self.default_namespaces_dict)

        # generate one rdf graph per language set
        if self.language_list_list == []:
            self.language_set = [ [ l ] for l in self.languages ]
        else:
            self.language_set = []
            for l in self.language_list_list:
                if isinstance(l, str):
                    self.language_set.append( [ l ] )
                else:
                    self.language_set.append( l )
        
        for ls in self.language_set:
            graph_name = "_".join(ls)
            self.rdf_graphs[graph_name] = rdflib.Graph()
            for gdn in self.dict_of_all_gdn.values():
                for l in ls:   # l ... language
                    ttl = gdn.molecule_dict.get(l, "")
                    if len(ttl) > 0:
                        if verbose >= 3:
                            print(f"INFO 200 {gdn.text}\n{ttl}\n\n")
                        try:
                            self.rdf_graphs[graph_name].parse(data=self.ttl_namespaces + ttl, format="ttl")
                        except:
                            if verbose >= 1:
                                print(f"WARNING 220: RDFLIB complains about")
                                print(ttl,"\n")
            self.rdf_graphs_ttl[graph_name] = self.rdf_graphs[graph_name].serialize()
                

    def build_gdntree_from_mm(self, xml_node, parent_gdn, depth, dict_of_all_gdn):
        """build tree of gdn (GenDifS nodes); store result in self.dict_of_all_gdn"""
        
        # allocate new instance of gdn (gdn = GenDifS Node)
        if parent_gdn == None: # we are called from __init__
            this_gdn = gdn_differentia(xml_node, depth=depth, parent_gdn=None )
        elif isinstance(parent_gdn, gdn_differentia):
            this_gdn = gdn_taxon(xml_node, depth=depth, parent_gdn=parent_gdn )
        elif isinstance(parent_gdn, gdn_taxon):  # NEU 2024-09-19
            if xml_node.get('TEXT', '').startswith('ATT'):
                this_gdn = gdn_attribute(xml_node, depth=depth, parent_gdn=parent_gdn ) # 2024-09-19: TBD
            else:
                this_gdn = gdn_differentia(xml_node, depth=depth, parent_gdn=parent_gdn )
        else:
            print("ERROR 294: 'else' should not be selected")
            
        # add current gdn to dict_of_all_gdn
        dict_of_all_gdn[this_gdn.id] = this_gdn
        
        for child_xml_node in xml_node.findall('node'):
            button_cancel = 'button_cancel' in [ icon.attrib['BUILTIN'] for icon in child_xml_node.findall("icon") ]
            taxonomy_node = child_xml_node.get('TEXT', '').startswith('TAXONOMY')

            if not ( button_cancel or taxonomy_node ):
                this_gdn.children_gdn_list.append( self.build_gdntree_from_mm(child_xml_node, this_gdn, depth+1, dict_of_all_gdn) )
        return this_gdn


    def augment(self, abox):

        
        def instances(graph, category):
            #q = ns_sparql + f"""SELECT ?i
            q = f"""SELECT ?i
            WHERE {{
                ?i a {category} .
            }}"""
            return { r[0].encode() for r in graph.query(q) }

        
        if not "RDFS_OWL" in self.rdf_graphs:
            print(f"249, {self=}: 'RDFS_OWL' not in {self.rdf_graphs=}")
            return None

        self.g_augment_before = rdflib.Graph()
        self.g_augment_before.parse( data=self.rdf_graphs["RDFS_OWL"].serialize() )
        try:
            self.g_augment_before.parse(data=abox)
        except:
            if verbose >= 1:
                print(f"WARNING 258: RDFLIB complains about abox, augmentation failed")
                return
            
        if verbose >= 2:
            print(f"augment() 264: {len(self.g_augment_before)=}", end=", ")

        self.g_augment = rdflib.Graph()
        self.g_augment.parse( data=self.g_augment_before.serialize() )
        
        owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples = False).expand(self.g_augment)
        if verbose >= 2:
            print(f"{len(self.g_augment)=}")
        
        for gdn in self.dict_of_all_gdn.values():
            #print(f"261: {gdn.entity_context}")
            gdn.augment_genus_species(self.g_augment)
            
    def describe(self):
        print(f"{self.ID} {self.TEXT}: #{len(self.dict_of_all_gdn)} GenDifS nodes;\n{self.languages=};\n{self.rdf_graphs.keys()=}")

    def describe_contexts(self, indent=True):
        """walk tree and describe nodes"""

        def describe_contexts_walk(this_gdn, indent):  
            this_gdn.describe(indent=indent)
            for child_gdn in this_gdn.children_gdn_list:
                describe_contexts_walk(child_gdn,indent)

        describe_contexts_walk(self.tree, indent)
class gdn(): # gdn ... GenDifS Node
    def __init__(self, xml_node, *,
                 parent_gdn = None,
                 depth = None
                ):
        self.id = xml_node.get('ID')
        self.text = xml_node.get('TEXT')
        self.depth = depth
        self.parent_gdn = parent_gdn
        self.xml_node = xml_node
        self.children_gdn_list=[]
        self.python_type = self.__class__.__name__

        #self.dct = self.string_to_dict_of_entities(self.text)
        self.dct, self.entities = self.string_to_dict_of_entities(self.text)

        self.gendifs_type = self.gendifs_dct_type()
        
        self.context = {}
        self.context["id"] = self.id

        self.entity_context = {}
        self.entity_context["id"] = self.id
        
        self.molecule_per_language = None # pointer to instance of class GenDifS_molecule_per_language
        self.molecule_dict = {} # return value of  molecule_per_language.compile(), i.e. code generation

        self.augment_set = {}
        
    def __str__(self):
        """string representation of parsed XML Element node/@TEXT"""
        return f"{self.id} ({self.python_type}) {self.text}\n {self.molecule_dict}"

    def gendifs_dct_type(self):
        dl = list(self.dct.keys())
        if dl[0] == "default": dl.pop(0)
        return dl[0] if len(dl) >= 1 else None

    def string_to_dict_of_entities(self, string): 
        """parse a string like `ONE text one TWO some other text`
        into a a dict like `{"ONE": "text_one", "TWO": "some_other_text" }`
        """
        currect_token = "default" # since we accept only isupper() tokens from user input, there is no conflict
        dct = string2dict( string )
        if len(dct["default"]) == 0:
            dct.pop("default")

        dct2 = { k: "_".join(v) for k,v in dct.items() }
        
        dct3 = { k: Entity( self, "_".join(v) ) for k,v in dct.items() }
        if verbose >= 3: print(f"318: {dct3=}")

        return dct2, dct3

    def generate_molecule_per_language(self):
        self.molecule_per_language = GenDifS_molecule_per_language(self.context, self.entity_context)
        self.molecule_dict = self.molecule_per_language.compile()
class Entity():

    def __init__(self, source_gdn, text ):

        self.source_gdn = source_gdn
        self.text = text
        self.it_is_a_uri = len(text) > 0 and text[0] == '<' and text[-1] == '>'

    def __str__(self):
        return self.text

    def __repr__(self):
        return f"""Entity(@id='{self.source_gdn.id}', @text='{self.text}')"""

    # create category curi
    def c(self, *, prefix = "", reverse = False):
        return self.text if self.it_is_a_uri else prefix + ":" +  self.uri_to_string(self.text)

    # create example, i.e. an instance 
    def e(self, id, *, prefix = "ex", reverse = False):
        myid = self.source_gdn.id if id == None else id
        if self.it_is_a_uri:
            r = prefix + ":" + self.uri_to_string(self.text[1:-1])
            if id != None: r += "_" + myid
        else:
            r = prefix + ":" + self.uri_to_string(self.text)
            if id != None: r += "_" + myid
        return r

    def uri_to_string(self, uri):
        return "".join([ c if c.isalnum() else "_" for c in self.text  ])
class gdn_differentia(gdn):
    
    def __init__(self, xml_node, *, parent_gdn=None, depth=None):
        super().__init__(xml_node, parent_gdn=parent_gdn, depth=depth)
    
    def describe(self, *, indent = False):
        d = self.depth if indent else 0
        print(f"{d*'  '}differentia {self.gendifs_type}: {self.dct} ")
        print(f"{d*'  '}{self.context=}")
        print(f"{d*'  '}{self.molecule_per_language=}")
        print(f"{d*'  '}{(20-d)*'-'}")

    def collect_context(self):

        #print("367:", self.gendifs_type)

        match self.gendifs_type:

            case 'TAXONOMY':
                self.context["codeclass"] = "TAXONOMY"
                self.context["taxonomy_name"]   = self.dct.get("TAXONOMY") if self.dct.get("TAXONOMY") != '' else self.id
                self.context["graph"] = f"graph_{self.id}"
                self.context["species_list"] = [ child.dct["default"] for child in self.children_gdn_list ] 

                self.entity_context["codeclass"] = "TAXONOMY"
                self.entity_context["taxonomy_name"]   = self.entities.get("TAXONOMY") if self.entities.get("TAXONOMY") != '' else Entity(self, self.id)
                self.entity_context["graph"] = Entity(self, f"graph_{self.id}")
                self.entity_context["species_list"] = [ child.entities["default"] for child in self.children_gdn_list ] 

            case 'ISA':
                self.context["codeclass"] = "ISA"
                self.context["genus"] = "anything" if self.parent_gdn == None else self.parent_gdn.dct["default"]
                self.context["species_list"] = [ child.dct["default"] for child in self.children_gdn_list ] 

                self.entity_context["codeclass"] = "ISA"
                self.entity_context["genus"] = Entity(self, "anything") if self.parent_gdn == None else self.parent_gdn.entities["default"]
                self.entity_context["species_list"] = [ child.entities["default"] for child in self.children_gdn_list ] 

            case 'BY':
                self.context["codeclass"] = "BY"
                self.context["genus"] = "anything" if self.parent_gdn == None else self.parent_gdn.dct["default"]
                self.context["by"]   = self.dct.get("BY", "anyproperty")
                self.context["frm"] = self.dct.get("FROM", "anything")
                self.context["species_list"] = [ child.dct["default"] for child in self.children_gdn_list ] 

                self.entity_context["codeclass"] = "BY"
                self.entity_context["genus"] = Entity(self, "anything") if self.parent_gdn == None else self.parent_gdn.entities["default"]
                self.entity_context["by"]   = self.entities.get("BY", "anyproperty")
                self.entity_context["frm"] = self.entities.get("FROM", "anything")
                self.entity_context["species_list"] = [ child.entities["default"] for child in self.children_gdn_list ] 

            case 'SUP':
                """SUP: species, list of genus1, genus2 (to generate intersection)"""
                self.context["codeclass"] = "SUP"
                self.context["species"] = "anything" if self.parent_gdn == None else self.parent_gdn.dct.get("default", "context_SUP_NONE_species")
                self.context["genus_list"] = [ child.dct["default"] for child in self.children_gdn_list ] 

                self.entity_context["codeclass"] = "SUP"
                self.entity_context["species"] = Entity(self, "anything") if self.parent_gdn == None else self.parent_gdn.entities["default"]
                self.entity_context["genus_list"] = [ child.entities["default"] for child in self.children_gdn_list ] 

                
            case 'SOME':
                try:
                    parent_differentia_gendifs_type = self.parent_gdn.parent_gdn.gendifs_type
                except:
                    parent_differentia_gendifs_type = None
                
                match parent_differentia_gendifs_type:
                    case 'BY':
                        self.context["codeclass"] = "BY_SOME"
                        self.context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.dct.get("default", "anything")
                        self.context["by"]   = self.parent_gdn.parent_gdn.dct.get("BY", "anyproperty")
                        self.context["frm"] = self.parent_gdn.parent_gdn.dct.get("FROM", "anything")
                        self.context["some"] = self.dct.get("SOME", "anything")
                        self.context["species"] = self.parent_gdn.dct.get("default", "context_BY_SOME_NONE_species")

                        self.entity_context["codeclass"] = "BY_SOME"
                        self.entity_context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.entities.get("default", Entity(self, "anything"))
                        self.entity_context["by"]   = self.parent_gdn.parent_gdn.entities.get("BY", Entity(self, "anyproperty"))
                        self.entity_context["frm"] = self.parent_gdn.parent_gdn.entities.get("FROM", Entity(self, "anything"))
                        self.entity_context["some"] = self.entities.get("SOME", Entity(self, "anything"))
                        self.entity_context["species"] = self.parent_gdn.entities.get("default", Entity(self, "context_BY_SOME_NONE_species"))

                    case 'ISA':
                        self.context["codeclass"] = "BY_SOME"
                        self.context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.dct.get("default", "anything")
                        self.context["by"] = "anyproperty"
                        self.context["frm"] = "anything"
                        self.context["some"] = self.dct.get("SOME", "anything")
                        self.context["species"] = self.parent_gdn.dct.get("default", "context_ISA_SOME_NONE_species")

                        self.entity_context["codeclass"] = "BY_SOME"
                        self.entity_context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.entities.get("default", Entity(self, "anything"))
                        self.entity_context["by"] = Entity(self, "anyproperty")
                        self.entity_context["frm"] = Entity(self, "anything")
                        self.entity_context["some"] = self.entities.get("SOME", Entity(self, "anything"))
                        self.entity_context["species"] = self.parent_gdn.entities.get("default", Entity(self, "context_ISA_SOME_NONE_species"))

                    case _:
                        self.context["codeclass"] = "UNCONDITIONAL_SOME"
                        self.context["some"] = self.dct.get("SOME", "anything")
                        self.context["species"] = self.parent_gdn.dct.get("default", "context_MINIMAL_SOME_NONE_species")

                        self.entity_context["codeclass"] = "UNCONDITIONAL_SOME"
                        self.entity_context["some"] = self.entities.get("SOME", Entity(self, "anything"))
                        self.entity_context["species"] = self.parent_gdn.entities.get("default", Entity(self, "context_MINIMAL_SOME_NONE_species"))

            case 'REL':
                ...

            case 'DEF':
                self.context["codeclass"] = "DEF"
                self.context["genus"] = self.parent_gdn.dct["default"]
                self.context["source"] = self.dct.get("DEF", "this_taxonomy")
                self.context["text_list"] = [ child.text for child in self.children_gdn_list ] 

                self.entity_context["codeclass"] = "DEF"
                self.entity_context["genus"] = self.parent_gdn.entities["default"]
                self.entity_context["source"] = self.entities.get("DEF", Entity(self, "this_taxonomy"))
                self.entity_context["text_list"] = [ child.text for child in self.children_gdn_list ] 

            case _:
                if verbose >= 1:
                    print(f'Error 297: collect_context(): unknown gendifs_type {self.gendifs_type} in node "{self.text}"') 


    # edge: borderline between data lake and land, coming from direction lake
    def augment_genus_species(self, augment_graph):

        def instances(graph, category):
            q = f"""SELECT ?i WHERE {{ ?i a {category} . }}"""
            return { r[0].n3() for r in graph.query(q) }
        
        if not( "genus" in self.entity_context and "species_list" in self.entity_context):
            return None
        
        genus_instances = instances(augment_graph, self.entity_context["genus"].c())

        # for each species: all instances
        species_instances_dict = {}
        for s in self.entity_context["species_list"]:
            instance_set = instances( augment_graph, s.c() )
            if s.c() not in species_instances_dict:
                species_instances_dict[ s.c() ] = set()
            species_instances_dict[ s.c() ].update(instance_set)
        if verbose >= 3:
            print(f"557 {species_instances_dict=}")


        # edge
        union_of_species_instances = set()
        for s in species_instances_dict.values():
            #print("562", s)
            union_of_species_instances.update( s )
        edge = genus_instances - union_of_species_instances
        if verbose >= 2:
            print(f"546: {self.id=}, {edge=})")
        self.augment_set["edge"] = edge

        # DISJOINT
        non_disjoint = {}
        k = list( species_instances_dict.keys() )
        for i in range(len(k)):
            for j in range(i+1, len(k)):
                isct = species_instances_dict[ k[i] ].intersection(species_instances_dict[ k[j] ])
                if len(isct) > 0:
                    non_disjoint[ (k[i], k[j]) ] = isct
        self.augment_set["non_disjoint"] = non_disjoint

                
        
class gdn_taxon(gdn):
    
    def __init__(self, xml_node, *, parent_gdn=None, depth=None):
        super().__init__(xml_node, parent_gdn=parent_gdn, depth=depth)

    def describe(self, *, indent = False):
        d = self.depth if indent else 0
        print(f"{d*'  '}{self.dct.get('default')}")
        print(f"{d*'  '}{self.context=}")
        print(f"{d*'  '}{(20-d)*'='}")

    def collect_context(self):
        self.context["codeclass"] = "taxon"
        self.context["species"] = self.dct.get("default")

        self.entity_context["codeclass"] = "taxon"
        self.entity_context["species"] = self.entities.get("default")

    def augment_genus_species(self, augment_graph):
        ...

generate ttl code#

class GenDifS_molecule_per_language():
    """A set (technically implemented in a dict 'code') of related molecules.
    A molecule is a set of ttl statements, a SPARQL-query etc.
    We generate one molecule per language (like RDFS, OWL, SKOS, SPARQL) etc. """
    
    def __init__(self, context, entity_context, add_context=False):
        self.context = context
        self.entity_context = entity_context
        #print(f"486: {self.context=}")
        self.code = {} # the dict of molecules
        self.restklassen = {}
        self.ConjunctiveGraph = rdflib.ConjunctiveGraph()
        self.inspect_inferencing = None
        

    def compile(self):
        
        codeclass2fn = {
            'taxon': self.code_taxon,
            'TAXONOMY': self.code_TAXONOMY,
            'ISA': self.code_ISA,
            'BY': self.code_BY_FROM, 
            'BY_SOME': self.code_BY_FROM_SOME,
            'SUP': self.code_SUP,
##            'ISA_SOME': self.code_ISA_SOME,
            'UNCONDITIONAL_SOME': self.code_UNCONDITIONAL_SOME,
            'DEF': self.code_DEF,
#            'REL': self.code_REL
        }

        codeclass = self.context.get("codeclass") # we get a string
        if codeclass == None:
            if verbose >= 1:
                print(f"WARNING 510: {self.context=}")
        elif codeclass not in codeclass2fn:
            if verbose >= 1:
                print(f"WARNING 513: {codeclass=} not in codeclass2fn")
        else:
            # call the code genating function
            
            # ALT codeclass2fn[codeclass](**self.context)
            codeclass2fn[codeclass](**self.entity_context)
        return self.code

    #
    # the "meat" of the library: the functions which actualle generate code
    #


    def code_taxon(self, *, id, codeclass, species):

        # these definitions here are redundant:
        # most other snippets also define the involved classes

        # -----------------
        lang = "RDFS"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""{ species.c() } a owl:Class . """ ]
        self.code[lang] = "\n".join(ttl)
        
        # lang = "RDFS"
        # ttl = [ f"""# {id} {lang}""" ,
        #         f""":{species} a owl:Class . """ ]
        # # ttl += [ f""":{species} rdfs:label ... .""" ]
        # self.code[lang] = "\n".join(ttl)



        # -----------------
        lang = "RDFStest"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""{ species.e(id) } a { species.c() } . """ ]
        self.code[lang] = "\n".join(ttl)

        # lang = "RDFStest"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"""ex:{species}_{id} a :{species} . """ ]
        # self.code[lang] = "\n".join(ttl)


        
                
    def code_TAXONOMY(self, *, id, codeclass, taxonomy_name, graph, species_list):

        # lang = "DE"
        # ttl =  [ f"""# {id} {lang}""" ]
        # ttl += [ f""":{taxonomy_name} :explanation "Ein Spiel in einer Phantasiewelt, es heißt *{taxonomy_name}*. Die Dinge in der Phantasiewelt haben abstrakte Bezeichner, die wir nicht verstehen, nur der Spieleleiter kennt sie. Wir beschreiben Dinge durch Attribute und ihre Werte. Wir wollen eine Taxonomie rekonstruieren. Unser abstraktes Datenmodell ist OWL-RL in der Serialisierung Turtle." . """ ]
        # self.code[lang] = "\n".join(ttl)

        # TBD: more detailed ontology metadata etc.

        # lang = "OWL"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f""":{taxonomy_name} a owl:Ontology; rdfs:label "{taxonomy_name}" .""" ]
        # self.code[lang] = "\n".join(ttl)

        # lang = "RDFS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f""":{taxonomy_name} rdfs:label "RDFS class tree {taxonomy_name}" .""" ]
        # self.code[lang] = "\n".join(ttl)


        
        # -----------------
        lang = "SKOS"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"{taxonomy_name.e(None, prefix='cpt')} a skos:ConceptScheme ." ]
        ttl += [ f"{ species.e(None, prefix='cpt') } a skos:Concept ;\n   skos:inScheme {taxonomy_name.e(None, prefix='cpt')} ." for species in species_list ]
        self.code[lang] = "\n".join(ttl)
        
        # lang = "SKOS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"cpt:{taxonomy_name} a skos:ConceptScheme ." ]
        # ttl += [ f"cpt:{species} a skos:Concept ;\n   skos:inScheme cpt:{taxonomy_name} ." for species in species_list ]
        # self.code[lang] = "\n".join(ttl)

        
    def code_DEF(self, *, id, codeclass, genus, source, text_list):
        ...
        
        # # class definition
        # lang = "DE"
        # ttl =  [ f"""# {id} {lang}""" ]
        # ttl += [ f""":{genus} :explanation "{text} ({source})" .""" for text in text_list ]
        # self.code[lang] = "\n".join(ttl)
        
        # lang = "SKOS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"""{ genus.e(None, prefix='cpt') } a skos:Concept .""" ]
        # ttl += [ f"""{ genus.e(None, prefix='cpt') } skos:scopeNote "{text}  ({source})" .""" for text in text_list ]
        # self.code[lang] = "\n".join(ttl)

        
    def code_ISA(self, *, id, codeclass, genus, species_list, augment = None ):

        # lang = "DE"
        # ttl =  [ f"""# {id} {lang}""" ]
        # ttl += [ f""":{species} :explanation "*{reverse(species)}* ist eine Subklasse von *{reverse(genus)}* ." .""" for species in species_list ]
        # self.code[lang] = "\n".join(ttl)


        # -----------------
        # class definitions
        lang = "RDFS"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"{ genus.c()   } a owl:Class ." ]
        ttl += [ f"{ species.c() } a owl:Class;\n   rdfs:subClassOf { genus.c() } ." for species in species_list ]
        self.code[lang] = "\n".join(ttl)

        # # --- SPARQL

        # self.edge_genus_species = [  f"{ genus.c() }" ,  [ f"{ species.c() }" for species in species_list ] ]
        # print(f"668: {self=}, {self.edge_genus_species=}")
        
        # if augment:
        #     if "disjoint" in augment.split():
        #         lang = "disjoint"
        #         ...

        


        

        
        # # class definitions
        # lang = "RDFS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f":{genus} a owl:Class ." ]
        # ttl += [ f":{species} a owl:Class;\n   rdfs:subClassOf :{genus} ." for species in species_list ]
        # self.code[lang] = "\n".join(ttl)


        # -----------------
        # # allocate example instances plus test condition

        lang = "RDFStest"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"{ genus.e(id)   } a { genus.c() } ." ]
        ttl += [ f"{ species.e(id) } a { species.c() } ." for species in species_list ]
        ttl += [ f"{ species.e(id) } gendifs:classify_similar {genus.e(id)} ."  for species in species_list  ] # bottom up inferencing "ISA"
        self.code[lang] = "\n".join(ttl)


        # lang = "RDFStest"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"ex:{genus}_{id} a :{genus} ." ]
        # ttl += [ f"ex:{species}_{id} a :{species} ." for species in species_list ]
        # ttl += [ f"ex:{species}_{id} gendifs:classify_similar ex:{genus}_{id} ."  for species in species_list  ] # bottom up inferencing "ISA"
        # self.code[lang] = "\n".join(ttl)

        # self.inspect_inferencing = Test_classify_similar(id, self.code["RDFStest"] + self.code["RDFS"] )



        # -----------------
        lang = "SKOS"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"{ genus.e(None, prefix='cpt') } a skos:Concept ." ]
        ttl += [ f"{ species.e(None, prefix='cpt') } a skos:Concept ;\n   skos:broader { genus.e(None, prefix='cpt') } ." for species in species_list ]
        self.code[lang] = "\n".join(ttl)

        # lang = "SKOS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"cpt:{genus} a skos:Concept ." ]
        # ttl += [ f"cpt:{species} a skos:Concept ;\n   skos:broader cpt:{genus} ." for species in species_list ]
        # self.code[lang] = "\n".join(ttl)


        
        
    def code_BY_FROM(self, *, id, codeclass, genus, by, frm, species_list):

        # -----------------
        ttl = [ f"{ species.c() } a owl:Class;\n   rdfs:subClassOf { genus.c() } ." for species in species_list ]
        self.code["RDFS"] = "\n".join(ttl)

        restriction_name_technically = f"BY_{id}_restriction"
        restriction_name_userfriendly = f"BY_{ by.e(None) }_FROM_{ frm.e(None) }"
                
        intersection_name_technically = f"BY_{id}_intersection"
        intersection_name_userfriendly = f"{restriction_name_userfriendly}_INTERSECT_{ genus.e(None) }"


        # gd06
                # # restriction name
                # restriction_name_technically = f"BY_{differentia_ID}_restriction"
                # restriction_name_userfriendly = f"BY_{BY}_SOME_{BYSOME}"

                # comment = f"all objects which have a property *{BY}* with a value from *{BYSOME}* ."
                # ttlCode = ( f""":{restriction_name_technically}\n   a owl:Class ;\n"""
                # f"""   rdfs:label "{restriction_name_userfriendly}" ;\n"""
                # f"""   owl:equivalentClass [ a owl:Restriction ;\n"""
                # f"""      owl:onProperty :{BY} ;\n"""
                # f"""      owl:someValuesFrom :{BYSOME} ] .\n""")


                
        lang = "OWL"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""{ by.c() } rdf:type owl:ObjectProperty .""" ,
                 f""":{restriction_name_technically} a owl:class ;""" ,
                 f"""   a owl:Restriction ;""" ,
                 f"""   rdfs:label "{restriction_name_userfriendly}" ;""" ,
                 f"""   owl:onProperty { by.c() } ;""" ,
                 f"""   owl:someValuesFrom { frm.c() } .""" ]

        ttl += [ f""":{intersection_name_technically} a owl:class ;""" ,
                 f"""   rdfs:label "{intersection_name_userfriendly}" ;""" ,
                 f"""   rdfs:subClassOf { genus.c() } ;""" ,
                 f"""   owl:intersectionOf (:{restriction_name_technically} { genus.c() } ) .""" ]
        self.code[lang] = "\n".join(ttl)

        # allocate example instance plus test condition

        lang = "OWLtest"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""ex:{restriction_name_technically}_example a :{restriction_name_technically} .""" ,
                 f"""{ genus.e(id) } a { genus.c() } ;""" ,
                 f"""   { by.c() } { frm.e(id) } ;""" ,
                 f"""   gendifs:classify_similar ex:{restriction_name_technically}_example .""" ,
                 f"""{frm.e(id) } a { frm.c() } .""" ]
        self.code[lang] = "\n".join(ttl)

        self.inspect_inferencing = Test_classify_similar(id, self.code["OWL"] + self.code["OWLtest"] )
        


        # ttl = [ f":{species} a owl:Class;\n   rdfs:subClassOf :{genus} ." for species in species_list ]
        # self.code["RDFS"] = "\n".join(ttl)

        # restriction_name_technically = f"BY_{id}_restriction"
        # restriction_name_userfriendly = f"BY_{by}_FROM_{frm}"
        
        # intersection_name_technically = f"BY_{id}_intersection"
        # intersection_name_userfriendly = f"{restriction_name_userfriendly}_INTERSECT_{genus}"

        # lang = "OWL"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f""":{by} rdf:type owl:ObjectProperty .""" ,
        #          f""":{restriction_name_technically} a owl:class ;""" ,
        #          f"""   a owl:Restriction ;""" ,
        #          f"""   rdfs:label "{restriction_name_userfriendly}" ;""" ,
        #          f"""   owl:onProperty :{by} ;""" ,
        #          f"""   owl:someValuesFrom :{frm} .""" ]

        # ttl += [ f""":{intersection_name_technically} a owl:class ;""" ,
        #          f"""   rdfs:label "{intersection_name_userfriendly}" ;""" ,
        #          f"""   rdfs:subClassOf :{genus} ;""" ,
        #          f"""   owl:intersectionOf (:{restriction_name_technically} :{genus} ) .""" ]
        # self.code[lang] = "\n".join(ttl)

        # # allocate example instance plus test condition

        # lang = "OWLtest"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"""ex:{restriction_name_technically}_example a :{restriction_name_technically} .""" ,
        #          f"""ex:{genus}_{id} a :{genus} ;""" ,
        #          f"""   :{by} ex:{frm}_{id} ;""" ,
        #          f"""   gendifs:classify_similar ex:{restriction_name_technically}_example .""" ,
        #          f"""ex:{frm}_{id} a :{frm} .""" ]
        # self.code[lang] = "\n".join(ttl)

        # self.inspect_inferencing = Test_classify_similar(id, self.code["OWL"] + self.code["OWLtest"] )
        

        
        
    def code_BY_FROM_SOME(self, *, id, codeclass, genus, by, frm, some, species):

        # ----------
        lang = "DE"
        ttl =  [ f"""# {id} {lang}""" ]
        ttl += [ f"""{ species.c() } :explanation "*{ species.c(reverse = True) }* ist eine Subklasse der Klasse *{ genus.c(reverse = True) }* ." . """  ]
        # ttl += [ f"""{ genus.c() } :explanation "*{ genus.c(reverse = True) }* hat das Attribut *{reverse(by)}* ." .  """ ]
        ttl += [ f"""{ some.c() } :explanation "*{reverse(some)}* ist eine Subklasse der Klasse *{reverse(frm)}* ." .  """ ]
        # ttl += [ f"""{ genus.c() } :explanation "Wenn ein Exemplar der Klasse *{ genus.c(reverse = True) }* ein Attribut *{reverse(by)}* mit einem Wert aus der Klasse  *{reverse(some)}* hat: Dann ist dieses Exemplar aus *{ genus.c(reverse = True) }* auch ein Exemplar aus  *{{ species.c(reverse = True) }}*." .  """ ]
        # ttl += [ f"""{ genus.c() } :explanation "Ein *{ genus.c(reverse = True) }*, dessen Attribut *{reverse(by)}* auf ein Element aus der Klasse *{reverse(some)}* zeigt, ist ein *{{ species.c(reverse = True) }}*." .  """ ]
        ttl += [ f"""{ genus.c() } :explanation "Ein *{ genus.c(reverse = True) }*, das für das Attribut *{reverse(by)}* einen Wert aus  *{reverse(some)}* hat, ist ein *{{ species.c(reverse = True) }}*." .  """ ]
        self.code[lang] = "\n".join(ttl)


        lang = "RDFS"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"{ species.c() } a owl:Class;\n   rdfs:subClassOf { genus.c() } ." ]
        ttl += [ f"""{ some.c() } rdfs:subClassOf { frm.c() } .""" ] 
        self.code[lang] = "\n".join(ttl)

        restriction_name_technically = f"BY_SOME_{id}_restriction"
        restriction_name_userfriendly = f"BY_{ by.e(None) }_FROM_{ frm.e(None) }_SOME_{some.e(None) }"
                
        intersection_name_technically = f"BY_SOME_{id}_intersection"
        intersection_name_userfriendly = f"({restriction_name_userfriendly})_INTERSECT_{ genus.e(None) }"

        lang = "OWL"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""{ by.c() } rdf:type owl:ObjectProperty .""" ,
                 f""":{restriction_name_technically} a owl:class ;""" ,
                 f"""   a owl:Restriction ;""" ,
                 f"""   rdfs:label "{restriction_name_userfriendly}" ;""" ,
                 f"""   owl:onProperty { by.c() } ;""" ,
                 f"""   owl:someValuesFrom { some.c() } .""" ]                                     # some

        ttl += [ f""":{intersection_name_technically} a owl:class ;""" ,
                 f"""   rdfs:label "{intersection_name_userfriendly}" ;""" ,
                 f"""   rdfs:subClassOf { species.c() } ;""" ,                                     
                 f"""   owl:intersectionOf (:{restriction_name_technically} { genus.c() } ) .""" ] # change e.g. to species to test Test_classify_similar

        self.code[lang] = "\n".join(ttl)

        # allocate example instance plus test condition

        lang = "OWLtest"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""{ species.e(id) } a { species.c() } .""" ,
                 f"""{ genus.e(id) }   a { genus.c() } ;""" ,
                 f"""   { by.c() } { some.e(id) } ;""" ,
                 f"""   gendifs:classify_similar { species.e(id) } .""" ,
                 f"""{ some.e(id) } a { some.c() } .""" ]
        self.code[lang] = "\n".join(ttl)

        self.inspect_inferencing = Test_classify_similar(id, self.code["RDFS"] + self.code["OWL"] + self.code["OWLtest"] )




        # lang = "DE"
        # ttl =  [ f"""# {id} {lang}""" ]
        # ttl += [ f""":{species} :explanation "*{reverse(species)}* ist eine Subklasse der Klasse *{reverse(genus)}* ." . """  ]
        # # ttl += [ f""":{genus} :explanation "*{reverse(genus)}* hat das Attribut *{reverse(by)}* ." .  """ ]
        # ttl += [ f""":{some} :explanation "*{reverse(some)}* ist eine Subklasse der Klasse *{reverse(frm)}* ." .  """ ]
        # # ttl += [ f""":{genus} :explanation "Wenn ein Exemplar der Klasse *{reverse(genus)}* ein Attribut *{reverse(by)}* mit einem Wert aus der Klasse  *{reverse(some)}* hat: Dann ist dieses Exemplar aus *{reverse(genus)}* auch ein Exemplar aus  *{reverse(species)}*." .  """ ]
        # # ttl += [ f""":{genus} :explanation "Ein *{reverse(genus)}*, dessen Attribut *{reverse(by)}* auf ein Element aus der Klasse *{reverse(some)}* zeigt, ist ein *{reverse(species)}*." .  """ ]
        # ttl += [ f""":{genus} :explanation "Ein *{reverse(genus)}*, das für das Attribut *{reverse(by)}* einen Wert aus  *{reverse(some)}* hat, ist ein *{reverse(species)}*." .  """ ]
        # self.code[lang] = "\n".join(ttl)


        # lang = "RDFS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f":{species} a owl:Class;\n   rdfs:subClassOf :{genus} ." ]
        # ttl += [ f""":{some} rdfs:subClassOf :{frm} .""" ] 
        # self.code[lang] = "\n".join(ttl)

        # restriction_name_technically = f"BY_SOME_{id}_restriction"
        # restriction_name_userfriendly = f"BY_{by}_FROM_{frm}_SOME_{some}"
        
        # intersection_name_technically = f"BY_SOME_{id}_intersection"
        # intersection_name_userfriendly = f"({restriction_name_userfriendly})_INTERSECT_{genus}"

        # lang = "OWL"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f""":{by} rdf:type owl:ObjectProperty .""" ,
        #          f""":{restriction_name_technically} a owl:class ;""" ,
        #          f"""   a owl:Restriction ;""" ,
        #          f"""   rdfs:label "{restriction_name_userfriendly}" ;""" ,
        #          f"""   owl:onProperty :{by} ;""" ,
        #          f"""   owl:someValuesFrom :{some} .""" ]                                     # some

        # ttl += [ f""":{intersection_name_technically} a owl:class ;""" ,
        #          f"""   rdfs:label "{intersection_name_userfriendly}" ;""" ,
        #          f"""   rdfs:subClassOf :{species} ;""" ,                                     
        #          f"""   owl:intersectionOf (:{restriction_name_technically} :{genus} ) .""" ] # change e.g. to species to test Test_classify_similar

        # self.code[lang] = "\n".join(ttl)

        # # allocate example instance plus test condition

        # lang = "OWLtest"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"""ex:{species}_{id} a :{species} .""" ,
        #          f"""ex:{genus}_{id}   a :{genus} ;""" ,
        #          f"""   :{by} ex:{some}_{id} ;""" ,
        #          f"""   gendifs:classify_similar ex:{species}_{id} .""" ,
        #          f"""ex:{some}_{id} a :{some} .""" ]
        # self.code[lang] = "\n".join(ttl)

        # self.inspect_inferencing = Test_classify_similar(id, self.code["RDFS"] + self.code["OWL"] + self.code["OWLtest"] )



        
    def code_UNCONDITIONAL_SOME(self, *, id, codeclass, some, species):

        # ----------
        restriction_name_technically = f"SOME_{id}_restriction"
        restriction_name_userfriendly = f"SOME_{ some.e(None) }"

        lang = "DE"
        ttl =  [ f"""# {id} {lang}""" ]
        ttl += [ f"""{ species.c() } :explanation "Ein beliebiges Ding, bei dem ein beliebiges Attribut einen Wert aus *{ some.c(reverse = True) }* hat, ist ein *{ species.c(reverse = True) }*." .  """ ]
        self.code[lang] = "\n".join(ttl)

        lang = "RDFS"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""{ species.c() } a owl:Class .""" ] 
        self.code[lang] = "\n".join(ttl)

        lang = "OWL"
        ttl =  [ f"""# {id} {lang}""" ,
                 f""":{restriction_name_technically} a owl:class ;""" ,
                 f"""   rdfs:subClassOf { species.c() } ;""" ,
                 f"""   a owl:Restriction ;""" ,
                 f"""   rdfs:label "{restriction_name_userfriendly}" ;""" ,
                 f"""   owl:onProperty owl:topObjectProperty ;""" ,
                 f"""   owl:someValuesFrom { some.c() } .""" ]                                     # some
 
        self.code[lang] = "\n".join(ttl)

        # allocate example instance plus test condition

        lang = "OWLtest"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"""{ species.e(id) } a { species.c() } .""" ,
                 f"""ex:thing_{id}  a owl:Thing ;""" ,
                 f"""   owl:topObjectProperty { some.e(id) } ;""" ,
                 f"""   gendifs:classify_similar { species.e(id) } .""" ,
                 f"""{ some.e(id) } a { some.c() } .""" ]
        self.code[lang] = "\n".join(ttl)

        self.inspect_inferencing = Test_classify_similar(id, self.code["RDFS"] + self.code["OWL"] + self.code["OWLtest"] )



        # restriction_name_technically = f"SOME_{id}_restriction"
        # restriction_name_userfriendly = f"SOME_{some}"

        # lang = "DE"
        # ttl =  [ f"""# {id} {lang}""" ]
        # ttl += [ f""":{species} :explanation "Ein beliebiges Ding, bei dem ein beliebiges Attribut einen Wert aus *{reverse(some)}* hat, ist ein *{reverse(species)}*." .  """ ]
        # self.code[lang] = "\n".join(ttl)

        # lang = "RDFS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f""":{species} a owl:Class .""" ] 
        # self.code[lang] = "\n".join(ttl)

        # lang = "OWL"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f""":{restriction_name_technically} a owl:class ;""" ,
        #          f"""   rdfs:subClassOf :{species} ;""" ,
        #          f"""   a owl:Restriction ;""" ,
        #          f"""   rdfs:label "{restriction_name_userfriendly}" ;""" ,
        #          f"""   owl:onProperty owl:anyProperty ;""" ,
        #          f"""   owl:someValuesFrom :{some} .""" ]                                     # some
        
        # self.code[lang] = "\n".join(ttl)

        # # allocate example instance plus test condition

        # lang = "OWLtest"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f"""ex:{species}_{id} a :{species} .""" ,
        #          f"""ex:thing_{id}   a owl:thing ;""" ,
        #          f"""   owl:anyProperty ex:{some}_{id} ;""" ,
        #          f"""   gendifs:classify_similar ex:{species}_{id} .""" ,
        #          f"""ex:{some}_{id} a :{some} .""" ]
        # self.code[lang] = "\n".join(ttl)

        # self.inspect_inferencing = Test_classify_similar(id, self.code["RDFS"] + self.code["OWL"] + self.code["OWLtest"] )

        

    def code_SUP(self, *, id, codeclass, species, genus_list):
        """intersection of all genus in genus_list is subclass of secies"""

        """
        Stallion
           SUP
              Horse
              Male
        """
        # bottom up inferencing:
        # if x is a stallion, then x is a horse; and x is a male

        lang = "RDFS"
        ttl =  [ f"""# {id} {lang}""" ,
                 f"{ species.c() } a owl:Class ." ]
        ttl += [ f"{ genus.c() } a owl:Class.\n   { species.c() } rdfs:subClassOf { genus.c() } ." for genus in genus_list ]
        self.code[lang] = "\n".join(ttl)

        # top down inferencing:
        # if x is a horse and x is a male, then x is a stallion"""

        intersection_name_technically = f"SUP_{id}"
        sup_genus_join = " ".join([ f"{ g.c() }" for g in genus_list ])
        
        lang = "OWL"
        ttl =  [ f"""# {id} {lang}""" ,
                 f""":{intersection_name_technically} a owl:class ;""" ,
                 f"""   rdfs:subClassOf { species.c() } ;""" ,   # comment out to test self.inspect_inferencing
                 f"""   owl:intersectionOf ({ sup_genus_join } ) .""" ]
        self.code[lang] = "\n".join(ttl)

        # we have one single example of species:
        # this species example is element of all genus classes involved, but NOT element of the intersection
        # (which in turn is a sublass of the species in question).

        lang = "OWLtest"
        ttl =  [ f"""# {id} {lang}""" ]
        ttl += [ f"""{ species.e(id) } a { genus.c() } .""" for genus in genus_list ]  # species genus is correct here
        ttl += [ f"""{ species.e(id) }_2 a { species.c() } .""" ]
        ttl += [ f"""{ species.e(id) } gendifs:classify_similar { species.e(id) }_2 .""" ]        
        self.code[lang] = "\n".join(ttl)

        self.inspect_inferencing = Test_classify_similar(id, self.code["OWL"] + self.code["OWLtest"] )





        # # bottom up inferencing:
        # # if x is a stallion, then x is a horse; and x is a male

        # lang = "RDFS"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f":{species} a owl:Class ." ]
        # ttl += [ f":{genus} a owl:Class.\n   :{species} rdfs:subClassOf :{genus} ." for genus in genus_list ]
        # self.code[lang] = "\n".join(ttl)

        # # top down inferencing:
        # # if x is a horse and x is a male, then x is a stallion"""

        # intersection_name_technically = f"SUP_{id}"
        # intersection_name_userfriendly = " AND ".join(genus_list)
        # sup_genus_join = " ".join([ f":{g}" for g in genus_list ])

        # lang = "OWL"
        # ttl =  [ f"""# {id} {lang}""" ,
        #          f""":{intersection_name_technically} a owl:class ;""" ,
        #          f"""   rdfs:label "{intersection_name_userfriendly}" ;""" ,
        #          f"""   rdfs:subClassOf :{species} ;""" ,   # comment out to test self.inspect_inferencing
        #          f"""   owl:intersectionOf ({sup_genus_join} ) .""" ]
        # self.code[lang] = "\n".join(ttl)

        # # we have one example of species:
        # # the species example is element of all genus classes involved, but NOT element of the intersection
        # # (which in turn is a sublass of the species in question).

        # lang = "OWLtest"
        # ttl =  [ f"""# {id} {lang}""" ]
        # ttl += [ f"""ex:{species}_{id} a :{genus} .""" for genus in genus_list ]  # species genus is correct here
        # ttl += [ f"""ex:{species}_{id} gendifs:classify_similar ex:{species}_{id}_2 .""" ]        
        # ttl += [ f"""ex:{species}_{id}_2 a :{species} .""" ]
        # self.code[lang] = "\n".join(ttl)

        # self.inspect_inferencing = Test_classify_similar(id, self.code["OWL"] + self.code["OWLtest"] )


    



    
    def code_ISA_SOME(self, *, id, codeclass, genus, by, some, species_list): ...

    def code_REL(self, *, id, codeclass, by, some, species_list): ...

        

    def describe(self):
        print(f"languages: {self.code.keys()}")
class Test_classify_similar:
        def __init__(self, id, ttl, *, source = None ):
        
            self.ns = """
            PREFIX : <urn:default#>
            PREFIX ex: <urn:ex#>
            PREFIX cpt: <urn:cpt#>
            PREFIX gendifs: <urn:gendifs#>

            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            """

            self.test_errors = None
            self.source = source
            self.ttl = ttl

            self.test_results = {}
            
            self.g1 = rdflib.Graph()
            #print(f"""684: =====\n{self.ns + self.ttl}""")
            self.g1.parse(data = self.ns + self.ttl)

            categories = {}
            
            # classes of example1 before inferencing

            q1 = """
            SELECT ?example1 ?example2 ?category1
            WHERE {
            ?example1 gendifs:classify_similar ?example2 .
            ?example1 a ?category1 .
            }"""

            c = "cat1_before_inferencing"

            for example1, example2, category1 in self.g1.query(q1):  # g1, q1, cat1
                ex_1 = example1.encode()
                ex_2 = example2.encode()
                cat_1 = category1.encode()

                if (ex_1, ex_2, c) not in categories:
                    categories[(ex_1, ex_2, c)] = set()
                categories[(ex_1, ex_2, c)].add(cat_1)

            # classes of example2 before inferencing
            
            q2 = """
            SELECT ?example1 ?example2 ?category2
            WHERE {
            ?example1 gendifs:classify_similar ?example2 .
            ?example2 a ?category2 .
            }"""

            c = "cat2_before_inferencing"
            for example1, example2, category2 in self.g1.query(q2):  # g1, q2, cat2
                ex_1 = example1.encode()
                ex_2 = example2.encode()
                cat_2 = category2.encode()

                if (ex_1, ex_2, c) not in categories:
                    categories[(ex_1, ex_2, c)] = set()
                categories[(ex_1, ex_2, c)].add(cat_2)

                
            self.g2 = rdflib.Graph()
            self.g2.parse(data = self.ns + self.ttl)
            owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples = False).expand(self.g2)
            
            # Die Instanz `ex_1` ist nach dem Inferencing eine Instanz potentiell vieler Klassen `cat_1`.
            # (Das gilt übrigens auch für `ex_2`.)
            # Eine dieser vielen Klassen muss auch die (idealerweise einzige) Klasse `cat_2` sein,
            # in der `ex_2` schon *vor* dem Inferencing enthalten ist.

            c = "cat1_after_inferencing"
            for example1, example2, category1 in self.g2.query(q1):  # g2, q1, cat1
                ex_1 = example1.encode()
                ex_2 = example2.encode()
                cat_1 = category1.encode()

                if (ex_1, ex_2, c) not in categories:
                    categories[(ex_1, ex_2, c)] = set()
                categories[(ex_1, ex_2, c)].add(cat_1)

            d2 = {}
            for (x,y,z), v in categories.items():
                if (x,y) not in d2:
                    d2[(x,y)] = {}
                if z not in d2[(x,y)]:
                    d2[(x,y)][z] = {}
                d2[(x,y)][z] = v

            for tuple, sets in d2.items():
                #debug
                #print(f"\n{tuple}:")
                #for k,v in sets.items():
                #    print(f"   {k}: {v}")

                self.test_results[tuple] = {}
                # are cat1 and cat2 not empty, are they instances of some classes?
                self.test_results[tuple]["cat1 not empty"] = sets["cat1_before_inferencing"] != set()
                self.test_results[tuple]["cat2 not empty"] = sets["cat2_before_inferencing"] != set()

                # are cat1_before_inferencing and cat2_before_inferencing disjoint?
                self.test_results[tuple]["disjoint before"] = sets["cat1_before_inferencing"].intersection(sets["cat2_before_inferencing"]) == set() 
                
                # has cat2_before_inferencing become a subset of von cat1_after_inferencing?
                self.test_results[tuple]["subset after"] = sets["cat2_before_inferencing"].intersection(sets["cat1_after_inferencing"]) == sets["cat2_before_inferencing"]

                self.test_results[tuple]["ok"] = all( [ boolean for boolean in self.test_results[tuple].values() ] )

                if self.test_results[tuple]["ok"]:
                    if verbose >= 3:
                        print(f"""766: Test_classify_similar OK, {id=}, {tuple=}.""")
                else:
                    if verbose >= 1:
                        print(f"""WARNING 767: Test_classify_similar FAILED, {id=},\n   {self.test_results=}""")
                        print("-- self.g1 --")
                        print(self.g1.serialize())
                        print("-- self.g2 --")
                        print(self.g2.serialize())
                        print("-------")

            self.test_ok = all( [ v["ok"] for v in self.test_results.values() ] )

 
                    

def reverse(x, *, rev = True) -> str:
    return x
    # if rev and x[0].isupper():
    #     ret = x[::-1].title()
    # elif rev:
    #     ret =  x[::-1].lower()
    # else:
    #     ret = x
    # return ret


def focus(focus_curie_list, ttl): # ll ... 'RDF_RDFtest_OWL_OWLtest'
    result = "\n\n".join( [ paragraph for paragraph in ttl.split("\n\n") \
                         if any( [ focus_curie in paragraph for focus_curie in focus_curie_list ] ) ] )
    return result


def string2dict(string):
    currect_token = "default" 
    dct = { currect_token: [] }
    for token in string.split():
        if token.isupper():
            currect_token = token
            if currect_token in dct:
                if not "warning" in dct:
                    dct["warning"] = []
                dct["warning"].append(f"warning from nodetext_to_dict: multiple facets {currect_token}")
            else:
                dct[currect_token] = []
        else:
            dct[currect_token].append(token)
    return dct