From 8fbd1f16845ae58dea1415e280345b89576a92da Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Thu, 21 Jun 2018 13:35:25 +0200
Subject: [PATCH 01/27] Test for evaluate() and mutate_fix_var()

---
 tests/__init__.py     |   0
 tests/test_fv_eval.py | 116 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_fv_eval.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_fv_eval.py b/tests/test_fv_eval.py
new file mode 100644
index 0000000..a924070
--- /dev/null
+++ b/tests/test_fv_eval.py
@@ -0,0 +1,116 @@
+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+"""test_mutate_fix_var und test_evaluate einmal davor und 
+einmal über die results aus mutate_fix_var
+"""
+
+import logging
+from collections import OrderedDict
+
+import SPARQLWrapper
+import rdflib
+from rdflib import URIRef
+from rdflib import Variable
+
+from config import SPARQL_ENDPOINT
+from gp_learner import evaluate
+from gp_learner import mutate_fix_var
+from gp_learner import update_individuals
+from gp_query import calibrate_query_timeout
+from gp_query import query_time_hard_exceeded
+from gp_query import query_time_soft_exceeded
+from graph_pattern import GraphPattern
+from graph_pattern import SOURCE_VAR
+from graph_pattern import TARGET_VAR
+from ground_truth_tools import get_semantic_associations
+from ground_truth_tools import split_training_test_set
+from gtp_scores import GTPScores
+from os import getenv
+
+logger = logging.getLogger(__name__)
+
+dbp = rdflib.Namespace('http://dbpedia.org/resource/')
+
+
+v = Variable('v')
+
+gp = GraphPattern([
+        (SOURCE_VAR, v, TARGET_VAR),
+    ])
+
+ground_truth_pairs_ = [
+    (dbp['Berlin'],dbp['Germany']),
+    (dbp['Hamburg'],dbp['Germany']),
+    (dbp['Kaiserslautern'],dbp['Germany']),
+    (dbp['Wien'],dbp['Austria']),
+    (dbp['Insbruck'],dbp['Austria']),
+    (dbp['Salzburg'],dbp['Austria']),
+    (dbp['Paris'],dbp['France']),
+    (dbp['Lyon'],dbp['France']),
+    (dbp['Amsterdam'],dbp['Netherlands']),
+    (dbp['Brussels'],dbp['Belgium']),
+    (dbp['Washington'],dbp['United_States']),
+    (dbp['Madrid'],dbp['Spain']),
+    (dbp['Prague'],dbp['Czech_Republic']),
+    (dbp['Bern'],dbp['Switzerland']),
+]
+
+gtp_scores_ = GTPScores(ground_truth_pairs_)
+
+sparql = SPARQLWrapper.SPARQLWrapper(getenv('SPARQL_ENDPOINT','http://dbpedia.org/sparql'))
+try:
+    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
+except IOError:
+    from nose import SkipTest
+    raise SkipTest(
+        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
+        "Skipping tests in\n    %s" % (SPARQL_ENDPOINT, __file__))
+
+def test_eval():
+    res, matching_node_pairs, gtp_precisions = evaluate(sparql, timeout, gtp_scores_, gp, run=0, gen=0)
+    logger.log(
+        logging.INFO,
+        'Results are:\n'
+        'remaining_gain: %d\n'
+        'score: %d\n'
+        'gain: %d\n'
+        'fm: %d\n'
+        'avg_res_length: %d\n'
+        'sum_gt_matches: %d\n'
+        'pattern_length: %d\n'
+        'pattern_vars:: %d\n'
+        'qtime_exceeded: %d\n'
+        'query_time: %d\n'
+        % res
+    )
+
+def test_mut_fv():
+    res = mutate_fix_var(sparql,timeout,gtp_scores_,gp,rand_var=v)
+    for gp_ in res:
+        logger.info(gp_)
+
+def test_eval_list():
+    list = mutate_fix_var(sparql,timeout,gtp_scores_,gp,rand_var=v)
+    for gp_ in list:
+        res, matching_node_pairs, gtp_precisions = evaluate(sparql, timeout, gtp_scores_, gp_, run=0, gen=0)
+        logger.log(
+            logging.INFO,
+            'For %s\n'
+            '%s', gp_,
+            'the results are:\n'
+            'remaining_gain: %d\n'
+            'score: %d\n'
+            'gain: %d\n'
+            'fm: %d\n'
+            'avg_res_length: %d\n'
+            'sum_gt_matches: %d\n'
+            'pattern_length: %d\n'
+            'pattern_vars:: %d\n'
+            'qtime_exceeded: %d\n'
+            'query_time: %d\n'
+            %res
+        )
+

From db78db407ca26c718653e044eee0f9db7ae2eeb7 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Mon, 25 Jun 2018 14:23:17 +0200
Subject: [PATCH 02/27] modified test_fv_eval.py

---
 .gitignore            |   3 +
 graph_pattern.py      |   1 +
 requirements.txt      |   4 +-
 tests/__init__.py     |   0
 tests/test_fv_eval.py | 171 ++++++++++++++++++++++++++----------------
 5 files changed, 113 insertions(+), 66 deletions(-)
 delete mode 100644 tests/__init__.py

diff --git a/.gitignore b/.gitignore
index 172c006..3b99ee0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ venv/
 # ignore py compiled etc. files
 *.pyc
 *.pyo
+
+# ignore .idea
+.idea/
diff --git a/graph_pattern.py b/graph_pattern.py
index a483c88..0a23c68 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -846,6 +846,7 @@ def to_count_var_over_values_query(self, var, vars_, values, limit):
             'triples': self._sparql_triples_part('               '),
             'limit': limit,
         }
+        print(res)
         return self._sparql_prefix(textwrap.dedent(res))
 
     def to_dict(self):
diff --git a/requirements.txt b/requirements.txt
index d61d2fd..4a02904 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,8 +11,8 @@ nose>=1.3.7
 numpy>=1.12.1
 objgraph>=3.1.0
 requests>=2.16.5
-#rdflib>=4.2.1
-git+git://github.com/RDFLib/rdflib@master#egg=rdflib
+rdflib>=4.2.1
+#git+git://github.com/RDFLib/rdflib@master#egg=rdflib
 scikit-learn>=0.18.1
 scipy>=0.19.0
 scoop>=0.7.1.1
diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/test_fv_eval.py b/tests/test_fv_eval.py
index a924070..ed501c6 100644
--- a/tests/test_fv_eval.py
+++ b/tests/test_fv_eval.py
@@ -9,6 +9,7 @@
 
 import logging
 from collections import OrderedDict
+from os import getenv
 
 import SPARQLWrapper
 import rdflib
@@ -28,39 +29,84 @@
 from ground_truth_tools import get_semantic_associations
 from ground_truth_tools import split_training_test_set
 from gtp_scores import GTPScores
-from os import getenv
+from serialization import print_graph_pattern
 
 logger = logging.getLogger(__name__)
 
 dbp = rdflib.Namespace('http://dbpedia.org/resource/')
+owl = rdflib.Namespace('http://www.w3.org/2002/07/owl#')
+
+a = Variable('a')
+b = Variable('b')
+c = Variable('c')
+d = Variable('d')
+e = Variable('e')
+f = Variable('f')
+v = Variable('v')
+w = Variable('w')
+
+sameAs = owl['sameAs']
+
+gp_1 = GraphPattern([
+    (SOURCE_VAR, v, TARGET_VAR)
+])
+
+gp_2 = GraphPattern([
+    (SOURCE_VAR, v, TARGET_VAR),
+    (TARGET_VAR, w, SOURCE_VAR)
+])
+
+gp_3 = GraphPattern([
+    (SOURCE_VAR, a, b),
+    (b, c, d),
+    (d, e, TARGET_VAR)
+])
+
+gp_4 = GraphPattern([
+    (SOURCE_VAR, a, b),
+    (b, c, d),
+    (TARGET_VAR, e, d)
+])
+
+ground_truth_pairs_1 = [
+    (dbp['Berlin'], dbp['Germany']),
+    (dbp['Hamburg'], dbp['Germany']),
+    (dbp['Kaiserslautern'], dbp['Germany']),
+    (dbp['Wien'], dbp['Austria']),
+    (dbp['Insbruck'], dbp['Austria']),
+    (dbp['Salzburg'], dbp['Austria']),
+    (dbp['Paris'], dbp['France']),
+    (dbp['Lyon'], dbp['France']),
+    (dbp['Amsterdam'], dbp['Netherlands']),
+    (dbp['Brussels'], dbp['Belgium']),
+    (dbp['Washington'], dbp['United_States']),
+    (dbp['Madrid'], dbp['Spain']),
+    (dbp['Prague'], dbp['Czech_Republic']),
+    (dbp['Bern'], dbp['Switzerland']),
+]
 
+ground_truth_pairs_2 = get_semantic_associations()
+ground_truth_pairs_2, _ = split_training_test_set(ground_truth_pairs_2)
+ground_truth_pairs_2 = ground_truth_pairs_2[1:10]
 
-v = Variable('v')
+ground_truth_pairs_3 = [
+    (dbp['Barrister'], dbp['Law']),
+    (dbp['Christ'], dbp['Jesus']),
+    (dbp['Pottage'], dbp['Soup'])
+    ]
 
-gp = GraphPattern([
-        (SOURCE_VAR, v, TARGET_VAR),
-    ])
-
-ground_truth_pairs_ = [
-    (dbp['Berlin'],dbp['Germany']),
-    (dbp['Hamburg'],dbp['Germany']),
-    (dbp['Kaiserslautern'],dbp['Germany']),
-    (dbp['Wien'],dbp['Austria']),
-    (dbp['Insbruck'],dbp['Austria']),
-    (dbp['Salzburg'],dbp['Austria']),
-    (dbp['Paris'],dbp['France']),
-    (dbp['Lyon'],dbp['France']),
-    (dbp['Amsterdam'],dbp['Netherlands']),
-    (dbp['Brussels'],dbp['Belgium']),
-    (dbp['Washington'],dbp['United_States']),
-    (dbp['Madrid'],dbp['Spain']),
-    (dbp['Prague'],dbp['Czech_Republic']),
-    (dbp['Bern'],dbp['Switzerland']),
+ground_truth_pairs_4 = [
+    (dbp['Motorrad_(disambiguation)'], dbp['Bmw_motorcycle']),
+    (dbp['Horse'], dbp['Saddle'])
 ]
 
-gtp_scores_ = GTPScores(ground_truth_pairs_)
+gtp_scores_1 = GTPScores(ground_truth_pairs_1)
+gtp_scores_2 = GTPScores(ground_truth_pairs_2)
+gtp_scores_3 = GTPScores(ground_truth_pairs_3)
+gtp_scores_4 = GTPScores(ground_truth_pairs_4)
 
-sparql = SPARQLWrapper.SPARQLWrapper(getenv('SPARQL_ENDPOINT','http://dbpedia.org/sparql'))
+sparql = SPARQLWrapper.SPARQLWrapper(
+    getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
 try:
     timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
 except IOError:
@@ -69,48 +115,45 @@
         "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
         "Skipping tests in\n    %s" % (SPARQL_ENDPOINT, __file__))
 
-def test_eval():
-    res, matching_node_pairs, gtp_precisions = evaluate(sparql, timeout, gtp_scores_, gp, run=0, gen=0)
-    logger.log(
-        logging.INFO,
-        'Results are:\n'
-        'remaining_gain: %d\n'
-        'score: %d\n'
-        'gain: %d\n'
-        'fm: %d\n'
-        'avg_res_length: %d\n'
-        'sum_gt_matches: %d\n'
-        'pattern_length: %d\n'
-        'pattern_vars:: %d\n'
-        'qtime_exceeded: %d\n'
-        'query_time: %d\n'
-        % res
-    )
-
-def test_mut_fv():
-    res = mutate_fix_var(sparql,timeout,gtp_scores_,gp,rand_var=v)
+
+def test_eval(gtp_scores, gp):
+    res, matching_node_pairs, gtp_precisions = evaluate(
+        sparql, timeout, gtp_scores, gp, run=0, gen=0)
+    update_individuals([gp], [(res, matching_node_pairs, gtp_precisions)])
+    logger.info(gp.fitness)
+
+
+def test_mut_fv(gtp_scores, gp, r=None):
+    res = mutate_fix_var(sparql, timeout, gtp_scores, gp, rand_var=r)
     for gp_ in res:
         logger.info(gp_)
 
-def test_eval_list():
-    list = mutate_fix_var(sparql,timeout,gtp_scores_,gp,rand_var=v)
-    for gp_ in list:
-        res, matching_node_pairs, gtp_precisions = evaluate(sparql, timeout, gtp_scores_, gp_, run=0, gen=0)
-        logger.log(
-            logging.INFO,
-            'For %s\n'
-            '%s', gp_,
-            'the results are:\n'
-            'remaining_gain: %d\n'
-            'score: %d\n'
-            'gain: %d\n'
-            'fm: %d\n'
-            'avg_res_length: %d\n'
-            'sum_gt_matches: %d\n'
-            'pattern_length: %d\n'
-            'pattern_vars:: %d\n'
-            'qtime_exceeded: %d\n'
-            'query_time: %d\n'
-            %res
-        )
 
+def test_eval_list(gtp_scores, gp, r=None):
+    mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp, rand_var=r)
+    for gp_ in mfv_res:
+        res, matching_node_pairs, gtp_precisions = evaluate(
+            sparql, timeout, gtp_scores, gp_, run=0, gen=0)
+        update_individuals([gp_], [(res, matching_node_pairs, gtp_precisions)])
+        print_graph_pattern(gp_, print_matching_node_pairs=0)
+    return mfv_res
+
+
+def test_eval_list_double(gtp_scores, gp, r_1=None, r_2=None):
+    # testing double execution of mutate_fix_var() on gp
+    res = test_eval_list(gtp_scores, gp, r_1)
+    gtp_scores.update_with_gps(res)
+    res_list = list(res)
+    for gp in res:
+        res_ = test_eval_list(gtp_scores, gp, r_2)
+        for gp_ in res_:
+            res_list.append(gp_)
+    gtp_scores.update_with_gps(res_list)
+    for gp in res_list:
+        print_graph_pattern(gp, print_matching_node_pairs=0)
+
+
+if __name__ == '__main__':
+    #test_eval_list_double(gtp_scores_1, gp_2)
+
+    test_eval_list_double(gtp_scores_4, gp_4, a, e)

From 07d1f39277f3e0217956aa9e1a594109cb99a41b Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Tue, 10 Jul 2018 14:54:07 +0200
Subject: [PATCH 03/27] Test to find one hop patterns with SAMPLE-Queries

---
 graph_pattern.py       |  47 +++++++-
 tests/test_fv_eval.py  |  25 +++-
 tests/test_sampling.py | 263 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 329 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_sampling.py

diff --git a/graph_pattern.py b/graph_pattern.py
index 0a23c68..635fec1 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -636,6 +636,52 @@ def to_sparql_select_query(
         res = textwrap.dedent(res)
         return self._sparql_prefix(res)
 
+    def to_sparql_select_sample_query(
+            self,
+            values,
+            projection=None,
+            limit=None,
+            sample_var=None
+    ):
+        """Generates a SPARQL select sample query from the graph pattern.
+
+        Examples:
+        TODO
+
+        Args:
+            values: a dict mapping a variable tuple to a list of binding tuples,
+                e.g. {(v1, v2): [(uri1, uri2), (uri3, uri4), ...]}
+            projection: which variables to select on, by default all vars.
+            limit: integer to limit the result size
+            sample_var: the variable to sample over
+        """
+        assert self.vars_in_graph, \
+            "tried to get sparql for pattern without vars: %s" % (self,)
+
+        if projection is None:
+            projection = sorted([v for v in self.vars_in_graph])
+
+        if sample_var is None:
+            sample_var = random.choice(projection)
+        logger.info(sample_var)
+
+        projection.remove(sample_var)
+
+        res = "SELECT %(samp)s %(proj)s WHERE {\n%(qpp)s}\n%(lim)s" % {
+            'samp': (' SAMPLE(%s) as %s' % (
+                ''.join(sample_var.n3()),
+                ''.join(sample_var.n3())
+            )),
+            'proj': ' '.join([v.n3() for v in projection]),
+            'qpp': self._sparql_query_pattern_part(
+                values=values,
+                indent=' ',
+            ),
+            'lim': ('LIMIT %d\n' % limit) if limit is not None else '',
+        }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
+
     def to_sparql_ask_query(
             self,
             bind=None,
@@ -846,7 +892,6 @@ def to_count_var_over_values_query(self, var, vars_, values, limit):
             'triples': self._sparql_triples_part('               '),
             'limit': limit,
         }
-        print(res)
         return self._sparql_prefix(textwrap.dedent(res))
 
     def to_dict(self):
diff --git a/tests/test_fv_eval.py b/tests/test_fv_eval.py
index ed501c6..3c847b6 100644
--- a/tests/test_fv_eval.py
+++ b/tests/test_fv_eval.py
@@ -8,11 +8,17 @@
 """
 
 import logging
+from collections import defaultdict
 from collections import OrderedDict
 from os import getenv
 
 import SPARQLWrapper
+from splendid import get_path
+from splendid import time_func
+import socket
 import rdflib
+from rdflib import BNode
+from rdflib import Literal
 from rdflib import URIRef
 from rdflib import Variable
 
@@ -30,6 +36,7 @@
 from ground_truth_tools import split_training_test_set
 from gtp_scores import GTPScores
 from serialization import print_graph_pattern
+from utils import sparql_json_result_bindings_to_rdflib
 
 logger = logging.getLogger(__name__)
 
@@ -68,6 +75,14 @@
     (TARGET_VAR, e, d)
 ])
 
+gp_5 = GraphPattern([
+    (SOURCE_VAR, a, c),
+    (TARGET_VAR, URIRef('http://dbpedia.org/ontology/thumbnail'), d),
+    (TARGET_VAR, URIRef('http://dbpedia.org/property/image'), b),
+    (c, URIRef('http://dbpedia.org/ontology/wikiPageWikiLink'), SOURCE_VAR),
+    (c, URIRef('http://purl.org/linguistics/gold/hypernym'), TARGET_VAR)
+])
+
 ground_truth_pairs_1 = [
     (dbp['Berlin'], dbp['Germany']),
     (dbp['Hamburg'], dbp['Germany']),
@@ -87,7 +102,7 @@
 
 ground_truth_pairs_2 = get_semantic_associations()
 ground_truth_pairs_2, _ = split_training_test_set(ground_truth_pairs_2)
-ground_truth_pairs_2 = ground_truth_pairs_2[1:10]
+ground_truth_pairs_2 = ground_truth_pairs_2[1:100]
 
 ground_truth_pairs_3 = [
     (dbp['Barrister'], dbp['Law']),
@@ -107,6 +122,7 @@
 
 sparql = SPARQLWrapper.SPARQLWrapper(
     getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
+#sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINT)
 try:
     timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
 except IOError:
@@ -152,8 +168,7 @@ def test_eval_list_double(gtp_scores, gp, r_1=None, r_2=None):
     for gp in res_list:
         print_graph_pattern(gp, print_matching_node_pairs=0)
 
-
 if __name__ == '__main__':
-    #test_eval_list_double(gtp_scores_1, gp_2)
-
-    test_eval_list_double(gtp_scores_4, gp_4, a, e)
+    test_steps(ground_truth_pairs_2)
+    #values = {(SOURCE_VAR, TARGET_VAR): ground_truth_pairs_1}
+    #print(gp_1.to_sparql_select_sample_query(values))
diff --git a/tests/test_sampling.py b/tests/test_sampling.py
new file mode 100644
index 0000000..c0afe08
--- /dev/null
+++ b/tests/test_sampling.py
@@ -0,0 +1,263 @@
+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+"""Tested das bauen von graph_pattern per gesampeltem finden von 1-hop wegen
+und fix-var-mutation
+"""
+
+import logging
+from collections import defaultdict
+from collections import OrderedDict
+from os import getenv
+
+import SPARQLWrapper
+from splendid import get_path
+from splendid import time_func
+import socket
+import rdflib
+from rdflib import BNode
+from rdflib import Literal
+from rdflib import URIRef
+from rdflib import Variable
+
+from config import SPARQL_ENDPOINT
+from gp_learner import evaluate
+from gp_learner import mutate_fix_var
+from gp_learner import update_individuals
+from gp_query import calibrate_query_timeout
+from gp_query import query_time_hard_exceeded
+from gp_query import query_time_soft_exceeded
+from graph_pattern import GraphPattern
+from graph_pattern import SOURCE_VAR
+from graph_pattern import TARGET_VAR
+from ground_truth_tools import get_semantic_associations
+from ground_truth_tools import split_training_test_set
+from gtp_scores import GTPScores
+from serialization import print_graph_pattern
+from utils import sparql_json_result_bindings_to_rdflib
+
+logger = logging.getLogger(__name__)
+
+sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINT)
+#sparql = SPARQLWrapper.SPARQLWrapper(
+#    getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
+try:
+    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
+except IOError:
+    from nose import SkipTest
+    raise SkipTest(
+        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
+        "Skipping tests in\n    %s" % (sparql.endpoint, __file__))
+
+dbp = rdflib.Namespace('http://dbpedia.org/resource/')
+owl = rdflib.Namespace('http://www.w3.org/2002/07/owl#')
+
+a = Variable('a')
+b = Variable('b')
+c = Variable('c')
+d = Variable('d')
+e = Variable('e')
+f = Variable('f')
+v = Variable('v')
+w = Variable('w')
+
+sameAs = owl['sameAs']
+
+gp_1 = GraphPattern([
+    (SOURCE_VAR, v, TARGET_VAR)
+])
+
+gp_2 = GraphPattern([
+    (SOURCE_VAR, v, TARGET_VAR),
+    (TARGET_VAR, w, SOURCE_VAR)
+])
+
+gp_3 = GraphPattern([
+    (SOURCE_VAR, a, b),
+    (b, c, d),
+    (d, e, TARGET_VAR)
+])
+
+gp_4 = GraphPattern([
+    (SOURCE_VAR, a, b),
+    (b, c, d),
+    (TARGET_VAR, e, d)
+])
+
+gp_5 = GraphPattern([
+    (SOURCE_VAR, a, c),
+    (TARGET_VAR, URIRef('http://dbpedia.org/ontology/thumbnail'), d),
+    (TARGET_VAR, URIRef('http://dbpedia.org/property/image'), b),
+    (c, URIRef('http://dbpedia.org/ontology/wikiPageWikiLink'), SOURCE_VAR),
+    (c, URIRef('http://purl.org/linguistics/gold/hypernym'), TARGET_VAR)
+])
+
+ground_truth_pairs_1 = [
+    (dbp['Berlin'], dbp['Germany']),
+    (dbp['Hamburg'], dbp['Germany']),
+    (dbp['Kaiserslautern'], dbp['Germany']),
+    (dbp['Wien'], dbp['Austria']),
+    (dbp['Insbruck'], dbp['Austria']),
+    (dbp['Salzburg'], dbp['Austria']),
+    (dbp['Paris'], dbp['France']),
+    (dbp['Lyon'], dbp['France']),
+    (dbp['Amsterdam'], dbp['Netherlands']),
+    (dbp['Brussels'], dbp['Belgium']),
+    (dbp['Washington'], dbp['United_States']),
+    (dbp['Madrid'], dbp['Spain']),
+    (dbp['Prague'], dbp['Czech_Republic']),
+    (dbp['Bern'], dbp['Switzerland']),
+]
+
+ground_truth_pairs_2 = get_semantic_associations()
+ground_truth_pairs_2, _ = split_training_test_set(ground_truth_pairs_2)
+ground_truth_pairs_2 = ground_truth_pairs_2[1:100]
+
+ground_truth_pairs_3 = [
+    (dbp['Barrister'], dbp['Law']),
+    (dbp['Christ'], dbp['Jesus']),
+    (dbp['Pottage'], dbp['Soup'])
+    ]
+
+ground_truth_pairs_4 = [
+    (dbp['Motorrad_(disambiguation)'], dbp['Bmw_motorcycle']),
+    (dbp['Horse'], dbp['Saddle'])
+]
+
+gtp_scores_1 = GTPScores(ground_truth_pairs_1)
+gtp_scores_2 = GTPScores(ground_truth_pairs_2)
+gtp_scores_3 = GTPScores(ground_truth_pairs_3)
+gtp_scores_4 = GTPScores(ground_truth_pairs_4)
+
+
+def test_steps(gtps):
+    values = {(SOURCE_VAR, TARGET_VAR): gtps}
+    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+    gp2 = GraphPattern([(b, c, TARGET_VAR)])
+    # SPARQL-Query die über eine Var aus gp1 random samplet.
+    # TODO: Query so verändern, dass nach count gefiltert wird (siehe log.txt)
+    q = gp1.to_sparql_select_sample_query(values=values, limit=100)
+    logger.info(q)
+    t, q_res = run_query(q)
+    logger.info(q_res)
+    # Kreiere b_list in der die Ergebnisse für b "gespeichert" sind
+    # TODO speichere alles um später den Weg nachzuvollziehen
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+    b_list = []
+    for row in bind:
+        x = get_path(row, [b])
+        y = (x, )
+        b_list.append(y)
+    logger.info('orig query took %.4f s, result:\n%s\n', t, b_list)
+    b_list[:] = [b_l for b_l in b_list if not list_remove_bool(b_l[0])]
+    # Values für die nächste query: b_list
+    values = {(b, ): b_list}
+    # Query die über eine var aus gp2 random samplet mit values aus b_list
+    q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
+    logger.info(q)
+    t, q_res = run_query(q)
+    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+    target_list = []
+    for row in bind:
+        target_list.append(get_path(row, [TARGET_VAR]))
+    logger.info('orig query took %.4f s, result:\n%s\n', t, q_res)
+    # Kreire gtps_2 in der alle gtps, deren targets in target_list enthalten
+    # sind, "gespeichert" werden
+    gtps_2 = []
+    for t in target_list:
+        for gtp in gtps:
+            if t == gtp[1]:
+                gtps_2.append(gtp)
+    logger.info(gtps_2)
+
+    gp3 = GraphPattern([
+        (SOURCE_VAR, a, b),
+        (b, c, TARGET_VAR)
+    ])
+    gtp_scores = GTPScores(gtps)
+    gtp_scores2 = GTPScores(gtps_2)
+
+    # Fixe das pattern über die gefundenen gtps
+    mfv2 = []
+    if len(gtps_2) > 1:
+        mfv2 = mutate_fix_var(sparql, timeout, gtp_scores2, gp3)
+
+    # lasse die gefundenen Pattern einmal durch die fix_var laufen
+    mfv = []
+    for gp_mfv2 in mfv2:
+        mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp_mfv2)
+        for gp_res in mfv_res:
+            mfv.append(gp_res)
+
+    # evaluiere die so gefundenen Pattern
+    res_eval = eval_gp_list(gtp_scores, mfv)
+    return res_eval
+
+
+# Runs a given (as String) query against the Sparql-endpoint
+def run_query(q):
+    try:
+        q_short = ' '.join((line.strip() for line in q.split('\n')))
+        sparql.setQuery(q_short)
+        cal = time_func(sparql.queryAndConvert)
+    except socket.timeout:
+        cal = (timeout, {})
+    except ValueError:
+        # e.g. if the endpoint gives us bad JSON for some unicode chars
+        logger.info(
+            'Could not parse result for query, assuming empty result...\n'
+            'Query:\n%s\nException:', q,
+            exc_info=1,  # appends exception to message
+        )
+        cal = (timeout, {})
+    return cal
+
+
+# Checks if an found RDF-Term can be used as value in a new query
+# (without conflicts)
+def list_remove_bool(var):
+    if isinstance(var, Literal):
+        i_n3 = var.n3()
+        if len(i_n3) > 60:
+            return True
+    elif isinstance(var, BNode):
+        return True
+    # echt hässlich, aber die einzige Möglichkeit, die ich gesehen habe um
+    # keine Probleme mit dem Category:Cigarettes-Beispiel zu bekommen
+    # (siehe docs)
+    # TODO: Möglicherweise dafür sorgen, dass die nicht rausgeschmissen,
+    # sondern nur nicht mit prefix gekürzt werden
+    elif isinstance(var, URIRef):
+        return ':' in var[7:]
+    return False
+
+
+# evaluates a given graph-pattern-list
+def eval_gp_list(gtp_scores, gp_list):
+    for gp_l in gp_list:
+        res_ev = evaluate(
+            sparql, timeout, gtp_scores, gp_l, run=0, gen=0)
+        update_individuals([gp_l], [res_ev])
+        #print_graph_pattern(gp_, print_matching_node_pairs=0)
+    return gp_list
+
+
+if __name__ == '__main__':
+    res = []
+    for i in range(20):
+        res_ts = test_steps(ground_truth_pairs_2)
+        for gp_ts in res_ts:
+            res.append(gp_ts)
+
+    res = sorted(res, key=lambda gp_: -gp_.fitness.values.score)
+    for i in range(10):
+        print_graph_pattern(res[i])

From 0837c104e3161b3f076d66e3c7ce239a0a48fd52 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Thu, 30 Aug 2018 14:21:27 +0200
Subject: [PATCH 04/27] test finished, alg.not yet in learner

---
 graph_pattern.py                 |  326 ++-
 tests/SPARQL-query.py            |   75 +
 tests/test_mutate_deep_narrow.py | 3442 ++++++++++++++++++++++++++++++
 tests/test_sampling.py           |  227 +-
 4 files changed, 3992 insertions(+), 78 deletions(-)
 create mode 100644 tests/SPARQL-query.py
 create mode 100644 tests/test_mutate_deep_narrow.py

diff --git a/graph_pattern.py b/graph_pattern.py
index 635fec1..e1468ad 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -34,7 +34,6 @@
 
 logger = logging.getLogger(__name__)
 
-
 RANDOM_VAR_LEN = 5  # so in total we have 62**5=916132832 different random vars
 RANDOM_VAR_PREFIX = 'vr'
 SOURCE_VAR = Variable('source')
@@ -241,10 +240,10 @@ def canonicalize(gp, shorten_varnames=True):
     cgp = GraphPattern(cbgp, mapping=mapping)
 
     if not (
-        len(gp) == len(cbgp) == len(cgp)
-        and len(gp.nodes) == len(cgp.nodes)
-        and len(gp.edges) == len(cgp.edges)
-        and sorted(gp.identifier_counts().values()) ==
+            len(gp) == len(cbgp) == len(cgp)
+            and len(gp.nodes) == len(cgp.nodes)
+            and len(gp.edges) == len(cgp.edges)
+            and sorted(gp.identifier_counts().values()) ==
             sorted(cgp.identifier_counts().values())
     ):
         # canonicalization should never change any of the features above, but it
@@ -432,8 +431,8 @@ def exclude(self, identifiers):
             [(s, p, o)
              for s, p, o in self
              if p not in identifiers and
-                s not in identifiers and
-                o not in identifiers
+             s not in identifiers and
+             o not in identifiers
              ]
         )
 
@@ -448,7 +447,7 @@ def identifier_counts(self, exclude_vars=False, vars_only=False):
         :param vars_only: Only return counts for vars.
         :return: Counter of all identifiers in this graph pattern.
         """
-        assert not(exclude_vars and vars_only)
+        assert not (exclude_vars and vars_only)
         ids = Counter([i for t in self for i in t])
         if exclude_vars:
             for i in self.vars_in_graph:
@@ -639,6 +638,7 @@ def to_sparql_select_query(
     def to_sparql_select_sample_query(
             self,
             values,
+            values_s_t=None,
             projection=None,
             limit=None,
             sample_var=None
@@ -651,6 +651,7 @@ def to_sparql_select_sample_query(
         Args:
             values: a dict mapping a variable tuple to a list of binding tuples,
                 e.g. {(v1, v2): [(uri1, uri2), (uri3, uri4), ...]}
+            values_s_t: TODO
             projection: which variables to select on, by default all vars.
             limit: integer to limit the result size
             sample_var: the variable to sample over
@@ -661,24 +662,286 @@ def to_sparql_select_sample_query(
         if projection is None:
             projection = sorted([v for v in self.vars_in_graph])
 
-        if sample_var is None:
-            sample_var = random.choice(projection)
-        logger.info(sample_var)
+        # if sample_var is None:
+        #     sample_var = random.choice(projection)
+        # logger.info(sample_var)
+
+        if sample_var:
+            projection.remove(sample_var)
+
+        res = "SELECT %(samp)s %(proj)s WHERE {\n" \
+              "%(valst)s\n" \
+              "%(qpp)s}\n" \
+              "%(lim)s" % {
+                  'samp': (' SAMPLE(%s) as %s' % (
+                      ''.join(sample_var.n3()),
+                      ''.join(sample_var.n3())
+                  )) if sample_var else '',
+                  'proj': ' '.join([v.n3() for v in projection]),
+                  'valst': self._sparql_values_part(values=values_s_t, indent=' ')
+                  if values_s_t is not None else '',
+                  'qpp': self._sparql_query_pattern_part(
+                      values=values,
+                      indent=' ',
+                  ),
+                  'lim': ('LIMIT %d\n' % limit) if limit is not None else '',
+              }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
 
-        projection.remove(sample_var)
+    def to_sparql_filter_by_count_in_out_query(
+            self,
+            values,
+            count_node,
+            in_out=None,
+            max_in=None,
+            max_out=None,
+            projection=None,
+            gp=None,
+            limit=None,
+            sample_var=None
+    ):
+        # TODO: Möglicherweise noch die Pfade aus dem gp_in rausfiltern, man
+        # will ja eher selten einen zusatzhop über einen schon vorhandenen
+        # Pfad finden
 
-        res = "SELECT %(samp)s %(proj)s WHERE {\n%(qpp)s}\n%(lim)s" % {
-            'samp': (' SAMPLE(%s) as %s' % (
-                ''.join(sample_var.n3()),
-                ''.join(sample_var.n3())
-            )),
-            'proj': ' '.join([v.n3() for v in projection]),
-            'qpp': self._sparql_query_pattern_part(
-                values=values,
-                indent=' ',
-            ),
-            'lim': ('LIMIT %d\n' % limit) if limit is not None else '',
-        }
+        """Generates a SPARQL select query from the graph pattern.
+
+        Examples:
+        TODO
+
+        Args: TODO
+            values: a dict mapping a variable tuple to a list of binding tuples,
+                e.g. {(v1, v2): [(uri1, uri2), (uri3, uri4), ...]}
+            count_node: Node to filter over outgoing arcs.
+            in_out:
+            max_in:
+            max_out: max outgoing arcs
+            projection: which variables to select on, by default all vars.
+            gp:
+            limit: integer to limit the result size
+            sample_var: the variable to sample over
+        """
+        assert self.vars_in_graph, \
+            "tried to get sparql for pattern without vars: %s" % (self,)
+
+        if projection is None:
+            projection = sorted([v for v in self.vars_in_graph])
+        if sample_var:
+            projection.remove(sample_var)
+
+        if max_out is None:
+            max_out = 20
+        if max_in is None:
+            max_in = 20
+
+        if in_out not in ['in', 'out', 'inout']:
+            in_out = random.choice(['in', 'out', 'inout'])
+            logger.info('in_out was set on %s' % in_out)
+        count_out = Variable('cout')
+        count_in = Variable('cin')
+        rand_var_out = gen_random_var()
+        rand_var_in = gen_random_var()
+        if gp:
+            if in_out == 'out':
+                gp_ = GraphPattern(chain(self,
+                                         GraphPattern([
+                                             (count_node, count_out, rand_var_out)
+                                         ]),
+                                         gp))
+            elif in_out == 'in':
+                gp_ = GraphPattern(chain(self,
+                                         GraphPattern([
+                                             (rand_var_in, count_in, count_node)
+                                         ]),
+                                         gp))
+            else:  # TODO: Testen ob inout überhaupt passt
+                gp_ = GraphPattern(chain(self,
+                                         GraphPattern([
+                                             (rand_var_in, count_in, count_node),
+                                             (count_node, count_out, rand_var_out)
+                                         ]),
+                                         gp))
+        else:
+            if in_out == 'out':
+                gp_ = GraphPattern(chain(self,
+                                         GraphPattern([
+                                             (count_node, count_out, rand_var_out)
+                                         ])
+                                         ))
+            elif in_out == 'in':
+                gp_ = GraphPattern(chain(self,
+                                         GraphPattern([
+                                             (rand_var_in, count_in, count_node)
+                                         ])
+                                         ))
+            else:  # TODO: Testen ob inout überhaupt passt
+                gp_ = GraphPattern(chain(self,
+                                         GraphPattern([
+                                             (rand_var_in, count_in, count_node),
+                                             (count_node, count_out, rand_var_out)
+                                         ])
+                                         ))
+
+        res = "SELECT %(samp)s %(proj)s %(count)s WHERE " \
+              "{\n%(qpp)s}\n%(gb)s\n%(hv)s\n%(lim)s" % {
+                  'samp': (' SAMPLE(%s) as %s' % (
+                      ''.join(sample_var.n3()),
+                      ''.join(sample_var.n3())
+                  )) if sample_var else '',
+                  'proj': ' '.join([v.n3() for v in projection]),
+                  'count': (' COUNT(%s) as %s' % (
+                      ''.join(count_out.n3()),
+                      ''.join(count_out.n3()))) if in_out == 'out' else
+                  (' COUNT(%s) as %s' % (
+                      ''.join(count_in.n3()),
+                      ''.join(count_in.n3()))) if in_out == 'in' else
+                  (' COUNT(%s) as %s COUNT(%s) as %s' % (
+                      ''.join(count_out.n3()),
+                      ''.join(count_out.n3()),
+                      ''.join(count_in.n3()),
+                      ''.join(count_in.n3())
+                  )),
+                  'qpp': gp_._sparql_query_pattern_part(
+                      values=values,
+                      indent=' ',
+                  ),
+                  'gb': ('GROUP BY ' + ' '.join([v.n3() for v in projection])),
+                  'hv': ('HAVING (COUNT(%s)<%s)' % (
+                      ''.join(count_out.n3()),
+                      str(max_out))) if in_out == 'out' else
+                  ('HAVING (COUNT(%s)<%s)' % (
+                      ''.join(count_in.n3()),
+                      str(max_in))) if in_out == 'in' else
+                  ('HAVING (COUNT(%s)<%s&&COUNT(%s)<%s)' % (
+                      ''.join(count_out.n3()),
+                      str(max_out),
+                      ''.join(count_in.n3()),
+                      str(max_in)
+                  )),
+                  'lim': ('LIMIT %d\n' % limit) if limit is not None else '',
+              }
+        res = textwrap.dedent(res)
+        return gp_._sparql_prefix(res)
+
+    def to_sparql_useful_path_query(
+            self,
+            var_to_fix,
+            var_to_count,
+            valueblocks,
+            steps,
+            startvar=None,
+            avglimit=10,
+            gp_in=False
+    ):
+        count_var_to_count = Variable('c' + ''.join(var_to_count))
+        avg_var_to_count = Variable('avgc' + ''.join(var_to_count))
+        if startvar is None:
+            startvar = SOURCE_VAR
+        res = "SELECT %(vtf)s (AVG(%(cvtc)s) as %(avtc)s) {\n" \
+              "SELECT %(stv)s %(vtf)s (COUNT (%(vtc)s) as %(cvtc)s) {\n" \
+              "%(val)s\n" \
+              "%(trip)s }\n" \
+              "GROUP BY %(stv)s %(vtf)s }\n" \
+              "GROUP BY %(vtf)s\n" \
+              "HAVING (AVG (%(cvtc)s) < %(avgl)s)" % {
+                  'vtf': ''.join(var_to_fix.n3()),
+                  'cvtc': ''.join(count_var_to_count.n3()),
+                  'avtc': ''.join(avg_var_to_count.n3()),
+                  'stv': ''.join(startvar.n3()),
+                  'vtc': ''.join(var_to_count.n3()),
+                  'val': ''.join([
+                      self._sparql_values_part(
+                          values=valueblocks[key], indent=' '
+                      ) for key in valueblocks
+                  ]),
+                  'trip': ''.join([
+                      step._sparql_triples_part(indent=' ') for step in steps
+                      # TODO: nicht auf private Methode zugreifen
+                  ]) + ''.join([
+                      self._sparql_triples_part(indent=' ') if gp_in else ''
+                  ]),
+                  'avgl': str(avglimit),
+              }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
+
+    def to_sparql_inst_query(
+            self,
+            hop,
+            valueblocks,
+            gp_help,
+            gp_in=False
+    ):
+        res = "SELECT %(vtf)s (COUNT (?source) as ?cst) {\n" \
+              "%(val)s\n" \
+              "%(trip)s }\n" \
+              "GROUP BY %(vtf)s\n" \
+              "HAVING (COUNT (?source) > 0)" % {
+                  'vtf': ' '.join([var.n3() for var in hop]),
+                  'val': ''.join([
+                      self._sparql_values_part(
+                          values=valueblocks[key], indent=' '
+                      ) for key in valueblocks
+                  ]),
+                  'trip': ''.join(gp_help._sparql_triples_part()) +
+                          # TODO: nicht auf private Methode zugreifen
+                          ''.join([
+                              self._sparql_triples_part(
+                                  indent=' '
+                              ) if gp_in else ''
+                          ]),
+              }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
+
+    # TODO: die normale inst durch diese hier ersetzen (sollte überall gehen)
+    def to_sparql_useful_path_inst_query(
+            self,
+            hop,
+            valueblocks,
+            steps,
+            gp_in=False
+    ):
+        res = "SELECT %(vtf)s (COUNT (?source) as ?cst) {\n" \
+              "%(val)s\n" \
+              "%(trip)s }\n" \
+              "GROUP BY %(vtf)s\n" \
+              "HAVING (COUNT (?source) > 0)" % {
+                  'vtf': ' '.join([var.n3() for var in hop]),
+                  'val': ''.join([
+                      self._sparql_values_part(
+                          values=valueblocks[key], indent=' '
+                      ) for key in valueblocks
+                  ]),
+                  'trip': ''.join([
+                      step._sparql_triples_part() for step in steps
+                      # TODO: nicht auf private Methode zugreifen
+                  ]) + ''.join([
+                      self._sparql_triples_part(indent=' ') if gp_in else ''
+                  ]),
+              }
+        res = textwrap.dedent(res)
+        return self._sparql_prefix(res)
+
+    def to_sparql_precheck_query(
+            self,
+            values,
+            gp_in=False
+    ):
+        res = "SELECT * {\n" \
+              "%(val)s\n" \
+              "%(trip)s\n" \
+              "}\n" \
+              "LIMIT 1" % {
+                  'val': ''.join(
+                      self._sparql_values_part(values=values, indent=' ')
+                  ),
+                  'trip': ''.join(self._sparql_triples_part(indent=' ')) +
+                          ''.join([
+                              self._sparql_triples_part(indent=' ') if gp_in else ''
+                          ]),
+              }
         res = textwrap.dedent(res)
         return self._sparql_prefix(res)
 
@@ -702,9 +965,9 @@ def _sparql_query_pattern_part(
     ):
         assert bind is None or isinstance(bind, dict)
         assert values is None or (
-            isinstance(values, dict) and
-            isinstance(next(six.iterkeys(values)), Iterable) and
-            isinstance(next(six.itervalues(values)), Iterable)
+                isinstance(values, dict) and
+                isinstance(next(six.iterkeys(values)), Iterable) and
+                isinstance(next(six.itervalues(values)), Iterable)
         )
 
         res = ''
@@ -1088,7 +1351,6 @@ def rate_graph_pattern(self, gp):
         ]
         return res
 
-
     def prune_counts(self, below=2):
         lns = len(self.identifier_gt_node_sum)
         ln = len(self.identifier_gt_node_count)
@@ -1115,7 +1377,7 @@ def prune_counts(self, below=2):
 
     def __str__(self):
         return '%s: pairs: %d, nodes: %d, Identifier counts:\n' \
-            'Pairs: %s\nNodes: %s' % (
-                self.__class__.__name__, len(self.gt_pairs), len(self.nodes),
-                self.identifier_gt_pair_count, self.identifier_gt_node_count
-            )
+               'Pairs: %s\nNodes: %s' % (
+                   self.__class__.__name__, len(self.gt_pairs), len(self.nodes),
+                   self.identifier_gt_pair_count, self.identifier_gt_node_count
+               )
diff --git a/tests/SPARQL-query.py b/tests/SPARQL-query.py
new file mode 100644
index 0000000..4bbb7e0
--- /dev/null
+++ b/tests/SPARQL-query.py
@@ -0,0 +1,75 @@
+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+"""ein File einfach um SPARQL-queries abzufeuern, statt es online im Browser
+zu machen.
+"""
+
+import logging
+from collections import OrderedDict
+from os import getenv
+
+import SPARQLWrapper
+from splendid import time_func
+import socket
+import rdflib
+from rdflib import URIRef
+from rdflib import Variable
+
+from config import SPARQL_ENDPOINT
+from gp_learner import evaluate
+from gp_learner import mutate_fix_var
+from gp_learner import update_individuals
+from gp_query import calibrate_query_timeout
+from gp_query import query_time_hard_exceeded
+from gp_query import query_time_soft_exceeded
+from graph_pattern import GraphPattern
+from graph_pattern import SOURCE_VAR
+from graph_pattern import TARGET_VAR
+from ground_truth_tools import get_semantic_associations
+from ground_truth_tools import split_training_test_set
+from gtp_scores import GTPScores
+from serialization import print_graph_pattern
+
+
+sparql = SPARQLWrapper.SPARQLWrapper(
+    getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
+try:
+    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
+except IOError:
+    from nose import SkipTest
+    raise SkipTest(
+        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
+        "Skipping tests in\n    %s" % (SPARQL_ENDPOINT, __file__))
+
+sparql.resetQuery()
+sparql.setTimeout(timeout)
+sparql.setReturnFormat(SPARQLWrapper.JSON)
+
+q = 'SELECT ?source ?target ?vcb0 ?vcb1 ?vcb2 ?vcb3 WHERE {' \
+    '?source ?vcb0 ?vcb2 .' \
+    '?target <http://dbpedia.org/ontology/thumbnail> ?vcb3 .' \
+    '?target <http://dbpedia.org/property/image> ?vcb1 .' \
+    '?vcb2 <http://dbpedia.org/ontology/wikiPageWikiLink> ?source .' \
+    '?vcb2 <http://purl.org/linguistics/gold/hypernym> ?target ' \
+    '}'
+
+try:
+    q_short = ' '.join((line.strip() for line in q.split('\n')))
+    sparql.setQuery(q_short)
+    c = time_func(sparql.queryAndConvert)
+except socket.timeout:
+    c = (timeout, {})
+except ValueError:
+    # e.g. if the endpoint gives us bad JSON for some unicode chars
+    print(
+        'Could not parse result for query, assuming empty result...\n'
+        'Query:\n%s\nException:', q,
+        exc_info=1,  # appends exception to message
+    )
+    c = (timeout, {})
+
+t, res = c
+print('orig query took %.4f s, result:\n%s\n', t, res)
\ No newline at end of file
diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
new file mode 100644
index 0000000..bbcbdca
--- /dev/null
+++ b/tests/test_mutate_deep_narrow.py
@@ -0,0 +1,3442 @@
+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+"""Testet die verschiedenen Versionen der mutatete_deep_narrow
+"""
+
+import logging
+import numpy as np
+import pickle
+import random
+from collections import defaultdict
+from collections import OrderedDict
+from os import getenv
+
+import SPARQLWrapper
+from itertools import chain
+from splendid import get_path
+from splendid import time_func
+import socket
+import rdflib
+from rdflib import BNode
+from rdflib import Literal
+from rdflib import URIRef
+from rdflib import Variable
+
+from config import SPARQL_ENDPOINT
+from gp_learner import evaluate
+from gp_learner import mutate_fix_var
+from gp_learner import update_individuals
+from gp_query import calibrate_query_timeout
+from gp_query import query_time_hard_exceeded
+from gp_query import query_time_soft_exceeded
+from graph_pattern import gen_random_var
+from graph_pattern import GraphPattern
+from graph_pattern import SOURCE_VAR
+from graph_pattern import TARGET_VAR
+from ground_truth_tools import get_semantic_associations
+from ground_truth_tools import split_training_test_set
+from gtp_scores import GTPScores
+from serialization import print_graph_pattern
+from utils import sparql_json_result_bindings_to_rdflib
+
+logger = logging.getLogger(__name__)
+
+sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINT)
+# sparql = SPARQLWrapper.SPARQLWrapper(
+#     getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
+try:
+    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
+except IOError:
+    from nose import SkipTest
+    raise SkipTest(
+        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
+        "Skipping tests in\n    %s" % (sparql.endpoint, __file__))
+
+dbr = rdflib.Namespace('http://dbpedia.org/resource/')
+owl = rdflib.Namespace('http://www.w3.org/2002/07/owl#')
+dbo = rdflib.Namespace('http://dbpedia.org/ontology/')
+gold = rdflib.Namespace('http://purl.org/linguistics/gold')
+dbt = rdflib.Namespace('http://dbpedia.org/resource/Template:')
+dbp = rdflib.Namespace('http://dbpedia.org/property/')
+
+v = [gen_random_var() for i in range(100)]
+
+sameAs = owl['sameAs']
+pwl = dbo['wikiPageWikiLink']
+hypernym = gold['hypernym']
+wpUseTemp = dbp['wikiPageUsesTemplate']
+
+gp_found = {}
+gp_found['1'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (SOURCE_VAR, v[0], v[1]),
+    (v[1], hypernym, TARGET_VAR)
+])
+gp_found['2'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], URIRef('http://dbpedia.org/dbtax/Page'))
+])
+gp_found['3'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Sister_project_links'])
+])
+gp_found['4'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, wpUseTemp, dbt['Pp-semi-indef'])
+])
+gp_found['5'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], dbt['Pp-semi-indef'])
+])
+gp_found['6'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Cite_book'])
+])
+gp_found['7'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Redirect'])
+])
+gp_found['8'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR)
+])
+gp_found['50'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Use_dmy_dates'])
+])
+gp_found['51'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Refend'])
+])
+gp_found['52'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+     URIRef('http://dbpedia.org/dbtax/Page'))
+])
+gp_found['54'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR),
+    (v[0], sameAs, SOURCE_VAR)
+])
+gp_found['55'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR),
+    (TARGET_VAR, pwl, SOURCE_VAR)
+])
+gp_found['67'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Portal'])
+])
+gp_found['68'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (TARGET_VAR, v[1], dbt['Convert'])
+])
+gp_found['69'] = GraphPattern([
+    (SOURCE_VAR, hypernym, TARGET_VAR),
+    (v[0], hypernym, SOURCE_VAR)
+])
+gp_found['72'] = GraphPattern([
+    (SOURCE_VAR, URIRef('http://purl.org/dc/terms/subject'), v[1]),
+    (TARGET_VAR, pwl, SOURCE_VAR),
+    (v[0], sameAs, v[1]),
+    (v[1], URIRef('http://www.w3.org/2004/02/skos/core#subject'), TARGET_VAR)
+])
+gp_found['94'] = GraphPattern([
+    (SOURCE_VAR, URIRef('http://purl.org/dc/terms/subject'), v[1]),
+    (TARGET_VAR, v[0], SOURCE_VAR),
+    (v[1], URIRef('http://www.w3.org/2004/02/skos/core#subject'), TARGET_VAR)
+])
+gp_found['131'] = GraphPattern([
+    (SOURCE_VAR, v[0], v[2]),
+    (TARGET_VAR, pwl, v[1]),
+    (v[2], URIRef('http://www.w3.org/2004/02/skos/core#subject'), TARGET_VAR),
+])
+gp_found['140'] = GraphPattern([
+    (TARGET_VAR, pwl, SOURCE_VAR),
+    (TARGET_VAR, wpUseTemp, dbt['Other_uses']),
+    (TARGET_VAR, wpUseTemp, dbt['Pp-move-indef']),
+    (v[0], URIRef('http://www.w3.org/2000/01/rdf-schema#seeAlso'), TARGET_VAR),
+])
+# Bis hier jedes mit neuem Fingerprint, jetzt noch 3 vom Rest
+gp_found['231'] = GraphPattern([
+    (SOURCE_VAR, dbo['class'], TARGET_VAR),
+    (TARGET_VAR, dbp['subdivisionRanks'], v[0])
+])
+gp_found['323'] = GraphPattern([
+    (SOURCE_VAR, pwl, TARGET_VAR),
+    (v[0], dbp['species'], TARGET_VAR),
+    (v[1], dbo['wikiPageDisambiguates'], TARGET_VAR)
+])
+gp_found['516'] = GraphPattern([
+    (SOURCE_VAR, pwl, v[1]),
+    (TARGET_VAR, dbp['image'], v[0]),
+    (v[1], hypernym, TARGET_VAR),
+    (v[2], dbo['wikiPageRedirects'], SOURCE_VAR)
+])
+
+# Verschiedene Limits festlegen:
+# Limit: search object-list => subject-values in next query
+limit_next = 500
+# limt: search an object list from two diferrent subjects and get hits through
+# comparing them
+limit_endpoint_two_sided = 1000
+# limit: search object-list => compare with sources/targets from gtp
+limit_choose_endpoint = 5000
+# limit: search subject-list from two diferrent objects and get hits through
+# comparing them
+limit_startpoint_two_sided = 200
+# limit: search subject-list => subject-values in next query
+limit_subject_next = 350
+# limit: search subject list => compare with sources/targets from gtp
+limit_choose_subject_endpoint = 3000
+# limits: hit-list => on side subject, one side object:
+limit_subj_to_obj = 350
+limit_obj_to_subj = 1500
+
+
+# einen ein-hop-weg von source zu target zum pattern hinzufügen
+# TODO Varianten (von gefundenen b aus Variante der zweiten query
+# 1.(default) mit (b, c, d) Liste von d suchen und mit Target-Liste vergleichen
+# 2. mit (b, c, target). VALUES(target) suchen =>
+# Ergebnisse direkt an existente Targets gebunden
+# 3. mit (b, c, target).urspurngs_gp
+def mutate_deep_narrow_one_hop_s_t_without_direction(
+        gp_, gtps, max_out=None, max_in=None, in_out=None
+):
+    vars_ = gp_.vars_in_graph
+    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
+        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
+        return []
+    # Erstelle pattern für den ersten Schritt
+    a = Variable('a')
+    b = Variable('b')
+    c = Variable('c')
+    values_s_t = {(SOURCE_VAR, TARGET_VAR): gtps}
+    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+    q = gp1.to_sparql_filter_by_count_in_out_query(
+        values=values_s_t, count_node=b, in_out=in_out, max_out=max_out,
+        max_in=max_in, gp=gp_, limit=200)
+    logger.info(q)
+    t, q_res1 = run_query(q)
+    if not q_res1['results']['bindings']:
+        return []
+    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
+    # Erstelle values aus den Ergebnissen für b
+    values = get_values([b], q_res1)
+    gp2 = GraphPattern([(b, c, TARGET_VAR)])
+    # Query die über eine var aus gp2 random samplet mit values aus b_list
+    q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
+    logger.info(q)
+    try:
+        t, q_res2 = run_query(q)
+    except:
+        logger.info('Die Query (s.o.) hat nicht geklappt')
+        return []
+    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
+    target_list = get_values_list(TARGET_VAR, q_res2)
+    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
+    # Kreiere gtps_hit in der alle gtps, deren targets in target_list enthalten
+    # sind, "gespeichert" werden
+    stp_hit = get_stp_hit(target_list, gtps, 1)
+    gp_list = get_fixed_path_gp_one_hop(
+        q_res1, q_res2, gp_, stp_hit, [], a, b, c
+    )
+    return gp_list
+
+
+# einen ein-hop-weg von source zu target zum pattern hinzufügen 
+# (gp in query 2 eingefügt)
+def mutate_deep_narrow_one_hop_s_t_2(gp_, gtps, max_in_out=None, in_out=None):
+    vars_ = gp_.vars_in_graph
+    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
+        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
+        return []
+    # Erstelle pattern für den ersten Schritt
+    a = Variable('a')
+    b = Variable('b')
+    c = Variable('c')
+    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+    values_s_t = {(SOURCE_VAR, TARGET_VAR): gtps}
+    q = gp1.to_sparql_filter_by_count_in_out_query(
+        values=values_s_t, count_node=b, in_out=in_out, 
+        max_out=max_in_out, gp=gp_, limit=200)
+    logger.info(q)
+    t, q_res1 = run_query(q)
+    if not q_res1['results']['bindings']:
+        return []
+    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
+    gp2 = GraphPattern([(b, c, TARGET_VAR)])
+    # Erstelle values aus den Ergebnissen für b
+    values = get_values([b], q_res1)
+    # Query die über eine var aus gp2 random samplet mit values aus b_list
+    q = gp2.to_sparql_select_sample_query(
+        values=values, values_s_t=values_s_t, limit=5000
+    )
+    logger.info(q)
+    try:
+        t, q_res2 = run_query(q)
+    except:
+        logger.info('Die Query (s.o.) hat nicht geklappt')
+        return []
+    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
+    target_list = get_values_list(TARGET_VAR, q_res2)
+    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
+    # Kreiere gtps_hit in der alle gtps, deren targets in target_list enthalten
+    # sind, "gespeichert" werden
+    stp_hit = get_stp_hit(target_list, gtps, 1)
+    gp_list = get_fixed_path_gp_one_hop(q_res1, q_res2, gp_, stp_hit, a, b, c)
+    return gp_list
+
+
+# eine one-hop verbindung zwischen source und target finden (Richtungen random)
+def mutate_deep_narrow_one_random_hop_s_t():
+    ich_darf_nich_leer_sein = []
+    return ich_darf_nich_leer_sein
+
+
+# einen direkten weg um einen hop erweitern (Weg löschen und stattdessen
+# ein-hop weg einfügen)
+
+
+# zu einem direkten weg noch einen ein-hop weg hinzufügen (weg behalten,
+# ein-hop weg dazu)
+
+
+# Runs a given (as String) query against the Sparql-endpoint
+def run_query(q):
+    try:
+        q_short = ' '.join((line.strip() for line in q.split('\n')))
+        sparql.setQuery(q_short)
+        cal = time_func(sparql.queryAndConvert)
+    except socket.timeout:
+        cal = (timeout, {})
+    except ValueError:
+        # e.g. if the endpoint gives us bad JSON for some unicode chars
+        logger.info(
+            'Could not parse result for query, assuming empty result...\n'
+            'Query:\n%s\nException:', q,
+            exc_info=1,  # appends exception to message
+        )
+        cal = (timeout, {})
+    return cal
+
+
+# returns a list of value-tupels for the given variables, out of an
+# query-result
+def get_values(varlist, q_res):
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+    vallist = []
+    for row in bind:
+        tup = ()
+        for var in varlist:
+            tup = tup + (get_path(row, [var]), )
+        vallist.append(tup)
+    # ausfiltern von vallist (leider notwendig vor allem wegen dbr:Template
+    vallist[:] = [valtup for valtup in vallist if not list_remove_bool(valtup)]
+    # dopppelte noch herausfiltern
+    vallist = list(set(vallist))
+    vartup = ()
+    for var in varlist:
+        vartup = vartup + (var, )
+    values = {vartup: vallist}
+    return values
+
+
+# returns a list of found values for a given variable and query-result
+def get_values_list(var, q_res):
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+    vallist = [get_path(row, [var]) for row in bind]
+    return vallist
+
+
+# gibt ein sample nach der Gewichtung der counts zurück,
+# Gewichtung ist hier innerhalb angesetzt
+def get_weighted_sample(var, count, q_res):
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+    val = []
+    weight = []
+    for row in bind:
+        val.append(get_path(row, [var]))
+        # Davon ausgehend, dass x besonders gut ist
+        if float(get_path(row, [count])) == 1.0:
+            weight.append(10000)
+        else:
+            weight.append(1/(abs(1-float(get_path(row, [count])))))
+        # Davon ausgehend, dass x besonders schlecht ist
+        # weight.append(abs(7-float(get_path(row, [count]))))
+        # weight.append(get_path(row, [count]))
+    s = sum(weight)
+    for i in range(len(weight)):
+        weight[i] = weight[i] / s
+    cum_weights = [0] + list(np.cumsum(weight))
+    res = []
+    while len(res) < min(10, len(list(set(val)))):
+        x = np.random.random()
+        i = 0
+        while x > cum_weights[i]:
+            i = i + 1
+        index = i - 1
+        if val[index] not in res:
+            res.append((val[index],))
+    sample = {(var,): res}
+    return sample
+
+
+# gibt zu einer gegebenen Liste von Variablen die stp aus gtps zurück,
+# bei denen Target(st=1)/Source(st=0) in der Variablen Liste ist.
+def get_stp_hit(varlist, gtps, st):
+    stp = []
+    for t in varlist:
+        for gtp in gtps:
+            if t == gtp[st]: 
+                stp.append(gtp)
+    return stp
+
+
+# Checks if an found RDF-Term can be used as value in a new query
+# (without conflicts)
+def list_remove_bool(tup):
+    for var in tup:
+        if isinstance(var, Literal):
+            i_n3 = var.n3()
+            if len(i_n3) > 60:
+                return True
+        elif isinstance(var, BNode):
+            return True
+        elif isinstance(var, URIRef):
+            return '%' in var
+        # TODO: nochmal schauen das % rauswerfen war kuzfristig,
+        # weil sparql mir bei einer query nen Fehler geschmissen hat
+    return False
+
+
+# evaluates a given graph-pattern-list
+def eval_gp_list(gtp_scores, gp_list):
+    for gp_l in gp_list:
+        eval_gp(gtp_scores, gp_l)
+    return gp_list
+
+
+# evaluate a given graph-pattern
+def eval_gp(gtp_scores, gp):
+    res = evaluate(
+        sparql, timeout, gtp_scores, gp, run=0, gen=0)
+    update_individuals([gp], [res])
+
+
+# helper to get target-hits and the corresponding stp
+def target_hit(stps, t_lis):
+    res = []
+    for stp in stps:
+        for t in t_lis:
+            if t == stp[1]:
+                res.append(
+                    (t, stp)
+                )
+    return res
+
+
+# add one hop with the given direction.
+def mutate_deep_narrow_one_hop(
+        gp_, max_out=None, max_in=None, in_out=None, richtung=None
+):
+    vars_ = gp_.vars_in_graph
+    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
+        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
+        return []
+    if not gp_.matching_node_pairs:
+        logger.info(
+            'No matching node pairs, cant get better through adding constraints'
+        )
+        return []
+    # Erstelle pattern für den ersten Schritt
+    a = Variable('a')
+    b = Variable('b')
+    c = Variable('c')
+    if richtung not in [1, 2, 3, 4]:
+        richtung = random.choice([1, 2, 3, 4])
+        logger.info('Richtung %s wurde gewaehlt' % richtung)
+    if richtung == 1:
+        values_s_t = {(SOURCE_VAR, TARGET_VAR): gp_.matching_node_pairs}
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s_t, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=200)
+        logger.info(q)
+        t, q_res1 = run_query(q)
+        if not q_res1:
+            return []
+        # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
+        # Erstelle values aus den Ergebnissen für b
+        values = get_values([b], q_res1)
+        gp2 = GraphPattern([(b, c, TARGET_VAR)])
+        # Query die über eine var aus gp2 random samplet mit values aus b_list
+        q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
+        logger.info(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.info('Die Query (s.o.) hat nicht geklappt')
+            return []
+        # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
+        gp_list = get_fixed_path_gp_one_hop(
+            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
+        )
+    elif richtung == 2:
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(TARGET_VAR, c, b)])
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=1000)
+        logger.info(q)
+        t, q_res1 = run_query(q)
+        if not q_res1['results']['bindings']:
+            return []
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=1000)
+        logger.info(q)
+        t, q_res2 = run_query(q)
+        if not q_res2['results']['bindings']:
+            return []
+        gp_list = get_fixed_path_gp_one_hop(
+            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
+        )
+    elif richtung == 3:
+        values_s_t = {(SOURCE_VAR, TARGET_VAR): gp_.matching_node_pairs}
+        gp2 = GraphPattern([(TARGET_VAR, c, b)])
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_s_t, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=200)
+        logger.info(q)
+        t, q_res2 = run_query(q)
+        if not q_res2['results']['bindings']:
+            return []
+        # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
+        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
+        # Erstelle values aus den Ergebnissen für b
+        values = get_values([b], q_res2)
+        # Query die über eine var aus gp2 random samplet mit values aus b_list
+        q = gp1.to_sparql_select_sample_query(values=values, limit=5000)
+        logger.info(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.info('Die Query (s.o.) hat nicht geklappt')
+            return []
+        gp_list = get_fixed_path_gp_one_hop(
+            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
+        )
+    else:
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
+        gp2 = GraphPattern([(b, c, TARGET_VAR)])
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=200)
+        logger.info(q)
+        t, q_res1 = run_query(q)
+        if not q_res1['results']['bindings']:
+            return []
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=200)
+        logger.info(q)
+        t, q_res2 = run_query(q)
+        if not q_res2['results']['bindings']:
+            return []
+        gp_list = get_fixed_path_gp_one_hop(
+            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
+        )
+    return gp_list
+
+
+# fixed den ein-hop-pfad zwischen Source und Target, fügt ihn dem Pattern hinzu
+# und gibt die Liste der resultierenden Pattern zurück
+# TODO nicht so sehr auf source a b. b c Target fokussieren.
+def get_fixed_path_gp_one_hop(q_res1, q_res2, gp_main, richtung, stp, a, b, c):
+    gp_list = []
+    res_rows_path = ['results', 'bindings']
+    bind1 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res1, res_rows_path, default=[])
+    )
+    bind2 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res2, res_rows_path, default=[])
+    )
+    for row2 in bind2:
+        for gtp in stp:
+            if gtp[1] == get_path(row2, [TARGET_VAR]):
+                for row1 in bind1:
+                    if (get_path(row1, [b]) == get_path(row2, [b])) and \
+                            (get_path(row1, [SOURCE_VAR]) == gtp[0]):
+                        if richtung == 1:
+                            gp_ = GraphPattern([
+                                (SOURCE_VAR, get_path(row1, [a]), b),
+                                (b, get_path(row2, [c]), TARGET_VAR)
+                            ])
+                        elif richtung == 2:
+                            gp_ = GraphPattern([
+                                (SOURCE_VAR, get_path(row1, [a]), b),
+                                (TARGET_VAR, get_path(row2, [c]), b)
+                            ])
+                        elif richtung == 3:
+                            gp_ = GraphPattern([
+                                (b, get_path(row1, [a]), SOURCE_VAR),
+                                (TARGET_VAR, get_path(row2, [c]), b)
+                            ])
+                        else:
+                            gp_ = GraphPattern([
+                                (b, get_path(row1, [a]), SOURCE_VAR),
+                                (b, get_path(row2, [c]), TARGET_VAR)
+                            ])
+
+                        gp_ = GraphPattern(chain(gp_, gp_main))
+                        if gp_ not in gp_list:
+                            gp_list.append(gp_)
+                        logger.info(gtp)
+    return gp_list
+
+
+# fixed den ein-hop-pfad zwischen Source und Target, fügt ihn dem Pattern hinzu
+# und gibt die Liste der resultierenden Pattern zurück
+# TODO nicht so sehr auf source a b. b c Target fokussieren.
+def get_fixed_path_gp_two_hops(
+        q_res1, q_res2, q_res3, gp_main, richtung, stp, a, b, c, d, e
+):
+    # TODO: überlegen nicht nur verschieden Pattern für verschiedene Richtungen
+    # zu machen, sondern auch in den Unterschiedlichen Ergebnissn anfangen
+    # (Idee wäre z.B. die a bis e durch nummerierte random vars zu ersetzen und
+    # sich dann zu überlegen wie man das übergibt, ob mans iwie immer entlang
+    # des patterns schafft oder eher nicht.
+    gp_list = []
+    res_rows_path = ['results', 'bindings']
+    bind1 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res1, res_rows_path, default=[])
+    )
+    bind2 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res2, res_rows_path, default=[])
+    )
+    bind3 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res3, res_rows_path, default=[])
+    )
+    for gtp in stp:
+        for row3 in bind3:
+            if gtp[1] == get_path(row3, [TARGET_VAR]):
+                for row2 in bind2:
+                    if get_path(row2, [d]) == get_path(row3, [d]):
+                        for row1 in bind1:
+                            if get_path(row1, [b]) == \
+                                    get_path(row2, [b]) and \
+                                    get_path(row1, [SOURCE_VAR]) == \
+                                    gtp[0]:
+                                if richtung == 1:
+                                    gp_ = GraphPattern([
+                                        (SOURCE_VAR, get_path(row1, [a]), b),
+                                        (b, get_path(row2, [c]), d),
+                                        (d, get_path(row3, [e]), TARGET_VAR)
+                                    ])
+                                elif richtung == 2:
+                                    gp_ = GraphPattern([
+                                        (SOURCE_VAR, get_path(row1, [a]), b),
+                                        (b, get_path(row2, [c]), d),
+                                        (TARGET_VAR, get_path(row3, [e]), d)
+                                    ])
+                                elif richtung == 3:
+                                    gp_ = GraphPattern([
+                                        (SOURCE_VAR, get_path(row1, [a]), b),
+                                        (d, get_path(row2, [c]), b),
+                                        (d, get_path(row3, [e]), TARGET_VAR)
+                                    ])
+                                elif richtung == 4:
+                                    gp_ = GraphPattern([
+                                        (SOURCE_VAR, get_path(row1, [a]), b),
+                                        (d, get_path(row2, [c]), b),
+                                        (TARGET_VAR, get_path(row3, [e]), d)
+                                    ])
+                                elif richtung == 5:
+                                    gp_ = GraphPattern([
+                                        (b, get_path(row1, [a]), SOURCE_VAR),
+                                        (b, get_path(row2, [c]), d),
+                                        (d, get_path(row3, [e]), TARGET_VAR)
+                                    ])
+                                elif richtung == 6:
+                                    gp_ = GraphPattern([
+                                        (b, get_path(row1, [a]), SOURCE_VAR),
+                                        (b, get_path(row2, [c]), d),
+                                        (TARGET_VAR, get_path(row3, [e]), d)
+                                    ])
+                                elif richtung == 7:
+                                    gp_ = GraphPattern([
+                                        (b, get_path(row1, [a]), SOURCE_VAR),
+                                        (d, get_path(row2, [c]), b),
+                                        (d, get_path(row3, [e]), TARGET_VAR)
+                                    ])
+                                else:
+                                    gp_ = GraphPattern([
+                                        (b, get_path(row1, [a]), SOURCE_VAR),
+                                        (d, get_path(row2, [c]), b),
+                                        (TARGET_VAR, get_path(row3, [e]), d)
+                                    ])
+                                gp_ = GraphPattern(chain(gp_, gp_main))
+                                if gp_ not in gp_list:
+                                    gp_list.append(gp_)
+                                logger.debug(gtp)
+    return gp_list
+
+
+# add two hops.
+def mutate_deep_narrow_two_hops(
+        gp_, max_out=None, max_in=None, in_out=None, richtung=None
+):
+    vars_ = gp_.vars_in_graph
+    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
+        logger.debug('SOURCE or TARGET are not in gp: %s' % gp_)
+        return []
+    if not gp_.matching_node_pairs:
+        logger.debug(
+            'No matching node pairs, cant get better through adding constraints'
+        )
+        return []
+    a = Variable('a')
+    b = Variable('b')
+    c = Variable('c')
+    d = Variable('d')
+    e = Variable('e')
+    gp_list = []
+    if richtung not in range(1, 9):
+        richtung = random.choice(range(1, 9))
+        logger.debug('Richtung %s wurde gewaehlt' % richtung)
+    if richtung == 1:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(d, e, TARGET_VAR)])
+        
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        values_d = get_values([d], q_res2)
+        q = gp3.to_sparql_select_sample_query(
+            values=values_d, limit=limit_choose_endpoint
+        )
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    if richtung == 2:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(TARGET_VAR, e, d)])
+        
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+            
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    if richtung == 3:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(d, c, b)])
+        gp3 = GraphPattern([(d, e, TARGET_VAR)])
+        
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_startpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_startpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    if richtung == 4:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(d, c, b)])
+        gp3 = GraphPattern([(TARGET_VAR, e, d)])
+        
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        values_d = get_values([d], q_res3)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    if richtung == 5:
+        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(d, e, TARGET_VAR)])
+
+        values_s = {(SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]}
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_subject_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_d = get_values([d], q_res2)
+        q = gp3.to_sparql_select_sample_query(
+            values=values_d, limit=limit_choose_endpoint
+        )
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    if richtung == 6:
+        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(TARGET_VAR, e, d)])
+
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_startpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_d = get_values([d], q_res3)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_startpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    if richtung == 7:
+        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
+        gp2 = GraphPattern([(d, c, b)])
+        gp3 = GraphPattern([(d, e, TARGET_VAR)])
+
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_subject_next)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_d = get_values([d], q_res3)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_b = get_values([b], q_res2)
+        q = gp1.to_sparql_select_sample_query(
+            values=values_b, limit=limit_choose_endpoint)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    if richtung == 8:
+        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
+        gp2 = GraphPattern([(d, c, b)])
+        gp3 = GraphPattern([(TARGET_VAR, e, d)])
+
+        values_t = {
+            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_d = get_values([d], q_res3)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_b = get_values([b], q_res2)
+        q = gp1.to_sparql_select_sample_query(
+            values=values_b, limit=limit_choose_endpoint)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_two_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e
+        )
+    
+    return gp_list
+
+
+# fixed den ein-hop-pfad zwischen Source und Target, fügt ihn dem Pattern hinzu
+# und gibt die Liste der resultierenden Pattern zurück
+# TODO nicht so sehr auf source a b. b c Target fokussieren.
+def get_fixed_path_gp_three_hops(
+        q_res1,
+        q_res2,
+        q_res3,
+        q_res4,
+        gp_main,
+        richtung,
+        stp,
+        a,
+        b,
+        c,
+        d,
+        e,
+        f,
+        g
+):
+    # TODO: überlegen nicht nur verschieden Pattern für verschiedene Richtungen
+    # zu machen, sondern auch in den Unterschiedlichen Ergebnissn anfangen
+    # (Idee wäre z.B. die a bis e durch nummerierte random vars zu ersetzen und
+    # sich dann zu überlegen wie man das übergibt, ob mans iwie immer entlang
+    # des patterns schafft oder eher nicht.
+    gp_list = []
+    res_rows_path = ['results', 'bindings']
+    bind1 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res1, res_rows_path, default=[])
+    )
+    bind2 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res2, res_rows_path, default=[])
+    )
+    bind3 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res3, res_rows_path, default=[])
+    )
+    bind4 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res4, res_rows_path, default=[])
+    )
+    for gtp in stp:
+        for row4 in bind4:
+            if gtp[1] == get_path(row4, [TARGET_VAR]):
+                for row3 in bind3:
+                    if get_path(row3, [f]) == get_path(row4, [f]):
+                        for row2 in bind2:
+                            if get_path(row2, [d]) == get_path(row3, [d]):
+                                for row1 in bind1:
+                                    if get_path(row1, [b]) == \
+                                            get_path(row2, [b]) and \
+                                            get_path(row1, [SOURCE_VAR]) == \
+                                            gtp[0]:
+                                        if richtung == 1:
+                                            gp_ = GraphPattern([
+                                                (SOURCE_VAR, get_path(row1, [a]), b),
+                                                (b, get_path(row2, [c]), d),
+                                                (d, get_path(row3, [e]), f),
+                                                (f, get_path(row4, [g]), TARGET_VAR)
+                                            ])
+                                        elif richtung == 2:
+                                            gp_ = GraphPattern([
+                                                (SOURCE_VAR, get_path(row1, [a]), b),
+                                                (b, get_path(row2, [c]), d),
+                                                (d, get_path(row3, [e]), f),
+                                                (TARGET_VAR, get_path(row4, [g]), f)
+                                            ])
+                                        else:  # dummy else, damit gp_ zugewiesen
+                                            gp_ = GraphPattern([])
+                                        gp_ = GraphPattern(chain(gp_, gp_main))
+                                        if gp_ not in gp_list:
+                                            gp_list.append(gp_)
+                                        logger.debug(gtp)
+    return gp_list
+
+
+# add two hops.
+def mutate_deep_narrow_three_hops(
+        gp_, max_out=None, max_in=None, in_out=None, richtung=None
+):
+    vars_ = gp_.vars_in_graph
+    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
+        logger.debug('SOURCE or TARGET are not in gp: %s' % gp_)
+        return []
+    if not gp_.matching_node_pairs:
+        logger.debug(
+            'No matching node pairs, cant get better through adding constraints'
+        )
+        return []
+    a = Variable('a')
+    b = Variable('b')
+    c = Variable('c')
+    d = Variable('d')
+    e = Variable('e')
+    f = Variable('f')
+    g = Variable('g')
+    if richtung not in range(1, 17):
+        richtung = random.choice(range(1, 17))
+        logger.debug('Richtung %s wurde gewaehlt' % richtung)
+    if richtung == 1:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(d, e, f)])
+        gp4 = GraphPattern([(f, g, TARGET_VAR)])
+
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_d = get_values([d], q_res2)
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_d, count_node=f, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_f = get_values([f], q_res3)
+        q = gp4.to_sparql_select_sample_query(
+            values=values_f, limit=limit_choose_endpoint
+        )
+        logger.debug(q)
+        try:
+            t, q_res4 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res4:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res4['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_three_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            q_res4,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g
+        )
+    elif richtung == 2:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(d, e, f)])
+        gp4 = GraphPattern([(TARGET_VAR, g, f)])
+
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_d = get_values([d], q_res2)
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_d, count_node=f, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_t = {
+            (TARGET_VAR,): [(tup[1],) for tup in gp_.matching_node_pairs]
+        }
+        q = gp4.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=f, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res4 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res4:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res4['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_three_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            q_res4,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g
+        )
+    elif richtung == 3:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(f, e, d)])
+        gp4 = GraphPattern([(f, g, TARGET_VAR)])
+
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_d = get_values([d], q_res2)
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_d, count_node=f, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_startpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_t = {
+            (TARGET_VAR,): [(tup[1],) for tup in gp_.matching_node_pairs]
+        }
+        q = gp4.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=f, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_startpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res4 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res4:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res4['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_three_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            q_res4,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g
+        )
+    elif richtung == 4:
+        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+        gp2 = GraphPattern([(b, c, d)])
+        gp3 = GraphPattern([(f, e, d)])
+        gp4 = GraphPattern([(TARGET_VAR, g, f)])
+
+        values_s = {
+            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+        }
+        q = gp1.to_sparql_filter_by_count_in_out_query(
+            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res1 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res1:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res1['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_b = get_values([b], q_res1)
+        q = gp2.to_sparql_filter_by_count_in_out_query(
+            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res2 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res2:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res2['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_t = {
+            (TARGET_VAR,): [(tup[1],) for tup in gp_.matching_node_pairs]
+        }
+        q = gp4.to_sparql_filter_by_count_in_out_query(
+            values=values_t, count_node=f, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_next)
+        logger.debug(q)
+        try:
+            t, q_res4 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res4:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res4['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        values_f = get_values([f], q_res4)
+        q = gp3.to_sparql_filter_by_count_in_out_query(
+            values=values_f, count_node=d, in_out=in_out, max_out=max_out,
+            max_in=max_in, limit=limit_endpoint_two_sided)
+        logger.debug(q)
+        try:
+            t, q_res3 = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not q_res3:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not q_res3['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+        gp_list = get_fixed_path_gp_three_hops(
+            q_res1,
+            q_res2,
+            q_res3,
+            q_res4,
+            gp_,
+            richtung,
+            gp_.matching_node_pairs,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g
+        )
+
+    return gp_list
+
+
+def get_fixed_path_gp_n_hops(
+        res_q, gp_, n, direct, stp, node, hn_ind, hop
+):
+    gp_list = []
+    res_rows_path = ['results', 'bindings']
+    bind = []
+    for res_q_i in res_q:
+        bind.append(sparql_json_result_bindings_to_rdflib(
+            get_path(res_q_i, res_rows_path, default=[]))
+        )
+    hit_paths = []
+    hit_paths_help = []
+
+    if hn_ind == 0:
+        for row in bind[0]:
+            for mnp in stp:
+                if mnp[0] == (get_path(row, [node[0]])):
+                    hit_paths.append([[
+                        mnp[0],
+                        get_path(row, [hop[0]]),
+                        get_path(row, [node[1]])
+                    ]])
+        for i in range(1, n+1):
+            for path in hit_paths:
+                for row in bind[i]:
+                    if path[i-1][2] == get_path(row, [node[i]]):
+                        path_h = path + [[
+                            path[i-1][2],
+                            get_path(row, [hop[i]]),
+                            get_path(row, [node[i+1]])
+                        ]]
+                        hit_paths_help.append(path_h)
+            hit_paths = hit_paths_help
+            hit_paths_help = []
+
+    elif hn_ind == n+1:
+        for row in bind[n]:
+            for mnp in stp:
+                if mnp[1] == (get_path(row, [node[n+1]])):
+                    hit_paths.append([[
+                        get_path(row, [node[n]]),
+                        get_path(row, [hop[n]]),
+                        mnp[1]
+                    ]])
+        for i in range(n-1, -1, -1):
+            for path in hit_paths:
+                for row in bind[i]:
+                    if path[(n-1)-i][0] == get_path(row, [node[i+1]]):
+                        path_h = path.append([[
+                            get_path(row, [node[i]],
+                                     get_path(row, [hop[i]]),
+                                     path[(n-1)-i][0])
+                        ]])
+                        hit_paths_help.append(path_h)
+        hit_paths = hit_paths_help
+        hit_paths_help = []
+        for path in hit_paths:
+            path.reverse()
+
+    else:
+        hit_paths_l = []
+        hit_paths_r = []
+        # get the hits of hit_node to start from
+        for row_l in bind[hn_ind-1]:
+            for row_r in bind[hn_ind]:
+                if get_path(row_l, [node[hn_ind]]) == \
+                        get_path(row_r, [node[hn_ind]]):
+                    hit_paths_l.append([[
+                        get_path(row_l, [node[hn_ind-1]]),
+                        get_path(row_l, [hop[hn_ind-1]]),
+                        get_path(row_l, [node[hn_ind]])
+                    ]])
+                    hit_paths_r.append([[
+                        get_path(row_r, [node[hn_ind]]),
+                        get_path(row_r, [hop[hn_ind]]),
+                        get_path(row_r, [node[hn_ind+1]])
+                    ]])
+        # get the path from hit node to targets
+        for i in range(hn_ind+1, n+1):
+            for path in hit_paths_r:
+                for row in bind[i]:
+                    if path[i-(hn_ind+1)][2] == get_path(row, [node[i]]):
+                        path_h = path + [[
+                            path[i-(hn_ind+1)][2],
+                            get_path(row, [hop[i]]),
+                            get_path(row, [node[i+1]])
+                        ]]
+                        hit_paths_help.append(path_h)
+            hit_paths_r = hit_paths_help
+            hit_paths_help = []
+        # get the path from hit node to sources
+        for i in range(hn_ind, -1, -1):
+            for path in hit_paths_l:
+                for row in bind[i]:
+                    if path[hn_ind-i][0] == get_path(row, [node[i+1]]):
+                        path_h = path + [[
+                            get_path(row, [node[i]]),
+                            get_path(row, [hop[i]]),
+                            path[hn_ind-i][0]
+                        ]]
+                        hit_paths_help.append(path_h)
+            hit_paths_l = hit_paths_help
+            hit_paths_help = []
+        # get the full path from source to target
+        for path_l in hit_paths_l:
+            path_l.reverse()
+            for path_r in hit_paths_r:
+                if path_l[hn_ind][2] == path_r[0][0]:
+                    hit_paths.append(path_l + path_r)
+        # filter the paths, over stp-hits
+
+    hit_paths = filter_stp_hits(hit_paths, stp)
+
+    # Make Graph_Pattern_with fixed hops out of the found paths
+    for path in hit_paths:
+        gp_list.append(
+            GraphPattern(
+                chain(
+                    GraphPattern([
+                        (node[i], path[i][1], node[i+1]) if direct(i) == 1
+                        else (node[i+1], path[i][1], node[i])
+                        for i in range(n+1)
+                    ]),
+                    gp_
+                )
+            )
+        )
+
+    return gp_list
+
+
+def filter_stp_hits(
+        hit_paths, stp
+):
+    res = []
+    for hit in hit_paths:
+        for mnp in stp:
+            if (mnp[0] == hit[0][0]) and (mnp[1] == hit[len(hit)-1][2]):
+                res.append(hit)
+    return res
+
+
+def mutate_deep_narrow_n_hops(
+        gp_, n, max_out=None, max_in=None, in_out=None, direct=None
+):
+    vars_ = gp_.vars_in_graph
+    if SOURCE_VAR not in vars_ and TARGET_VAR not in vars_:
+        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
+        return []
+    if not gp_.matching_node_pairs:
+        logger.info(
+            'No matching node pairs, cant get better through adding constraints'
+        )
+        return []
+    if n < 1:
+        logger.info('Cannot add less than one hop')
+        return []
+    # setting up lists for nodes, hops, values, gp_helpers, query-results
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(gen_random_var())
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n+1):
+        hop.append(gen_random_var())
+    if direct is None or len(direct) != n+1:
+        logger.info('No direction chosen, or direction tuple with false length')
+        direct = []
+        for i in range(n+1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n+1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = []
+    for i in range(n+2):
+        values.append({})
+    values[0] = {
+        (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
+    }
+    values[n+1] = {
+        (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
+    }
+    res_q = []
+    for i in range(n+1):
+        res_q.append({})
+
+    # selecting an random "hit_node" => Node to check the random hits
+    hit_node = random.choice(node)
+    hn_ind = node.index(hit_node)
+
+    # TODO: use direct for cases in queriing
+    # Querieing
+    # From source to target if hit_node is target:
+    if hit_node == TARGET_VAR:
+        # Firing the queries for the first n-2 steps
+        for i in range(0, n):
+            if gp_helper[i][0][0] == node[i]:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i], count_node=node[i+1], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_next)
+            else:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i], count_node=node[i+1], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
+            logger.info(q)
+            try:
+                t, res_q[i] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[i]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[i]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[i+1] = get_values([node[i+1]], res_q[i])
+        # Firing the last query for the target hits:
+        if gp_helper[n][0][0] == node[n-1]:
+            q = gp_helper[n].to_sparql_select_sample_query(
+                values=values[n], limit=limit_choose_endpoint)
+        else:
+            q = gp_helper[n].to_sparql_select_sample_query(
+                values=values[n], limit=limit_choose_subject_endpoint)
+        logger.info(q)
+        try:
+            t, res_q[n] = run_query(q)
+        except:
+            logger.info('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not res_q[n]:
+            logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not res_q[n]['results']['bindings']:
+            logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+    # From target to source if hit_node is source:
+    elif hit_node == SOURCE_VAR:
+        # Firing the queries for the first n-2 steps
+        for i in range(n, 0, -1):
+            if gp_helper[i][0][0] == node[i+1]:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i+1], count_node=node[i], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_next)
+            else:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i+1], count_node=node[i], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
+            logger.info(q)
+            try:
+                t, res_q[i] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[i]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[i]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[i] = get_values([node[i]], res_q[i])
+        # Firing the last query for the target hits:
+        if gp_helper[0][0][0] == node[1]:
+            q = gp_helper[0].to_sparql_select_sample_query(
+                values=values[1], limit=limit_choose_endpoint)
+        else:
+            q = gp_helper[0].to_sparql_select_sample_query(
+                values=values[1], limit=limit_choose_subject_endpoint)
+        logger.info(q)
+        try:
+            t, res_q[0] = run_query(q)
+        except:
+            logger.info('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not res_q[0]:
+            logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not res_q[0]['results']['bindings']:
+            logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+
+    # From both sides to the hit_node:
+    else:
+        # firing the queries from source to the last node before hit_node
+        for i in range(0, hn_ind-1):
+            if gp_helper[i][0][0] == node[i]:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i], count_node=node[i+1], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_next)
+            else:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i], count_node=node[i+1], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
+            logger.info(q)
+            try:
+                t, res_q[i] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[i]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[i]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[i+1] = get_values([node[i+1]], res_q[i])
+            # Firing the queries from target to the last node before hit node
+        for i in range(n, hn_ind, -1):
+            if gp_helper[i][0][0] == node[i+1]:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i+1], count_node=node[i], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_next)
+            else:
+                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
+                    values=values[i+1], count_node=node[i], in_out=in_out,
+                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
+            logger.info(q)
+            try:
+                t, res_q[i] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[i]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[i]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[i] = get_values([node[i]], res_q[i])
+        # feuere die letzten beiden queries richtung hit_node ab.
+        # Dabei unterscheide nach Richtungen beider queries.
+        if ((gp_helper[hn_ind-1][0][0] == node[hn_ind-1]) and   # hit is Object
+                (gp_helper[hn_ind][0][0] == node[hn_ind+1])):   # hit is Object
+            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_endpoint_two_sided)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind-1] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind-1]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind-1]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_endpoint_two_sided)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+        elif ((gp_helper[hn_ind-1][0][0] == node[hn_ind]) and   # hit is Subject
+              (gp_helper[hn_ind][0][0] == node[hn_ind])):       # hit is Subject
+            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_startpoint_two_sided)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind-1] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind-1]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind-1]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_startpoint_two_sided)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+        elif ((gp_helper[hn_ind-1][0][0] == node[hn_ind-1]) and  # hit is Object
+                (gp_helper[hn_ind][0][0] == node[hn_ind])):      # hit is Subject
+            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_obj_to_subj)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind-1] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind-1]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind-1]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_subj_to_obj)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+        elif ((gp_helper[hn_ind-1][0][0] == node[hn_ind]) and   # hit is Subject
+                (gp_helper[hn_ind][0][0] == node[hn_ind+1])):   # hit is Object
+            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_subj_to_obj)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind-1] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind-1]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind-1]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
+                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
+                max_out=max_out, max_in=max_in, limit=limit_obj_to_subj)
+            logger.info(q)
+            try:
+                t, res_q[hn_ind] = run_query(q)
+            except:
+                logger.info('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[hn_ind]:
+                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[hn_ind]['results']['bindings']:
+                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+
+    gp_list = get_fixed_path_gp_n_hops(
+        res_q, gp_, n, direct, gp_.matching_node_pairs, node, hn_ind, hop
+    )
+
+    return gp_list
+
+
+# erste Version, komplett straight forward
+def mutate_deep_narrow(
+        gp_, gtps, n, direct=None, gp_in=False
+):
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n + 1):
+        hop.append(Variable('p%i' % i))
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = []
+        for i in range(n + 1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = {}
+    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
+    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
+    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
+    res_q = []
+    for i in range(n + 1):
+        res_q.append({})
+
+    # Queries für die Schritte
+    valueblocks = {}
+    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
+    for i in range(n+1):
+        q = gp_.to_sparql_useful_path_query(
+            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
+        )
+        logger.debug(q)
+        try:
+            t, res_q[i] = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not res_q[i]:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not res_q[i]['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        values[hop[i]] = get_values([hop[i]], res_q[i])
+        valueblocks[hop[i]] = {
+            (hop[i],): random.sample(
+                values[hop[i]][(hop[i],)],
+                min(10, len(values[hop[i]][(hop[i],)]))
+            )
+        }
+
+    # Query fürs Ergebnis
+    gp_help = GraphPattern([
+                    (node[i], hop[i], node[i+1]) if direct[i] == 1
+                    else (node[i+1], hop[i], node[i])
+                    for i in range(n+1)
+                    ])
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    del valueblocks[SOURCE_VAR]
+    valueblocks['st'] = values['st']
+    q = gp_.to_sparql_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    logger.debug(q)
+    try:
+        t, res_q_inst = run_query(q)
+    except:
+        logger.debug('Die Query (s.o.) hat nicht geklappt')
+        return []
+    if not res_q_inst:
+        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+        return []
+    elif not res_q_inst['results']['bindings']:
+        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+        return []
+    res = []
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(res_q_inst, res_rows_path, default=[])
+    )
+    for row in bind:
+        gp_res = GraphPattern([
+            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
+            else (node[i + 1], get_path(row, [hop[i]]), node[i])
+            for i in range(n + 1)
+        ])
+        res.append(gp_res)
+
+    return res
+
+
+# zweite Version: Query für letzten step bekommt schon die Targets
+def mutate_deep_narrow_2(
+        gp_, gtps, n, direct=None, gp_in=False
+):
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n + 1):
+        hop.append(Variable('p%i' % i))
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = []
+        for i in range(n + 1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = {}
+    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
+    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
+    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
+    res_q = []
+    for i in range(n + 1):
+        res_q.append({})
+
+    # Queries für die Schritte
+    valueblocks = {}
+    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
+    for i in range(n):
+        q = gp_.to_sparql_useful_path_query(
+            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
+        )
+        logger.debug(q)
+        try:
+            t, res_q[i] = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not res_q[i]:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not res_q[i]['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        values[hop[i]] = get_values([hop[i]], res_q[i])
+        valueblocks[hop[i]] = {
+            (hop[i],): random.sample(
+                values[hop[i]][(hop[i],)],
+                min(10, len(values[hop[i]][(hop[i],)]))
+            )
+        }
+
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    del valueblocks[SOURCE_VAR]
+    valueblocks['st'] = values['st']
+    q = gp_.to_sparql_useful_path_inst_query(
+        hop, valueblocks, gp_helper, gp_in=gp_in
+    )
+    logger.debug(q)
+    try:
+        t, res_q_inst = run_query(q)
+    except:
+        logger.debug('Die Query (s.o.) hat nicht geklappt')
+        return []
+    if not res_q_inst:
+        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+        return []
+    elif not res_q_inst['results']['bindings']:
+        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+        return []
+    res = []
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(res_q_inst, res_rows_path, default=[])
+    )
+    for row in bind:
+        gp_res = GraphPattern([
+            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
+            else (node[i + 1], get_path(row, [hop[i]]), node[i])
+            for i in range(n + 1)
+        ])
+        res.append(gp_res)
+
+    return res
+
+
+# dritte Version: BIDI straight forward
+def mutate_deep_narrow_3(
+        gp_, gtps, n, direct=None, gp_in=False
+):
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n + 1):
+        hop.append(Variable('p%i' % i))
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = []
+        for i in range(n + 1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = {}
+    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
+    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
+    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
+    res_q = []
+    for i in range(n+1):
+        res_q.append({})
+
+    # Queries für die Schritte
+    valueblocks_s = {}
+    valueblocks_s[SOURCE_VAR] = values[SOURCE_VAR]
+    valueblocks_t = {}
+    valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
+    for i in range(int((n / 2) + 1)):
+        q = gp_.to_sparql_useful_path_query(
+            hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
+        )
+        logger.debug(q)
+        try:
+            t, res_q[i] = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not res_q[i]:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not res_q[i]['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        values[hop[i]] = get_values([hop[i]], res_q[i])
+        valueblocks_s[hop[i]] = {
+            (hop[i],): random.sample(
+                values[hop[i]][(hop[i],)],
+                min(10, len(values[hop[i]][(hop[i],)]))
+            )
+        }
+        if n-i != i:
+            q = gp_.to_sparql_useful_path_query(
+                hop[n-i],
+                node[n-i],
+                valueblocks_t,
+                gp_helper[n-i:],
+                startvar=TARGET_VAR,
+                gp_in=gp_in
+            )
+            logger.debug(q)
+            try:
+                t, res_q[n-i] = run_query(q)
+            except:
+                logger.debug('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[n-i]:
+                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[n-i]['results']['bindings']:
+                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[hop[n-i]] = get_values([hop[n-i]], res_q[n-i])
+            valueblocks_t[hop[n-i]] = {
+                (hop[n-i],): random.sample(
+                    values[hop[n-i]][(hop[n-i],)],
+                    min(10, len(values[hop[n-i]][(hop[n-i],)]))
+                )
+            }
+
+    # Query fürs Ergebnis
+    gp_help = GraphPattern([
+                    (node[i], hop[i], node[i+1]) if direct[i] == 1
+                    else (node[i+1], hop[i], node[i])
+                    for i in range(n+1)
+                    ])
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    valueblocks = {}
+    for key in valueblocks_s:
+        if key is not SOURCE_VAR:
+            valueblocks[key] = valueblocks_s[key]
+    for key in valueblocks_t:
+        if key is not TARGET_VAR:
+            valueblocks[key] = valueblocks_t[key]
+    valueblocks['st'] = values['st']
+    q = gp_.to_sparql_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    logger.debug(q)
+    try:
+        t, res_q_inst = run_query(q)
+    except:
+        logger.debug('Die Query (s.o.) hat nicht geklappt')
+        return []
+    if not res_q_inst:
+        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+        return []
+    elif not res_q_inst['results']['bindings']:
+        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+        return []
+    res = []
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(res_q_inst, res_rows_path, default=[])
+    )
+    for row in bind:
+        gp_res = GraphPattern([
+            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
+            else (node[i + 1], get_path(row, [hop[i]]), node[i])
+            for i in range(n + 1)
+        ])
+        res.append(gp_res)
+
+    return res
+
+
+# vierte Version: BIDI with instantiation in last step
+def mutate_deep_narrow_4(
+        gp_, gtps, n, direct=None, gp_in=False
+):
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n + 1):
+        hop.append(Variable('p%i' % i))
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = []
+        for i in range(n + 1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = {}
+    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
+    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
+    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
+    res_q = []
+    for i in range(n+1):
+        res_q.append({})
+
+    # Queries für die Schritte
+    valueblocks_s = {}
+    valueblocks_s[SOURCE_VAR] = values[SOURCE_VAR]
+    valueblocks_t = {}
+    valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
+    for i in range(int((n / 2) + 1)):
+        if i < int(n/2):
+            q = gp_.to_sparql_useful_path_query(
+                hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
+            )
+            logger.debug(q)
+            try:
+                t, res_q[i] = run_query(q)
+            except:
+                logger.debug('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[i]:
+                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[i]['results']['bindings']:
+                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[hop[i]] = get_values([hop[i]], res_q[i])
+            valueblocks_s[hop[i]] = {
+                (hop[i],): random.sample(
+                    values[hop[i]][(hop[i],)],
+                    min(10, len(values[hop[i]][(hop[i],)]))
+                )
+            }
+        if n-i > i:
+            q = gp_.to_sparql_useful_path_query(
+                hop[n-i],
+                node[n-i],
+                valueblocks_t,
+                gp_helper[n-i:],
+                startvar=TARGET_VAR,
+                gp_in=gp_in
+            )
+            logger.debug(q)
+            try:
+                t, res_q[n-i] = run_query(q)
+            except:
+                logger.debug('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[n-i]:
+                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[n-i]['results']['bindings']:
+                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[hop[n-i]] = get_values([hop[n-i]], res_q[n-i])
+            valueblocks_t[hop[n-i]] = {
+                (hop[n-i],): random.sample(
+                    values[hop[n-i]][(hop[n-i],)],
+                    min(10, len(values[hop[n-i]][(hop[n-i],)]))
+                )
+            }
+
+    # Query fürs Ergebnis
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    valueblocks = {}
+    for key in valueblocks_s:
+        if key is not SOURCE_VAR:
+            valueblocks[key] = valueblocks_s[key]
+    for key in valueblocks_t:
+        if key is not TARGET_VAR:
+            valueblocks[key] = valueblocks_t[key]
+    valueblocks['st'] = values['st']
+    q = gp_.to_sparql_useful_path_inst_query(
+        hop, valueblocks, gp_helper, gp_in=gp_in
+    )
+    logger.debug(q)
+    try:
+        t, res_q_inst = run_query(q)
+    except:
+        logger.debug('Die Query (s.o.) hat nicht geklappt')
+        return []
+    if not res_q_inst:
+        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+        return []
+    elif not res_q_inst['results']['bindings']:
+        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+        return []
+    res = []
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(res_q_inst, res_rows_path, default=[])
+    )
+    for row in bind:
+        gp_res = GraphPattern([
+            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
+            else (node[i + 1], get_path(row, [hop[i]]), node[i])
+            for i in range(n + 1)
+        ])
+        res.append(gp_res)
+
+    return res
+
+
+# fünfte Version: filtern nach Count
+def mutate_deep_narrow_5(
+        gp_, gtps, n, direct=None, gp_in=False
+):
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n + 1):
+        hop.append(Variable('p%i' % i))
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = []
+        for i in range(n + 1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = {}
+    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
+    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
+    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
+    res_q = []
+    for i in range(n + 1):
+        res_q.append({})
+
+    # Queries für die Schritte
+    valueblocks = {}
+    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
+    for i in range(n+1):
+        q = gp_.to_sparql_useful_path_query(
+            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
+        )
+        logger.debug(q)
+        try:
+            t, res_q[i] = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not res_q[i]:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not res_q[i]['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        values[hop[i]] = get_values([hop[i]], res_q[i])
+        valueblocks[hop[i]] = get_weighted_sample(
+            hop[i], Variable('avgc'+''.join(node[i+1])), res_q[i]
+        )
+
+    # Query fürs Ergebnis
+    gp_help = GraphPattern([
+                    (node[i], hop[i], node[i+1]) if direct[i] == 1
+                    else (node[i+1], hop[i], node[i])
+                    for i in range(n+1)
+                    ])
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    del valueblocks[SOURCE_VAR]
+    valueblocks['st'] = values['st']
+    q = gp_.to_sparql_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    logger.debug(q)
+    try:
+        t, res_q_inst = run_query(q)
+    except:
+        logger.debug('Die Query (s.o.) hat nicht geklappt')
+        return []
+    if not res_q_inst:
+        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+        return []
+    elif not res_q_inst['results']['bindings']:
+        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+        return []
+    res = []
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(res_q_inst, res_rows_path, default=[])
+    )
+    for row in bind:
+        gp_res = GraphPattern([
+            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
+            else (node[i + 1], get_path(row, [hop[i]]), node[i])
+            for i in range(n + 1)
+        ])
+        res.append(gp_res)
+
+    return res
+
+
+# sechste Version: Query für letzten step bekommt schon die Targets
+#  => Precheck feasible?
+def mutate_deep_narrow_6(
+        gp_, gtps, n, direct=None, gp_in=False
+):
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n + 1):
+        hop.append(Variable('p%i' % i))
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = []
+        for i in range(n + 1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = {}
+    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
+    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
+    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
+    res_q = []
+    for i in range(n + 1):
+        res_q.append({})
+
+    # Pre-check:
+    gp_help = GraphPattern([
+                    (node[i], hop[i], node[i+1]) if direct[i] == 1
+                    else (node[i+1], hop[i], node[i])
+                    for i in range(n+1)
+                    ])
+    q = gp_help.to_sparql_precheck_query(values['st'], gp_in=gp_in)
+    logger.debug(q)
+    try:
+        t, res_q = run_query(q)
+    except:
+        logger.info('Pre-Check hat nicht geklappt')
+    if not res_q:
+        logger.info('Pre-Check hat kein Ergebnis')
+    elif not res_q['results']['bindings']:
+        logger.info('Pre-Check hat keine gebundenen Variablen')
+    else:
+        logger.info('Pre-Check hat einen Treffer')
+
+    # Queries für die Schritte
+    valueblocks = {}
+    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
+    for i in range(n):
+        q = gp_.to_sparql_useful_path_query(
+            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
+        )
+        logger.debug(q)
+        try:
+            t, res_q[i] = run_query(q)
+        except:
+            logger.debug('Die Query (s.o.) hat nicht geklappt')
+            return []
+        if not res_q[i]:
+            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+            return []
+        elif not res_q[i]['results']['bindings']:
+            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+            return []
+        values[hop[i]] = get_values([hop[i]], res_q[i])
+        valueblocks[hop[i]] = {
+            (hop[i],): random.sample(
+                values[hop[i]][(hop[i],)],
+                min(10, len(values[hop[i]][(hop[i],)]))
+            )
+        }
+
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    del valueblocks[SOURCE_VAR]
+    valueblocks['st'] = values['st']
+    q = gp_.to_sparql_useful_path_inst_query(
+        hop, valueblocks, gp_helper, gp_in=gp_in
+    )
+    logger.debug(q)
+    try:
+        t, res_q_inst = run_query(q)
+    except:
+        logger.debug('Die Query (s.o.) hat nicht geklappt')
+        return []
+    if not res_q_inst:
+        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+        return []
+    elif not res_q_inst['results']['bindings']:
+        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+        return []
+    res = []
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(res_q_inst, res_rows_path, default=[])
+    )
+    for row in bind:
+        gp_res = GraphPattern([
+            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
+            else (node[i + 1], get_path(row, [hop[i]]), node[i])
+            for i in range(n + 1)
+        ])
+        res.append(gp_res)
+
+    return res
+
+
+# siebte Version: BIDI with instantiation in last step + ws-sampling
+def mutate_deep_narrow_7(
+        gp_, gtps, n, direct=None, gp_in=False
+):
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = []
+    for i in range(n + 1):
+        hop.append(Variable('p%i' % i))
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = []
+        for i in range(n + 1):
+            direct.append(0)
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    values = {}
+    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
+    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
+    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
+    res_q = []
+    for i in range(n+1):
+        res_q.append({})
+
+    # Queries für die Schritte
+    valueblocks_s = {}
+    valueblocks_s[SOURCE_VAR] = values[SOURCE_VAR]
+    valueblocks_t = {}
+    valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
+    for i in range(int((n / 2) + 1)):
+        if i < int(n/2):
+            q = gp_.to_sparql_useful_path_query(
+                hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
+            )
+            logger.debug(q)
+            try:
+                t, res_q[i] = run_query(q)
+            except:
+                logger.debug('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[i]:
+                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[i]['results']['bindings']:
+                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[hop[i]] = get_values([hop[i]], res_q[i])
+            valueblocks_s[hop[i]] = get_weighted_sample(
+                hop[i], Variable('avgc' + ''.join(node[i + 1])), res_q[i]
+            )
+        if n-i > i:
+            q = gp_.to_sparql_useful_path_query(
+                hop[n-i],
+                node[n-i],
+                valueblocks_t,
+                gp_helper[n-i:],
+                startvar=TARGET_VAR,
+                gp_in=gp_in
+            )
+            logger.debug(q)
+            try:
+                t, res_q[n-i] = run_query(q)
+            except:
+                logger.debug('Die Query (s.o.) hat nicht geklappt')
+                return []
+            if not res_q[n-i]:
+                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+                return []
+            elif not res_q[n-i]['results']['bindings']:
+                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+                return []
+            values[hop[n-i]] = get_values([hop[n-i]], res_q[n-i])
+            valueblocks_t[hop[n-i]] = get_weighted_sample(
+                hop[n-i], Variable('avgc' + ''.join(node[n-i])), res_q[n-i]
+            )
+
+    # Query fürs Ergebnis
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    valueblocks = {}
+    for key in valueblocks_s:
+        if key is not SOURCE_VAR:
+            valueblocks[key] = valueblocks_s[key]
+    for key in valueblocks_t:
+        if key is not TARGET_VAR:
+            valueblocks[key] = valueblocks_t[key]
+    valueblocks['st'] = values['st']
+    q = gp_.to_sparql_useful_path_inst_query(
+        hop, valueblocks, gp_helper, gp_in=gp_in
+    )
+    logger.debug(q)
+    try:
+        t, res_q_inst = run_query(q)
+    except:
+        logger.debug('Die Query (s.o.) hat nicht geklappt')
+        return []
+    if not res_q_inst:
+        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
+        return []
+    elif not res_q_inst['results']['bindings']:
+        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
+        return []
+    res = []
+    res_rows_path = ['results', 'bindings']
+    bind = sparql_json_result_bindings_to_rdflib(
+        get_path(res_q_inst, res_rows_path, default=[])
+    )
+    for row in bind:
+        gp_res = GraphPattern([
+            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
+            else (node[i + 1], get_path(row, [hop[i]]), node[i])
+            for i in range(n + 1)
+        ])
+        res.append(gp_res)
+
+    return res
+
+
+def main():
+    ground_truth_pairs = get_semantic_associations()
+    ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
+    # ground_truth_pairs = ground_truth_pairs[:100]
+    gtp_scores = GTPScores(ground_truth_pairs)
+    res = []
+    # key = random.choice(gp_found.keys())
+    # for i in range(100):
+    #     # ground_truth_pairs = random.sample(ground_truth_pairs, 200)
+    #     gp_ = GraphPattern([])
+    #     # gp_ = gp_found[key]
+    #     res_= mutate_deep_narrow_5(gp_, ground_truth_pairs, 2, gp_in=False)
+    #     res.append(res_)
+    #     logger.info(i)
+    #     if res_:
+    #         logger.info(res_)
+    #
+    # logger.info(res)
+    for key in gp_found.keys():
+        gp_ = gp_found[key]
+        eval_gp(gtp_scores, gp_)
+        for i in range(100):
+            res_ = mutate_deep_narrow_4(
+                gp_, gp_.matching_node_pairs, 6, gp_in=False
+            )
+            res.append(res_)
+            logger.info((i, key))
+            if res_:
+                logger.info(res_)
+
+    # res_eval=[]
+    # res = []
+    #
+    # max_out = 65
+    # max_in = 40
+    # in_out = 'out'
+    # richtung = 2
+    # ground_truth_pairs = get_semantic_associations()
+    # ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
+    # # ground_truth_pairs = ground_truth_pairs[0:200]
+    # gtp_scores = GTPScores(ground_truth_pairs)
+    # gp = gp_found['140']
+    # eval_gp(gtp_scores, gp)
+    #
+    # for i in range(20):
+    #     res.append(mutate_deep_narrow_n_hops(gp, 2, max_out=max_out, in_out=in_out))
+    #
+    # logger.info(res)
+    #
+    # durchgaenge = []
+    #
+    # for richtung in range(1, 9):
+    #     for max_out in [5, 10, 20, 30, 40, 50, 65, 75, 85, 100, 200]:
+    #         for key in gp_found.keys():
+    #             durchgaenge.append((richtung, max_out, key))
+    #
+    # random.shuffle(durchgaenge)
+    #
+    # for (richtung, max_out, key) in durchgaenge:
+    #     logger.info('Durchgang: richtung = %s, max_out = %s, gp.key = %s' %
+    #         (richtung, max_out, key)
+    #     )
+    #     ground_truth_pairs = get_semantic_associations()
+    #     ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
+    #     # ground_truth_pairs = random.sample(ground_truth_pairs, 100)
+    #     gtp_scores = GTPScores(ground_truth_pairs)
+    #     gp = gp_found[key]
+    #     eval_gp(gtp_scores, gp)
+    #
+    #     res_gp = mutate_deep_narrow_two_hops(
+    #         gp,
+    #         max_out=max_out,
+    #         max_in=max_in,
+    #         in_out=in_out,
+    #         richtung=richtung
+    #     )
+    #     res_gp.append(gp)
+    #     res_eval = eval_gp_list(gtp_scores, res_gp)
+    #     gp_eval = res_eval[-1]
+    #     res_eval = sorted(
+    #         res_eval[:-1], key=lambda gp_: -gp_.fitness.values.score
+    #     )
+    #     if res_eval:
+    #         logger.info(max_out)
+    #         print_graph_pattern(gp)
+    #         for gp_ in res_eval:
+    #             print_graph_pattern(gp_)
+    #         res.append((richtung, key, max_out, gp_eval, res_eval))
+
+    # f = open('store.pckl', 'wb')
+    # pickle.dump(res, f)
+    # f.close()
+
+    # in der Konsole das res nochmal anschauen:
+    # import pickle
+    # f = open('tests/store.pckl', 'rb')
+    # res = pickle.load(f)
+    # f.close()
+
+    # print('HERE STARTS THE RES_PRINTING:')
+    # for r in res:
+    #     print('richtung %s, key %s, max_out %s\n' % r[0:3])
+    #     print('Original GP:\n')
+    #     print_graph_pattern(r[3], print_matching_node_pairs=0)
+    #     print('Top 3 found (if 3 where found, else all found) GP:\n')
+    #     for i in range(min(3, len(r[4]))):
+    #         print_graph_pattern(r[4][i], print_matching_node_pairs=0)
+
+    # ground_truth_pairs = get_semantic_associations()
+    # ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
+    # ground_truth_pairs = random.sample(ground_truth_pairs, 100)
+    # gtp_scores = GTPScores(ground_truth_pairs)
+    # gp = gp_found[random.choice(gp_found.keys())]
+    #
+    # max_out = 50
+    # max_in = 40
+    # in_out = 'out'
+    #
+    # res = mutate_deep_narrow_one_hop_s_t_without_direction(
+    #     gp,
+    #     ground_truth_pairs,
+    #     max_out=max_out,
+    #     max_in=max_in,
+    #     in_out=in_out
+    # )
+    # res.append(gp)
+    # res_eval = eval_gp_list(gtp_scores, res)
+    # gp_eval = res_eval[-1]
+    # res_eval = sorted(res_eval[:-1], key=lambda gp_: -gp_.fitness.values.score)
+    #
+    # print_graph_pattern(gp_eval)
+    # for gp_ in res_eval:
+    #     print_graph_pattern(gp_)
+
+    # # Zählfelder für die Statistik (Zugriff über max_in_out)
+    # # durchschnittliche Anzahl der zurückgegebenen pattern
+    # avg_num_pat = {}
+    # # maximal zurückgegebene pattern
+    # max_num_pat = {}
+    # # durchschnittlicher Score aller zurückgegebenen pattern
+    # avg_score_all_pat = {}
+    # # durchschnittlicher Score des besten zurückgegegebenen pattern
+    # # (wenn vorhanden)
+    # avg_score_best_pat = {}
+    # # druchschnittlicher Score des besten zurückgegebenen patterns
+    # # (0 wenn keins vorhanden)
+    # avg_score_best_pat_pun = {}
+    # # maximaler Score eines zurückgegebenen patterns
+    # max_score_ovrall = {}
+    # # Wie oft wurde kein pattern zurückgegeben
+    # num_no_pattern = {}
+    # # durchschnittliche abweichung des besten patterns vom Score des
+    # # Ausgangspatterns, wenn vorhanden
+    # avg_diff_all_pat = {}
+    # # durchschnittliche Abweichung vom Score des Ausgangspatterns,
+    # # wenn vorhanden
+    # avg_diff_best_pat = {}
+    # # aufaddierter score von Durchgängen ohne pattern
+    # punish_avg_diff_best_pat = {}
+    # # aufaddierter score von Durchgängen ohne pattern mal der durchschnittlichen
+    # # Anzahl zurückgegebener pattern
+    # punish_avg_diff_all_pat = {}
+    # # durchschnittliche Abweichung des besten patterns vom score des
+    # # Ausgangspatterns mit Strafe für gar kein pattern
+    # avg_diff_all_pat_punished = {}
+    # # durchschnittliche Abweichung vom Score des Ausgangspatterns, mit Strafe
+    # # für gar kein pattern
+    # avg_diff_best_pat_punished = {}
+    # # die fünf besten (am stärksten verbessernden) pattern
+    # five_best_pattern = {}
+    #
+    # max_out_steps = [10, 15, 20, 25, 30, 40, 50, 75, 100]
+    #
+    # for j in max_out_steps:
+    #     avg_num_pat[j] = 0
+    #     max_num_pat[j] = 0
+    #     avg_score_all_pat[j] = 0
+    #     avg_score_best_pat[j] = 0
+    #     avg_score_best_pat_pun[j] = 0
+    #     max_score_ovrall[j] = 0
+    #     num_no_pattern[j] = 0
+    #     avg_diff_all_pat[j] = 0
+    #     avg_diff_best_pat[j] = 0
+    #     punish_avg_diff_best_pat[j] = 0
+    #     punish_avg_diff_all_pat[j] = 0
+    #     avg_diff_all_pat_punished[j] = 0
+    #     avg_diff_best_pat_punished[j] = 0
+    #     five_best_pattern[j] = []
+    #
+    # reps = 50
+    #
+    # for i in range(reps):
+    #     ground_truth_pairs = get_semantic_associations()
+    #     ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
+    #     ground_truth_pairs = random.sample(ground_truth_pairs, 100)
+    #     gtp_scores = GTPScores(ground_truth_pairs)
+    #     gp = gp_found[random.choice(gp_found.keys())]
+    #     for j in max_out_steps:
+    #         res = mutate_deep_narrow_one_hop_s_t_without_direction(
+    #             gp, ground_truth_pairs, max_out=j, in_out='out'
+    #         )  # TODO: warum kommt oben None rein???
+    #         res.append(gp)
+    #         res_eval = eval_gp_list(gtp_scores, res)
+    #         gp_eval = res_eval[-1]
+    #         res_eval = sorted(
+    #             res_eval[:-1], key=lambda gp_: -gp_.fitness.values.score
+    #         )
+    #
+    #         # Statistik:
+    #         avg_num_pat[j] = avg_num_pat[j] + len(res_eval) / reps
+    #         if len(res_eval) > max_num_pat[j]:
+    #             max_num_pat[j] = len(res_eval)
+    #         for gp_ in res_eval:
+    #             avg_score_all_pat[j] = avg_score_all_pat[j] + \
+    #                                    gp_.fitness.values.score / \
+    #                                    (len(res_eval) * reps)
+    #         if res_eval:
+    #             avg_score_best_pat[j] = avg_score_best_pat[j] + \
+    #                                     res_eval[0].fitness.values.score
+    #         if res_eval:
+    #             if res_eval[0].fitness.values.score > max_score_ovrall[j]:
+    #                 max_score_ovrall[j] = res_eval[0].fitness.values.score
+    #         if len(res_eval) == 0:
+    #             num_no_pattern[j] = num_no_pattern[j] + 1
+    #         if res_eval:
+    #             avg_diff_all_pat[j] = avg_diff_all_pat[j] + \
+    #                                   (res_eval[0].fitness.values.score -
+    #                                    gp_eval.fitness.values.score) / \
+    #                                   reps
+    #         for gp_ in res_eval:
+    #             avg_diff_best_pat[j] = avg_diff_best_pat[j] + \
+    #                                    (gp_.fitness.values.score -
+    #                                     gp_eval.fitness.values.score) / \
+    #                                    (len(res_eval) * reps)
+    #         if not res_eval:
+    #             punish_avg_diff_best_pat[j] = punish_avg_diff_best_pat[j] + \
+    #                                           gp_eval.fitness.values.score
+    #         if res_eval:
+    #             if len(five_best_pattern[j]) < 5:
+    #                 five_best_pattern[j].append((
+    #                     res_eval[0].fitness.values.score -
+    #                     gp_eval.fitness.values.score,
+    #                     res_eval[0],
+    #                     gp_eval
+    #                 ))
+    #                 five_best_pattern[j] = sorted(
+    #                     five_best_pattern[j],
+    #                     key=lambda tup_: -tup_[0]
+    #                 )
+    #             else:
+    #                 five_best_pattern[j][4] = (
+    #                     res_eval[0].fitness.values.score -
+    #                     gp_eval.fitness.values.score,
+    #                     res_eval[0],
+    #                     gp_eval
+    #                 )
+    #                 five_best_pattern[j] = sorted(
+    #                     five_best_pattern[j],
+    #                     key=lambda tup_: -tup_[0]
+    #                 )
+    #         logger.info('Runde %s, min_max = %s' % (i, j))
+    #         print_graph_pattern(gp)
+    #         if res_eval:
+    #             print_graph_pattern(res_eval[0])
+    #
+    # # print out the five best patterns per min_max:
+    # logger.info(' The five best new patterns (per min_max): ')
+    # for j in max_out_steps:
+    #     for i in range(len(five_best_pattern[j])):
+    #         print('min_max: %s\n' % j)
+    #         print('Differenz: %s\n' % five_best_pattern[j][i][0])
+    #         print_graph_pattern(five_best_pattern[j][i][1])
+    #         print_graph_pattern(five_best_pattern[j][i][2])
+    #
+    # # more statistics
+    # for j in max_out_steps:
+    #     avg_score_best_pat_pun[j] = avg_score_best_pat[j] / reps
+    #     if reps - num_no_pattern[j]:
+    #         avg_score_best_pat[j] = avg_score_best_pat[j] / \
+    #                                 (reps - num_no_pattern[j])
+    #     else:
+    #         avg_score_best_pat = -1
+    #     punish_avg_diff_all_pat[j] = punish_avg_diff_best_pat[j] * \
+    #                                  avg_num_pat[j]
+    #     avg_diff_all_pat_punished[j] = avg_diff_all_pat[j] - \
+    #                                    punish_avg_diff_best_pat[j]
+    #     avg_diff_best_pat_punished[j] = avg_diff_best_pat[j] - \
+    #                                     punish_avg_diff_all_pat[j]
+    #
+    # # print the statistics
+    # logger.info('min_max: %s\n'
+    #             'avg_num_pat: %s\n'
+    #             'max_num_pat: %s\n'
+    #             'avg_score_all_pat: %s\n'
+    #             'avg_score_best_pat: %s\n'
+    #             'avg_score_best_pat_pun: %s\n'
+    #             'max_score_ovrall: %s\n'
+    #             'num_no_pattern: %s\n'
+    #             'avg_diff_all_pat: %s\n'
+    #             'avg_diff_best_pat: %s\n'
+    #             'punish_avg_diff_best_pat: %s\n'
+    #             'punish_avg_diff_all_pat: %s\n'
+    #             'avg_diff_all_pat_punished: %s\n'
+    #             'avg_diff_best_pat_punished: %s\n' % (
+    #             ' '.join([str(x) for x in max_out_steps]),
+    #             ' '.join([str(avg_num_pat[x]) for x in max_out_steps]),
+    #             ' '.join([str(max_num_pat[x]) for x in max_out_steps]),
+    #             ' '.join([str(avg_score_all_pat[x]) for x in max_out_steps]),
+    #             ' '.join([str(avg_score_best_pat[x]) for x in max_out_steps]),
+    #             ' '.join(
+    #                 [str(avg_score_best_pat_pun[x]) for x in max_out_steps]
+    #             ),
+    #             ' '.join([str(max_score_ovrall[x]) for x in max_out_steps]),
+    #             ' '.join([str(num_no_pattern[x]) for x in max_out_steps]),
+    #             ' '.join([str(avg_diff_all_pat[x]) for x in max_out_steps]),
+    #             ' '.join([str(avg_diff_best_pat[x]) for x in max_out_steps]),
+    #             ' '.join(
+    #                 [str(punish_avg_diff_best_pat[x]) for x in max_out_steps]
+    #             ),
+    #             ' '.join(
+    #                 [str(punish_avg_diff_all_pat[x]) for x in max_out_steps]
+    #             ),
+    #             ' '.join(
+    #                 [str(avg_diff_all_pat_punished[x]) for x in max_out_steps]
+    #             ),
+    #             ' '.join(
+    #                 [str(avg_diff_best_pat_punished[x]) for x in max_out_steps]
+    #             )
+    # ))
+    #
+    # # TODO: Fehler finden, warum die Differenz der gp-scores in
+    # five_best_patterns nicht stimmt
+    #
+    # res = res[0:100]
+    # for res_ in res:
+    #     # print('max_out:' + str(res_[1]))
+    #     print_graph_pattern(res_)
+    #
+    #     # TODO: zweite Query auch mit SOURCE TARGET binden und gp in die query
+    #     # dazunehmen, dann spar ich mir auch das suchen nach Treffern ?!
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/test_sampling.py b/tests/test_sampling.py
index c0afe08..044449d 100644
--- a/tests/test_sampling.py
+++ b/tests/test_sampling.py
@@ -8,6 +8,7 @@
 """
 
 import logging
+import random
 from collections import defaultdict
 from collections import OrderedDict
 from os import getenv
@@ -41,8 +42,8 @@
 logger = logging.getLogger(__name__)
 
 sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINT)
-#sparql = SPARQLWrapper.SPARQLWrapper(
-#    getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
+# sparql = SPARQLWrapper.SPARQLWrapper(
+#     getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
 try:
     timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
 except IOError:
@@ -113,7 +114,7 @@
 
 ground_truth_pairs_2 = get_semantic_associations()
 ground_truth_pairs_2, _ = split_training_test_set(ground_truth_pairs_2)
-ground_truth_pairs_2 = ground_truth_pairs_2[1:100]
+ground_truth_pairs_2 = random.sample(ground_truth_pairs_2, 100)
 
 ground_truth_pairs_3 = [
     (dbp['Barrister'], dbp['Law']),
@@ -132,7 +133,104 @@
 gtp_scores_4 = GTPScores(ground_truth_pairs_4)
 
 
-def test_steps(gtps):
+def test_count(gtps, max_out):
+    # values = {(SOURCE_VAR, TARGET_VAR): gtps} hier besser nur die sources
+    source_list = [(stp[0], ) for stp in gtps]
+    values = {(SOURCE_VAR, ): source_list}
+    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
+    gp2 = GraphPattern([(b, c, TARGET_VAR)])
+    # SPARQL-Query die über eine Var aus gp1 random samplet
+    q = gp1.to_sparql_filter_by_count_out_query(
+        values=values, count_node=b, max_out=max_out, limit=200)
+    logger.info(q)
+    t, q_res1 = run_query(q)
+    logger.info(q_res1)
+    # Kreiere b_list in der die Ergebnisse für b "gespeichert" sind
+    # TODO: als Methode, die Listenform (Tupellistenform) der gefundenen
+    # Bindings zu gewünschten Variablen zurückgibt.
+    res_rows_path = ['results', 'bindings']
+    bind1 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res1, res_rows_path, default=[])
+    )
+    b_list = []
+    for row in bind1:
+        x = get_path(row, [b])
+        y = (x, )
+        b_list.append(y)
+    logger.info('orig query took %.4f s, result:\n%s\n', t, b_list)
+    b_list[:] = [b_l for b_l in b_list if not list_remove_bool(b_l[0])]
+    b_list = list(set(b_list))
+    # Values für die nächste query: b_list
+    values = {(b, ): b_list}
+    # Query die über eine var aus gp2 random samplet mit values aus b_list
+    q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
+    logger.info(q)
+    try:
+        t, q_res2 = run_query(q)
+    except:
+        return []
+    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
+    bind2 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res2, res_rows_path, default=[])
+    )
+    target_list = []
+    for row in bind2:
+        target_list.append(get_path(row, [TARGET_VAR]))
+    logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
+    # Kreire gtps_2 in der alle gtps, deren targets in target_list enthalten
+    # sind, "gespeichert" werden
+    gtps_2 = []
+    for t in target_list:
+        for gtp in gtps:
+            if t == gtp[1]:
+                gtps_2.append(gtp)
+    logger.info(gtps_2)
+
+    # GraphPattern mit gefixten Pfaden aus den gefundenen gtp kreieren:
+    # TODO: Das ganze als Methode aus einem graph-pattern, den results und
+    # den stp
+    gp_list = []
+    for row2 in bind2:
+        for gtp in gtps:
+            if gtp[1] == get_path(row2, [TARGET_VAR]):
+                for row1 in bind1:
+                    if get_path(row1, [b]) == get_path(row2, [b]):
+                        gp_ = GraphPattern([
+                            (SOURCE_VAR, get_path(row1, [a]), b),
+                            (b, get_path(row2, [c]), TARGET_VAR)
+                        ])
+                        if gp_ not in gp_list:
+                            gp_list.append(gp_)
+
+    # gp3 = GraphPattern([
+    #     (SOURCE_VAR, a, b),
+    #     (b, c, TARGET_VAR)
+    # ])
+    gtp_scores = GTPScores(gtps)
+    # gtp_scores2 = GTPScores(gtps_2)
+
+    # # Fixe das pattern über die gefundenen gtps
+    # mfv2 = []
+    # if len(gtps_2) > 1:
+    #     mfv2 = mutate_fix_var(sparql, timeout, gtp_scores2, gp3)
+    #
+    # # lasse die gefundenen Pattern einmal durch die fix_var laufen
+    # mfv = []
+    # for gp_mfv2 in mfv2:
+    #     mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp_mfv2)
+    #     for gp_res in mfv_res:
+    #         mfv.append(gp_res)
+    #
+    # # evaluiere die so gefundenen Pattern
+    # res_eval = eval_gp_list(gtp_scores, mfv)
+    # return res_eval
+
+    # evaluiere die gefixten pattern
+    res_eval = eval_gp_list(gtp_scores, gp_list)
+    return res_eval
+
+
+def test_sample(gtps):
     values = {(SOURCE_VAR, TARGET_VAR): gtps}
     gp1 = GraphPattern([(SOURCE_VAR, a, b)])
     gp2 = GraphPattern([(b, c, TARGET_VAR)])
@@ -140,16 +238,15 @@ def test_steps(gtps):
     # TODO: Query so verändern, dass nach count gefiltert wird (siehe log.txt)
     q = gp1.to_sparql_select_sample_query(values=values, limit=100)
     logger.info(q)
-    t, q_res = run_query(q)
-    logger.info(q_res)
+    t, q_res1 = run_query(q)
+    logger.info(q_res1)
     # Kreiere b_list in der die Ergebnisse für b "gespeichert" sind
-    # TODO speichere alles um später den Weg nachzuvollziehen
     res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res, res_rows_path, default=[])
+    bind1 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res1, res_rows_path, default=[])
     )
     b_list = []
-    for row in bind:
+    for row in bind1:
         x = get_path(row, [b])
         y = (x, )
         b_list.append(y)
@@ -160,16 +257,15 @@ def test_steps(gtps):
     # Query die über eine var aus gp2 random samplet mit values aus b_list
     q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
     logger.info(q)
-    t, q_res = run_query(q)
+    t, q_res2 = run_query(q)
     # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res, res_rows_path, default=[])
+    bind2 = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res2, res_rows_path, default=[])
     )
     target_list = []
-    for row in bind:
+    for row in bind2:
         target_list.append(get_path(row, [TARGET_VAR]))
-    logger.info('orig query took %.4f s, result:\n%s\n', t, q_res)
+    logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
     # Kreire gtps_2 in der alle gtps, deren targets in target_list enthalten
     # sind, "gespeichert" werden
     gtps_2 = []
@@ -179,27 +275,47 @@ def test_steps(gtps):
                 gtps_2.append(gtp)
     logger.info(gtps_2)
 
-    gp3 = GraphPattern([
-        (SOURCE_VAR, a, b),
-        (b, c, TARGET_VAR)
-    ])
+    # GraphPattern mit gefixten Pfaden aus den gefundenen gtp kreieren:
+    # TODO: Das ganze als Methode aus einem graph-pattern, den results und
+    # den stp
+    gp_list = []
+    for row2 in bind2:
+        for gtp in gtps:
+            if gtp[1] == get_path(row2, [TARGET_VAR]):
+                for row1 in bind1:
+                    if get_path(row1, [b]) == get_path(row2, [b]):
+                        gp_ = GraphPattern([
+                            (SOURCE_VAR, get_path(row1, [a]), b),
+                            (b, get_path(row2, [c]), TARGET_VAR)
+                        ])
+                        if gp_ not in gp_list:
+                            gp_list.append(gp_)
+
+    # gp3 = GraphPattern([
+    #     (SOURCE_VAR, a, b),
+    #     (b, c, TARGET_VAR)
+    # ])
     gtp_scores = GTPScores(gtps)
-    gtp_scores2 = GTPScores(gtps_2)
-
-    # Fixe das pattern über die gefundenen gtps
-    mfv2 = []
-    if len(gtps_2) > 1:
-        mfv2 = mutate_fix_var(sparql, timeout, gtp_scores2, gp3)
-
-    # lasse die gefundenen Pattern einmal durch die fix_var laufen
-    mfv = []
-    for gp_mfv2 in mfv2:
-        mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp_mfv2)
-        for gp_res in mfv_res:
-            mfv.append(gp_res)
-
-    # evaluiere die so gefundenen Pattern
-    res_eval = eval_gp_list(gtp_scores, mfv)
+    # gtp_scores2 = GTPScores(gtps_2)
+
+    # # Fixe das pattern über die gefundenen gtps
+    # mfv2 = []
+    # if len(gtps_2) > 1:
+    #     mfv2 = mutate_fix_var(sparql, timeout, gtp_scores2, gp3)
+    #
+    # # lasse die gefundenen Pattern einmal durch die fix_var laufen
+    # mfv = []
+    # for gp_mfv2 in mfv2:
+    #     mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp_mfv2)
+    #     for gp_res in mfv_res:
+    #         mfv.append(gp_res)
+    #
+    # # evaluiere die so gefundenen Pattern
+    # res_eval = eval_gp_list(gtp_scores, mfv)
+    # return res_eval
+
+    # evaluiere die gefixten pattern
+    res_eval = eval_gp_list(gtp_scores, gp_list)
     return res_eval
 
 
@@ -235,7 +351,8 @@ def list_remove_bool(var):
     # keine Probleme mit dem Category:Cigarettes-Beispiel zu bekommen
     # (siehe docs)
     # TODO: Möglicherweise dafür sorgen, dass die nicht rausgeschmissen,
-    # sondern nur nicht mit prefix gekürzt werden
+    # sondern nur nicht mit prefix gekürzt werden, also einfach mal schauen,
+    # dass die curify das tut was sie soll
     elif isinstance(var, URIRef):
         return ':' in var[7:]
     return False
@@ -247,17 +364,35 @@ def eval_gp_list(gtp_scores, gp_list):
         res_ev = evaluate(
             sparql, timeout, gtp_scores, gp_l, run=0, gen=0)
         update_individuals([gp_l], [res_ev])
-        #print_graph_pattern(gp_, print_matching_node_pairs=0)
+        # print_graph_pattern(gp_, print_matching_node_pairs=0)
     return gp_list
 
 
 if __name__ == '__main__':
+    # # test_sample:
+    # res = []
+    # for i in range(10):
+    #     res_ts = test_sample(ground_truth_pairs_2)
+    #     for gp_ts in res_ts:
+    #         res.append(gp_ts)
+    #
+    # res = sorted(res, key=lambda gp_: -gp_.fitness.values.score)
+    # for res_ in res:
+    #     print_graph_pattern(res_)
+
+    # test_count
     res = []
-    for i in range(20):
-        res_ts = test_steps(ground_truth_pairs_2)
-        for gp_ts in res_ts:
-            res.append(gp_ts)
-
-    res = sorted(res, key=lambda gp_: -gp_.fitness.values.score)
-    for i in range(10):
-        print_graph_pattern(res[i])
+    for i in range(1):
+        ground_truth_pairs_5 = get_semantic_associations()
+        ground_truth_pairs_5 = random.sample(ground_truth_pairs_5, 200)
+        max_out_steps = [10, 15, 20, 25, 30, 40, 50, 75, 100]
+        for j in max_out_steps:
+            res_ts = test_count(ground_truth_pairs_5, j)
+            for gp_ts in res_ts:
+                res.append((gp_ts, j))
+
+    res = sorted(res, key=lambda gp_: -gp_[0].fitness.values.score)
+    res = res[0:100]
+    for res_ in res:
+        print('max_out:'+str(res_[1]))
+        print_graph_pattern(res_[0])

From f67c7309605bcdde33f1155beeda0d346c6bf9ab Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Fri, 31 Aug 2018 13:58:28 +0200
Subject: [PATCH 05/27] Deep-and-Narrow-Path-Mutation schould be runnable

---
 config/defaults.py               |   3 +
 gp_learner.py                    | 128 +++++++++++++++++++++++++++-
 gp_query.py                      | 141 +++++++++++++++++++++++++++++++
 graph_pattern.py                 |   5 +-
 tests/test_mutate_deep_narrow.py |  47 +++++------
 5 files changed, 294 insertions(+), 30 deletions(-)

diff --git a/config/defaults.py b/config/defaults.py
index cf153d9..cee401d 100644
--- a/config/defaults.py
+++ b/config/defaults.py
@@ -89,6 +89,9 @@
 MUTPB_FV_SAMPLE_MAXN = 32  # max n of instantiations to sample from top k
 MUTPB_FV_QUERY_LIMIT = 256  # SPARQL query limit for the top k instantiations
 MUTPB_SP = 0.05  # prob to simplify pattern (warning: can restrict exploration)
+MUTPB_DN = 0.5  # prob to try adding a deep and narrow path to a pattern
+MUTPB_DN_PS_MAX_N = 10  # Max steps in the deep narrow path
+MUTPB_DN_AVG_LIMIT = 10  # Max avg. reachable Nodes
 
 # fusion of target candidates:
 FUSION_SAMPLES_PER_CLASS = 500  # only use up to n training samples per class
diff --git a/gp_learner.py b/gp_learner.py
index 008310f..c9462e9 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -54,6 +54,8 @@
 from gp_query import query_stats
 from gp_query import query_time_hard_exceeded
 from gp_query import query_time_soft_exceeded
+from gp_query import useful_path_query
+from gp_query import useful_path_inst_query
 from gp_query import variable_substitution_query
 from graph_pattern import canonicalize
 from graph_pattern import gen_random_var
@@ -684,6 +686,121 @@ def mutate_fix_var(
     ]
     return res
 
+def mutate_deep_narrow(
+        sparql,
+        timeout,
+        child,
+        gtp_scores,
+        dn_path_steps_max_n=config.MUTPB_DN_PS_MAX_N,
+        direct=None, 
+        childin=False,
+        limit=config.MUTPB_FV_QUERY_LIMIT,  # TODO: Limit benutzen?
+):
+    if not child.matching_node_pairs:
+        ev = evaluate(
+            sparql, timeout, gtp_scores, child)  # TODO: Muss hier run/gen dazu?
+        update_individuals([child], [ev])
+    gtps = child.matching_node_pairs
+    if not gtps:
+        return [child]
+    #TODO: testen, wie die Verteilung gut ist
+    n = random.choice(range(dn_path_steps_max_n))+1
+    n = 2
+    node = [SOURCE_VAR]
+    for i in range(n):
+        node.append(Variable('n%i' % i))
+    node.append(TARGET_VAR)
+    hop = [Variable('p%i' % i) for i in range(n + 1)]
+    # TODO: Entfernern, wenn direct einfach immer random gewählt werden soll
+    if direct is None or len(direct) != n + 1:
+        logger.debug(
+            'No direction chosen, or direction tuple with false length'
+        )
+        direct = [0 for _ in range(n + 1)]
+    gp_helper = []
+    for i in range(n + 1):
+        if direct[i] == 0:
+            direct[i] = random.choice([-1, 1])
+        if direct[i] == 1:
+            gp_helper.append(
+                GraphPattern([(node[i], hop[i], node[i + 1])])
+            )
+        else:
+            gp_helper.append(
+                GraphPattern([(node[i + 1], hop[i], node[i])])
+            )
+    # Queries für die Schritte
+    valueblocks_s = {}
+    valueblocks_t = {}
+    for i in range(int((n / 2) + 1)):
+        if i < int(n/2):
+            t, q_res = useful_path_query(
+                sparql,
+                timeout,
+                child,
+                hop[i],
+                node[i+1],
+                valueblocks_s,
+                gp_helper[:i + 1],
+                SOURCE_VAR,
+                gp_in=childin,
+            )
+            if not q_res:
+                return [child]
+            valueblocks_s[hop[i]] = {
+                (hop[i],): random.sample(
+                    [(q_r,) for q_r in q_res],
+                    min(10, len(q_res))
+                )
+            }
+        if n-i > i:
+            t, q_res = useful_path_query(
+                sparql,
+                timeout,
+                child,
+                hop[n-i],
+                node[n-i],
+                valueblocks_t,
+                gp_helper[n - i:],
+                TARGET_VAR,
+                gp_in=childin,
+            )
+            if not q_res:
+                return [child]
+            valueblocks_t[hop[n-i]] = {
+                (hop[n-i],): random.sample(
+                    [(q_r,) for q_r in q_res],
+                    min(10, len(q_res))
+                )
+            }
+
+    # Query fürs Ergebnis
+    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
+    # werden
+    valueblocks = {}
+    valueblocks.update(valueblocks_s)
+    valueblocks.update(valueblocks_t)
+    t, q_res = useful_path_inst_query(
+        sparql,
+        timeout,
+        child,
+        hop,
+        valueblocks,
+        gp_helper,
+        gp_in=childin
+    )
+    if not q_res:
+        return [child]
+    res = []
+    for inst in q_res:
+        child_inst = GraphPattern([
+            (node[i], inst[i], node[i + 1]) if direct[i] == 1
+            else (node[i + 1], inst[i], node[i])
+            for i in range(n + 1)
+        ])
+        res.append(GraphPattern(child + child_inst))
+    return res
+
 
 def mutate_simplify_pattern(gp):
     if len(gp) < 2:
@@ -797,6 +914,7 @@ def mutate(
         pb_mv=config.MUTPB_MV,
         pb_sp=config.MUTPB_SP,
         pb_sv=config.MUTPB_SV,
+        pb_dn=config.MUTPB_DN,
 ):
     # mutate patterns:
     # grow: select random identifier and convert them into a var (local)
@@ -837,8 +955,14 @@ def mutate(
     else:
         children = [child]
 
-
-    # TODO: deep & narrow paths mutation
+    helper = []
+    for child in children:
+        if random.random() < pb_dn:
+            res = mutate_deep_narrow(sparql, timeout, gtp_scores, child)
+            helper += res
+        else:
+            helper.append(child)
+    children = helper
 
     children = {
         c if fit_to_live(c) else orig_child
diff --git a/gp_query.py b/gp_query.py
index 0a4618d..eae1c4f 100644
--- a/gp_query.py
+++ b/gp_query.py
@@ -62,6 +62,8 @@ def __init__(self):
         self.ask_multi_query_count = 0
         self.combined_ask_count_multi_query_count = 0
         self.variable_substitution_query_count = 0
+        self.useful_path_query_count = 0
+        self.useful_path_inst_query_count = 0
         self.predict_query_count = 0
         self.count_query_count = 0
 
@@ -695,6 +697,145 @@ def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds):
 
 def _var_subst_res_update(res, update, **_):
     res += update
+    
+
+def useful_path_query(
+        sparql,
+        timeout,
+        graph_pattern,
+        var_to_fix,
+        var_to_count,
+        valueblocks,
+        steps,
+        startvar,
+        avglimit=config.MUTPB_DN_AVG_LIMIT,
+        gp_in=False,
+        batch_size=None
+):
+    _query_stats.useful_path_query_count += 1
+    # TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen
+    # (weil der Block ja mit rein geht)
+    _values = graph_pattern.matching_node_pairs
+    # TODO: evtl. Schnitt mit noch nicht abgedeckten
+    _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs}
+    _vars_steps_and_stuff = (
+        var_to_fix, var_to_count, startvar, valueblocks, steps, avglimit, gp_in
+    )
+    return _multi_query(
+        sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs,
+        batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping,
+        _usef_path_res_init, _usef_path_chunk_q, _usef_path_chunk_result_ext,
+        _usef_path_res_update
+    )
+
+
+# noinspection PyUnusedLocal
+def _usef_path_res_init(_, **kwds):
+    return []
+
+
+def _usef_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
+    var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \
+        = _vars_steps_and_stuff
+    valueblocks = {
+        startvar: {
+            (startvar,):
+                [(tup[0],) for tup in values_chunk] if startvar == SOURCE_VAR
+                else [(tup[1],) for tup in values_chunk]
+        }
+    }
+    valueblocks.update(_valueblocks)
+    return gp.to_sparql_useful_path_query(
+            var_to_fix,
+            var_to_count,
+            valueblocks,
+            steps,
+            startvar,
+            avglimit=avglimit,
+            gp_in=gp_in
+    )
+
+
+# noinspection PyUnusedLocal
+def _usef_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
+    var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \
+        = _vars_steps_and_stuff
+    chunk_res = []
+    res_rows_path = ['results', 'bindings']
+    bindings = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+
+    for row in bindings:
+        # TODO: Drüber nachdenken, ob iwie die avg-outgoing auch mit
+        # zurückgegeben werden sollen
+        chunk_res.append(get_path(row, [var_to_fix]))
+    return chunk_res
+
+
+def _usef_path_res_update(res, update, **_):
+    res += update
+    
+    
+def useful_path_inst_query(
+        sparql,
+        timeout,
+        graph_pattern,
+        hop,
+        valueblocks,
+        steps,
+        gp_in=False,
+        batch_size=None
+):
+    _query_stats.useful_path_inst_query_count += 1
+    # TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen
+    # (weil der Block ja mit rein geht)
+    _values = graph_pattern.matching_node_pairs
+    # evtl. Schnitt mit noch nicht abgedeckten
+    _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs}
+    _vars_steps_and_stuff = (hop, valueblocks, steps, gp_in)
+    return _multi_query(
+        sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs,
+        batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping,
+        _usef_path_inst_res_init, _usef_path_inst_chunk_q,
+        _usef_path_inst_chunk_result_ext, _usef_path_inst_res_update
+    )
+
+
+# noinspection PyUnusedLocal
+def _usef_path_inst_res_init(_, **kwds):
+    return []
+
+
+def _usef_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
+    hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff
+    valueblocks = {
+        'st': {
+            (SOURCE_VAR, TARGET_VAR): values_chunk
+        }
+    }
+    valueblocks.update(_valueblocks)
+    return gp.to_sparql_useful_path_inst_query(
+        hop, valueblocks, steps, gp_in=gp_in
+    )
+
+
+# noinspection PyUnusedLocal
+def _usef_path_inst_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
+    hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff
+    chunk_res = []
+    res_rows_path = ['results', 'bindings']
+    bindings = sparql_json_result_bindings_to_rdflib(
+        get_path(q_res, res_rows_path, default=[])
+    )
+
+    for row in bindings:
+        chunk_res.append([get_path(row, [h]) for h in hop])
+    return chunk_res
+
+
+def _usef_path_inst_res_update(res, update, **_):
+    res += update
 
 
 def generate_stps_from_gp(sparql, gp):
diff --git a/graph_pattern.py b/graph_pattern.py
index e1468ad..62c6a2c 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -830,14 +830,13 @@ def to_sparql_useful_path_query(
             var_to_count,
             valueblocks,
             steps,
-            startvar=None,
+            startvar,
             avglimit=10,
             gp_in=False
     ):
+        # TODO: evtl. Limit zufügen
         count_var_to_count = Variable('c' + ''.join(var_to_count))
         avg_var_to_count = Variable('avgc' + ''.join(var_to_count))
-        if startvar is None:
-            startvar = SOURCE_VAR
         res = "SELECT %(vtf)s (AVG(%(cvtc)s) as %(avtc)s) {\n" \
               "SELECT %(stv)s %(vtf)s (COUNT (%(vtc)s) as %(cvtc)s) {\n" \
               "%(val)s\n" \
diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
index bbcbdca..1369f18 100644
--- a/tests/test_mutate_deep_narrow.py
+++ b/tests/test_mutate_deep_narrow.py
@@ -27,6 +27,7 @@
 
 from config import SPARQL_ENDPOINT
 from gp_learner import evaluate
+from gp_learner import mutate_deep_narrow
 from gp_learner import mutate_fix_var
 from gp_learner import update_individuals
 from gp_query import calibrate_query_timeout
@@ -2262,7 +2263,7 @@ def mutate_deep_narrow_n_hops(
 
 
 # erste Version, komplett straight forward
-def mutate_deep_narrow(
+def mutate_deep_narrow_1(
         gp_, gtps, n, direct=None, gp_in=False
 ):
     node = [SOURCE_VAR]
@@ -2651,7 +2652,7 @@ def mutate_deep_narrow_4(
     for i in range(int((n / 2) + 1)):
         if i < int(n/2):
             q = gp_.to_sparql_useful_path_query(
-                hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
+                hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], SOURCE_VAR, gp_in=gp_in
             )
             logger.debug(q)
             try:
@@ -2678,7 +2679,7 @@ def mutate_deep_narrow_4(
                 node[n-i],
                 valueblocks_t,
                 gp_helper[n-i:],
-                startvar=TARGET_VAR,
+                TARGET_VAR,
                 gp_in=gp_in
             )
             logger.debug(q)
@@ -3102,29 +3103,25 @@ def main():
     # ground_truth_pairs = ground_truth_pairs[:100]
     gtp_scores = GTPScores(ground_truth_pairs)
     res = []
-    # key = random.choice(gp_found.keys())
-    # for i in range(100):
-    #     # ground_truth_pairs = random.sample(ground_truth_pairs, 200)
-    #     gp_ = GraphPattern([])
-    #     # gp_ = gp_found[key]
-    #     res_= mutate_deep_narrow_5(gp_, ground_truth_pairs, 2, gp_in=False)
-    #     res.append(res_)
-    #     logger.info(i)
-    #     if res_:
-    #         logger.info(res_)
-    #
-    # logger.info(res)
-    for key in gp_found.keys():
+    for i in range(100):
+        key = random.choice(gp_found.keys())
         gp_ = gp_found[key]
-        eval_gp(gtp_scores, gp_)
-        for i in range(100):
-            res_ = mutate_deep_narrow_4(
-                gp_, gp_.matching_node_pairs, 6, gp_in=False
-            )
-            res.append(res_)
-            logger.info((i, key))
-            if res_:
-                logger.info(res_)
+        # eval_gp(gtp_scores, gp_)
+        r = mutate_deep_narrow(sparql, timeout, gp_, gtp_scores)
+        logger.info(i)
+        logger.info(r)
+        res.append(r)
+    # for key in gp_found.keys():
+    #     gp_ = gp_found[key]
+    #     eval_gp(gtp_scores, gp_)
+    #     for i in range(100):
+    #         res_ = mutate_deep_narrow_4(
+    #             gp_, gp_.matching_node_pairs, 6, gp_in=False
+    #         )
+    #         res.append(res_)
+    #         logger.info((i, key))
+    #         if res_:
+    #             logger.info(res_)
 
     # res_eval=[]
     # res = []

From c908cafd24e736b75192dfb2ba38f767180a949f Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Mon, 3 Sep 2018 11:32:37 +0200
Subject: [PATCH 06/27] kleine Aenderung und Fehlerbehebung in gp_learner.py

---
 gp_learner.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gp_learner.py b/gp_learner.py
index c9462e9..108a59c 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -686,26 +686,26 @@ def mutate_fix_var(
     ]
     return res
 
+
 def mutate_deep_narrow(
         sparql,
         timeout,
-        child,
         gtp_scores,
+        child,
         dn_path_steps_max_n=config.MUTPB_DN_PS_MAX_N,
-        direct=None, 
+        direct=None,
         childin=False,
         limit=config.MUTPB_FV_QUERY_LIMIT,  # TODO: Limit benutzen?
 ):
-    if not child.matching_node_pairs:
+    if not child.fitness.valid:
         ev = evaluate(
-            sparql, timeout, gtp_scores, child)  # TODO: Muss hier run/gen dazu?
+            sparql, timeout, gtp_scores, child, run=-1, gen=-1)  # TODO: Muss hier run/gen dazu?
         update_individuals([child], [ev])
     gtps = child.matching_node_pairs
     if not gtps:
         return [child]
-    #TODO: testen, wie die Verteilung gut ist
-    n = random.choice(range(dn_path_steps_max_n))+1
-    n = 2
+    # TODO: testen, wie die Verteilung gut ist
+    n = random.choice(range(dn_path_steps_max_n)) + 1
     node = [SOURCE_VAR]
     for i in range(n):
         node.append(Variable('n%i' % i))

From adbc215cc52c965a9fd2d7040d109d6d21b4a02a Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Tue, 4 Sep 2018 13:24:14 +0200
Subject: [PATCH 07/27] Undone modifying unrelated stuff

---
 graph_pattern.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/graph_pattern.py b/graph_pattern.py
index 62c6a2c..bdb1d57 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -34,6 +34,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 RANDOM_VAR_LEN = 5  # so in total we have 62**5=916132832 different random vars
 RANDOM_VAR_PREFIX = 'vr'
 SOURCE_VAR = Variable('source')
@@ -240,11 +241,11 @@ def canonicalize(gp, shorten_varnames=True):
     cgp = GraphPattern(cbgp, mapping=mapping)
 
     if not (
-            len(gp) == len(cbgp) == len(cgp)
-            and len(gp.nodes) == len(cgp.nodes)
-            and len(gp.edges) == len(cgp.edges)
-            and sorted(gp.identifier_counts().values()) ==
-            sorted(cgp.identifier_counts().values())
+        len(gp) == len(cbgp) == len(cgp)
+        and len(gp.nodes) == len(cgp.nodes)
+        and len(gp.edges) == len(cgp.edges)
+        and sorted(gp.identifier_counts().values()) ==
+        sorted(cgp.identifier_counts().values())
     ):
         # canonicalization should never change any of the features above, but it
         # did before (e.g., https://github.com/RDFLib/rdflib/issues/494 ).
@@ -431,8 +432,8 @@ def exclude(self, identifiers):
             [(s, p, o)
              for s, p, o in self
              if p not in identifiers and
-             s not in identifiers and
-             o not in identifiers
+                s not in identifiers and
+                o not in identifiers
              ]
         )
 
@@ -447,7 +448,7 @@ def identifier_counts(self, exclude_vars=False, vars_only=False):
         :param vars_only: Only return counts for vars.
         :return: Counter of all identifiers in this graph pattern.
         """
-        assert not (exclude_vars and vars_only)
+        assert not(exclude_vars and vars_only)
         ids = Counter([i for t in self for i in t])
         if exclude_vars:
             for i in self.vars_in_graph:

From 20e5b343307dbdafd7c01669077106a3ba6b2889 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Tue, 4 Sep 2018 13:40:32 +0200
Subject: [PATCH 08/27] Renamed two values and added, alpha beta values for the
 path lentght of deep_and_narrwo_path_mutation

---
 config/defaults.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/config/defaults.py b/config/defaults.py
index cee401d..9a95607 100644
--- a/config/defaults.py
+++ b/config/defaults.py
@@ -89,9 +89,12 @@
 MUTPB_FV_SAMPLE_MAXN = 32  # max n of instantiations to sample from top k
 MUTPB_FV_QUERY_LIMIT = 256  # SPARQL query limit for the top k instantiations
 MUTPB_SP = 0.05  # prob to simplify pattern (warning: can restrict exploration)
+# TODO: Lower the MUTPB_DN
 MUTPB_DN = 0.5  # prob to try adding a deep and narrow path to a pattern
-MUTPB_DN_PS_MAX_N = 10  # Max steps in the deep narrow path
-MUTPB_DN_AVG_LIMIT = 10  # Max avg. reachable Nodes
+MUTPB_DN_MAX_HOPS = 10  # Max number of hops in the deep narrow path
+MUTPB_DN_MAX_HOPS_ALPHA = 5.  # alpha value in a length beta distribution
+MUTPB_DN_MAX_HOPS_BETA = 30.  # beta value in a length beta distribution
+MUTPB_DN_AVG_DEG_LIMIT = 10  # Max avg. reachable Nodes
 
 # fusion of target candidates:
 FUSION_SAMPLES_PER_CLASS = 500  # only use up to n training samples per class

From 22a786e6c5ddf231b1aa487600f794ab87b07e8d Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Tue, 4 Sep 2018 13:58:41 +0200
Subject: [PATCH 09/27] Changed values MUTPB_DN_MAX_HOPS_ALPHA / BETA

---
 config/defaults.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/defaults.py b/config/defaults.py
index 9a95607..bd10c50 100644
--- a/config/defaults.py
+++ b/config/defaults.py
@@ -92,8 +92,8 @@
 # TODO: Lower the MUTPB_DN
 MUTPB_DN = 0.5  # prob to try adding a deep and narrow path to a pattern
 MUTPB_DN_MAX_HOPS = 10  # Max number of hops in the deep narrow path
-MUTPB_DN_MAX_HOPS_ALPHA = 5.  # alpha value in a length beta distribution
-MUTPB_DN_MAX_HOPS_BETA = 30.  # beta value in a length beta distribution
+MUTPB_DN_MAX_HOPS_ALPHA = 2.  # alpha value in a length beta distribution
+MUTPB_DN_MAX_HOPS_BETA = 5.  # beta value in a length beta distribution
 MUTPB_DN_AVG_DEG_LIMIT = 10  # Max avg. reachable Nodes
 
 # fusion of target candidates:

From 91cbde0ddaa7987a276919b3ead051cb956f5037 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 11:59:05 +0200
Subject: [PATCH 10/27] Changed order in mutate_deep_narrow()

---
 tests/test_mutate_deep_narrow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
index 1369f18..3f62984 100644
--- a/tests/test_mutate_deep_narrow.py
+++ b/tests/test_mutate_deep_narrow.py
@@ -3107,7 +3107,7 @@ def main():
         key = random.choice(gp_found.keys())
         gp_ = gp_found[key]
         # eval_gp(gtp_scores, gp_)
-        r = mutate_deep_narrow(sparql, timeout, gp_, gtp_scores)
+        r = mutate_deep_narrow(sparql, timeout, gtp_scores, gp_)
         logger.info(i)
         logger.info(r)
         res.append(r)

From 9c3238a9d0e2e6df5dd18918e2fd333a1d580d50 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 11:59:58 +0200
Subject: [PATCH 11/27] Renamed MUTPB_DN_AVG_LIMIT

---
 gp_query.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gp_query.py b/gp_query.py
index eae1c4f..a7444b3 100644
--- a/gp_query.py
+++ b/gp_query.py
@@ -708,7 +708,7 @@ def useful_path_query(
         valueblocks,
         steps,
         startvar,
-        avglimit=config.MUTPB_DN_AVG_LIMIT,
+        avglimit=config.MUTPB_DN_AVG_DEG_LIMIT,
         gp_in=False,
         batch_size=None
 ):

From 6362dc8c14278f093b55eb795efe7ed703c22834 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 12:17:45 +0200
Subject: [PATCH 12/27] added betadistribution for mut-length and dnp-mut only
 if not fixvar

---
 gp_learner.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/gp_learner.py b/gp_learner.py
index 108a59c..6cc3335 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -692,20 +692,25 @@ def mutate_deep_narrow(
         timeout,
         gtp_scores,
         child,
-        dn_path_steps_max_n=config.MUTPB_DN_PS_MAX_N,
         direct=None,
         childin=False,
         limit=config.MUTPB_FV_QUERY_LIMIT,  # TODO: Limit benutzen?
 ):
     if not child.fitness.valid:
         ev = evaluate(
-            sparql, timeout, gtp_scores, child, run=-1, gen=-1)  # TODO: Muss hier run/gen dazu?
+            sparql, timeout, gtp_scores, child, run=-1, gen=-1)
         update_individuals([child], [ev])
     gtps = child.matching_node_pairs
     if not gtps:
         return [child]
-    # TODO: testen, wie die Verteilung gut ist
-    n = random.choice(range(dn_path_steps_max_n)) + 1
+    alpha = config.MUTPB_DN_MAX_HOPS_ALPHA
+    beta = config.MUTPB_DN_MAX_HOPS_BETA
+    max_hops = config.MUTPB_DN_MAX_HOPS
+    # more likely to create shorter paths
+    # with default values the distribution is as follows:
+    # PDF: 1: 14 %, 2: 27 %, 3: 25 %, 4: 17 %, 5: 10 %, 6: 5 %, 7: 1.5 %, ...
+    # CDF: 1: 14 %, 2: 40 %, 3: 66 %, 4: 83 %, 5: 93 %, 6: 98 %, 7: 99,6 %, ...
+    n = int(random.betavariate(alpha, beta) * (max_hops-1) + 1)
     node = [SOURCE_VAR]
     for i in range(n):
         node.append(Variable('n%i' % i))
@@ -953,16 +958,10 @@ def mutate(
         child = canonicalize(child)
         children = mutate_fix_var(sparql, timeout, gtp_scores, child)
     else:
-        children = [child]
-
-    helper = []
-    for child in children:
         if random.random() < pb_dn:
-            res = mutate_deep_narrow(sparql, timeout, gtp_scores, child)
-            helper += res
+            children = mutate_deep_narrow(sparql, timeout, gtp_scores, child)
         else:
-            helper.append(child)
-    children = helper
+            children = [child]
 
     children = {
         c if fit_to_live(c) else orig_child

From 1130da4349a5b1b985566ec64875810fb185992f Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 12:43:30 +0200
Subject: [PATCH 13/27] Code-Style changes and renamed mutate_deep_narrow to
 mutate_deep_narrow_path

---
 gp_learner.py                    | 41 +++++++++++++++-----------------
 tests/test_mutate_deep_narrow.py |  4 ++--
 2 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/gp_learner.py b/gp_learner.py
index 6cc3335..ce7b1b4 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -687,7 +687,7 @@ def mutate_fix_var(
     return res
 
 
-def mutate_deep_narrow(
+def mutate_deep_narrow_path(
         sparql,
         timeout,
         gtp_scores,
@@ -711,11 +711,8 @@ def mutate_deep_narrow(
     # PDF: 1: 14 %, 2: 27 %, 3: 25 %, 4: 17 %, 5: 10 %, 6: 5 %, 7: 1.5 %, ...
     # CDF: 1: 14 %, 2: 40 %, 3: 66 %, 4: 83 %, 5: 93 %, 6: 98 %, 7: 99,6 %, ...
     n = int(random.betavariate(alpha, beta) * (max_hops-1) + 1)
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = [Variable('p%i' % i) for i in range(n + 1)]
+    nodes = [SOURCE_VAR] + [Variable('n%d' % i) for i in range(n)] + [TARGET_VAR]
+    hops = [Variable('p%d' % i) for i in range(n + 1)]
     # TODO: Entfernern, wenn direct einfach immer random gewählt werden soll
     if direct is None or len(direct) != n + 1:
         logger.debug(
@@ -728,23 +725,23 @@ def mutate_deep_narrow(
             direct[i] = random.choice([-1, 1])
         if direct[i] == 1:
             gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
+                GraphPattern([(nodes[i], hops[i], nodes[i + 1])])
             )
         else:
             gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
+                GraphPattern([(nodes[i + 1], hops[i], nodes[i])])
             )
     # Queries für die Schritte
     valueblocks_s = {}
     valueblocks_t = {}
-    for i in range(int((n / 2) + 1)):
+    for i in range(n // 2 + 1):
         if i < int(n/2):
             t, q_res = useful_path_query(
                 sparql,
                 timeout,
                 child,
-                hop[i],
-                node[i+1],
+                hops[i],
+                nodes[i+1],
                 valueblocks_s,
                 gp_helper[:i + 1],
                 SOURCE_VAR,
@@ -752,8 +749,8 @@ def mutate_deep_narrow(
             )
             if not q_res:
                 return [child]
-            valueblocks_s[hop[i]] = {
-                (hop[i],): random.sample(
+            valueblocks_s[hops[i]] = {
+                (hops[i],): random.sample(
                     [(q_r,) for q_r in q_res],
                     min(10, len(q_res))
                 )
@@ -763,8 +760,8 @@ def mutate_deep_narrow(
                 sparql,
                 timeout,
                 child,
-                hop[n-i],
-                node[n-i],
+                hops[n-i],
+                nodes[n-i],
                 valueblocks_t,
                 gp_helper[n - i:],
                 TARGET_VAR,
@@ -772,10 +769,10 @@ def mutate_deep_narrow(
             )
             if not q_res:
                 return [child]
-            valueblocks_t[hop[n-i]] = {
-                (hop[n-i],): random.sample(
+            valueblocks_t[hops[n-i]] = {
+                (hops[n-i],): random.sample(
                     [(q_r,) for q_r in q_res],
-                    min(10, len(q_res))
+                    min(config.MUTPB_DN_AVG_DEG_LIMIT, len(q_res))
                 )
             }
 
@@ -789,7 +786,7 @@ def mutate_deep_narrow(
         sparql,
         timeout,
         child,
-        hop,
+        hops,
         valueblocks,
         gp_helper,
         gp_in=childin
@@ -799,8 +796,8 @@ def mutate_deep_narrow(
     res = []
     for inst in q_res:
         child_inst = GraphPattern([
-            (node[i], inst[i], node[i + 1]) if direct[i] == 1
-            else (node[i + 1], inst[i], node[i])
+            (nodes[i], inst[i], nodes[i + 1]) if direct[i] == 1
+            else (nodes[i + 1], inst[i], nodes[i])
             for i in range(n + 1)
         ])
         res.append(GraphPattern(child + child_inst))
@@ -959,7 +956,7 @@ def mutate(
         children = mutate_fix_var(sparql, timeout, gtp_scores, child)
     else:
         if random.random() < pb_dn:
-            children = mutate_deep_narrow(sparql, timeout, gtp_scores, child)
+            children = mutate_deep_narrow_path(sparql, timeout, gtp_scores, child)
         else:
             children = [child]
 
diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
index 3f62984..de0e443 100644
--- a/tests/test_mutate_deep_narrow.py
+++ b/tests/test_mutate_deep_narrow.py
@@ -27,7 +27,7 @@
 
 from config import SPARQL_ENDPOINT
 from gp_learner import evaluate
-from gp_learner import mutate_deep_narrow
+from gp_learner import mutate_deep_narrow_path
 from gp_learner import mutate_fix_var
 from gp_learner import update_individuals
 from gp_query import calibrate_query_timeout
@@ -3107,7 +3107,7 @@ def main():
         key = random.choice(gp_found.keys())
         gp_ = gp_found[key]
         # eval_gp(gtp_scores, gp_)
-        r = mutate_deep_narrow(sparql, timeout, gtp_scores, gp_)
+        r = mutate_deep_narrow_path(sparql, timeout, gtp_scores, gp_)
         logger.info(i)
         logger.info(r)
         res.append(r)

From 12a95ae1e3507e015c381e616622d5cc5d6fa12b Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 12:44:21 +0200
Subject: [PATCH 14/27] Renamed useful_path_(inst_)query to
 deep_narrow_path_(inst_)query

---
 gp_learner.py | 10 +++++-----
 gp_query.py   | 31 +++++++++++++++++--------------
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/gp_learner.py b/gp_learner.py
index ce7b1b4..099409c 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -54,8 +54,8 @@
 from gp_query import query_stats
 from gp_query import query_time_hard_exceeded
 from gp_query import query_time_soft_exceeded
-from gp_query import useful_path_query
-from gp_query import useful_path_inst_query
+from gp_query import deep_narrow_path_query
+from gp_query import deep_narrow_path_inst_query
 from gp_query import variable_substitution_query
 from graph_pattern import canonicalize
 from graph_pattern import gen_random_var
@@ -736,7 +736,7 @@ def mutate_deep_narrow_path(
     valueblocks_t = {}
     for i in range(n // 2 + 1):
         if i < int(n/2):
-            t, q_res = useful_path_query(
+            t, q_res = deep_narrow_path_query(
                 sparql,
                 timeout,
                 child,
@@ -756,7 +756,7 @@ def mutate_deep_narrow_path(
                 )
             }
         if n-i > i:
-            t, q_res = useful_path_query(
+            t, q_res = deep_narrow_path_query(
                 sparql,
                 timeout,
                 child,
@@ -782,7 +782,7 @@ def mutate_deep_narrow_path(
     valueblocks = {}
     valueblocks.update(valueblocks_s)
     valueblocks.update(valueblocks_t)
-    t, q_res = useful_path_inst_query(
+    t, q_res = deep_narrow_path_inst_query(
         sparql,
         timeout,
         child,
diff --git a/gp_query.py b/gp_query.py
index a7444b3..c0a8bea 100644
--- a/gp_query.py
+++ b/gp_query.py
@@ -699,7 +699,7 @@ def _var_subst_res_update(res, update, **_):
     res += update
     
 
-def useful_path_query(
+def deep_narrow_path_query(
         sparql,
         timeout,
         graph_pattern,
@@ -724,17 +724,17 @@ def useful_path_query(
     return _multi_query(
         sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs,
         batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping,
-        _usef_path_res_init, _usef_path_chunk_q, _usef_path_chunk_result_ext,
-        _usef_path_res_update
+        _deep_narrow_path_res_init, _deep_narrow_path_chunk_q,
+        _deep_narrow_path_chunk_result_ext, _deep_narrow_path_res_update
     )
 
 
 # noinspection PyUnusedLocal
-def _usef_path_res_init(_, **kwds):
+def _deep_narrow_path_res_init(_, **kwds):
     return []
 
 
-def _usef_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
+def _deep_narrow_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
     var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \
         = _vars_steps_and_stuff
     valueblocks = {
@@ -757,7 +757,7 @@ def _usef_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
 
 
 # noinspection PyUnusedLocal
-def _usef_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
+def _deep_narrow_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
     var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \
         = _vars_steps_and_stuff
     chunk_res = []
@@ -773,11 +773,11 @@ def _usef_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
     return chunk_res
 
 
-def _usef_path_res_update(res, update, **_):
+def _deep_narrow_path_res_update(res, update, **_):
     res += update
     
     
-def useful_path_inst_query(
+def deep_narrow_path_inst_query(
         sparql,
         timeout,
         graph_pattern,
@@ -797,17 +797,18 @@ def useful_path_inst_query(
     return _multi_query(
         sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs,
         batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping,
-        _usef_path_inst_res_init, _usef_path_inst_chunk_q,
-        _usef_path_inst_chunk_result_ext, _usef_path_inst_res_update
+        _deep_narrow_path_inst_res_init, _deep_narrow_path_inst_chunk_q,
+        _deep_narrow_path_inst_chunk_result_ext,
+        _deep_narrow_path_inst_res_update
     )
 
 
 # noinspection PyUnusedLocal
-def _usef_path_inst_res_init(_, **kwds):
+def _deep_narrow_path_inst_res_init(_, **kwds):
     return []
 
 
-def _usef_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
+def _deep_narrow_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
     hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff
     valueblocks = {
         'st': {
@@ -821,7 +822,9 @@ def _usef_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
 
 
 # noinspection PyUnusedLocal
-def _usef_path_inst_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
+def _deep_narrow_path_inst_chunk_result_ext(
+        q_res, _vars_steps_and_stuff, _, **kwds
+):
     hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff
     chunk_res = []
     res_rows_path = ['results', 'bindings']
@@ -834,7 +837,7 @@ def _usef_path_inst_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
     return chunk_res
 
 
-def _usef_path_inst_res_update(res, update, **_):
+def _deep_narrow_path_inst_res_update(res, update, **_):
     res += update
 
 

From 22ca6aac182c47f970ade142698c44925c32c857 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 12:49:00 +0200
Subject: [PATCH 15/27] Renamed to_sparql_useful_path/_inst_query() to
 to_sparql_deep_narrow_path_(inst_)query

---
 gp_query.py                      |  2 +-
 graph_pattern.py                 |  4 ++--
 tests/test_mutate_deep_narrow.py | 26 +++++++++++++-------------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/gp_query.py b/gp_query.py
index c0a8bea..5f1327d 100644
--- a/gp_query.py
+++ b/gp_query.py
@@ -745,7 +745,7 @@ def _deep_narrow_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
         }
     }
     valueblocks.update(_valueblocks)
-    return gp.to_sparql_useful_path_query(
+    return gp.to_sparql_deep_narrow_path_query(
             var_to_fix,
             var_to_count,
             valueblocks,
diff --git a/graph_pattern.py b/graph_pattern.py
index bdb1d57..859584b 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -825,7 +825,7 @@ def to_sparql_filter_by_count_in_out_query(
         res = textwrap.dedent(res)
         return gp_._sparql_prefix(res)
 
-    def to_sparql_useful_path_query(
+    def to_sparql_deep_narrow_path_query(
             self,
             var_to_fix,
             var_to_count,
@@ -866,7 +866,7 @@ def to_sparql_useful_path_query(
         res = textwrap.dedent(res)
         return self._sparql_prefix(res)
 
-    def to_sparql_inst_query(
+    def to_sparql_deep_narrow_path_inst_query(
             self,
             hop,
             valueblocks,
diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
index de0e443..a99a94a 100644
--- a/tests/test_mutate_deep_narrow.py
+++ b/tests/test_mutate_deep_narrow.py
@@ -2304,7 +2304,7 @@ def mutate_deep_narrow_1(
     valueblocks = {}
     valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
     for i in range(n+1):
-        q = gp_.to_sparql_useful_path_query(
+        q = gp_.to_sparql_deep_narrow_path_query(
             hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
         )
         logger.debug(q)
@@ -2337,7 +2337,7 @@ def mutate_deep_narrow_1(
     # werden
     del valueblocks[SOURCE_VAR]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    q = gp_.to_sparql_deep_narrow_path_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
     logger.debug(q)
     try:
         t, res_q_inst = run_query(q)
@@ -2408,7 +2408,7 @@ def mutate_deep_narrow_2(
     valueblocks = {}
     valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
     for i in range(n):
-        q = gp_.to_sparql_useful_path_query(
+        q = gp_.to_sparql_deep_narrow_path_query(
             hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
         )
         logger.debug(q)
@@ -2510,7 +2510,7 @@ def mutate_deep_narrow_3(
     valueblocks_t = {}
     valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
     for i in range(int((n / 2) + 1)):
-        q = gp_.to_sparql_useful_path_query(
+        q = gp_.to_sparql_deep_narrow_path_query(
             hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
         )
         logger.debug(q)
@@ -2533,7 +2533,7 @@ def mutate_deep_narrow_3(
             )
         }
         if n-i != i:
-            q = gp_.to_sparql_useful_path_query(
+            q = gp_.to_sparql_deep_narrow_path_query(
                 hop[n-i],
                 node[n-i],
                 valueblocks_t,
@@ -2577,7 +2577,7 @@ def mutate_deep_narrow_3(
         if key is not TARGET_VAR:
             valueblocks[key] = valueblocks_t[key]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    q = gp_.to_sparql_deep_narrow_path_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
     logger.debug(q)
     try:
         t, res_q_inst = run_query(q)
@@ -2651,7 +2651,7 @@ def mutate_deep_narrow_4(
     valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
     for i in range(int((n / 2) + 1)):
         if i < int(n/2):
-            q = gp_.to_sparql_useful_path_query(
+            q = gp_.to_sparql_deep_narrow_path_query(
                 hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], SOURCE_VAR, gp_in=gp_in
             )
             logger.debug(q)
@@ -2674,7 +2674,7 @@ def mutate_deep_narrow_4(
                 )
             }
         if n-i > i:
-            q = gp_.to_sparql_useful_path_query(
+            q = gp_.to_sparql_deep_narrow_path_query(
                 hop[n-i],
                 node[n-i],
                 valueblocks_t,
@@ -2786,7 +2786,7 @@ def mutate_deep_narrow_5(
     valueblocks = {}
     valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
     for i in range(n+1):
-        q = gp_.to_sparql_useful_path_query(
+        q = gp_.to_sparql_deep_narrow_path_query(
             hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
         )
         logger.debug(q)
@@ -2816,7 +2816,7 @@ def mutate_deep_narrow_5(
     # werden
     del valueblocks[SOURCE_VAR]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    q = gp_.to_sparql_deep_narrow_path_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
     logger.debug(q)
     try:
         t, res_q_inst = run_query(q)
@@ -2907,7 +2907,7 @@ def mutate_deep_narrow_6(
     valueblocks = {}
     valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
     for i in range(n):
-        q = gp_.to_sparql_useful_path_query(
+        q = gp_.to_sparql_deep_narrow_path_query(
             hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
         )
         logger.debug(q)
@@ -3010,7 +3010,7 @@ def mutate_deep_narrow_7(
     valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
     for i in range(int((n / 2) + 1)):
         if i < int(n/2):
-            q = gp_.to_sparql_useful_path_query(
+            q = gp_.to_sparql_deep_narrow_path_query(
                 hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
             )
             logger.debug(q)
@@ -3030,7 +3030,7 @@ def mutate_deep_narrow_7(
                 hop[i], Variable('avgc' + ''.join(node[i + 1])), res_q[i]
             )
         if n-i > i:
-            q = gp_.to_sparql_useful_path_query(
+            q = gp_.to_sparql_deep_narrow_path_query(
                 hop[n-i],
                 node[n-i],
                 valueblocks_t,

From 126e84db8abc0d35bdba914175f4fc420ef68429 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 12:55:36 +0200
Subject: [PATCH 16/27] Undone the changes in requirements.txt

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 4a02904..d61d2fd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,8 +11,8 @@ nose>=1.3.7
 numpy>=1.12.1
 objgraph>=3.1.0
 requests>=2.16.5
-rdflib>=4.2.1
-#git+git://github.com/RDFLib/rdflib@master#egg=rdflib
+#rdflib>=4.2.1
+git+git://github.com/RDFLib/rdflib@master#egg=rdflib
 scikit-learn>=0.18.1
 scipy>=0.19.0
 scoop>=0.7.1.1

From 331e06f0d5003ec914f9725bd20838fad4df2224 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 14:25:24 +0200
Subject: [PATCH 17/27]  Added default-value for max instances of hops

---
 config/defaults.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/defaults.py b/config/defaults.py
index bd10c50..75d61e9 100644
--- a/config/defaults.py
+++ b/config/defaults.py
@@ -95,6 +95,7 @@
 MUTPB_DN_MAX_HOPS_ALPHA = 2.  # alpha value in a length beta distribution
 MUTPB_DN_MAX_HOPS_BETA = 5.  # beta value in a length beta distribution
 MUTPB_DN_AVG_DEG_LIMIT = 10  # Max avg. reachable Nodes
+MUTPB_DN_MAX_HOP_INST = 10  # Max number of hop instances for the next query/ies
 
 # fusion of target candidates:
 FUSION_SAMPLES_PER_CLASS = 500  # only use up to n training samples per class

From 49b5c4dfa6d5ced99284ee5c499f0e676dfc16a6 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 14:29:10 +0200
Subject: [PATCH 18/27] Renamed the correct
 to_sparql_deep_narrow_path_inst_query()

---
 graph_pattern.py                 |  4 ++--
 tests/test_mutate_deep_narrow.py | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/graph_pattern.py b/graph_pattern.py
index 859584b..ce7b45a 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -866,7 +866,7 @@ def to_sparql_deep_narrow_path_query(
         res = textwrap.dedent(res)
         return self._sparql_prefix(res)
 
-    def to_sparql_deep_narrow_path_inst_query(
+    def to_sparql_deep_narrow_path_inst_query_old(
             self,
             hop,
             valueblocks,
@@ -896,7 +896,7 @@ def to_sparql_deep_narrow_path_inst_query(
         return self._sparql_prefix(res)
 
     # TODO: die normale inst durch diese hier ersetzen (sollte überall gehen)
-    def to_sparql_useful_path_inst_query(
+    def to_sparql_deep_narrow_path_inst_query(
             self,
             hop,
             valueblocks,
diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
index a99a94a..d564f6f 100644
--- a/tests/test_mutate_deep_narrow.py
+++ b/tests/test_mutate_deep_narrow.py
@@ -2337,7 +2337,7 @@ def mutate_deep_narrow_1(
     # werden
     del valueblocks[SOURCE_VAR]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    q = gp_.to_sparql_deep_narrow_path_inst_query_old(hop, valueblocks, gp_help, gp_in=gp_in)
     logger.debug(q)
     try:
         t, res_q_inst = run_query(q)
@@ -2435,7 +2435,7 @@ def mutate_deep_narrow_2(
     # werden
     del valueblocks[SOURCE_VAR]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_useful_path_inst_query(
+    q = gp_.to_sparql_deep_narrow_path_inst_query(
         hop, valueblocks, gp_helper, gp_in=gp_in
     )
     logger.debug(q)
@@ -2577,7 +2577,7 @@ def mutate_deep_narrow_3(
         if key is not TARGET_VAR:
             valueblocks[key] = valueblocks_t[key]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    q = gp_.to_sparql_deep_narrow_path_inst_query_old(hop, valueblocks, gp_help, gp_in=gp_in)
     logger.debug(q)
     try:
         t, res_q_inst = run_query(q)
@@ -2713,7 +2713,7 @@ def mutate_deep_narrow_4(
         if key is not TARGET_VAR:
             valueblocks[key] = valueblocks_t[key]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_useful_path_inst_query(
+    q = gp_.to_sparql_deep_narrow_path_inst_query(
         hop, valueblocks, gp_helper, gp_in=gp_in
     )
     logger.debug(q)
@@ -2816,7 +2816,7 @@ def mutate_deep_narrow_5(
     # werden
     del valueblocks[SOURCE_VAR]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query(hop, valueblocks, gp_help, gp_in=gp_in)
+    q = gp_.to_sparql_deep_narrow_path_inst_query_old(hop, valueblocks, gp_help, gp_in=gp_in)
     logger.debug(q)
     try:
         t, res_q_inst = run_query(q)
@@ -2934,7 +2934,7 @@ def mutate_deep_narrow_6(
     # werden
     del valueblocks[SOURCE_VAR]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_useful_path_inst_query(
+    q = gp_.to_sparql_deep_narrow_path_inst_query(
         hop, valueblocks, gp_helper, gp_in=gp_in
     )
     logger.debug(q)
@@ -3066,7 +3066,7 @@ def mutate_deep_narrow_7(
         if key is not TARGET_VAR:
             valueblocks[key] = valueblocks_t[key]
     valueblocks['st'] = values['st']
-    q = gp_.to_sparql_useful_path_inst_query(
+    q = gp_.to_sparql_deep_narrow_path_inst_query(
         hop, valueblocks, gp_helper, gp_in=gp_in
     )
     logger.debug(q)

From c0617ea505d456b1e48a5a74b86503fdeacb346d Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 15:00:11 +0200
Subject: [PATCH 19/27] Added docsring for mutate_deep_narrow_path() AND
 Renamed direct and childin AND added the possibility to choose n => length of
 directions AND some Codestyle changes AND comments => english

---
 gp_learner.py | 115 +++++++++++++++++++++++++++++---------------------
 1 file changed, 67 insertions(+), 48 deletions(-)

diff --git a/gp_learner.py b/gp_learner.py
index 099409c..3765dad 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -692,10 +692,34 @@ def mutate_deep_narrow_path(
         timeout,
         gtp_scores,
         child,
-        direct=None,
-        childin=False,
-        limit=config.MUTPB_FV_QUERY_LIMIT,  # TODO: Limit benutzen?
+        directions=None,
+        child_in_queries=False,
+        limit=None,  # TODO: Use a limit for the queries?
 ):
+    """ Finds n-hop-connections from Source to Target, to add them to a given
+    Graph-Pattern.
+    
+    The outline of the mutation is as follows:
+    - If not evaluated, evaluates the given GP to work on its matching-node-
+      pairs
+    - If not passed in, randomly selects the path-length and the directions
+      of the single hops.
+    - Issues SPARQL queries, to find hops (from Source and Target), that don't
+      have a big fan-out (smaller than the default-value). Uses an default max-
+      amount of found hops to find the next hop.
+      When there is only one hop left to find, it tries to instanciate paths,
+      that fit to an STP. If such a path is found, its hops are added to the GP.
+      As there could be more than one path, the mutation returns a list of such
+      patterns.
+
+    :param directions: list of directions to use for the hops
+        (1: Source -> Target, -1: Target -> Source,
+        0 (or everything else): choose random)
+    :param child_in_queries: If true: add the triples of the given pattern to
+        the queries
+    :param limit: SPARQL limnit
+    :return: list of children in which a deep_narrow_path is added
+    """
     if not child.fitness.valid:
         ev = evaluate(
             sparql, timeout, gtp_scores, child, run=-1, gen=-1)
@@ -703,35 +727,32 @@ def mutate_deep_narrow_path(
     gtps = child.matching_node_pairs
     if not gtps:
         return [child]
-    alpha = config.MUTPB_DN_MAX_HOPS_ALPHA
-    beta = config.MUTPB_DN_MAX_HOPS_BETA
-    max_hops = config.MUTPB_DN_MAX_HOPS
-    # more likely to create shorter paths
-    # with default values the distribution is as follows:
-    # PDF: 1: 14 %, 2: 27 %, 3: 25 %, 4: 17 %, 5: 10 %, 6: 5 %, 7: 1.5 %, ...
-    # CDF: 1: 14 %, 2: 40 %, 3: 66 %, 4: 83 %, 5: 93 %, 6: 98 %, 7: 99,6 %, ...
-    n = int(random.betavariate(alpha, beta) * (max_hops-1) + 1)
+    if directions:
+        n = len(directions) - 1
+    else:
+        alpha = config.MUTPB_DN_MAX_HOPS_ALPHA
+        beta = config.MUTPB_DN_MAX_HOPS_BETA
+        max_hops = config.MUTPB_DN_MAX_HOPS
+        # more likely to create shorter paths
+        # with default values the distribution is as follows:
+        # PDF: 1: 14 %, 2: 27 %, 3: 25 %, 4: 17 %, 5: 10 %, 6: 5 %, 7: 1.5 %, ...
+        # CDF: 1: 14 %, 2: 40 %, 3: 66 %, 4: 83 %, 5: 93 %, 6: 98 %, 7: 99,6 %, ...
+        n = int(random.betavariate(alpha, beta) * (max_hops-1) + 1)
     nodes = [SOURCE_VAR] + [Variable('n%d' % i) for i in range(n)] + [TARGET_VAR]
     hops = [Variable('p%d' % i) for i in range(n + 1)]
-    # TODO: Entfernern, wenn direct einfach immer random gewählt werden soll
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = [0 for _ in range(n + 1)]
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(nodes[i], hops[i], nodes[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(nodes[i + 1], hops[i], nodes[i])])
-            )
-    # Queries für die Schritte
+    if not directions:
+        directions = [0 for _ in range(n + 1)]
+    directions = [
+        random.choice([-1, 1]) if d not in [-1, 1] else d for d in directions
+    ]
+    gp_hops = [
+        # directions[i] == 1 => hop in the direction source -> target
+        GraphPattern([(nodes[i], hops[i], nodes[i + 1])]) if directions[i] == 1
+        # directions[i] == -1 => hop in the direction target -> source
+        else GraphPattern([(nodes[i + 1], hops[i], nodes[i])])
+        for i in range(n+1)
+    ]
+    # queries to get the first n hops:
     valueblocks_s = {}
     valueblocks_t = {}
     for i in range(n // 2 + 1):
@@ -743,16 +764,16 @@ def mutate_deep_narrow_path(
                 hops[i],
                 nodes[i+1],
                 valueblocks_s,
-                gp_helper[:i + 1],
+                gp_hops[:i + 1],
                 SOURCE_VAR,
-                gp_in=childin,
+                gp_in=child_in_queries,
             )
             if not q_res:
                 return [child]
             valueblocks_s[hops[i]] = {
                 (hops[i],): random.sample(
                     [(q_r,) for q_r in q_res],
-                    min(10, len(q_res))
+                    min(config.MUTPB_DN_MAX_HOP_INST, len(q_res))
                 )
             }
         if n-i > i:
@@ -763,22 +784,21 @@ def mutate_deep_narrow_path(
                 hops[n-i],
                 nodes[n-i],
                 valueblocks_t,
-                gp_helper[n - i:],
+                gp_hops[n - i:],
                 TARGET_VAR,
-                gp_in=childin,
+                gp_in=child_in_queries,
             )
             if not q_res:
                 return [child]
             valueblocks_t[hops[n-i]] = {
                 (hops[n-i],): random.sample(
                     [(q_r,) for q_r in q_res],
-                    min(config.MUTPB_DN_AVG_DEG_LIMIT, len(q_res))
+                    min(config.MUTPB_DN_MAX_HOP_INST, len(q_res))
                 )
             }
 
-    # Query fürs Ergebnis
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
+    # query to get the last hop and instantiations, that connect source and
+    # target
     valueblocks = {}
     valueblocks.update(valueblocks_s)
     valueblocks.update(valueblocks_t)
@@ -788,19 +808,18 @@ def mutate_deep_narrow_path(
         child,
         hops,
         valueblocks,
-        gp_helper,
-        gp_in=childin
+        gp_hops,
+        gp_in=child_in_queries
     )
     if not q_res:
         return [child]
-    res = []
-    for inst in q_res:
-        child_inst = GraphPattern([
-            (nodes[i], inst[i], nodes[i + 1]) if direct[i] == 1
-            else (nodes[i + 1], inst[i], nodes[i])
+    res = [
+        child + GraphPattern([
+            (nodes[i], qr[i], nodes[i + 1]) if directions[i] == 1
+            else (nodes[i + 1], qr[i], nodes[i])
             for i in range(n + 1)
-        ])
-        res.append(GraphPattern(child + child_inst))
+        ]) for qr in q_res
+    ]
     return res
 
 

From 82cdacfb84addf1ef760c8ffcb9de324ec94cc59 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 15:02:35 +0200
Subject: [PATCH 20/27] Renamed the correct
 to_sparql_depp_narrow_path_inst_query()

---
 gp_query.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gp_query.py b/gp_query.py
index 5f1327d..60caee0 100644
--- a/gp_query.py
+++ b/gp_query.py
@@ -816,7 +816,7 @@ def _deep_narrow_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk):
         }
     }
     valueblocks.update(_valueblocks)
-    return gp.to_sparql_useful_path_inst_query(
+    return gp.to_sparql_deep_narrow_path_inst_query(
         hop, valueblocks, steps, gp_in=gp_in
     )
 

From 9117d059a678023e8478e1446721d524a72ef2bd Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 15:12:40 +0200
Subject: [PATCH 21/27] Comments -> english

---
 gp_query.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/gp_query.py b/gp_query.py
index 60caee0..1763ece 100644
--- a/gp_query.py
+++ b/gp_query.py
@@ -713,10 +713,9 @@ def deep_narrow_path_query(
         batch_size=None
 ):
     _query_stats.useful_path_query_count += 1
-    # TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen
-    # (weil der Block ja mit rein geht)
+    # TODO: maybe batch_size = batch_size - 10 * number of valueblocks for hops
     _values = graph_pattern.matching_node_pairs
-    # TODO: evtl. Schnitt mit noch nicht abgedeckten
+    # TODO: maybe use not good covered stp
     _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs}
     _vars_steps_and_stuff = (
         var_to_fix, var_to_count, startvar, valueblocks, steps, avglimit, gp_in
@@ -765,10 +764,8 @@ def _deep_narrow_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds):
     bindings = sparql_json_result_bindings_to_rdflib(
         get_path(q_res, res_rows_path, default=[])
     )
-
     for row in bindings:
-        # TODO: Drüber nachdenken, ob iwie die avg-outgoing auch mit
-        # zurückgegeben werden sollen
+        # TODO: Maybe return the avg-degree too
         chunk_res.append(get_path(row, [var_to_fix]))
     return chunk_res
 
@@ -788,10 +785,9 @@ def deep_narrow_path_inst_query(
         batch_size=None
 ):
     _query_stats.useful_path_inst_query_count += 1
-    # TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen
-    # (weil der Block ja mit rein geht)
+    # TODO: maybe batch_size = batch_size - 10 * number of valueblocks for hops
     _values = graph_pattern.matching_node_pairs
-    # evtl. Schnitt mit noch nicht abgedeckten
+    # TODO: maybe use not good covered stp
     _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs}
     _vars_steps_and_stuff = (hop, valueblocks, steps, gp_in)
     return _multi_query(

From d792d10d9467e0b0538cc69d63f9384a83457317 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 15:16:59 +0200
Subject: [PATCH 22/27] Erased all unused to_sparql_*_query()

---
 graph_pattern.py | 217 -----------------------------------------------
 1 file changed, 217 deletions(-)

diff --git a/graph_pattern.py b/graph_pattern.py
index ce7b45a..5b31917 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -636,194 +636,6 @@ def to_sparql_select_query(
         res = textwrap.dedent(res)
         return self._sparql_prefix(res)
 
-    def to_sparql_select_sample_query(
-            self,
-            values,
-            values_s_t=None,
-            projection=None,
-            limit=None,
-            sample_var=None
-    ):
-        """Generates a SPARQL select sample query from the graph pattern.
-
-        Examples:
-        TODO
-
-        Args:
-            values: a dict mapping a variable tuple to a list of binding tuples,
-                e.g. {(v1, v2): [(uri1, uri2), (uri3, uri4), ...]}
-            values_s_t: TODO
-            projection: which variables to select on, by default all vars.
-            limit: integer to limit the result size
-            sample_var: the variable to sample over
-        """
-        assert self.vars_in_graph, \
-            "tried to get sparql for pattern without vars: %s" % (self,)
-
-        if projection is None:
-            projection = sorted([v for v in self.vars_in_graph])
-
-        # if sample_var is None:
-        #     sample_var = random.choice(projection)
-        # logger.info(sample_var)
-
-        if sample_var:
-            projection.remove(sample_var)
-
-        res = "SELECT %(samp)s %(proj)s WHERE {\n" \
-              "%(valst)s\n" \
-              "%(qpp)s}\n" \
-              "%(lim)s" % {
-                  'samp': (' SAMPLE(%s) as %s' % (
-                      ''.join(sample_var.n3()),
-                      ''.join(sample_var.n3())
-                  )) if sample_var else '',
-                  'proj': ' '.join([v.n3() for v in projection]),
-                  'valst': self._sparql_values_part(values=values_s_t, indent=' ')
-                  if values_s_t is not None else '',
-                  'qpp': self._sparql_query_pattern_part(
-                      values=values,
-                      indent=' ',
-                  ),
-                  'lim': ('LIMIT %d\n' % limit) if limit is not None else '',
-              }
-        res = textwrap.dedent(res)
-        return self._sparql_prefix(res)
-
-    def to_sparql_filter_by_count_in_out_query(
-            self,
-            values,
-            count_node,
-            in_out=None,
-            max_in=None,
-            max_out=None,
-            projection=None,
-            gp=None,
-            limit=None,
-            sample_var=None
-    ):
-        # TODO: Möglicherweise noch die Pfade aus dem gp_in rausfiltern, man
-        # will ja eher selten einen zusatzhop über einen schon vorhandenen
-        # Pfad finden
-
-        """Generates a SPARQL select query from the graph pattern.
-
-        Examples:
-        TODO
-
-        Args: TODO
-            values: a dict mapping a variable tuple to a list of binding tuples,
-                e.g. {(v1, v2): [(uri1, uri2), (uri3, uri4), ...]}
-            count_node: Node to filter over outgoing arcs.
-            in_out:
-            max_in:
-            max_out: max outgoing arcs
-            projection: which variables to select on, by default all vars.
-            gp:
-            limit: integer to limit the result size
-            sample_var: the variable to sample over
-        """
-        assert self.vars_in_graph, \
-            "tried to get sparql for pattern without vars: %s" % (self,)
-
-        if projection is None:
-            projection = sorted([v for v in self.vars_in_graph])
-        if sample_var:
-            projection.remove(sample_var)
-
-        if max_out is None:
-            max_out = 20
-        if max_in is None:
-            max_in = 20
-
-        if in_out not in ['in', 'out', 'inout']:
-            in_out = random.choice(['in', 'out', 'inout'])
-            logger.info('in_out was set on %s' % in_out)
-        count_out = Variable('cout')
-        count_in = Variable('cin')
-        rand_var_out = gen_random_var()
-        rand_var_in = gen_random_var()
-        if gp:
-            if in_out == 'out':
-                gp_ = GraphPattern(chain(self,
-                                         GraphPattern([
-                                             (count_node, count_out, rand_var_out)
-                                         ]),
-                                         gp))
-            elif in_out == 'in':
-                gp_ = GraphPattern(chain(self,
-                                         GraphPattern([
-                                             (rand_var_in, count_in, count_node)
-                                         ]),
-                                         gp))
-            else:  # TODO: Testen ob inout überhaupt passt
-                gp_ = GraphPattern(chain(self,
-                                         GraphPattern([
-                                             (rand_var_in, count_in, count_node),
-                                             (count_node, count_out, rand_var_out)
-                                         ]),
-                                         gp))
-        else:
-            if in_out == 'out':
-                gp_ = GraphPattern(chain(self,
-                                         GraphPattern([
-                                             (count_node, count_out, rand_var_out)
-                                         ])
-                                         ))
-            elif in_out == 'in':
-                gp_ = GraphPattern(chain(self,
-                                         GraphPattern([
-                                             (rand_var_in, count_in, count_node)
-                                         ])
-                                         ))
-            else:  # TODO: Testen ob inout überhaupt passt
-                gp_ = GraphPattern(chain(self,
-                                         GraphPattern([
-                                             (rand_var_in, count_in, count_node),
-                                             (count_node, count_out, rand_var_out)
-                                         ])
-                                         ))
-
-        res = "SELECT %(samp)s %(proj)s %(count)s WHERE " \
-              "{\n%(qpp)s}\n%(gb)s\n%(hv)s\n%(lim)s" % {
-                  'samp': (' SAMPLE(%s) as %s' % (
-                      ''.join(sample_var.n3()),
-                      ''.join(sample_var.n3())
-                  )) if sample_var else '',
-                  'proj': ' '.join([v.n3() for v in projection]),
-                  'count': (' COUNT(%s) as %s' % (
-                      ''.join(count_out.n3()),
-                      ''.join(count_out.n3()))) if in_out == 'out' else
-                  (' COUNT(%s) as %s' % (
-                      ''.join(count_in.n3()),
-                      ''.join(count_in.n3()))) if in_out == 'in' else
-                  (' COUNT(%s) as %s COUNT(%s) as %s' % (
-                      ''.join(count_out.n3()),
-                      ''.join(count_out.n3()),
-                      ''.join(count_in.n3()),
-                      ''.join(count_in.n3())
-                  )),
-                  'qpp': gp_._sparql_query_pattern_part(
-                      values=values,
-                      indent=' ',
-                  ),
-                  'gb': ('GROUP BY ' + ' '.join([v.n3() for v in projection])),
-                  'hv': ('HAVING (COUNT(%s)<%s)' % (
-                      ''.join(count_out.n3()),
-                      str(max_out))) if in_out == 'out' else
-                  ('HAVING (COUNT(%s)<%s)' % (
-                      ''.join(count_in.n3()),
-                      str(max_in))) if in_out == 'in' else
-                  ('HAVING (COUNT(%s)<%s&&COUNT(%s)<%s)' % (
-                      ''.join(count_out.n3()),
-                      str(max_out),
-                      ''.join(count_in.n3()),
-                      str(max_in)
-                  )),
-                  'lim': ('LIMIT %d\n' % limit) if limit is not None else '',
-              }
-        res = textwrap.dedent(res)
-        return gp_._sparql_prefix(res)
 
     def to_sparql_deep_narrow_path_query(
             self,
@@ -866,36 +678,7 @@ def to_sparql_deep_narrow_path_query(
         res = textwrap.dedent(res)
         return self._sparql_prefix(res)
 
-    def to_sparql_deep_narrow_path_inst_query_old(
-            self,
-            hop,
-            valueblocks,
-            gp_help,
-            gp_in=False
-    ):
-        res = "SELECT %(vtf)s (COUNT (?source) as ?cst) {\n" \
-              "%(val)s\n" \
-              "%(trip)s }\n" \
-              "GROUP BY %(vtf)s\n" \
-              "HAVING (COUNT (?source) > 0)" % {
-                  'vtf': ' '.join([var.n3() for var in hop]),
-                  'val': ''.join([
-                      self._sparql_values_part(
-                          values=valueblocks[key], indent=' '
-                      ) for key in valueblocks
-                  ]),
-                  'trip': ''.join(gp_help._sparql_triples_part()) +
-                          # TODO: nicht auf private Methode zugreifen
-                          ''.join([
-                              self._sparql_triples_part(
-                                  indent=' '
-                              ) if gp_in else ''
-                          ]),
-              }
-        res = textwrap.dedent(res)
-        return self._sparql_prefix(res)
 
-    # TODO: die normale inst durch diese hier ersetzen (sollte überall gehen)
     def to_sparql_deep_narrow_path_inst_query(
             self,
             hop,

From 75bd1ea4af6ff6fa1b3b9eb983ac38b39bfec7ca Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 15:24:40 +0200
Subject: [PATCH 23/27] Comments -> english

---
 graph_pattern.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/graph_pattern.py b/graph_pattern.py
index 5b31917..3cef58b 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -647,7 +647,7 @@ def to_sparql_deep_narrow_path_query(
             avglimit=10,
             gp_in=False
     ):
-        # TODO: evtl. Limit zufügen
+        # TODO: Maybe use a limit
         count_var_to_count = Variable('c' + ''.join(var_to_count))
         avg_var_to_count = Variable('avgc' + ''.join(var_to_count))
         res = "SELECT %(vtf)s (AVG(%(cvtc)s) as %(avtc)s) {\n" \
@@ -669,7 +669,7 @@ def to_sparql_deep_narrow_path_query(
                   ]),
                   'trip': ''.join([
                       step._sparql_triples_part(indent=' ') for step in steps
-                      # TODO: nicht auf private Methode zugreifen
+                      # TODO: don't use private method
                   ]) + ''.join([
                       self._sparql_triples_part(indent=' ') if gp_in else ''
                   ]),
@@ -686,6 +686,7 @@ def to_sparql_deep_narrow_path_inst_query(
             steps,
             gp_in=False
     ):
+        # TODO: Maybe use a limit
         res = "SELECT %(vtf)s (COUNT (?source) as ?cst) {\n" \
               "%(val)s\n" \
               "%(trip)s }\n" \
@@ -699,7 +700,7 @@ def to_sparql_deep_narrow_path_inst_query(
                   ]),
                   'trip': ''.join([
                       step._sparql_triples_part() for step in steps
-                      # TODO: nicht auf private Methode zugreifen
+                      # TODO: don't use private method
                   ]) + ''.join([
                       self._sparql_triples_part(indent=' ') if gp_in else ''
                   ]),

From 72f2fee9e3078bd7b21bf30e6d5998cedd3cb956 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 15:58:45 +0200
Subject: [PATCH 24/27]  deleted test_fv_eval.py and SPARQL-query.py

---
 tests/SPARQL-query.py |  75 ------------------
 tests/test_fv_eval.py | 174 ------------------------------------------
 2 files changed, 249 deletions(-)
 delete mode 100644 tests/SPARQL-query.py
 delete mode 100644 tests/test_fv_eval.py

diff --git a/tests/SPARQL-query.py b/tests/SPARQL-query.py
deleted file mode 100644
index 4bbb7e0..0000000
--- a/tests/SPARQL-query.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-"""ein File einfach um SPARQL-queries abzufeuern, statt es online im Browser
-zu machen.
-"""
-
-import logging
-from collections import OrderedDict
-from os import getenv
-
-import SPARQLWrapper
-from splendid import time_func
-import socket
-import rdflib
-from rdflib import URIRef
-from rdflib import Variable
-
-from config import SPARQL_ENDPOINT
-from gp_learner import evaluate
-from gp_learner import mutate_fix_var
-from gp_learner import update_individuals
-from gp_query import calibrate_query_timeout
-from gp_query import query_time_hard_exceeded
-from gp_query import query_time_soft_exceeded
-from graph_pattern import GraphPattern
-from graph_pattern import SOURCE_VAR
-from graph_pattern import TARGET_VAR
-from ground_truth_tools import get_semantic_associations
-from ground_truth_tools import split_training_test_set
-from gtp_scores import GTPScores
-from serialization import print_graph_pattern
-
-
-sparql = SPARQLWrapper.SPARQLWrapper(
-    getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
-try:
-    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
-except IOError:
-    from nose import SkipTest
-    raise SkipTest(
-        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
-        "Skipping tests in\n    %s" % (SPARQL_ENDPOINT, __file__))
-
-sparql.resetQuery()
-sparql.setTimeout(timeout)
-sparql.setReturnFormat(SPARQLWrapper.JSON)
-
-q = 'SELECT ?source ?target ?vcb0 ?vcb1 ?vcb2 ?vcb3 WHERE {' \
-    '?source ?vcb0 ?vcb2 .' \
-    '?target <http://dbpedia.org/ontology/thumbnail> ?vcb3 .' \
-    '?target <http://dbpedia.org/property/image> ?vcb1 .' \
-    '?vcb2 <http://dbpedia.org/ontology/wikiPageWikiLink> ?source .' \
-    '?vcb2 <http://purl.org/linguistics/gold/hypernym> ?target ' \
-    '}'
-
-try:
-    q_short = ' '.join((line.strip() for line in q.split('\n')))
-    sparql.setQuery(q_short)
-    c = time_func(sparql.queryAndConvert)
-except socket.timeout:
-    c = (timeout, {})
-except ValueError:
-    # e.g. if the endpoint gives us bad JSON for some unicode chars
-    print(
-        'Could not parse result for query, assuming empty result...\n'
-        'Query:\n%s\nException:', q,
-        exc_info=1,  # appends exception to message
-    )
-    c = (timeout, {})
-
-t, res = c
-print('orig query took %.4f s, result:\n%s\n', t, res)
\ No newline at end of file
diff --git a/tests/test_fv_eval.py b/tests/test_fv_eval.py
deleted file mode 100644
index 3c847b6..0000000
--- a/tests/test_fv_eval.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-"""test_mutate_fix_var und test_evaluate einmal davor und 
-einmal über die results aus mutate_fix_var
-"""
-
-import logging
-from collections import defaultdict
-from collections import OrderedDict
-from os import getenv
-
-import SPARQLWrapper
-from splendid import get_path
-from splendid import time_func
-import socket
-import rdflib
-from rdflib import BNode
-from rdflib import Literal
-from rdflib import URIRef
-from rdflib import Variable
-
-from config import SPARQL_ENDPOINT
-from gp_learner import evaluate
-from gp_learner import mutate_fix_var
-from gp_learner import update_individuals
-from gp_query import calibrate_query_timeout
-from gp_query import query_time_hard_exceeded
-from gp_query import query_time_soft_exceeded
-from graph_pattern import GraphPattern
-from graph_pattern import SOURCE_VAR
-from graph_pattern import TARGET_VAR
-from ground_truth_tools import get_semantic_associations
-from ground_truth_tools import split_training_test_set
-from gtp_scores import GTPScores
-from serialization import print_graph_pattern
-from utils import sparql_json_result_bindings_to_rdflib
-
-logger = logging.getLogger(__name__)
-
-dbp = rdflib.Namespace('http://dbpedia.org/resource/')
-owl = rdflib.Namespace('http://www.w3.org/2002/07/owl#')
-
-a = Variable('a')
-b = Variable('b')
-c = Variable('c')
-d = Variable('d')
-e = Variable('e')
-f = Variable('f')
-v = Variable('v')
-w = Variable('w')
-
-sameAs = owl['sameAs']
-
-gp_1 = GraphPattern([
-    (SOURCE_VAR, v, TARGET_VAR)
-])
-
-gp_2 = GraphPattern([
-    (SOURCE_VAR, v, TARGET_VAR),
-    (TARGET_VAR, w, SOURCE_VAR)
-])
-
-gp_3 = GraphPattern([
-    (SOURCE_VAR, a, b),
-    (b, c, d),
-    (d, e, TARGET_VAR)
-])
-
-gp_4 = GraphPattern([
-    (SOURCE_VAR, a, b),
-    (b, c, d),
-    (TARGET_VAR, e, d)
-])
-
-gp_5 = GraphPattern([
-    (SOURCE_VAR, a, c),
-    (TARGET_VAR, URIRef('http://dbpedia.org/ontology/thumbnail'), d),
-    (TARGET_VAR, URIRef('http://dbpedia.org/property/image'), b),
-    (c, URIRef('http://dbpedia.org/ontology/wikiPageWikiLink'), SOURCE_VAR),
-    (c, URIRef('http://purl.org/linguistics/gold/hypernym'), TARGET_VAR)
-])
-
-ground_truth_pairs_1 = [
-    (dbp['Berlin'], dbp['Germany']),
-    (dbp['Hamburg'], dbp['Germany']),
-    (dbp['Kaiserslautern'], dbp['Germany']),
-    (dbp['Wien'], dbp['Austria']),
-    (dbp['Insbruck'], dbp['Austria']),
-    (dbp['Salzburg'], dbp['Austria']),
-    (dbp['Paris'], dbp['France']),
-    (dbp['Lyon'], dbp['France']),
-    (dbp['Amsterdam'], dbp['Netherlands']),
-    (dbp['Brussels'], dbp['Belgium']),
-    (dbp['Washington'], dbp['United_States']),
-    (dbp['Madrid'], dbp['Spain']),
-    (dbp['Prague'], dbp['Czech_Republic']),
-    (dbp['Bern'], dbp['Switzerland']),
-]
-
-ground_truth_pairs_2 = get_semantic_associations()
-ground_truth_pairs_2, _ = split_training_test_set(ground_truth_pairs_2)
-ground_truth_pairs_2 = ground_truth_pairs_2[1:100]
-
-ground_truth_pairs_3 = [
-    (dbp['Barrister'], dbp['Law']),
-    (dbp['Christ'], dbp['Jesus']),
-    (dbp['Pottage'], dbp['Soup'])
-    ]
-
-ground_truth_pairs_4 = [
-    (dbp['Motorrad_(disambiguation)'], dbp['Bmw_motorcycle']),
-    (dbp['Horse'], dbp['Saddle'])
-]
-
-gtp_scores_1 = GTPScores(ground_truth_pairs_1)
-gtp_scores_2 = GTPScores(ground_truth_pairs_2)
-gtp_scores_3 = GTPScores(ground_truth_pairs_3)
-gtp_scores_4 = GTPScores(ground_truth_pairs_4)
-
-sparql = SPARQLWrapper.SPARQLWrapper(
-    getenv('SPARQL_ENDPOINT', 'http://dbpedia.org/sparql'))
-#sparql = SPARQLWrapper.SPARQLWrapper(SPARQL_ENDPOINT)
-try:
-    timeout = max(5, calibrate_query_timeout(sparql))  # 5s for warmup
-except IOError:
-    from nose import SkipTest
-    raise SkipTest(
-        "Can't establish connection to SPARQL_ENDPOINT:\n    %s\n"
-        "Skipping tests in\n    %s" % (SPARQL_ENDPOINT, __file__))
-
-
-def test_eval(gtp_scores, gp):
-    res, matching_node_pairs, gtp_precisions = evaluate(
-        sparql, timeout, gtp_scores, gp, run=0, gen=0)
-    update_individuals([gp], [(res, matching_node_pairs, gtp_precisions)])
-    logger.info(gp.fitness)
-
-
-def test_mut_fv(gtp_scores, gp, r=None):
-    res = mutate_fix_var(sparql, timeout, gtp_scores, gp, rand_var=r)
-    for gp_ in res:
-        logger.info(gp_)
-
-
-def test_eval_list(gtp_scores, gp, r=None):
-    mfv_res = mutate_fix_var(sparql, timeout, gtp_scores, gp, rand_var=r)
-    for gp_ in mfv_res:
-        res, matching_node_pairs, gtp_precisions = evaluate(
-            sparql, timeout, gtp_scores, gp_, run=0, gen=0)
-        update_individuals([gp_], [(res, matching_node_pairs, gtp_precisions)])
-        print_graph_pattern(gp_, print_matching_node_pairs=0)
-    return mfv_res
-
-
-def test_eval_list_double(gtp_scores, gp, r_1=None, r_2=None):
-    # testing double execution of mutate_fix_var() on gp
-    res = test_eval_list(gtp_scores, gp, r_1)
-    gtp_scores.update_with_gps(res)
-    res_list = list(res)
-    for gp in res:
-        res_ = test_eval_list(gtp_scores, gp, r_2)
-        for gp_ in res_:
-            res_list.append(gp_)
-    gtp_scores.update_with_gps(res_list)
-    for gp in res_list:
-        print_graph_pattern(gp, print_matching_node_pairs=0)
-
-if __name__ == '__main__':
-    test_steps(ground_truth_pairs_2)
-    #values = {(SOURCE_VAR, TARGET_VAR): ground_truth_pairs_1}
-    #print(gp_1.to_sparql_select_sample_query(values))

From e2e09a4203dc88ab0668037f255c37ace6d7cd81 Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Wed, 5 Sep 2018 16:03:41 +0200
Subject: [PATCH 25/27] Erased everything except the test for the mutation in
 the learner

---
 tests/test_mutate_deep_narrow.py | 3235 ------------------------------
 1 file changed, 3235 deletions(-)

diff --git a/tests/test_mutate_deep_narrow.py b/tests/test_mutate_deep_narrow.py
index d564f6f..b636735 100644
--- a/tests/test_mutate_deep_narrow.py
+++ b/tests/test_mutate_deep_narrow.py
@@ -183,2919 +183,6 @@
     (v[2], dbo['wikiPageRedirects'], SOURCE_VAR)
 ])
 
-# Verschiedene Limits festlegen:
-# Limit: search object-list => subject-values in next query
-limit_next = 500
-# limt: search an object list from two diferrent subjects and get hits through
-# comparing them
-limit_endpoint_two_sided = 1000
-# limit: search object-list => compare with sources/targets from gtp
-limit_choose_endpoint = 5000
-# limit: search subject-list from two diferrent objects and get hits through
-# comparing them
-limit_startpoint_two_sided = 200
-# limit: search subject-list => subject-values in next query
-limit_subject_next = 350
-# limit: search subject list => compare with sources/targets from gtp
-limit_choose_subject_endpoint = 3000
-# limits: hit-list => on side subject, one side object:
-limit_subj_to_obj = 350
-limit_obj_to_subj = 1500
-
-
-# einen ein-hop-weg von source zu target zum pattern hinzufügen
-# TODO Varianten (von gefundenen b aus Variante der zweiten query
-# 1.(default) mit (b, c, d) Liste von d suchen und mit Target-Liste vergleichen
-# 2. mit (b, c, target). VALUES(target) suchen =>
-# Ergebnisse direkt an existente Targets gebunden
-# 3. mit (b, c, target).urspurngs_gp
-def mutate_deep_narrow_one_hop_s_t_without_direction(
-        gp_, gtps, max_out=None, max_in=None, in_out=None
-):
-    vars_ = gp_.vars_in_graph
-    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
-        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
-        return []
-    # Erstelle pattern für den ersten Schritt
-    a = Variable('a')
-    b = Variable('b')
-    c = Variable('c')
-    values_s_t = {(SOURCE_VAR, TARGET_VAR): gtps}
-    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-    q = gp1.to_sparql_filter_by_count_in_out_query(
-        values=values_s_t, count_node=b, in_out=in_out, max_out=max_out,
-        max_in=max_in, gp=gp_, limit=200)
-    logger.info(q)
-    t, q_res1 = run_query(q)
-    if not q_res1['results']['bindings']:
-        return []
-    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
-    # Erstelle values aus den Ergebnissen für b
-    values = get_values([b], q_res1)
-    gp2 = GraphPattern([(b, c, TARGET_VAR)])
-    # Query die über eine var aus gp2 random samplet mit values aus b_list
-    q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
-    logger.info(q)
-    try:
-        t, q_res2 = run_query(q)
-    except:
-        logger.info('Die Query (s.o.) hat nicht geklappt')
-        return []
-    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
-    target_list = get_values_list(TARGET_VAR, q_res2)
-    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
-    # Kreiere gtps_hit in der alle gtps, deren targets in target_list enthalten
-    # sind, "gespeichert" werden
-    stp_hit = get_stp_hit(target_list, gtps, 1)
-    gp_list = get_fixed_path_gp_one_hop(
-        q_res1, q_res2, gp_, stp_hit, [], a, b, c
-    )
-    return gp_list
-
-
-# einen ein-hop-weg von source zu target zum pattern hinzufügen 
-# (gp in query 2 eingefügt)
-def mutate_deep_narrow_one_hop_s_t_2(gp_, gtps, max_in_out=None, in_out=None):
-    vars_ = gp_.vars_in_graph
-    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
-        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
-        return []
-    # Erstelle pattern für den ersten Schritt
-    a = Variable('a')
-    b = Variable('b')
-    c = Variable('c')
-    gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-    values_s_t = {(SOURCE_VAR, TARGET_VAR): gtps}
-    q = gp1.to_sparql_filter_by_count_in_out_query(
-        values=values_s_t, count_node=b, in_out=in_out, 
-        max_out=max_in_out, gp=gp_, limit=200)
-    logger.info(q)
-    t, q_res1 = run_query(q)
-    if not q_res1['results']['bindings']:
-        return []
-    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
-    gp2 = GraphPattern([(b, c, TARGET_VAR)])
-    # Erstelle values aus den Ergebnissen für b
-    values = get_values([b], q_res1)
-    # Query die über eine var aus gp2 random samplet mit values aus b_list
-    q = gp2.to_sparql_select_sample_query(
-        values=values, values_s_t=values_s_t, limit=5000
-    )
-    logger.info(q)
-    try:
-        t, q_res2 = run_query(q)
-    except:
-        logger.info('Die Query (s.o.) hat nicht geklappt')
-        return []
-    # Kreiere target_list, in der die "gefundenen" Targets vermerkt sind
-    target_list = get_values_list(TARGET_VAR, q_res2)
-    # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
-    # Kreiere gtps_hit in der alle gtps, deren targets in target_list enthalten
-    # sind, "gespeichert" werden
-    stp_hit = get_stp_hit(target_list, gtps, 1)
-    gp_list = get_fixed_path_gp_one_hop(q_res1, q_res2, gp_, stp_hit, a, b, c)
-    return gp_list
-
-
-# eine one-hop verbindung zwischen source und target finden (Richtungen random)
-def mutate_deep_narrow_one_random_hop_s_t():
-    ich_darf_nich_leer_sein = []
-    return ich_darf_nich_leer_sein
-
-
-# einen direkten weg um einen hop erweitern (Weg löschen und stattdessen
-# ein-hop weg einfügen)
-
-
-# zu einem direkten weg noch einen ein-hop weg hinzufügen (weg behalten,
-# ein-hop weg dazu)
-
-
-# Runs a given (as String) query against the Sparql-endpoint
-def run_query(q):
-    try:
-        q_short = ' '.join((line.strip() for line in q.split('\n')))
-        sparql.setQuery(q_short)
-        cal = time_func(sparql.queryAndConvert)
-    except socket.timeout:
-        cal = (timeout, {})
-    except ValueError:
-        # e.g. if the endpoint gives us bad JSON for some unicode chars
-        logger.info(
-            'Could not parse result for query, assuming empty result...\n'
-            'Query:\n%s\nException:', q,
-            exc_info=1,  # appends exception to message
-        )
-        cal = (timeout, {})
-    return cal
-
-
-# returns a list of value-tupels for the given variables, out of an
-# query-result
-def get_values(varlist, q_res):
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res, res_rows_path, default=[])
-    )
-    vallist = []
-    for row in bind:
-        tup = ()
-        for var in varlist:
-            tup = tup + (get_path(row, [var]), )
-        vallist.append(tup)
-    # ausfiltern von vallist (leider notwendig vor allem wegen dbr:Template
-    vallist[:] = [valtup for valtup in vallist if not list_remove_bool(valtup)]
-    # dopppelte noch herausfiltern
-    vallist = list(set(vallist))
-    vartup = ()
-    for var in varlist:
-        vartup = vartup + (var, )
-    values = {vartup: vallist}
-    return values
-
-
-# returns a list of found values for a given variable and query-result
-def get_values_list(var, q_res):
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res, res_rows_path, default=[])
-    )
-    vallist = [get_path(row, [var]) for row in bind]
-    return vallist
-
-
-# gibt ein sample nach der Gewichtung der counts zurück,
-# Gewichtung ist hier innerhalb angesetzt
-def get_weighted_sample(var, count, q_res):
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res, res_rows_path, default=[])
-    )
-    val = []
-    weight = []
-    for row in bind:
-        val.append(get_path(row, [var]))
-        # Davon ausgehend, dass x besonders gut ist
-        if float(get_path(row, [count])) == 1.0:
-            weight.append(10000)
-        else:
-            weight.append(1/(abs(1-float(get_path(row, [count])))))
-        # Davon ausgehend, dass x besonders schlecht ist
-        # weight.append(abs(7-float(get_path(row, [count]))))
-        # weight.append(get_path(row, [count]))
-    s = sum(weight)
-    for i in range(len(weight)):
-        weight[i] = weight[i] / s
-    cum_weights = [0] + list(np.cumsum(weight))
-    res = []
-    while len(res) < min(10, len(list(set(val)))):
-        x = np.random.random()
-        i = 0
-        while x > cum_weights[i]:
-            i = i + 1
-        index = i - 1
-        if val[index] not in res:
-            res.append((val[index],))
-    sample = {(var,): res}
-    return sample
-
-
-# gibt zu einer gegebenen Liste von Variablen die stp aus gtps zurück,
-# bei denen Target(st=1)/Source(st=0) in der Variablen Liste ist.
-def get_stp_hit(varlist, gtps, st):
-    stp = []
-    for t in varlist:
-        for gtp in gtps:
-            if t == gtp[st]: 
-                stp.append(gtp)
-    return stp
-
-
-# Checks if an found RDF-Term can be used as value in a new query
-# (without conflicts)
-def list_remove_bool(tup):
-    for var in tup:
-        if isinstance(var, Literal):
-            i_n3 = var.n3()
-            if len(i_n3) > 60:
-                return True
-        elif isinstance(var, BNode):
-            return True
-        elif isinstance(var, URIRef):
-            return '%' in var
-        # TODO: nochmal schauen das % rauswerfen war kuzfristig,
-        # weil sparql mir bei einer query nen Fehler geschmissen hat
-    return False
-
-
-# evaluates a given graph-pattern-list
-def eval_gp_list(gtp_scores, gp_list):
-    for gp_l in gp_list:
-        eval_gp(gtp_scores, gp_l)
-    return gp_list
-
-
-# evaluate a given graph-pattern
-def eval_gp(gtp_scores, gp):
-    res = evaluate(
-        sparql, timeout, gtp_scores, gp, run=0, gen=0)
-    update_individuals([gp], [res])
-
-
-# helper to get target-hits and the corresponding stp
-def target_hit(stps, t_lis):
-    res = []
-    for stp in stps:
-        for t in t_lis:
-            if t == stp[1]:
-                res.append(
-                    (t, stp)
-                )
-    return res
-
-
-# add one hop with the given direction.
-def mutate_deep_narrow_one_hop(
-        gp_, max_out=None, max_in=None, in_out=None, richtung=None
-):
-    vars_ = gp_.vars_in_graph
-    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
-        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
-        return []
-    if not gp_.matching_node_pairs:
-        logger.info(
-            'No matching node pairs, cant get better through adding constraints'
-        )
-        return []
-    # Erstelle pattern für den ersten Schritt
-    a = Variable('a')
-    b = Variable('b')
-    c = Variable('c')
-    if richtung not in [1, 2, 3, 4]:
-        richtung = random.choice([1, 2, 3, 4])
-        logger.info('Richtung %s wurde gewaehlt' % richtung)
-    if richtung == 1:
-        values_s_t = {(SOURCE_VAR, TARGET_VAR): gp_.matching_node_pairs}
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s_t, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=200)
-        logger.info(q)
-        t, q_res1 = run_query(q)
-        if not q_res1:
-            return []
-        # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
-        # Erstelle values aus den Ergebnissen für b
-        values = get_values([b], q_res1)
-        gp2 = GraphPattern([(b, c, TARGET_VAR)])
-        # Query die über eine var aus gp2 random samplet mit values aus b_list
-        q = gp2.to_sparql_select_sample_query(values=values, limit=5000)
-        logger.info(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.info('Die Query (s.o.) hat nicht geklappt')
-            return []
-        # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res2)
-        gp_list = get_fixed_path_gp_one_hop(
-            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
-        )
-    elif richtung == 2:
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(TARGET_VAR, c, b)])
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=1000)
-        logger.info(q)
-        t, q_res1 = run_query(q)
-        if not q_res1['results']['bindings']:
-            return []
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=1000)
-        logger.info(q)
-        t, q_res2 = run_query(q)
-        if not q_res2['results']['bindings']:
-            return []
-        gp_list = get_fixed_path_gp_one_hop(
-            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
-        )
-    elif richtung == 3:
-        values_s_t = {(SOURCE_VAR, TARGET_VAR): gp_.matching_node_pairs}
-        gp2 = GraphPattern([(TARGET_VAR, c, b)])
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_s_t, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=200)
-        logger.info(q)
-        t, q_res2 = run_query(q)
-        if not q_res2['results']['bindings']:
-            return []
-        # logger.info('orig query took %.4f s, result:\n%s\n', t, q_res1)
-        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
-        # Erstelle values aus den Ergebnissen für b
-        values = get_values([b], q_res2)
-        # Query die über eine var aus gp2 random samplet mit values aus b_list
-        q = gp1.to_sparql_select_sample_query(values=values, limit=5000)
-        logger.info(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.info('Die Query (s.o.) hat nicht geklappt')
-            return []
-        gp_list = get_fixed_path_gp_one_hop(
-            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
-        )
-    else:
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
-        gp2 = GraphPattern([(b, c, TARGET_VAR)])
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=200)
-        logger.info(q)
-        t, q_res1 = run_query(q)
-        if not q_res1['results']['bindings']:
-            return []
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=200)
-        logger.info(q)
-        t, q_res2 = run_query(q)
-        if not q_res2['results']['bindings']:
-            return []
-        gp_list = get_fixed_path_gp_one_hop(
-            q_res1, q_res2, gp_, richtung, gp_.matching_node_pairs, a, b, c
-        )
-    return gp_list
-
-
-# fixed den ein-hop-pfad zwischen Source und Target, fügt ihn dem Pattern hinzu
-# und gibt die Liste der resultierenden Pattern zurück
-# TODO nicht so sehr auf source a b. b c Target fokussieren.
-def get_fixed_path_gp_one_hop(q_res1, q_res2, gp_main, richtung, stp, a, b, c):
-    gp_list = []
-    res_rows_path = ['results', 'bindings']
-    bind1 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res1, res_rows_path, default=[])
-    )
-    bind2 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res2, res_rows_path, default=[])
-    )
-    for row2 in bind2:
-        for gtp in stp:
-            if gtp[1] == get_path(row2, [TARGET_VAR]):
-                for row1 in bind1:
-                    if (get_path(row1, [b]) == get_path(row2, [b])) and \
-                            (get_path(row1, [SOURCE_VAR]) == gtp[0]):
-                        if richtung == 1:
-                            gp_ = GraphPattern([
-                                (SOURCE_VAR, get_path(row1, [a]), b),
-                                (b, get_path(row2, [c]), TARGET_VAR)
-                            ])
-                        elif richtung == 2:
-                            gp_ = GraphPattern([
-                                (SOURCE_VAR, get_path(row1, [a]), b),
-                                (TARGET_VAR, get_path(row2, [c]), b)
-                            ])
-                        elif richtung == 3:
-                            gp_ = GraphPattern([
-                                (b, get_path(row1, [a]), SOURCE_VAR),
-                                (TARGET_VAR, get_path(row2, [c]), b)
-                            ])
-                        else:
-                            gp_ = GraphPattern([
-                                (b, get_path(row1, [a]), SOURCE_VAR),
-                                (b, get_path(row2, [c]), TARGET_VAR)
-                            ])
-
-                        gp_ = GraphPattern(chain(gp_, gp_main))
-                        if gp_ not in gp_list:
-                            gp_list.append(gp_)
-                        logger.info(gtp)
-    return gp_list
-
-
-# fixed den ein-hop-pfad zwischen Source und Target, fügt ihn dem Pattern hinzu
-# und gibt die Liste der resultierenden Pattern zurück
-# TODO nicht so sehr auf source a b. b c Target fokussieren.
-def get_fixed_path_gp_two_hops(
-        q_res1, q_res2, q_res3, gp_main, richtung, stp, a, b, c, d, e
-):
-    # TODO: überlegen nicht nur verschieden Pattern für verschiedene Richtungen
-    # zu machen, sondern auch in den Unterschiedlichen Ergebnissn anfangen
-    # (Idee wäre z.B. die a bis e durch nummerierte random vars zu ersetzen und
-    # sich dann zu überlegen wie man das übergibt, ob mans iwie immer entlang
-    # des patterns schafft oder eher nicht.
-    gp_list = []
-    res_rows_path = ['results', 'bindings']
-    bind1 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res1, res_rows_path, default=[])
-    )
-    bind2 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res2, res_rows_path, default=[])
-    )
-    bind3 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res3, res_rows_path, default=[])
-    )
-    for gtp in stp:
-        for row3 in bind3:
-            if gtp[1] == get_path(row3, [TARGET_VAR]):
-                for row2 in bind2:
-                    if get_path(row2, [d]) == get_path(row3, [d]):
-                        for row1 in bind1:
-                            if get_path(row1, [b]) == \
-                                    get_path(row2, [b]) and \
-                                    get_path(row1, [SOURCE_VAR]) == \
-                                    gtp[0]:
-                                if richtung == 1:
-                                    gp_ = GraphPattern([
-                                        (SOURCE_VAR, get_path(row1, [a]), b),
-                                        (b, get_path(row2, [c]), d),
-                                        (d, get_path(row3, [e]), TARGET_VAR)
-                                    ])
-                                elif richtung == 2:
-                                    gp_ = GraphPattern([
-                                        (SOURCE_VAR, get_path(row1, [a]), b),
-                                        (b, get_path(row2, [c]), d),
-                                        (TARGET_VAR, get_path(row3, [e]), d)
-                                    ])
-                                elif richtung == 3:
-                                    gp_ = GraphPattern([
-                                        (SOURCE_VAR, get_path(row1, [a]), b),
-                                        (d, get_path(row2, [c]), b),
-                                        (d, get_path(row3, [e]), TARGET_VAR)
-                                    ])
-                                elif richtung == 4:
-                                    gp_ = GraphPattern([
-                                        (SOURCE_VAR, get_path(row1, [a]), b),
-                                        (d, get_path(row2, [c]), b),
-                                        (TARGET_VAR, get_path(row3, [e]), d)
-                                    ])
-                                elif richtung == 5:
-                                    gp_ = GraphPattern([
-                                        (b, get_path(row1, [a]), SOURCE_VAR),
-                                        (b, get_path(row2, [c]), d),
-                                        (d, get_path(row3, [e]), TARGET_VAR)
-                                    ])
-                                elif richtung == 6:
-                                    gp_ = GraphPattern([
-                                        (b, get_path(row1, [a]), SOURCE_VAR),
-                                        (b, get_path(row2, [c]), d),
-                                        (TARGET_VAR, get_path(row3, [e]), d)
-                                    ])
-                                elif richtung == 7:
-                                    gp_ = GraphPattern([
-                                        (b, get_path(row1, [a]), SOURCE_VAR),
-                                        (d, get_path(row2, [c]), b),
-                                        (d, get_path(row3, [e]), TARGET_VAR)
-                                    ])
-                                else:
-                                    gp_ = GraphPattern([
-                                        (b, get_path(row1, [a]), SOURCE_VAR),
-                                        (d, get_path(row2, [c]), b),
-                                        (TARGET_VAR, get_path(row3, [e]), d)
-                                    ])
-                                gp_ = GraphPattern(chain(gp_, gp_main))
-                                if gp_ not in gp_list:
-                                    gp_list.append(gp_)
-                                logger.debug(gtp)
-    return gp_list
-
-
-# add two hops.
-def mutate_deep_narrow_two_hops(
-        gp_, max_out=None, max_in=None, in_out=None, richtung=None
-):
-    vars_ = gp_.vars_in_graph
-    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
-        logger.debug('SOURCE or TARGET are not in gp: %s' % gp_)
-        return []
-    if not gp_.matching_node_pairs:
-        logger.debug(
-            'No matching node pairs, cant get better through adding constraints'
-        )
-        return []
-    a = Variable('a')
-    b = Variable('b')
-    c = Variable('c')
-    d = Variable('d')
-    e = Variable('e')
-    gp_list = []
-    if richtung not in range(1, 9):
-        richtung = random.choice(range(1, 9))
-        logger.debug('Richtung %s wurde gewaehlt' % richtung)
-    if richtung == 1:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(d, e, TARGET_VAR)])
-        
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        values_d = get_values([d], q_res2)
-        q = gp3.to_sparql_select_sample_query(
-            values=values_d, limit=limit_choose_endpoint
-        )
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    if richtung == 2:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(TARGET_VAR, e, d)])
-        
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-            
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    if richtung == 3:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(d, c, b)])
-        gp3 = GraphPattern([(d, e, TARGET_VAR)])
-        
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_startpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_startpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    if richtung == 4:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(d, c, b)])
-        gp3 = GraphPattern([(TARGET_VAR, e, d)])
-        
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        values_d = get_values([d], q_res3)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    if richtung == 5:
-        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(d, e, TARGET_VAR)])
-
-        values_s = {(SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]}
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_subject_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_d = get_values([d], q_res2)
-        q = gp3.to_sparql_select_sample_query(
-            values=values_d, limit=limit_choose_endpoint
-        )
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    if richtung == 6:
-        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(TARGET_VAR, e, d)])
-
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_startpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_d = get_values([d], q_res3)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_startpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    if richtung == 7:
-        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
-        gp2 = GraphPattern([(d, c, b)])
-        gp3 = GraphPattern([(d, e, TARGET_VAR)])
-
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_subject_next)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_d = get_values([d], q_res3)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_b = get_values([b], q_res2)
-        q = gp1.to_sparql_select_sample_query(
-            values=values_b, limit=limit_choose_endpoint)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    if richtung == 8:
-        gp1 = GraphPattern([(b, a, SOURCE_VAR)])
-        gp2 = GraphPattern([(d, c, b)])
-        gp3 = GraphPattern([(TARGET_VAR, e, d)])
-
-        values_t = {
-            (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_d = get_values([d], q_res3)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_d, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_b = get_values([b], q_res2)
-        q = gp1.to_sparql_select_sample_query(
-            values=values_b, limit=limit_choose_endpoint)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_two_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e
-        )
-    
-    return gp_list
-
-
-# fixed den ein-hop-pfad zwischen Source und Target, fügt ihn dem Pattern hinzu
-# und gibt die Liste der resultierenden Pattern zurück
-# TODO nicht so sehr auf source a b. b c Target fokussieren.
-def get_fixed_path_gp_three_hops(
-        q_res1,
-        q_res2,
-        q_res3,
-        q_res4,
-        gp_main,
-        richtung,
-        stp,
-        a,
-        b,
-        c,
-        d,
-        e,
-        f,
-        g
-):
-    # TODO: überlegen nicht nur verschieden Pattern für verschiedene Richtungen
-    # zu machen, sondern auch in den Unterschiedlichen Ergebnissn anfangen
-    # (Idee wäre z.B. die a bis e durch nummerierte random vars zu ersetzen und
-    # sich dann zu überlegen wie man das übergibt, ob mans iwie immer entlang
-    # des patterns schafft oder eher nicht.
-    gp_list = []
-    res_rows_path = ['results', 'bindings']
-    bind1 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res1, res_rows_path, default=[])
-    )
-    bind2 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res2, res_rows_path, default=[])
-    )
-    bind3 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res3, res_rows_path, default=[])
-    )
-    bind4 = sparql_json_result_bindings_to_rdflib(
-        get_path(q_res4, res_rows_path, default=[])
-    )
-    for gtp in stp:
-        for row4 in bind4:
-            if gtp[1] == get_path(row4, [TARGET_VAR]):
-                for row3 in bind3:
-                    if get_path(row3, [f]) == get_path(row4, [f]):
-                        for row2 in bind2:
-                            if get_path(row2, [d]) == get_path(row3, [d]):
-                                for row1 in bind1:
-                                    if get_path(row1, [b]) == \
-                                            get_path(row2, [b]) and \
-                                            get_path(row1, [SOURCE_VAR]) == \
-                                            gtp[0]:
-                                        if richtung == 1:
-                                            gp_ = GraphPattern([
-                                                (SOURCE_VAR, get_path(row1, [a]), b),
-                                                (b, get_path(row2, [c]), d),
-                                                (d, get_path(row3, [e]), f),
-                                                (f, get_path(row4, [g]), TARGET_VAR)
-                                            ])
-                                        elif richtung == 2:
-                                            gp_ = GraphPattern([
-                                                (SOURCE_VAR, get_path(row1, [a]), b),
-                                                (b, get_path(row2, [c]), d),
-                                                (d, get_path(row3, [e]), f),
-                                                (TARGET_VAR, get_path(row4, [g]), f)
-                                            ])
-                                        else:  # dummy else, damit gp_ zugewiesen
-                                            gp_ = GraphPattern([])
-                                        gp_ = GraphPattern(chain(gp_, gp_main))
-                                        if gp_ not in gp_list:
-                                            gp_list.append(gp_)
-                                        logger.debug(gtp)
-    return gp_list
-
-
-# add two hops.
-def mutate_deep_narrow_three_hops(
-        gp_, max_out=None, max_in=None, in_out=None, richtung=None
-):
-    vars_ = gp_.vars_in_graph
-    if not (SOURCE_VAR in vars_ and TARGET_VAR in vars_):
-        logger.debug('SOURCE or TARGET are not in gp: %s' % gp_)
-        return []
-    if not gp_.matching_node_pairs:
-        logger.debug(
-            'No matching node pairs, cant get better through adding constraints'
-        )
-        return []
-    a = Variable('a')
-    b = Variable('b')
-    c = Variable('c')
-    d = Variable('d')
-    e = Variable('e')
-    f = Variable('f')
-    g = Variable('g')
-    if richtung not in range(1, 17):
-        richtung = random.choice(range(1, 17))
-        logger.debug('Richtung %s wurde gewaehlt' % richtung)
-    if richtung == 1:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(d, e, f)])
-        gp4 = GraphPattern([(f, g, TARGET_VAR)])
-
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_d = get_values([d], q_res2)
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_d, count_node=f, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_f = get_values([f], q_res3)
-        q = gp4.to_sparql_select_sample_query(
-            values=values_f, limit=limit_choose_endpoint
-        )
-        logger.debug(q)
-        try:
-            t, q_res4 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res4:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res4['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_three_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            q_res4,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e,
-            f,
-            g
-        )
-    elif richtung == 2:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(d, e, f)])
-        gp4 = GraphPattern([(TARGET_VAR, g, f)])
-
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_d = get_values([d], q_res2)
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_d, count_node=f, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_t = {
-            (TARGET_VAR,): [(tup[1],) for tup in gp_.matching_node_pairs]
-        }
-        q = gp4.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=f, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res4 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res4:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res4['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_three_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            q_res4,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e,
-            f,
-            g
-        )
-    elif richtung == 3:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(f, e, d)])
-        gp4 = GraphPattern([(f, g, TARGET_VAR)])
-
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_d = get_values([d], q_res2)
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_d, count_node=f, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_startpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_t = {
-            (TARGET_VAR,): [(tup[1],) for tup in gp_.matching_node_pairs]
-        }
-        q = gp4.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=f, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_startpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res4 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res4:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res4['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_three_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            q_res4,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e,
-            f,
-            g
-        )
-    elif richtung == 4:
-        gp1 = GraphPattern([(SOURCE_VAR, a, b)])
-        gp2 = GraphPattern([(b, c, d)])
-        gp3 = GraphPattern([(f, e, d)])
-        gp4 = GraphPattern([(TARGET_VAR, g, f)])
-
-        values_s = {
-            (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-        }
-        q = gp1.to_sparql_filter_by_count_in_out_query(
-            values=values_s, count_node=b, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res1 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res1:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res1['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_b = get_values([b], q_res1)
-        q = gp2.to_sparql_filter_by_count_in_out_query(
-            values=values_b, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res2 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res2:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res2['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_t = {
-            (TARGET_VAR,): [(tup[1],) for tup in gp_.matching_node_pairs]
-        }
-        q = gp4.to_sparql_filter_by_count_in_out_query(
-            values=values_t, count_node=f, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_next)
-        logger.debug(q)
-        try:
-            t, q_res4 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res4:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res4['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        values_f = get_values([f], q_res4)
-        q = gp3.to_sparql_filter_by_count_in_out_query(
-            values=values_f, count_node=d, in_out=in_out, max_out=max_out,
-            max_in=max_in, limit=limit_endpoint_two_sided)
-        logger.debug(q)
-        try:
-            t, q_res3 = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not q_res3:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not q_res3['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-        gp_list = get_fixed_path_gp_three_hops(
-            q_res1,
-            q_res2,
-            q_res3,
-            q_res4,
-            gp_,
-            richtung,
-            gp_.matching_node_pairs,
-            a,
-            b,
-            c,
-            d,
-            e,
-            f,
-            g
-        )
-
-    return gp_list
-
-
-def get_fixed_path_gp_n_hops(
-        res_q, gp_, n, direct, stp, node, hn_ind, hop
-):
-    gp_list = []
-    res_rows_path = ['results', 'bindings']
-    bind = []
-    for res_q_i in res_q:
-        bind.append(sparql_json_result_bindings_to_rdflib(
-            get_path(res_q_i, res_rows_path, default=[]))
-        )
-    hit_paths = []
-    hit_paths_help = []
-
-    if hn_ind == 0:
-        for row in bind[0]:
-            for mnp in stp:
-                if mnp[0] == (get_path(row, [node[0]])):
-                    hit_paths.append([[
-                        mnp[0],
-                        get_path(row, [hop[0]]),
-                        get_path(row, [node[1]])
-                    ]])
-        for i in range(1, n+1):
-            for path in hit_paths:
-                for row in bind[i]:
-                    if path[i-1][2] == get_path(row, [node[i]]):
-                        path_h = path + [[
-                            path[i-1][2],
-                            get_path(row, [hop[i]]),
-                            get_path(row, [node[i+1]])
-                        ]]
-                        hit_paths_help.append(path_h)
-            hit_paths = hit_paths_help
-            hit_paths_help = []
-
-    elif hn_ind == n+1:
-        for row in bind[n]:
-            for mnp in stp:
-                if mnp[1] == (get_path(row, [node[n+1]])):
-                    hit_paths.append([[
-                        get_path(row, [node[n]]),
-                        get_path(row, [hop[n]]),
-                        mnp[1]
-                    ]])
-        for i in range(n-1, -1, -1):
-            for path in hit_paths:
-                for row in bind[i]:
-                    if path[(n-1)-i][0] == get_path(row, [node[i+1]]):
-                        path_h = path.append([[
-                            get_path(row, [node[i]],
-                                     get_path(row, [hop[i]]),
-                                     path[(n-1)-i][0])
-                        ]])
-                        hit_paths_help.append(path_h)
-        hit_paths = hit_paths_help
-        hit_paths_help = []
-        for path in hit_paths:
-            path.reverse()
-
-    else:
-        hit_paths_l = []
-        hit_paths_r = []
-        # get the hits of hit_node to start from
-        for row_l in bind[hn_ind-1]:
-            for row_r in bind[hn_ind]:
-                if get_path(row_l, [node[hn_ind]]) == \
-                        get_path(row_r, [node[hn_ind]]):
-                    hit_paths_l.append([[
-                        get_path(row_l, [node[hn_ind-1]]),
-                        get_path(row_l, [hop[hn_ind-1]]),
-                        get_path(row_l, [node[hn_ind]])
-                    ]])
-                    hit_paths_r.append([[
-                        get_path(row_r, [node[hn_ind]]),
-                        get_path(row_r, [hop[hn_ind]]),
-                        get_path(row_r, [node[hn_ind+1]])
-                    ]])
-        # get the path from hit node to targets
-        for i in range(hn_ind+1, n+1):
-            for path in hit_paths_r:
-                for row in bind[i]:
-                    if path[i-(hn_ind+1)][2] == get_path(row, [node[i]]):
-                        path_h = path + [[
-                            path[i-(hn_ind+1)][2],
-                            get_path(row, [hop[i]]),
-                            get_path(row, [node[i+1]])
-                        ]]
-                        hit_paths_help.append(path_h)
-            hit_paths_r = hit_paths_help
-            hit_paths_help = []
-        # get the path from hit node to sources
-        for i in range(hn_ind, -1, -1):
-            for path in hit_paths_l:
-                for row in bind[i]:
-                    if path[hn_ind-i][0] == get_path(row, [node[i+1]]):
-                        path_h = path + [[
-                            get_path(row, [node[i]]),
-                            get_path(row, [hop[i]]),
-                            path[hn_ind-i][0]
-                        ]]
-                        hit_paths_help.append(path_h)
-            hit_paths_l = hit_paths_help
-            hit_paths_help = []
-        # get the full path from source to target
-        for path_l in hit_paths_l:
-            path_l.reverse()
-            for path_r in hit_paths_r:
-                if path_l[hn_ind][2] == path_r[0][0]:
-                    hit_paths.append(path_l + path_r)
-        # filter the paths, over stp-hits
-
-    hit_paths = filter_stp_hits(hit_paths, stp)
-
-    # Make Graph_Pattern_with fixed hops out of the found paths
-    for path in hit_paths:
-        gp_list.append(
-            GraphPattern(
-                chain(
-                    GraphPattern([
-                        (node[i], path[i][1], node[i+1]) if direct(i) == 1
-                        else (node[i+1], path[i][1], node[i])
-                        for i in range(n+1)
-                    ]),
-                    gp_
-                )
-            )
-        )
-
-    return gp_list
-
-
-def filter_stp_hits(
-        hit_paths, stp
-):
-    res = []
-    for hit in hit_paths:
-        for mnp in stp:
-            if (mnp[0] == hit[0][0]) and (mnp[1] == hit[len(hit)-1][2]):
-                res.append(hit)
-    return res
-
-
-def mutate_deep_narrow_n_hops(
-        gp_, n, max_out=None, max_in=None, in_out=None, direct=None
-):
-    vars_ = gp_.vars_in_graph
-    if SOURCE_VAR not in vars_ and TARGET_VAR not in vars_:
-        logger.info('SOURCE or TARGET are not in gp: %s' % gp_)
-        return []
-    if not gp_.matching_node_pairs:
-        logger.info(
-            'No matching node pairs, cant get better through adding constraints'
-        )
-        return []
-    if n < 1:
-        logger.info('Cannot add less than one hop')
-        return []
-    # setting up lists for nodes, hops, values, gp_helpers, query-results
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(gen_random_var())
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n+1):
-        hop.append(gen_random_var())
-    if direct is None or len(direct) != n+1:
-        logger.info('No direction chosen, or direction tuple with false length')
-        direct = []
-        for i in range(n+1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n+1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = []
-    for i in range(n+2):
-        values.append({})
-    values[0] = {
-        (SOURCE_VAR, ): [(tup[0], ) for tup in gp_.matching_node_pairs]
-    }
-    values[n+1] = {
-        (TARGET_VAR, ): [(tup[1], ) for tup in gp_.matching_node_pairs]
-    }
-    res_q = []
-    for i in range(n+1):
-        res_q.append({})
-
-    # selecting an random "hit_node" => Node to check the random hits
-    hit_node = random.choice(node)
-    hn_ind = node.index(hit_node)
-
-    # TODO: use direct for cases in queriing
-    # Querieing
-    # From source to target if hit_node is target:
-    if hit_node == TARGET_VAR:
-        # Firing the queries for the first n-2 steps
-        for i in range(0, n):
-            if gp_helper[i][0][0] == node[i]:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i], count_node=node[i+1], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_next)
-            else:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i], count_node=node[i+1], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
-            logger.info(q)
-            try:
-                t, res_q[i] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[i]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[i]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[i+1] = get_values([node[i+1]], res_q[i])
-        # Firing the last query for the target hits:
-        if gp_helper[n][0][0] == node[n-1]:
-            q = gp_helper[n].to_sparql_select_sample_query(
-                values=values[n], limit=limit_choose_endpoint)
-        else:
-            q = gp_helper[n].to_sparql_select_sample_query(
-                values=values[n], limit=limit_choose_subject_endpoint)
-        logger.info(q)
-        try:
-            t, res_q[n] = run_query(q)
-        except:
-            logger.info('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not res_q[n]:
-            logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not res_q[n]['results']['bindings']:
-            logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-    # From target to source if hit_node is source:
-    elif hit_node == SOURCE_VAR:
-        # Firing the queries for the first n-2 steps
-        for i in range(n, 0, -1):
-            if gp_helper[i][0][0] == node[i+1]:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i+1], count_node=node[i], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_next)
-            else:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i+1], count_node=node[i], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
-            logger.info(q)
-            try:
-                t, res_q[i] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[i]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[i]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[i] = get_values([node[i]], res_q[i])
-        # Firing the last query for the target hits:
-        if gp_helper[0][0][0] == node[1]:
-            q = gp_helper[0].to_sparql_select_sample_query(
-                values=values[1], limit=limit_choose_endpoint)
-        else:
-            q = gp_helper[0].to_sparql_select_sample_query(
-                values=values[1], limit=limit_choose_subject_endpoint)
-        logger.info(q)
-        try:
-            t, res_q[0] = run_query(q)
-        except:
-            logger.info('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not res_q[0]:
-            logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not res_q[0]['results']['bindings']:
-            logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-
-    # From both sides to the hit_node:
-    else:
-        # firing the queries from source to the last node before hit_node
-        for i in range(0, hn_ind-1):
-            if gp_helper[i][0][0] == node[i]:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i], count_node=node[i+1], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_next)
-            else:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i], count_node=node[i+1], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
-            logger.info(q)
-            try:
-                t, res_q[i] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[i]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[i]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[i+1] = get_values([node[i+1]], res_q[i])
-            # Firing the queries from target to the last node before hit node
-        for i in range(n, hn_ind, -1):
-            if gp_helper[i][0][0] == node[i+1]:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i+1], count_node=node[i], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_next)
-            else:
-                q = gp_helper[i].to_sparql_filter_by_count_in_out_query(
-                    values=values[i+1], count_node=node[i], in_out=in_out,
-                    max_out=max_out, max_in=max_in, limit=limit_subject_next)
-            logger.info(q)
-            try:
-                t, res_q[i] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[i]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[i]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[i] = get_values([node[i]], res_q[i])
-        # feuere die letzten beiden queries richtung hit_node ab.
-        # Dabei unterscheide nach Richtungen beider queries.
-        if ((gp_helper[hn_ind-1][0][0] == node[hn_ind-1]) and   # hit is Object
-                (gp_helper[hn_ind][0][0] == node[hn_ind+1])):   # hit is Object
-            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_endpoint_two_sided)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind-1] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind-1]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind-1]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_endpoint_two_sided)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-        elif ((gp_helper[hn_ind-1][0][0] == node[hn_ind]) and   # hit is Subject
-              (gp_helper[hn_ind][0][0] == node[hn_ind])):       # hit is Subject
-            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_startpoint_two_sided)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind-1] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind-1]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind-1]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_startpoint_two_sided)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-        elif ((gp_helper[hn_ind-1][0][0] == node[hn_ind-1]) and  # hit is Object
-                (gp_helper[hn_ind][0][0] == node[hn_ind])):      # hit is Subject
-            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_obj_to_subj)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind-1] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind-1]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind-1]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_subj_to_obj)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-        elif ((gp_helper[hn_ind-1][0][0] == node[hn_ind]) and   # hit is Subject
-                (gp_helper[hn_ind][0][0] == node[hn_ind+1])):   # hit is Object
-            q = gp_helper[hn_ind-1].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind-1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_subj_to_obj)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind-1] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind-1]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind-1]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            q = gp_helper[hn_ind].to_sparql_filter_by_count_in_out_query(
-                values=values[hn_ind+1], count_node=node[hn_ind], in_out=in_out,
-                max_out=max_out, max_in=max_in, limit=limit_obj_to_subj)
-            logger.info(q)
-            try:
-                t, res_q[hn_ind] = run_query(q)
-            except:
-                logger.info('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[hn_ind]:
-                logger.info('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[hn_ind]['results']['bindings']:
-                logger.info('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-
-    gp_list = get_fixed_path_gp_n_hops(
-        res_q, gp_, n, direct, gp_.matching_node_pairs, node, hn_ind, hop
-    )
-
-    return gp_list
-
-
-# erste Version, komplett straight forward
-def mutate_deep_narrow_1(
-        gp_, gtps, n, direct=None, gp_in=False
-):
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n + 1):
-        hop.append(Variable('p%i' % i))
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = []
-        for i in range(n + 1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = {}
-    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
-    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
-    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
-    res_q = []
-    for i in range(n + 1):
-        res_q.append({})
-
-    # Queries für die Schritte
-    valueblocks = {}
-    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
-    for i in range(n+1):
-        q = gp_.to_sparql_deep_narrow_path_query(
-            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
-        )
-        logger.debug(q)
-        try:
-            t, res_q[i] = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not res_q[i]:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not res_q[i]['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        values[hop[i]] = get_values([hop[i]], res_q[i])
-        valueblocks[hop[i]] = {
-            (hop[i],): random.sample(
-                values[hop[i]][(hop[i],)],
-                min(10, len(values[hop[i]][(hop[i],)]))
-            )
-        }
-
-    # Query fürs Ergebnis
-    gp_help = GraphPattern([
-                    (node[i], hop[i], node[i+1]) if direct[i] == 1
-                    else (node[i+1], hop[i], node[i])
-                    for i in range(n+1)
-                    ])
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
-    del valueblocks[SOURCE_VAR]
-    valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query_old(hop, valueblocks, gp_help, gp_in=gp_in)
-    logger.debug(q)
-    try:
-        t, res_q_inst = run_query(q)
-    except:
-        logger.debug('Die Query (s.o.) hat nicht geklappt')
-        return []
-    if not res_q_inst:
-        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-        return []
-    elif not res_q_inst['results']['bindings']:
-        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-        return []
-    res = []
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(res_q_inst, res_rows_path, default=[])
-    )
-    for row in bind:
-        gp_res = GraphPattern([
-            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
-            else (node[i + 1], get_path(row, [hop[i]]), node[i])
-            for i in range(n + 1)
-        ])
-        res.append(gp_res)
-
-    return res
-
-
-# zweite Version: Query für letzten step bekommt schon die Targets
-def mutate_deep_narrow_2(
-        gp_, gtps, n, direct=None, gp_in=False
-):
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n + 1):
-        hop.append(Variable('p%i' % i))
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = []
-        for i in range(n + 1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = {}
-    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
-    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
-    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
-    res_q = []
-    for i in range(n + 1):
-        res_q.append({})
-
-    # Queries für die Schritte
-    valueblocks = {}
-    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
-    for i in range(n):
-        q = gp_.to_sparql_deep_narrow_path_query(
-            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
-        )
-        logger.debug(q)
-        try:
-            t, res_q[i] = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not res_q[i]:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not res_q[i]['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        values[hop[i]] = get_values([hop[i]], res_q[i])
-        valueblocks[hop[i]] = {
-            (hop[i],): random.sample(
-                values[hop[i]][(hop[i],)],
-                min(10, len(values[hop[i]][(hop[i],)]))
-            )
-        }
-
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
-    del valueblocks[SOURCE_VAR]
-    valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query(
-        hop, valueblocks, gp_helper, gp_in=gp_in
-    )
-    logger.debug(q)
-    try:
-        t, res_q_inst = run_query(q)
-    except:
-        logger.debug('Die Query (s.o.) hat nicht geklappt')
-        return []
-    if not res_q_inst:
-        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-        return []
-    elif not res_q_inst['results']['bindings']:
-        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-        return []
-    res = []
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(res_q_inst, res_rows_path, default=[])
-    )
-    for row in bind:
-        gp_res = GraphPattern([
-            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
-            else (node[i + 1], get_path(row, [hop[i]]), node[i])
-            for i in range(n + 1)
-        ])
-        res.append(gp_res)
-
-    return res
-
-
-# dritte Version: BIDI straight forward
-def mutate_deep_narrow_3(
-        gp_, gtps, n, direct=None, gp_in=False
-):
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n + 1):
-        hop.append(Variable('p%i' % i))
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = []
-        for i in range(n + 1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = {}
-    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
-    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
-    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
-    res_q = []
-    for i in range(n+1):
-        res_q.append({})
-
-    # Queries für die Schritte
-    valueblocks_s = {}
-    valueblocks_s[SOURCE_VAR] = values[SOURCE_VAR]
-    valueblocks_t = {}
-    valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
-    for i in range(int((n / 2) + 1)):
-        q = gp_.to_sparql_deep_narrow_path_query(
-            hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
-        )
-        logger.debug(q)
-        try:
-            t, res_q[i] = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not res_q[i]:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not res_q[i]['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        values[hop[i]] = get_values([hop[i]], res_q[i])
-        valueblocks_s[hop[i]] = {
-            (hop[i],): random.sample(
-                values[hop[i]][(hop[i],)],
-                min(10, len(values[hop[i]][(hop[i],)]))
-            )
-        }
-        if n-i != i:
-            q = gp_.to_sparql_deep_narrow_path_query(
-                hop[n-i],
-                node[n-i],
-                valueblocks_t,
-                gp_helper[n-i:],
-                startvar=TARGET_VAR,
-                gp_in=gp_in
-            )
-            logger.debug(q)
-            try:
-                t, res_q[n-i] = run_query(q)
-            except:
-                logger.debug('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[n-i]:
-                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[n-i]['results']['bindings']:
-                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[hop[n-i]] = get_values([hop[n-i]], res_q[n-i])
-            valueblocks_t[hop[n-i]] = {
-                (hop[n-i],): random.sample(
-                    values[hop[n-i]][(hop[n-i],)],
-                    min(10, len(values[hop[n-i]][(hop[n-i],)]))
-                )
-            }
-
-    # Query fürs Ergebnis
-    gp_help = GraphPattern([
-                    (node[i], hop[i], node[i+1]) if direct[i] == 1
-                    else (node[i+1], hop[i], node[i])
-                    for i in range(n+1)
-                    ])
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
-    valueblocks = {}
-    for key in valueblocks_s:
-        if key is not SOURCE_VAR:
-            valueblocks[key] = valueblocks_s[key]
-    for key in valueblocks_t:
-        if key is not TARGET_VAR:
-            valueblocks[key] = valueblocks_t[key]
-    valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query_old(hop, valueblocks, gp_help, gp_in=gp_in)
-    logger.debug(q)
-    try:
-        t, res_q_inst = run_query(q)
-    except:
-        logger.debug('Die Query (s.o.) hat nicht geklappt')
-        return []
-    if not res_q_inst:
-        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-        return []
-    elif not res_q_inst['results']['bindings']:
-        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-        return []
-    res = []
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(res_q_inst, res_rows_path, default=[])
-    )
-    for row in bind:
-        gp_res = GraphPattern([
-            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
-            else (node[i + 1], get_path(row, [hop[i]]), node[i])
-            for i in range(n + 1)
-        ])
-        res.append(gp_res)
-
-    return res
-
-
-# vierte Version: BIDI with instantiation in last step
-def mutate_deep_narrow_4(
-        gp_, gtps, n, direct=None, gp_in=False
-):
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n + 1):
-        hop.append(Variable('p%i' % i))
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = []
-        for i in range(n + 1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = {}
-    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
-    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
-    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
-    res_q = []
-    for i in range(n+1):
-        res_q.append({})
-
-    # Queries für die Schritte
-    valueblocks_s = {}
-    valueblocks_s[SOURCE_VAR] = values[SOURCE_VAR]
-    valueblocks_t = {}
-    valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
-    for i in range(int((n / 2) + 1)):
-        if i < int(n/2):
-            q = gp_.to_sparql_deep_narrow_path_query(
-                hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], SOURCE_VAR, gp_in=gp_in
-            )
-            logger.debug(q)
-            try:
-                t, res_q[i] = run_query(q)
-            except:
-                logger.debug('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[i]:
-                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[i]['results']['bindings']:
-                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[hop[i]] = get_values([hop[i]], res_q[i])
-            valueblocks_s[hop[i]] = {
-                (hop[i],): random.sample(
-                    values[hop[i]][(hop[i],)],
-                    min(10, len(values[hop[i]][(hop[i],)]))
-                )
-            }
-        if n-i > i:
-            q = gp_.to_sparql_deep_narrow_path_query(
-                hop[n-i],
-                node[n-i],
-                valueblocks_t,
-                gp_helper[n-i:],
-                TARGET_VAR,
-                gp_in=gp_in
-            )
-            logger.debug(q)
-            try:
-                t, res_q[n-i] = run_query(q)
-            except:
-                logger.debug('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[n-i]:
-                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[n-i]['results']['bindings']:
-                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[hop[n-i]] = get_values([hop[n-i]], res_q[n-i])
-            valueblocks_t[hop[n-i]] = {
-                (hop[n-i],): random.sample(
-                    values[hop[n-i]][(hop[n-i],)],
-                    min(10, len(values[hop[n-i]][(hop[n-i],)]))
-                )
-            }
-
-    # Query fürs Ergebnis
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
-    valueblocks = {}
-    for key in valueblocks_s:
-        if key is not SOURCE_VAR:
-            valueblocks[key] = valueblocks_s[key]
-    for key in valueblocks_t:
-        if key is not TARGET_VAR:
-            valueblocks[key] = valueblocks_t[key]
-    valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query(
-        hop, valueblocks, gp_helper, gp_in=gp_in
-    )
-    logger.debug(q)
-    try:
-        t, res_q_inst = run_query(q)
-    except:
-        logger.debug('Die Query (s.o.) hat nicht geklappt')
-        return []
-    if not res_q_inst:
-        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-        return []
-    elif not res_q_inst['results']['bindings']:
-        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-        return []
-    res = []
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(res_q_inst, res_rows_path, default=[])
-    )
-    for row in bind:
-        gp_res = GraphPattern([
-            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
-            else (node[i + 1], get_path(row, [hop[i]]), node[i])
-            for i in range(n + 1)
-        ])
-        res.append(gp_res)
-
-    return res
-
-
-# fünfte Version: filtern nach Count
-def mutate_deep_narrow_5(
-        gp_, gtps, n, direct=None, gp_in=False
-):
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n + 1):
-        hop.append(Variable('p%i' % i))
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = []
-        for i in range(n + 1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = {}
-    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
-    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
-    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
-    res_q = []
-    for i in range(n + 1):
-        res_q.append({})
-
-    # Queries für die Schritte
-    valueblocks = {}
-    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
-    for i in range(n+1):
-        q = gp_.to_sparql_deep_narrow_path_query(
-            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
-        )
-        logger.debug(q)
-        try:
-            t, res_q[i] = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not res_q[i]:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not res_q[i]['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        values[hop[i]] = get_values([hop[i]], res_q[i])
-        valueblocks[hop[i]] = get_weighted_sample(
-            hop[i], Variable('avgc'+''.join(node[i+1])), res_q[i]
-        )
-
-    # Query fürs Ergebnis
-    gp_help = GraphPattern([
-                    (node[i], hop[i], node[i+1]) if direct[i] == 1
-                    else (node[i+1], hop[i], node[i])
-                    for i in range(n+1)
-                    ])
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
-    del valueblocks[SOURCE_VAR]
-    valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query_old(hop, valueblocks, gp_help, gp_in=gp_in)
-    logger.debug(q)
-    try:
-        t, res_q_inst = run_query(q)
-    except:
-        logger.debug('Die Query (s.o.) hat nicht geklappt')
-        return []
-    if not res_q_inst:
-        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-        return []
-    elif not res_q_inst['results']['bindings']:
-        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-        return []
-    res = []
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(res_q_inst, res_rows_path, default=[])
-    )
-    for row in bind:
-        gp_res = GraphPattern([
-            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
-            else (node[i + 1], get_path(row, [hop[i]]), node[i])
-            for i in range(n + 1)
-        ])
-        res.append(gp_res)
-
-    return res
-
-
-# sechste Version: Query für letzten step bekommt schon die Targets
-#  => Precheck feasible?
-def mutate_deep_narrow_6(
-        gp_, gtps, n, direct=None, gp_in=False
-):
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n + 1):
-        hop.append(Variable('p%i' % i))
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = []
-        for i in range(n + 1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = {}
-    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
-    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
-    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
-    res_q = []
-    for i in range(n + 1):
-        res_q.append({})
-
-    # Pre-check:
-    gp_help = GraphPattern([
-                    (node[i], hop[i], node[i+1]) if direct[i] == 1
-                    else (node[i+1], hop[i], node[i])
-                    for i in range(n+1)
-                    ])
-    q = gp_help.to_sparql_precheck_query(values['st'], gp_in=gp_in)
-    logger.debug(q)
-    try:
-        t, res_q = run_query(q)
-    except:
-        logger.info('Pre-Check hat nicht geklappt')
-    if not res_q:
-        logger.info('Pre-Check hat kein Ergebnis')
-    elif not res_q['results']['bindings']:
-        logger.info('Pre-Check hat keine gebundenen Variablen')
-    else:
-        logger.info('Pre-Check hat einen Treffer')
-
-    # Queries für die Schritte
-    valueblocks = {}
-    valueblocks[SOURCE_VAR] = values[SOURCE_VAR]
-    for i in range(n):
-        q = gp_.to_sparql_deep_narrow_path_query(
-            hop[i], node[i+1], valueblocks, gp_helper[:i+1], gp_in=gp_in
-        )
-        logger.debug(q)
-        try:
-            t, res_q[i] = run_query(q)
-        except:
-            logger.debug('Die Query (s.o.) hat nicht geklappt')
-            return []
-        if not res_q[i]:
-            logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-            return []
-        elif not res_q[i]['results']['bindings']:
-            logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-            return []
-        values[hop[i]] = get_values([hop[i]], res_q[i])
-        valueblocks[hop[i]] = {
-            (hop[i],): random.sample(
-                values[hop[i]][(hop[i],)],
-                min(10, len(values[hop[i]][(hop[i],)]))
-            )
-        }
-
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
-    del valueblocks[SOURCE_VAR]
-    valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query(
-        hop, valueblocks, gp_helper, gp_in=gp_in
-    )
-    logger.debug(q)
-    try:
-        t, res_q_inst = run_query(q)
-    except:
-        logger.debug('Die Query (s.o.) hat nicht geklappt')
-        return []
-    if not res_q_inst:
-        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-        return []
-    elif not res_q_inst['results']['bindings']:
-        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-        return []
-    res = []
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(res_q_inst, res_rows_path, default=[])
-    )
-    for row in bind:
-        gp_res = GraphPattern([
-            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
-            else (node[i + 1], get_path(row, [hop[i]]), node[i])
-            for i in range(n + 1)
-        ])
-        res.append(gp_res)
-
-    return res
-
-
-# siebte Version: BIDI with instantiation in last step + ws-sampling
-def mutate_deep_narrow_7(
-        gp_, gtps, n, direct=None, gp_in=False
-):
-    node = [SOURCE_VAR]
-    for i in range(n):
-        node.append(Variable('n%i' % i))
-    node.append(TARGET_VAR)
-    hop = []
-    for i in range(n + 1):
-        hop.append(Variable('p%i' % i))
-    if direct is None or len(direct) != n + 1:
-        logger.debug(
-            'No direction chosen, or direction tuple with false length'
-        )
-        direct = []
-        for i in range(n + 1):
-            direct.append(0)
-    gp_helper = []
-    for i in range(n + 1):
-        if direct[i] == 0:
-            direct[i] = random.choice([-1, 1])
-        if direct[i] == 1:
-            gp_helper.append(
-                GraphPattern([(node[i], hop[i], node[i + 1])])
-            )
-        else:
-            gp_helper.append(
-                GraphPattern([(node[i + 1], hop[i], node[i])])
-            )
-    values = {}
-    values[SOURCE_VAR] = {(SOURCE_VAR,): [(tup[0],) for tup in gtps]}
-    values[TARGET_VAR] = {(TARGET_VAR,): [(tup[1],) for tup in gtps]}
-    values['st'] = {(SOURCE_VAR, TARGET_VAR): gtps}
-    res_q = []
-    for i in range(n+1):
-        res_q.append({})
-
-    # Queries für die Schritte
-    valueblocks_s = {}
-    valueblocks_s[SOURCE_VAR] = values[SOURCE_VAR]
-    valueblocks_t = {}
-    valueblocks_t[TARGET_VAR] = values[TARGET_VAR]
-    for i in range(int((n / 2) + 1)):
-        if i < int(n/2):
-            q = gp_.to_sparql_deep_narrow_path_query(
-                hop[i], node[i+1], valueblocks_s, gp_helper[:i+1], gp_in=gp_in
-            )
-            logger.debug(q)
-            try:
-                t, res_q[i] = run_query(q)
-            except:
-                logger.debug('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[i]:
-                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[i]['results']['bindings']:
-                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[hop[i]] = get_values([hop[i]], res_q[i])
-            valueblocks_s[hop[i]] = get_weighted_sample(
-                hop[i], Variable('avgc' + ''.join(node[i + 1])), res_q[i]
-            )
-        if n-i > i:
-            q = gp_.to_sparql_deep_narrow_path_query(
-                hop[n-i],
-                node[n-i],
-                valueblocks_t,
-                gp_helper[n-i:],
-                startvar=TARGET_VAR,
-                gp_in=gp_in
-            )
-            logger.debug(q)
-            try:
-                t, res_q[n-i] = run_query(q)
-            except:
-                logger.debug('Die Query (s.o.) hat nicht geklappt')
-                return []
-            if not res_q[n-i]:
-                logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-                return []
-            elif not res_q[n-i]['results']['bindings']:
-                logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-                return []
-            values[hop[n-i]] = get_values([hop[n-i]], res_q[n-i])
-            valueblocks_t[hop[n-i]] = get_weighted_sample(
-                hop[n-i], Variable('avgc' + ''.join(node[n-i])), res_q[n-i]
-            )
-
-    # Query fürs Ergebnis
-    # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden
-    # werden
-    valueblocks = {}
-    for key in valueblocks_s:
-        if key is not SOURCE_VAR:
-            valueblocks[key] = valueblocks_s[key]
-    for key in valueblocks_t:
-        if key is not TARGET_VAR:
-            valueblocks[key] = valueblocks_t[key]
-    valueblocks['st'] = values['st']
-    q = gp_.to_sparql_deep_narrow_path_inst_query(
-        hop, valueblocks, gp_helper, gp_in=gp_in
-    )
-    logger.debug(q)
-    try:
-        t, res_q_inst = run_query(q)
-    except:
-        logger.debug('Die Query (s.o.) hat nicht geklappt')
-        return []
-    if not res_q_inst:
-        logger.debug('Die Query (s.o.) hat kein Ergebnis geliefert')
-        return []
-    elif not res_q_inst['results']['bindings']:
-        logger.debug('Die Query (s.o.) hat keine gebundenen Variablen')
-        return []
-    res = []
-    res_rows_path = ['results', 'bindings']
-    bind = sparql_json_result_bindings_to_rdflib(
-        get_path(res_q_inst, res_rows_path, default=[])
-    )
-    for row in bind:
-        gp_res = GraphPattern([
-            (node[i], get_path(row, [hop[i]]), node[i + 1]) if direct[i] == 1
-            else (node[i + 1], get_path(row, [hop[i]]), node[i])
-            for i in range(n + 1)
-        ])
-        res.append(gp_res)
-
-    return res
-
 
 def main():
     ground_truth_pairs = get_semantic_associations()
@@ -3111,328 +198,6 @@ def main():
         logger.info(i)
         logger.info(r)
         res.append(r)
-    # for key in gp_found.keys():
-    #     gp_ = gp_found[key]
-    #     eval_gp(gtp_scores, gp_)
-    #     for i in range(100):
-    #         res_ = mutate_deep_narrow_4(
-    #             gp_, gp_.matching_node_pairs, 6, gp_in=False
-    #         )
-    #         res.append(res_)
-    #         logger.info((i, key))
-    #         if res_:
-    #             logger.info(res_)
-
-    # res_eval=[]
-    # res = []
-    #
-    # max_out = 65
-    # max_in = 40
-    # in_out = 'out'
-    # richtung = 2
-    # ground_truth_pairs = get_semantic_associations()
-    # ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
-    # # ground_truth_pairs = ground_truth_pairs[0:200]
-    # gtp_scores = GTPScores(ground_truth_pairs)
-    # gp = gp_found['140']
-    # eval_gp(gtp_scores, gp)
-    #
-    # for i in range(20):
-    #     res.append(mutate_deep_narrow_n_hops(gp, 2, max_out=max_out, in_out=in_out))
-    #
-    # logger.info(res)
-    #
-    # durchgaenge = []
-    #
-    # for richtung in range(1, 9):
-    #     for max_out in [5, 10, 20, 30, 40, 50, 65, 75, 85, 100, 200]:
-    #         for key in gp_found.keys():
-    #             durchgaenge.append((richtung, max_out, key))
-    #
-    # random.shuffle(durchgaenge)
-    #
-    # for (richtung, max_out, key) in durchgaenge:
-    #     logger.info('Durchgang: richtung = %s, max_out = %s, gp.key = %s' %
-    #         (richtung, max_out, key)
-    #     )
-    #     ground_truth_pairs = get_semantic_associations()
-    #     ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
-    #     # ground_truth_pairs = random.sample(ground_truth_pairs, 100)
-    #     gtp_scores = GTPScores(ground_truth_pairs)
-    #     gp = gp_found[key]
-    #     eval_gp(gtp_scores, gp)
-    #
-    #     res_gp = mutate_deep_narrow_two_hops(
-    #         gp,
-    #         max_out=max_out,
-    #         max_in=max_in,
-    #         in_out=in_out,
-    #         richtung=richtung
-    #     )
-    #     res_gp.append(gp)
-    #     res_eval = eval_gp_list(gtp_scores, res_gp)
-    #     gp_eval = res_eval[-1]
-    #     res_eval = sorted(
-    #         res_eval[:-1], key=lambda gp_: -gp_.fitness.values.score
-    #     )
-    #     if res_eval:
-    #         logger.info(max_out)
-    #         print_graph_pattern(gp)
-    #         for gp_ in res_eval:
-    #             print_graph_pattern(gp_)
-    #         res.append((richtung, key, max_out, gp_eval, res_eval))
-
-    # f = open('store.pckl', 'wb')
-    # pickle.dump(res, f)
-    # f.close()
-
-    # in der Konsole das res nochmal anschauen:
-    # import pickle
-    # f = open('tests/store.pckl', 'rb')
-    # res = pickle.load(f)
-    # f.close()
-
-    # print('HERE STARTS THE RES_PRINTING:')
-    # for r in res:
-    #     print('richtung %s, key %s, max_out %s\n' % r[0:3])
-    #     print('Original GP:\n')
-    #     print_graph_pattern(r[3], print_matching_node_pairs=0)
-    #     print('Top 3 found (if 3 where found, else all found) GP:\n')
-    #     for i in range(min(3, len(r[4]))):
-    #         print_graph_pattern(r[4][i], print_matching_node_pairs=0)
-
-    # ground_truth_pairs = get_semantic_associations()
-    # ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
-    # ground_truth_pairs = random.sample(ground_truth_pairs, 100)
-    # gtp_scores = GTPScores(ground_truth_pairs)
-    # gp = gp_found[random.choice(gp_found.keys())]
-    #
-    # max_out = 50
-    # max_in = 40
-    # in_out = 'out'
-    #
-    # res = mutate_deep_narrow_one_hop_s_t_without_direction(
-    #     gp,
-    #     ground_truth_pairs,
-    #     max_out=max_out,
-    #     max_in=max_in,
-    #     in_out=in_out
-    # )
-    # res.append(gp)
-    # res_eval = eval_gp_list(gtp_scores, res)
-    # gp_eval = res_eval[-1]
-    # res_eval = sorted(res_eval[:-1], key=lambda gp_: -gp_.fitness.values.score)
-    #
-    # print_graph_pattern(gp_eval)
-    # for gp_ in res_eval:
-    #     print_graph_pattern(gp_)
-
-    # # Zählfelder für die Statistik (Zugriff über max_in_out)
-    # # durchschnittliche Anzahl der zurückgegebenen pattern
-    # avg_num_pat = {}
-    # # maximal zurückgegebene pattern
-    # max_num_pat = {}
-    # # durchschnittlicher Score aller zurückgegebenen pattern
-    # avg_score_all_pat = {}
-    # # durchschnittlicher Score des besten zurückgegegebenen pattern
-    # # (wenn vorhanden)
-    # avg_score_best_pat = {}
-    # # druchschnittlicher Score des besten zurückgegebenen patterns
-    # # (0 wenn keins vorhanden)
-    # avg_score_best_pat_pun = {}
-    # # maximaler Score eines zurückgegebenen patterns
-    # max_score_ovrall = {}
-    # # Wie oft wurde kein pattern zurückgegeben
-    # num_no_pattern = {}
-    # # durchschnittliche abweichung des besten patterns vom Score des
-    # # Ausgangspatterns, wenn vorhanden
-    # avg_diff_all_pat = {}
-    # # durchschnittliche Abweichung vom Score des Ausgangspatterns,
-    # # wenn vorhanden
-    # avg_diff_best_pat = {}
-    # # aufaddierter score von Durchgängen ohne pattern
-    # punish_avg_diff_best_pat = {}
-    # # aufaddierter score von Durchgängen ohne pattern mal der durchschnittlichen
-    # # Anzahl zurückgegebener pattern
-    # punish_avg_diff_all_pat = {}
-    # # durchschnittliche Abweichung des besten patterns vom score des
-    # # Ausgangspatterns mit Strafe für gar kein pattern
-    # avg_diff_all_pat_punished = {}
-    # # durchschnittliche Abweichung vom Score des Ausgangspatterns, mit Strafe
-    # # für gar kein pattern
-    # avg_diff_best_pat_punished = {}
-    # # die fünf besten (am stärksten verbessernden) pattern
-    # five_best_pattern = {}
-    #
-    # max_out_steps = [10, 15, 20, 25, 30, 40, 50, 75, 100]
-    #
-    # for j in max_out_steps:
-    #     avg_num_pat[j] = 0
-    #     max_num_pat[j] = 0
-    #     avg_score_all_pat[j] = 0
-    #     avg_score_best_pat[j] = 0
-    #     avg_score_best_pat_pun[j] = 0
-    #     max_score_ovrall[j] = 0
-    #     num_no_pattern[j] = 0
-    #     avg_diff_all_pat[j] = 0
-    #     avg_diff_best_pat[j] = 0
-    #     punish_avg_diff_best_pat[j] = 0
-    #     punish_avg_diff_all_pat[j] = 0
-    #     avg_diff_all_pat_punished[j] = 0
-    #     avg_diff_best_pat_punished[j] = 0
-    #     five_best_pattern[j] = []
-    #
-    # reps = 50
-    #
-    # for i in range(reps):
-    #     ground_truth_pairs = get_semantic_associations()
-    #     ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
-    #     ground_truth_pairs = random.sample(ground_truth_pairs, 100)
-    #     gtp_scores = GTPScores(ground_truth_pairs)
-    #     gp = gp_found[random.choice(gp_found.keys())]
-    #     for j in max_out_steps:
-    #         res = mutate_deep_narrow_one_hop_s_t_without_direction(
-    #             gp, ground_truth_pairs, max_out=j, in_out='out'
-    #         )  # TODO: warum kommt oben None rein???
-    #         res.append(gp)
-    #         res_eval = eval_gp_list(gtp_scores, res)
-    #         gp_eval = res_eval[-1]
-    #         res_eval = sorted(
-    #             res_eval[:-1], key=lambda gp_: -gp_.fitness.values.score
-    #         )
-    #
-    #         # Statistik:
-    #         avg_num_pat[j] = avg_num_pat[j] + len(res_eval) / reps
-    #         if len(res_eval) > max_num_pat[j]:
-    #             max_num_pat[j] = len(res_eval)
-    #         for gp_ in res_eval:
-    #             avg_score_all_pat[j] = avg_score_all_pat[j] + \
-    #                                    gp_.fitness.values.score / \
-    #                                    (len(res_eval) * reps)
-    #         if res_eval:
-    #             avg_score_best_pat[j] = avg_score_best_pat[j] + \
-    #                                     res_eval[0].fitness.values.score
-    #         if res_eval:
-    #             if res_eval[0].fitness.values.score > max_score_ovrall[j]:
-    #                 max_score_ovrall[j] = res_eval[0].fitness.values.score
-    #         if len(res_eval) == 0:
-    #             num_no_pattern[j] = num_no_pattern[j] + 1
-    #         if res_eval:
-    #             avg_diff_all_pat[j] = avg_diff_all_pat[j] + \
-    #                                   (res_eval[0].fitness.values.score -
-    #                                    gp_eval.fitness.values.score) / \
-    #                                   reps
-    #         for gp_ in res_eval:
-    #             avg_diff_best_pat[j] = avg_diff_best_pat[j] + \
-    #                                    (gp_.fitness.values.score -
-    #                                     gp_eval.fitness.values.score) / \
-    #                                    (len(res_eval) * reps)
-    #         if not res_eval:
-    #             punish_avg_diff_best_pat[j] = punish_avg_diff_best_pat[j] + \
-    #                                           gp_eval.fitness.values.score
-    #         if res_eval:
-    #             if len(five_best_pattern[j]) < 5:
-    #                 five_best_pattern[j].append((
-    #                     res_eval[0].fitness.values.score -
-    #                     gp_eval.fitness.values.score,
-    #                     res_eval[0],
-    #                     gp_eval
-    #                 ))
-    #                 five_best_pattern[j] = sorted(
-    #                     five_best_pattern[j],
-    #                     key=lambda tup_: -tup_[0]
-    #                 )
-    #             else:
-    #                 five_best_pattern[j][4] = (
-    #                     res_eval[0].fitness.values.score -
-    #                     gp_eval.fitness.values.score,
-    #                     res_eval[0],
-    #                     gp_eval
-    #                 )
-    #                 five_best_pattern[j] = sorted(
-    #                     five_best_pattern[j],
-    #                     key=lambda tup_: -tup_[0]
-    #                 )
-    #         logger.info('Runde %s, min_max = %s' % (i, j))
-    #         print_graph_pattern(gp)
-    #         if res_eval:
-    #             print_graph_pattern(res_eval[0])
-    #
-    # # print out the five best patterns per min_max:
-    # logger.info(' The five best new patterns (per min_max): ')
-    # for j in max_out_steps:
-    #     for i in range(len(five_best_pattern[j])):
-    #         print('min_max: %s\n' % j)
-    #         print('Differenz: %s\n' % five_best_pattern[j][i][0])
-    #         print_graph_pattern(five_best_pattern[j][i][1])
-    #         print_graph_pattern(five_best_pattern[j][i][2])
-    #
-    # # more statistics
-    # for j in max_out_steps:
-    #     avg_score_best_pat_pun[j] = avg_score_best_pat[j] / reps
-    #     if reps - num_no_pattern[j]:
-    #         avg_score_best_pat[j] = avg_score_best_pat[j] / \
-    #                                 (reps - num_no_pattern[j])
-    #     else:
-    #         avg_score_best_pat = -1
-    #     punish_avg_diff_all_pat[j] = punish_avg_diff_best_pat[j] * \
-    #                                  avg_num_pat[j]
-    #     avg_diff_all_pat_punished[j] = avg_diff_all_pat[j] - \
-    #                                    punish_avg_diff_best_pat[j]
-    #     avg_diff_best_pat_punished[j] = avg_diff_best_pat[j] - \
-    #                                     punish_avg_diff_all_pat[j]
-    #
-    # # print the statistics
-    # logger.info('min_max: %s\n'
-    #             'avg_num_pat: %s\n'
-    #             'max_num_pat: %s\n'
-    #             'avg_score_all_pat: %s\n'
-    #             'avg_score_best_pat: %s\n'
-    #             'avg_score_best_pat_pun: %s\n'
-    #             'max_score_ovrall: %s\n'
-    #             'num_no_pattern: %s\n'
-    #             'avg_diff_all_pat: %s\n'
-    #             'avg_diff_best_pat: %s\n'
-    #             'punish_avg_diff_best_pat: %s\n'
-    #             'punish_avg_diff_all_pat: %s\n'
-    #             'avg_diff_all_pat_punished: %s\n'
-    #             'avg_diff_best_pat_punished: %s\n' % (
-    #             ' '.join([str(x) for x in max_out_steps]),
-    #             ' '.join([str(avg_num_pat[x]) for x in max_out_steps]),
-    #             ' '.join([str(max_num_pat[x]) for x in max_out_steps]),
-    #             ' '.join([str(avg_score_all_pat[x]) for x in max_out_steps]),
-    #             ' '.join([str(avg_score_best_pat[x]) for x in max_out_steps]),
-    #             ' '.join(
-    #                 [str(avg_score_best_pat_pun[x]) for x in max_out_steps]
-    #             ),
-    #             ' '.join([str(max_score_ovrall[x]) for x in max_out_steps]),
-    #             ' '.join([str(num_no_pattern[x]) for x in max_out_steps]),
-    #             ' '.join([str(avg_diff_all_pat[x]) for x in max_out_steps]),
-    #             ' '.join([str(avg_diff_best_pat[x]) for x in max_out_steps]),
-    #             ' '.join(
-    #                 [str(punish_avg_diff_best_pat[x]) for x in max_out_steps]
-    #             ),
-    #             ' '.join(
-    #                 [str(punish_avg_diff_all_pat[x]) for x in max_out_steps]
-    #             ),
-    #             ' '.join(
-    #                 [str(avg_diff_all_pat_punished[x]) for x in max_out_steps]
-    #             ),
-    #             ' '.join(
-    #                 [str(avg_diff_best_pat_punished[x]) for x in max_out_steps]
-    #             )
-    # ))
-    #
-    # # TODO: Fehler finden, warum die Differenz der gp-scores in
-    # five_best_patterns nicht stimmt
-    #
-    # res = res[0:100]
-    # for res_ in res:
-    #     # print('max_out:' + str(res_[1]))
-    #     print_graph_pattern(res_)
-    #
-    #     # TODO: zweite Query auch mit SOURCE TARGET binden und gp in die query
-    #     # dazunehmen, dann spar ich mir auch das suchen nach Treffern ?!
 
 
 if __name__ == '__main__':

From 6deb0ad754840fba0cb887561111fc5c43b9104c Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Thu, 6 Sep 2018 10:55:54 +0200
Subject: [PATCH 26/27] Erased the use of private methods in
 to_sparql_deep_narrow_path_(inst_)query()

---
 graph_pattern.py | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/graph_pattern.py b/graph_pattern.py
index 3cef58b..d46f654 100644
--- a/graph_pattern.py
+++ b/graph_pattern.py
@@ -652,7 +652,7 @@ def to_sparql_deep_narrow_path_query(
         avg_var_to_count = Variable('avgc' + ''.join(var_to_count))
         res = "SELECT %(vtf)s (AVG(%(cvtc)s) as %(avtc)s) {\n" \
               "SELECT %(stv)s %(vtf)s (COUNT (%(vtc)s) as %(cvtc)s) {\n" \
-              "%(val)s\n" \
+              "%(val)s" \
               "%(trip)s }\n" \
               "GROUP BY %(stv)s %(vtf)s }\n" \
               "GROUP BY %(vtf)s\n" \
@@ -663,13 +663,17 @@ def to_sparql_deep_narrow_path_query(
                   'stv': ''.join(startvar.n3()),
                   'vtc': ''.join(var_to_count.n3()),
                   'val': ''.join([
-                      self._sparql_values_part(
-                          values=valueblocks[key], indent=' '
-                      ) for key in valueblocks
+                      'VALUES (%s) {\n%s }\n' % (
+                          ' '.join(var.n3() for var in valueblocks[key].keys()[0]),
+                          ''.join(['(%s)\n' %
+                                   ' '.join(self.curify(v) for v in vt)
+                                   for vt in valueblocks[key][(key,)]])
+                      ) for key in valueblocks.keys()
                   ]),
                   'trip': ''.join([
-                      step._sparql_triples_part(indent=' ') for step in steps
-                      # TODO: don't use private method
+                      '%s %s %s .\n' % (s.n3(), p.n3(), o.n3())
+                      for step in steps
+                      for s, p, o in step
                   ]) + ''.join([
                       self._sparql_triples_part(indent=' ') if gp_in else ''
                   ]),
@@ -688,19 +692,23 @@ def to_sparql_deep_narrow_path_inst_query(
     ):
         # TODO: Maybe use a limit
         res = "SELECT %(vtf)s (COUNT (?source) as ?cst) {\n" \
-              "%(val)s\n" \
+              "%(val)s" \
               "%(trip)s }\n" \
               "GROUP BY %(vtf)s\n" \
               "HAVING (COUNT (?source) > 0)" % {
                   'vtf': ' '.join([var.n3() for var in hop]),
                   'val': ''.join([
-                      self._sparql_values_part(
-                          values=valueblocks[key], indent=' '
-                      ) for key in valueblocks
+                      'VALUES (%s) {\n%s }\n' % (
+                          ' '.join(var.n3() for var in valueblocks[key].keys()[0]),
+                          ''.join(['(%s)\n' %
+                                   ' '.join(self.curify(v) for v in vt)
+                                   for vt in valueblocks[key].values()[0]])
+                      ) for key in valueblocks.keys()
                   ]),
                   'trip': ''.join([
-                      step._sparql_triples_part() for step in steps
-                      # TODO: don't use private method
+                      '%s %s %s .\n' % (s.n3(), p.n3(), o.n3())
+                      for step in steps
+                      for s, p, o in step
                   ]) + ''.join([
                       self._sparql_triples_part(indent=' ') if gp_in else ''
                   ]),

From 05ae8434cf2551daa0ef8f4df8b9d94cf2647f8d Mon Sep 17 00:00:00 2001
From: "philipp.neuer" <pneuer@rhrk.uni-kl.de>
Date: Thu, 6 Sep 2018 13:10:36 +0200
Subject: [PATCH 27/27] Changed the alpha/beta-valus for the
 path-length-distributen and the probabilities for fix_var_mut and
 deep_narrow_mut

---
 config/defaults.py | 8 ++++----
 gp_learner.py      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/config/defaults.py b/config/defaults.py
index 75d61e9..c697437 100644
--- a/config/defaults.py
+++ b/config/defaults.py
@@ -84,16 +84,16 @@
 MUTPB_EN_OUT_LINK = 0.5  # probability to add an outgoing triple (otherwise in)
 MUTPB_AE = 0.2  # prob to try adding an edge between two nodes
 MUTPB_ID = 0.05  # prob to increase distance between source and target by 1 hop
-MUTPB_FV = 0.4  # prob to fix a variable (SPARQL)
+MUTPB_FV = 0.25  # prob to fix a variable (SPARQL)
 MUTPB_FV_RGTP_SAMPLE_N = 128  # sample <= n remaining GTPs to fix variables for
 MUTPB_FV_SAMPLE_MAXN = 32  # max n of instantiations to sample from top k
 MUTPB_FV_QUERY_LIMIT = 256  # SPARQL query limit for the top k instantiations
 MUTPB_SP = 0.05  # prob to simplify pattern (warning: can restrict exploration)
 # TODO: Lower the MUTPB_DN
-MUTPB_DN = 0.5  # prob to try adding a deep and narrow path to a pattern
+MUTPB_DN = 0.6  # prob to try adding a deep and narrow path to a pattern
 MUTPB_DN_MAX_HOPS = 10  # Max number of hops in the deep narrow path
-MUTPB_DN_MAX_HOPS_ALPHA = 2.  # alpha value in a length beta distribution
-MUTPB_DN_MAX_HOPS_BETA = 5.  # beta value in a length beta distribution
+MUTPB_DN_MAX_HOPS_ALPHA = 1.15  # alpha value in a length beta distribution
+MUTPB_DN_MAX_HOPS_BETA = 1.85  # beta value in a length beta distribution
 MUTPB_DN_AVG_DEG_LIMIT = 10  # Max avg. reachable Nodes
 MUTPB_DN_MAX_HOP_INST = 10  # Max number of hop instances for the next query/ies
 
diff --git a/gp_learner.py b/gp_learner.py
index 3765dad..d183978 100644
--- a/gp_learner.py
+++ b/gp_learner.py
@@ -737,7 +737,7 @@ def mutate_deep_narrow_path(
         # with default values the distribution is as follows:
         # PDF: 1: 14 %, 2: 27 %, 3: 25 %, 4: 17 %, 5: 10 %, 6: 5 %, 7: 1.5 %, ...
         # CDF: 1: 14 %, 2: 40 %, 3: 66 %, 4: 83 %, 5: 93 %, 6: 98 %, 7: 99,6 %, ...
-        n = int(random.betavariate(alpha, beta) * (max_hops-1) + 1)
+        n = int(random.betavariate(alpha, beta) * max_hops + 1)
     nodes = [SOURCE_VAR] + [Variable('n%d' % i) for i in range(n)] + [TARGET_VAR]
     hops = [Variable('p%d' % i) for i in range(n + 1)]
     if not directions: