Skip to content

Commit 990a27f

Browse files
committed
added to_find_edge_var_for_narrow_path_query
1 parent 15b9de1 commit 990a27f

File tree

4 files changed

+115
-6
lines changed

4 files changed

+115
-6
lines changed

gp_learner.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -437,11 +437,12 @@ def mutate_deep_narrow_path(
437437
min_len=config.MUTPB_DN_MIN_LEN,
438438
max_len=config.MUTPB_DN_MAX_LEN,
439439
term_pb=config.MUTPB_DN_TERM_PB,
440+
pb_en_out_link=config.MUTPB_EN_OUT_LINK,
440441
):
441442
assert isinstance(child, GraphPattern)
442443
nodes = list(child.nodes)
443444
start_node = random.choice(nodes)
444-
# target_nodes = set(nodes) - {start_node}
445+
# target_nodes = set(nodes) - {start_node}
445446
gp = child
446447
hop = 0
447448
while True:
@@ -453,6 +454,9 @@ def mutate_deep_narrow_path(
453454
new_triple, var_node = _mutate_expand_node_helper(start_node)
454455
gp += [new_triple]
455456
start_node = var_node
457+
458+
# TODO: insert connection to a target node
459+
# TODO: fix edge or node ( to_count_var_over_values_query)
456460
return gp
457461

458462

graph_pattern.py

+88-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@
4141
TARGET_VAR = Variable('target')
4242
ASK_VAR = Variable('ask')
4343
COUNT_VAR = Variable('count')
44-
45-
44+
EDGE_VAR_COUNT = Variable('edge_count_var')
45+
NODE_VAR_COUNT = Variable('node_count_var')
46+
MAX_NODE_COUNT = Variable('maximum node count')
47+
PRIO_VAR = Variable('priority')
4648
def gen_random_var():
4749
return Variable(RANDOM_VAR_PREFIX + ''.join(
4850
random.choice(string.ascii_letters + string.digits)
@@ -709,6 +711,90 @@ def to_count_var_over_values_query(self, var, vars_, values, limit):
709711
res += 'LIMIT %d\n' % limit
710712
return self._sparql_prefix(res)
711713

714+
def to_find_edge_var_for_narrow_path_query(self, edge_var, node_var,
715+
vars_, filter_node_count,
716+
filter_edge_count, limit_res):
717+
"""Counts possible substitutions for edge_var to get a narrow path
718+
719+
Meant to perform a query like this:
720+
SELECT *
721+
{
722+
{
723+
SELECT
724+
?edge_var
725+
(COUNT(*) AS ?edge_var_count)
726+
(MAX(?node_var_count) AS ?max_node_count)
727+
(COUNT(*)/AVG(?node_var_count) as ?prio_var)
728+
{
729+
SELECT DISTINCT
730+
?source ?target ?edge_var (COUNT(?node_var) AS ?node_var_count)
731+
{
732+
VALUES (?source ?target) {
733+
(dbr:Adolescence dbr:Youth)
734+
(dbr:Adult dbr:Child)
735+
(dbr:Angel dbr:Heaven)
736+
(dbr:Arithmetic dbr:Mathematics)
737+
}
738+
?node_var ?edge_var ?source .
739+
?source dbo:wikiPageWikiLink ?target .
740+
}
741+
}
742+
GROUP BY ?edge_var
743+
ORDER BY DESC(?edge_var_count)
744+
}
745+
FILTER(?max_node_count < 10 && ?edge_var_count > 1)
746+
}
747+
ORDER BY DESC(?prio_var)
748+
LIMIT 32
749+
750+
:param edge_var: Edge variable to find substitution for.
751+
:param node_var: Node variable to count.
752+
:param vars_: List of vars to fix values for (e.g. ?source, ?target).
753+
:param values: List of value lists for vars_.
754+
:param filter_node_count: Filter on node count of edge variable.
755+
:param filter_edge_count: Filter for edge count of triples.
756+
:param limit_res : limit result size
757+
:return: Query String.
758+
"""
759+
760+
res = 'SELECT * WHERE {\n'
761+
res += ' {\n'\
762+
' SELECT %s (COUNT(*) as %s) (Max(%s) AS %s) ' \
763+
' (COUNT(*)/AVG(%s) AS %s) WHERE {\n' % (
764+
edge_var.n3(), EDGE_VAR_COUNT.n3(),
765+
NODE_VAR_COUNT.n3(), MAX_NODE_COUNT.n3(),
766+
NODE_VAR_COUNT.n3(), PRIO_VAR.n3())
767+
res += ' SELECT DISTINCT %s %s (COUNT(%s) AS %s) WHERE {\n' % (
768+
' '.join([v.n3() for v in vars_]),
769+
edge_var.n3(), node_var.n3(), NODE_VAR_COUNT.n3())
770+
# res += self._sparql_values_part(values)
771+
res += 'VALUES(%s) {\n' \
772+
'(dbr: Adolescence dbr: Youth)' \
773+
'(dbr:Adult dbr:Child)' \
774+
'(dbr:Angel dbr:Heaven)' \
775+
'(dbr:Arithmetic dbr:Mathematics)' \
776+
'}\n' % (' '.join([v.n3() for v in vars_]))
777+
# triples part
778+
tres = []
779+
for s, p, o in self:
780+
tres.append('%s %s %s .' % (s.n3(), p.n3(), o.n3()))
781+
indent = ' ' * 3
782+
triples = indent + ('\n' + indent).join(tres) + '\n'
783+
res += triples
784+
res += ' }\n'\
785+
' }\n'
786+
res += ' GROUP BY %s\n' % edge_var.n3()
787+
res += ' ORDER BY DESC(%s)\n' % EDGE_VAR_COUNT.n3()
788+
res += ' }\n'
789+
res += ' FILTER(%s < %d && %s > %d)\n' % (MAX_NODE_COUNT.n3(),
790+
filter_node_count,
791+
EDGE_VAR_COUNT.n3(),
792+
filter_edge_count)
793+
res += '}\n'
794+
res += 'ORDER BY DESC(%s)\n' % PRIO_VAR.n3()
795+
res += 'LIMIT %d' % limit_res
796+
return self._sparql_prefix(res)
797+
712798
def to_dict(self):
713799
return {
714800
'fitness': self.fitness.values if self.fitness.valid else (),

requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ matplotlib==1.5.1
55
networkx==1.11
66
nose==1.3.7
77
numpy==1.11.0
8-
pygraphviz==1.3.1
8+
# pygraphviz==1.3.1
99
requests==2.9.1
1010
rdflib==4.2.1
1111
scikit-learn==0.17.1
12-
scipy==0.17.0
12+
scipy
1313
scoop==0.7.1.1
1414
six==1.10.0
1515
SPARQLWrapper==1.7.6

tests/test_gp_learner_offline.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,24 @@ def test_mutate_deep_narrow_path():
120120
print(child)
121121

122122

123+
def test_to_find_edge_var_for_narrow_path_query():
124+
node_var = Variable('node_variable')
125+
edge_var = Variable('edge_variable')
126+
gp = GraphPattern([
127+
(node_var, edge_var, SOURCE_VAR),
128+
(SOURCE_VAR, wikilink, TARGET_VAR)
129+
])
130+
filter_node_count = 10
131+
filter_edge_count = 1
132+
limit_res = 32
133+
vars_ = {SOURCE_VAR,TARGET_VAR}
134+
res = GraphPattern.to_find_edge_var_for_narrow_path_query(gp, edge_var, node_var,
135+
vars_, filter_node_count,
136+
filter_edge_count, limit_res)
137+
print(gp)
138+
print(res)
139+
140+
123141
def test_simplify_pattern():
124142
gp = GraphPattern([(SOURCE_VAR, wikilink, TARGET_VAR)])
125143
res = mutate_simplify_pattern(gp)
@@ -286,4 +304,5 @@ def test_gtp_scores():
286304

287305

288306
if __name__ == '__main__':
289-
test_mutate_deep_narrow_path()
307+
# test_mutate_deep_narrow_path()
308+
test_to_find_edge_var_for_narrow_path_query()

0 commit comments

Comments
 (0)