-
Notifications
You must be signed in to change notification settings - Fork 15
WIP: deep narrow paths mutation #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 5 commits
8fbd1f1
db78db4
07d1f39
0837c10
f67c730
c908caf
adbc215
20e5b34
22a786e
91cbde0
9c3238a
6362dc8
1130da4
12a95ae
22ca6aa
126e84d
331e06f
49b5c4d
c0617ea
82cdacf
9117d05
d792d10
75bd1ea
72f2fee
e2e09a4
6deb0ad
05ae843
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,6 @@ venv/ | |
# ignore py compiled etc. files | ||
*.pyc | ||
*.pyo | ||
|
||
# ignore .idea | ||
.idea/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,6 +89,9 @@ | |
MUTPB_FV_SAMPLE_MAXN = 32 # max n of instantiations to sample from top k | ||
MUTPB_FV_QUERY_LIMIT = 256 # SPARQL query limit for the top k instantiations | ||
MUTPB_SP = 0.05 # prob to simplify pattern (warning: can restrict exploration) | ||
MUTPB_DN = 0.5 # prob to try adding a deep and narrow path to a pattern | ||
MUTPB_DN_PS_MAX_N = 10 # Max steps in the deep narrow path | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. MUTPB_DN_MAX_HOPS ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe also use |
||
MUTPB_DN_AVG_LIMIT = 10 # Max avg. reachable Nodes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
# fusion of target candidates: | ||
FUSION_SAMPLES_PER_CLASS = 500 # only use up to n training samples per class | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,8 @@ | |
from gp_query import query_stats | ||
from gp_query import query_time_hard_exceeded | ||
from gp_query import query_time_soft_exceeded | ||
from gp_query import useful_path_query | ||
from gp_query import useful_path_inst_query | ||
from gp_query import variable_substitution_query | ||
from graph_pattern import canonicalize | ||
from graph_pattern import gen_random_var | ||
|
@@ -684,6 +686,121 @@ def mutate_fix_var( | |
] | ||
return res | ||
|
||
def mutate_deep_narrow( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
sparql, | ||
timeout, | ||
child, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. gtp_scores first |
||
gtp_scores, | ||
dn_path_steps_max_n=config.MUTPB_DN_PS_MAX_N, | ||
direct=None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
childin=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? |
||
limit=config.MUTPB_FV_QUERY_LIMIT, # TODO: Limit benutzen? | ||
): | ||
if not child.matching_node_pairs: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably doesn't exist on uneval children |
||
ev = evaluate( | ||
sparql, timeout, gtp_scores, child) # TODO: Muss hier run/gen dazu? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set |
||
update_individuals([child], [ev]) | ||
gtps = child.matching_node_pairs | ||
if not gtps: | ||
return [child] | ||
#TODO: testen, wie die Verteilung gut ist | ||
n = random.choice(range(dn_path_steps_max_n))+1 | ||
n = 2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rather configurable beta distribution |
||
node = [SOURCE_VAR] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not simply |
||
for i in range(n): | ||
node.append(Variable('n%i' % i)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use |
||
node.append(TARGET_VAR) | ||
hop = [Variable('p%i' % i) for i in range(n + 1)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also: if it's a list it's a convention to use plural, so |
||
# TODO: Entfernern, wenn direct einfach immer random gewählt werden soll | ||
if direct is None or len(direct) != n + 1: | ||
logger.debug( | ||
'No direction chosen, or direction tuple with false length' | ||
) | ||
direct = [0 for _ in range(n + 1)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pull below here: |
||
gp_helper = [] | ||
for i in range(n + 1): | ||
if direct[i] == 0: | ||
direct[i] = random.choice([-1, 1]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not just init as above? |
||
if direct[i] == 1: | ||
gp_helper.append( | ||
GraphPattern([(node[i], hop[i], node[i + 1])]) | ||
) | ||
else: | ||
gp_helper.append( | ||
GraphPattern([(node[i + 1], hop[i], node[i])]) | ||
) | ||
# Queries für die Schritte | ||
valueblocks_s = {} | ||
valueblocks_t = {} | ||
for i in range(int((n / 2) + 1)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if i < int(n/2): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments explaining case |
||
t, q_res = useful_path_query( | ||
sparql, | ||
timeout, | ||
child, | ||
hop[i], | ||
node[i+1], | ||
valueblocks_s, | ||
gp_helper[:i + 1], | ||
SOURCE_VAR, | ||
gp_in=childin, | ||
) | ||
if not q_res: | ||
return [child] | ||
valueblocks_s[hop[i]] = { | ||
(hop[i],): random.sample( | ||
[(q_r,) for q_r in q_res], | ||
min(10, len(q_res)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 10 magic number? use limit from above? |
||
) | ||
} | ||
if n-i > i: | ||
t, q_res = useful_path_query( | ||
sparql, | ||
timeout, | ||
child, | ||
hop[n-i], | ||
node[n-i], | ||
valueblocks_t, | ||
gp_helper[n - i:], | ||
TARGET_VAR, | ||
gp_in=childin, | ||
) | ||
if not q_res: | ||
return [child] | ||
valueblocks_t[hop[n-i]] = { | ||
(hop[n-i],): random.sample( | ||
[(q_r,) for q_r in q_res], | ||
min(10, len(q_res)) | ||
) | ||
} | ||
|
||
# Query fürs Ergebnis | ||
# gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden | ||
# werden | ||
valueblocks = {} | ||
valueblocks.update(valueblocks_s) | ||
valueblocks.update(valueblocks_t) | ||
t, q_res = useful_path_inst_query( | ||
sparql, | ||
timeout, | ||
child, | ||
hop, | ||
valueblocks, | ||
gp_helper, | ||
gp_in=childin | ||
) | ||
if not q_res: | ||
return [child] | ||
res = [] | ||
for inst in q_res: | ||
child_inst = GraphPattern([ | ||
(node[i], inst[i], node[i + 1]) if direct[i] == 1 | ||
else (node[i + 1], inst[i], node[i]) | ||
for i in range(n + 1) | ||
]) | ||
res.append(GraphPattern(child + child_inst)) | ||
return res | ||
|
||
|
||
def mutate_simplify_pattern(gp): | ||
if len(gp) < 2: | ||
|
@@ -797,6 +914,7 @@ def mutate( | |
pb_mv=config.MUTPB_MV, | ||
pb_sp=config.MUTPB_SP, | ||
pb_sv=config.MUTPB_SV, | ||
pb_dn=config.MUTPB_DN, | ||
): | ||
# mutate patterns: | ||
# grow: select random identifier and convert them into a var (local) | ||
|
@@ -837,8 +955,14 @@ def mutate( | |
else: | ||
children = [child] | ||
|
||
|
||
# TODO: deep & narrow paths mutation | ||
helper = [] | ||
for child in children: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'd actually only do this if not pb_fv, as otherwise we might have n*m children here... |
||
if random.random() < pb_dn: | ||
res = mutate_deep_narrow(sparql, timeout, gtp_scores, child) | ||
helper += res | ||
else: | ||
helper.append(child) | ||
children = helper | ||
|
||
children = { | ||
c if fit_to_live(c) else orig_child | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,6 +62,8 @@ def __init__(self): | |
self.ask_multi_query_count = 0 | ||
self.combined_ask_count_multi_query_count = 0 | ||
self.variable_substitution_query_count = 0 | ||
self.useful_path_query_count = 0 | ||
self.useful_path_inst_query_count = 0 | ||
self.predict_query_count = 0 | ||
self.count_query_count = 0 | ||
|
||
|
@@ -695,6 +697,145 @@ def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds): | |
|
||
def _var_subst_res_update(res, update, **_): | ||
res += update | ||
|
||
|
||
def useful_path_query( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please rename to |
||
sparql, | ||
timeout, | ||
graph_pattern, | ||
var_to_fix, | ||
var_to_count, | ||
valueblocks, | ||
steps, | ||
startvar, | ||
avglimit=config.MUTPB_DN_AVG_LIMIT, | ||
gp_in=False, | ||
batch_size=None | ||
): | ||
_query_stats.useful_path_query_count += 1 | ||
# TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen | ||
# (weil der Block ja mit rein geht) | ||
_values = graph_pattern.matching_node_pairs | ||
# TODO: evtl. Schnitt mit noch nicht abgedeckten | ||
_ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs} | ||
_vars_steps_and_stuff = ( | ||
var_to_fix, var_to_count, startvar, valueblocks, steps, avglimit, gp_in | ||
) | ||
return _multi_query( | ||
sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs, | ||
batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping, | ||
_usef_path_res_init, _usef_path_chunk_q, _usef_path_chunk_result_ext, | ||
_usef_path_res_update | ||
) | ||
|
||
|
||
# noinspection PyUnusedLocal | ||
def _usef_path_res_init(_, **kwds): | ||
return [] | ||
|
||
|
||
def _usef_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk): | ||
var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \ | ||
= _vars_steps_and_stuff | ||
valueblocks = { | ||
startvar: { | ||
(startvar,): | ||
[(tup[0],) for tup in values_chunk] if startvar == SOURCE_VAR | ||
else [(tup[1],) for tup in values_chunk] | ||
} | ||
} | ||
valueblocks.update(_valueblocks) | ||
return gp.to_sparql_useful_path_query( | ||
var_to_fix, | ||
var_to_count, | ||
valueblocks, | ||
steps, | ||
startvar, | ||
avglimit=avglimit, | ||
gp_in=gp_in | ||
) | ||
|
||
|
||
# noinspection PyUnusedLocal | ||
def _usef_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds): | ||
var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \ | ||
= _vars_steps_and_stuff | ||
chunk_res = [] | ||
res_rows_path = ['results', 'bindings'] | ||
bindings = sparql_json_result_bindings_to_rdflib( | ||
get_path(q_res, res_rows_path, default=[]) | ||
) | ||
|
||
for row in bindings: | ||
# TODO: Drüber nachdenken, ob iwie die avg-outgoing auch mit | ||
# zurückgegeben werden sollen | ||
chunk_res.append(get_path(row, [var_to_fix])) | ||
return chunk_res | ||
|
||
|
||
def _usef_path_res_update(res, update, **_): | ||
res += update | ||
|
||
|
||
def useful_path_inst_query( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
sparql, | ||
timeout, | ||
graph_pattern, | ||
hop, | ||
valueblocks, | ||
steps, | ||
gp_in=False, | ||
batch_size=None | ||
): | ||
_query_stats.useful_path_inst_query_count += 1 | ||
# TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen | ||
# (weil der Block ja mit rein geht) | ||
_values = graph_pattern.matching_node_pairs | ||
# evtl. Schnitt mit noch nicht abgedeckten | ||
_ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs} | ||
_vars_steps_and_stuff = (hop, valueblocks, steps, gp_in) | ||
return _multi_query( | ||
sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs, | ||
batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping, | ||
_usef_path_inst_res_init, _usef_path_inst_chunk_q, | ||
_usef_path_inst_chunk_result_ext, _usef_path_inst_res_update | ||
) | ||
|
||
|
||
# noinspection PyUnusedLocal | ||
def _usef_path_inst_res_init(_, **kwds): | ||
return [] | ||
|
||
|
||
def _usef_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk): | ||
hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff | ||
valueblocks = { | ||
'st': { | ||
(SOURCE_VAR, TARGET_VAR): values_chunk | ||
} | ||
} | ||
valueblocks.update(_valueblocks) | ||
return gp.to_sparql_useful_path_inst_query( | ||
hop, valueblocks, steps, gp_in=gp_in | ||
) | ||
|
||
|
||
# noinspection PyUnusedLocal | ||
def _usef_path_inst_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds): | ||
hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff | ||
chunk_res = [] | ||
res_rows_path = ['results', 'bindings'] | ||
bindings = sparql_json_result_bindings_to_rdflib( | ||
get_path(q_res, res_rows_path, default=[]) | ||
) | ||
|
||
for row in bindings: | ||
chunk_res.append([get_path(row, [h]) for h in hop]) | ||
return chunk_res | ||
|
||
|
||
def _usef_path_inst_res_update(res, update, **_): | ||
res += update | ||
|
||
|
||
def generate_stps_from_gp(sparql, gp): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably should be a lot lower in final version