-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph.h
131 lines (99 loc) · 3.14 KB
/
graph.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#ifndef PSGRAPH_H
#define PSGRAPH_H
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include "khash.h"
#include "kseq.h"
#include "kvec.h"
KSTREAM_INIT(gzFile, gzread, 65536)
KHASH_MAP_INIT_INT(im, int)
/* Some assumptions I made:
* - graph is chopped (size of segments < 4096)
* - graph has less than INT_MAX nodes
* - graph is topological sorted
*/
typedef struct {
int idx; // identifier (as in gfa) - assuming integer
char *seq; // actual sequence
int l; // actual length
int c; // capacity
kvec_t(int) paths;
} seg_t;
typedef struct {
int v1;
int v2;
} link_t;
typedef struct path_t {
char *idx; // identifier (as in gfa)
int *vertices; // identifiers of the vertices along the path (graph space)
int l; // actual length
int capacity; // capacity
} path_t;
typedef struct graph_t {
char *fn; // file name
int nv; // number of vertices
int cv; // allocated vertices
seg_t **vertices; // vertices
khash_t(im) *
v_map; // mapping between gfa idx and internal idx (position on vertices)
int ne; // total number of edges
kvec_t(int) * out_edges; // outgoing edges
int oe_c;
kvec_t(int) * in_edges; // incoming edges
int ie_c;
int np; // number of paths
int cp; // allocated paths
path_t **paths; // paths
} graph_t;
/* Initialize a graph */
graph_t *init_graph(char *fn);
/* Dump graph in GFA format*/
void dump_gfa(graph_t *g, char *fn);
/* Destroy the graph */
void destroy_graph(graph_t *g);
/* Load all vertices from gfa */
int load_vertices(graph_t *g);
/* Load all edges from gfa */
int load_edges(graph_t *g);
/* Load all paths from gfa */
int load_paths(graph_t *g);
/* Extract subgraph in between vertices v1 and v2 */
// XXX: assuming topological sorting
graph_t *extract_subgraph(graph_t *g, int v1, int v2);
/* Make graph canonical */
graph_t *canonicalize(graph_t *g);
/* Get internal identifier from GFA identifier */
int get_iidx(graph_t *g, int v);
/* Return the segment given the gfa idx */
seg_t *get_vertex(graph_t *g, int v);
int is_source(graph_t *g, int v);
int is_sink(graph_t *g, int v);
int contains(graph_t *g, path_t *p, int v);
int get_father(graph_t *g, path_t *p, int v);
char get_label(graph_t *g, int v);
/* Extract subpath */
// XXX: this function init the returned path. Maybe we should init it outside
path_t *extract_subpath(graph_t *g, path_t *path, int x, int y);
/* Check if x and y are on at least one path */
int compatible(graph_t *g, int x, int y);
/* Initialize a segment */
seg_t *init_seg();
/* Destroy the segment */
void destroy_seg(seg_t *seg);
/* Initialize a path with capacity c*/
path_t *init_path(int c);
/* Add vertex v to path, reallocating it if needed */
void add_vertex(path_t *path, int v);
/* Get path sequence */
int get_sequence(graph_t *graph, path_t *path, char **pseq, int *pseq_c);
/* Destroy a path */
void destroy_path(path_t *p);
/* GFA reading utilities */
void gfa_parse_S(char *s, seg_t *ret);
void gfa_parse_L(char *s, int *idx1, int *idx2);
void gfa_parse_P(char *s, path_t *path);
void gfa_parse_W(char *s, path_t *path);
#endif