Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/common/assembly_graph/core/debruijn_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,11 @@ class DeBruijnDataMaster {
return EdgeData(!(data.nucls()));
}

// FIXME: support complex overlaps
VertexData conjugate(const VertexData &data) const {
return data.clone();
if (!data.has_complex_overlap() or data.links().empty())
return data.clone();
VERIFY_MSG(false, "Conjugation of complex overlap data is not implemented")
}

size_t length(const EdgeData& data) const {
Expand Down
18 changes: 10 additions & 8 deletions src/common/assembly_graph/core/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ class DeBruijnGraph: public omnigraph::ObservableGraph<DeBruijnDataMaster> {
}), links.end());
}

void erase_links_with_outedge(VertexId v, EdgeId e) {
auto &links = data(v).links();
links.erase(std::remove_if(links.begin(),
links.end(),
[this, &e](const LinkId &link_id) {
return link_storage_[link_id].link.second == e;
}), links.end());
}
void erase_links_with_outedge(VertexId v, EdgeId e) {
auto &links = data(v).links();
links.erase(std::remove_if(links.begin(),
links.end(),
[this, &e](const LinkId &link_id) {
return link_storage_[link_id].link.second == e;
}), links.end());
}

auto links(VertexId v) const {
return data(v).links();
Expand Down Expand Up @@ -141,6 +141,8 @@ class DeBruijnGraph: public omnigraph::ObservableGraph<DeBruijnDataMaster> {
VERIFY_MSG(false, "Link " << in.int_id() << " -> " << out.int_id() << " was not found for vertex " << v.int_id());
}

void lreserve(size_t size) { link_storage_.reserve(size); }

auto link_begin() { return link_storage_.begin(); }
auto link_end() { return link_storage_.end(); }
auto link_begin() const { return link_storage_.begin(); }
Expand Down
79 changes: 70 additions & 9 deletions src/common/io/binary/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,49 @@ class GraphIO : public IOSingle<Graph> {

private:
void SaveImpl(BinOStream &str, const Graph &graph) override {
str << graph.vreserved() << graph.ereserved();
str << graph.vreserved() << graph.ereserved() << graph.link_size();

size_t vertex_cnt = graph.size();
str << vertex_cnt;

std::vector<bool> saved_vertices(graph.max_vid(), false);

auto SaveVertex = [&](typename Graph::VertexId v) {
TRACE("Saving " << v.int_id() << " ~ " << graph.conjugate(v).int_id());
str << v.int_id() << graph.conjugate(v).int_id();
if (saved_vertices[v.int_id()])
return;
TRACE("Saving link info");
bool complex = graph.is_complex(v);
str << complex;
if (!complex) {
unsigned ovl = unsigned(graph.link_length(v, typename Graph::EdgeId(), typename Graph::EdgeId()));
str << ovl;
} else {
str << graph.links(v).size();
TRACE("Saving " << graph.links(v).size() << " links from " << v.int_id() << ", " << graph.conjugate(v).int_id());
for (const auto &link_id: graph.links(v)) {
const auto &link = graph.link(link_id);
auto first_e = link.link.first;
auto second_e = link.link.second;
str << first_e << second_e << graph.conjugate(first_e) << graph.conjugate(second_e) << link.overlap;
}
}
saved_vertices[v.int_id()] = true;
saved_vertices[graph.conjugate(v).int_id()] = true;
};

for (auto v1 : graph) {
str << v1.int_id() << graph.conjugate(v1).int_id();
SaveVertex(v1);
for (auto e1 : graph.OutgoingEdges(v1)) {
auto e2 = graph.conjugate(e1);
if (e2 < e1)
continue;
str << e1.int_id() << e2.int_id()
<< graph.EdgeEnd(e1).int_id() << graph.EdgeStart(e2).int_id()
<< graph.EdgeNucls(e1);
str << e1.int_id() << e2.int_id();
//<< graph.EdgeEnd(e1).int_id() << graph.EdgeStart(e2).int_id() << graph.EdgeNucls(e1);

SaveVertex(graph.EdgeEnd(e1));
str << graph.EdgeNucls(e1);
}
str << (size_t)0; //null-term
}
Expand All @@ -47,9 +76,11 @@ class GraphIO : public IOSingle<Graph> {
void LoadImpl(BinIStream &str, Graph &graph) override {
graph.clear();

uint64_t max_vid, max_eid;
str >> max_vid >> max_eid;
uint64_t max_vid, max_eid, num_links;
str >> max_vid >> max_eid >> num_links;
graph.reserve(max_vid, max_eid);
TRACE("Reserving " << num_links << " links");
graph.lreserve(num_links);

size_t vertex_cnt;
str >> vertex_cnt;
Expand All @@ -58,9 +89,38 @@ class GraphIO : public IOSingle<Graph> {
if (graph.contains(typename Graph::VertexId(ids[0])))
return;
TRACE("Vertex " << ids[0] << " ~ " << ids[1] << " .");
auto new_id = graph.AddVertex(typename Graph::VertexData(graph.k()), ids[0], ids[1]);
bool complex;
typename Graph::VertexId new_id;
str >> complex;
TRACE("Complex: " << complex);
if (!complex) {
unsigned ovl;
str >> ovl;
new_id = graph.AddVertex(typename Graph::VertexData(ovl), ids[0], ids[1]);
} else {
uint link_count = 0;
str >> link_count;
std::vector<debruijn_graph::LinkId> link_ids;
std::vector<debruijn_graph::LinkId> conj_link_ids;
TRACE("Reading " << link_count << " links from " << ids[0] << ", " << ids[1]);
for (uint i = 0; i < link_count; ++i) {
typename Graph::EdgeId e1, e2, e1_conj, e2_conj;
unsigned ovl;
str >> e1 >> e2 >> e1_conj >> e2_conj >> ovl;
auto link_id = graph.add_link(e1, e2, ovl);
auto conj_link_id = graph.add_link(e2_conj, e1_conj, ovl);
link_ids.push_back(link_id);
conj_link_ids.push_back(conj_link_id);
}
std::vector<debruijn_graph::LinkId> empty_links;
new_id = graph.AddVertex(debruijn_graph::DeBruijnVertexData(empty_links), ids[0], ids[1]);
auto conj_id = graph.conjugate(new_id);
graph.add_links(new_id, link_ids);
graph.add_links(conj_id, conj_link_ids);
}
VERIFY(new_id == ids[0]);
VERIFY(graph.conjugate(new_id) == ids[1]);
TRACE("Added " << ids[0] << " ~ " << ids[1] << " .")
};

for (size_t i = 0; i < vertex_cnt; ++i) {
Expand All @@ -75,10 +135,11 @@ class GraphIO : public IOSingle<Graph> {
str >> edge_ids[1];
uint64_t end_ids[2];
Sequence seq;
str >> end_ids >> seq;
str >> end_ids;
TRACE("Edge " << edge_ids[0] << " : " << start_ids[0] << " -> "
<< end_ids[0] << " l = " << seq.size() << " ~ " << edge_ids[1]);
TryAddVertex(end_ids);
str >> seq;

auto new_id = graph.AddEdge(start_ids[0], end_ids[0],
typename Graph::EdgeData(seq), edge_ids[0], edge_ids[1]);
Expand Down
55 changes: 50 additions & 5 deletions src/test/debruijn/v_overlaps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "assembly_graph/core/construction_helper.hpp"
#include "assembly_graph/graph_support/v_overlaps_support.hpp"
#include "io/binary/graph.hpp"
#include "io/graph/gfa_reader.hpp"
#include "io/reads/file_reader.hpp"

Expand Down Expand Up @@ -134,17 +135,28 @@ void PerformSplits(debruijn_graph::Graph &graph, std::ifstream &ops_stream, cons
" are not incident, operations are not consistent with the graph");
VertexId new_vertex;
if (graph.is_complex(vertex)) {
LinkId split_link;
LinkId split_link, conj_split_link;
bool link_found = false;
bool conj_link_found = false;
for (auto &link_id: graph.links(vertex)) {
if (graph.link(link_id).link.first == in_edge and graph.link(link_id).link.second == out_edge) {
if (graph.link(link_id).link.first == in_edge && graph.link(link_id).link.second == out_edge) {
split_link = link_id;
link_found = true;
}
}
EXPECT_TRUE(link_found);
std::vector<LinkId> links({split_link});
new_vertex = helper.CreateVertex(debruijn_graph::DeBruijnVertexData(links));
for (auto &link_id: graph.links(graph.conjugate(vertex))) {
if (graph.link(link_id).link.first == graph.conjugate(out_edge) &&
graph.link(link_id).link.second == graph.conjugate(in_edge)) {
conj_split_link = link_id;
conj_link_found = true;
}
}

EXPECT_TRUE(link_found && conj_link_found);
std::vector<debruijn_graph::LinkId> empty;
new_vertex = helper.CreateVertex(debruijn_graph::DeBruijnVertexData(empty));
graph.add_link(new_vertex, split_link);
graph.add_link(graph.conjugate(new_vertex), conj_split_link);

graph.erase_links_with_outedge(vertex, out_edge);
graph.erase_links_with_inedge(vertex, in_edge);
Expand All @@ -165,9 +177,11 @@ void CheckPathOperations(debruijn_graph::Graph &graph,
const std::string &fasta_path,
const IdMapper &id_mapper) {
std::ifstream ops_stream(operations_path);
INFO("Performing splits");
PerformSplits(graph, ops_stream, id_mapper);
int num_merges = 0;
ops_stream >> num_merges;
INFO("Performed splits");

std::unordered_map<std::string, EdgeId> merged_id_map;
for (int i = 0; i < num_merges; ++i) {
Expand Down Expand Up @@ -227,9 +241,40 @@ void CheckGraphWithPaths(const std::filesystem::path &graph_basename) {
CheckPathOperations(graph, operations_path, fasta_path, *id_mapper);
}

void CheckBinaryIO(const std::string &path_to_save,
const std::filesystem::path &graph_basename) {
auto gfa_path = graph_basename;
auto graph_path = graph_basename;
gfa_path += ".gfa";
graph_path += ".graph";

size_t K = 55;
std::unique_ptr<io::IdMapper<std::string>> id_mapper(new io::IdMapper<std::string>());
gfa::GFAReader gfa_reader(gfa_path);
Graph graph(0);
gfa_reader.to_graph(graph, id_mapper.get());
size_t local_k = gfa_reader.to_graph(graph, id_mapper.get());

io::binary::Save(path_to_save, graph);
io::binary::Load(path_to_save, graph);

std::ifstream graph_stream(graph_path.c_str());
std::string graph_name;
graph_stream >> graph_name;
CheckSegmentLen(graph_stream, graph, *id_mapper);
CheckStructure(graph_stream, graph, *id_mapper);
}

TEST(VariableOverlaps, BasicOperations) {
CheckGraphWithPaths("src/test/debruijn/graph_fragments/v_overlaps/bone");
CheckGraphWithPaths("src/test/debruijn/graph_fragments/v_overlaps/conjugate_bone");
CheckGraphWithPaths("src/test/debruijn/graph_fragments/v_overlaps/conjugate_triple");
CheckGraphWithPaths("src/test/debruijn/graph_fragments/v_overlaps/triple_repeat");
}

TEST(VariableOverlaps, BinaryIO) {
std::string save_path = "src/test/debruijn/graph_fragments/saves/test_save";
CheckBinaryIO(save_path, "src/test/debruijn/graph_fragments/v_overlaps/bone");
CheckBinaryIO(save_path, "src/test/debruijn/graph_fragments/v_overlaps/conjugate_triple");
CheckBinaryIO(save_path, "src/test/debruijn/graph_fragments/v_overlaps/triple_repeat");
}