Skip to content

Commit 3dc7f61

Browse files
authored
Add probability-based stepping to a node in network (#168)
This adds step (i.e., jump, teleport) from one node to another based on probability assigned to an edge. The code it uses is completely different, so a new function step is now available alongside of the existing travel function. Node to go to is selected based on edge probability in the new step function. Ignoring of nodes, implemented for the travel function, is not implemented for the step function (or not yet) because that would change the relative probabilities. Edges have probabilities, but they don't have costs. Probability can be any range (it is relative) but it cannot be negative. Edge probabilities are determined from the input CSV. Network type is now driven by a single string variable with three values. Using current names network_type = snap, step, travel.
1 parent d56b0ee commit 3dc7f61

File tree

5 files changed

+461
-34
lines changed

5 files changed

+461
-34
lines changed

include/pops/anthropogenic_kernel.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,11 @@ std::unique_ptr<KernelInterface<Generator>> create_anthro_kernel(
5959
else if (anthro_kernel == DispersalKernelType::Network) {
6060
using Kernel =
6161
DynamicWrapperKernel<NetworkDispersalKernel<RasterIndex>, Generator>;
62+
if (config.network_type == "step")
63+
return std::unique_ptr<Kernel>(new Kernel(network));
64+
bool snap = config.network_type == "snap" ? true : false;
6265
return std::unique_ptr<Kernel>(new Kernel(
63-
network, config.network_min_distance, config.network_max_distance));
66+
network, config.network_min_distance, config.network_max_distance, snap));
6467
}
6568
else if (config.deterministic) {
6669
using Kernel = DynamicWrapperKernel<

include/pops/config.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ class Config
7171
std::string anthro_kernel_type;
7272
double anthro_scale{0};
7373
std::string anthro_direction;
74+
std::string network_type; ///< travel, snap, step
7475
double network_min_distance{0};
7576
double network_max_distance{0};
76-
bool network_snap{false}; ///< Snap resulting location to closest node in network
7777
double anthro_kappa{0};
7878
double shape{1.0};
7979
// Treatments

include/pops/network.hpp

Lines changed: 181 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ class EdgeGeometry : public std::vector<RasterCell>
6464
return (this->size() - 1) * cost_per_cell_;
6565
}
6666

67+
void set_total_cost(double value)
68+
{
69+
total_cost_ = value;
70+
}
71+
6772
/** Get cost per cell for the segment (edge). */
6873
double cost_per_cell() const
6974
{
@@ -72,19 +77,27 @@ class EdgeGeometry : public std::vector<RasterCell>
7277
return cost_per_cell_;
7378
}
7479

75-
void set_total_cost(double value)
80+
void set_cost_per_cell(double value)
7681
{
77-
total_cost_ = value;
82+
cost_per_cell_ = value;
7883
}
7984

80-
void set_cost_per_cell(double value)
85+
/** Get probability of an edge */
86+
double probability() const
8187
{
82-
cost_per_cell_ = value;
88+
return probability_;
89+
}
90+
91+
/** Set probability of an edge */
92+
void set_probability(double value)
93+
{
94+
probability_ = value;
8395
}
8496

8597
private:
8698
double cost_per_cell_ = 0;
8799
double total_cost_ = 0;
100+
double probability_ = 0;
88101
};
89102

90103
/** Constant view of a edge geometry (to iterate a segment in either direction)
@@ -177,16 +190,14 @@ class Network
177190
* @param bbox Bounding box of the raster grid (in real world coordinates)
178191
* @param ew_res East-west resolution of the raster grid
179192
* @param ns_res North-south resolution of the raster grid
180-
* @param snap Snap result to the closest node
181193
*/
182-
Network(BBox<double> bbox, double ew_res, double ns_res, bool snap = false)
194+
Network(BBox<double> bbox, double ew_res, double ns_res)
183195
: bbox_(bbox),
184196
ew_res_(ew_res),
185197
ns_res_(ns_res),
186198
max_row_(0),
187199
max_col_(0),
188-
distance_per_cell_((ew_res + ns_res) / 2),
189-
snap_(snap)
200+
distance_per_cell_((ew_res + ns_res) / 2)
190201
{
191202
std::tie(max_row_, max_col_) = xy_to_row_col(bbox_.east, bbox_.south);
192203
}
@@ -443,14 +454,25 @@ class Network
443454
* All previously visited nodes are tracked and, if possible, excluded
444455
* from further traveling.
445456
*
457+
* The function assumes there is a node at the given *row* and *column*, i.e., that
458+
* the decision to call this function was based on the caller knowing there is a
459+
* node. If there is no node, an std::invalid_argument exception is thrown.
460+
* If there is more than one node at the given *row* and *column*, a random node is
461+
* picked and used for traveling.
462+
*
463+
* If *snap* is true, then results are snapped to the closest node, otherwise
464+
* result can be anywhere in between the nodes based on the edge geomerty (segment).
465+
*
446466
* @returns Final row and column pair
447467
*/
448468
template<typename Generator>
449469
std::tuple<int, int> travel(
450-
RasterIndex row, RasterIndex col, double distance, Generator& generator) const
470+
RasterIndex row,
471+
RasterIndex col,
472+
double distance,
473+
Generator& generator,
474+
bool snap = false) const
451475
{
452-
// We assume there is a node here, i.e., that we are made decision
453-
// to use this kernel knowing there is a node.
454476
auto node_id = get_random_node_at(row, col, generator);
455477
std::set<NodeId> visited_nodes;
456478
while (distance >= 0) {
@@ -472,7 +494,7 @@ class Network
472494
distance -= segment.cost();
473495
continue;
474496
}
475-
if (snap_) {
497+
if (snap) {
476498
if (distance < segment.cost() / 2) {
477499
// Less than half snaps to the start node.
478500
return segment.front();
@@ -487,6 +509,38 @@ class Network
487509
throw std::invalid_argument("Distance must be greater than or equal to zero");
488510
}
489511

512+
/**
513+
* Step to a different node in the network from given row and column.
514+
*
515+
* Returns any node of the nodes connected to the start node possibly based on the
516+
* edge probability if probability was assigned to the edges.
517+
*
518+
* If *num_steps* is greater than 1, multiple steps are perfomed and the last node
519+
* is returned. In each node, the probability of picking a specific connection is
520+
* either determined by the provided edge probabilities or is equal among the
521+
* connections. Consequently, previously visited nodes can be visited again. In
522+
* other words, for highly probable connections, most likely next step is a step
523+
* back to the starting node (or generally previous node for `num_steps >= 3`).
524+
*
525+
* The function assumes a node is at the *row*, *col* coordinates, i.e., that this
526+
* was either checked beforehand or otherwise ensured. If there is no node, an
527+
* std::invalid_argument exception is thrown.
528+
* If there is more than one node at the given *row* and *column*, a random node is
529+
* picked and used for traveling.
530+
*
531+
* @returns Destination row and column pair
532+
*/
533+
template<typename Generator>
534+
std::tuple<int, int> step(
535+
RasterIndex row, RasterIndex col, Generator& generator, int num_steps = 1) const
536+
{
537+
auto node_id = get_random_node_at(row, col, generator);
538+
for (int i = 0; i < num_steps; ++i) {
539+
node_id = next_probable_node(node_id, generator);
540+
}
541+
return get_node_row_col(node_id);
542+
}
543+
490544
/**
491545
* @brief Get all nodes as vector of all ids with their row and column.
492546
*
@@ -594,7 +648,7 @@ class Network
594648
stream << " distance_per_cell: " << distance_per_cell_ << "\n";
595649
std::set<std::pair<NodeId, NodeId>> edges;
596650
for (const auto& item : node_matrix_) {
597-
for (const auto& node_id : item.second) {
651+
for (const auto& node_id : item.second.second) {
598652
edges.emplace(item.first, node_id);
599653
}
600654
}
@@ -631,7 +685,8 @@ class Network
631685
* Connection in both directions is stored explicitly (see load_segments() source
632686
* code).
633687
*/
634-
using NodeMatrix = std::map<NodeId, std::vector<NodeId>>;
688+
using NodeMatrix =
689+
std::map<NodeId, std::pair<std::vector<double>, std::vector<NodeId>>>;
635690

636691
/** Smallest component of edge geometry */
637692
using Cell = std::pair<RasterIndex, RasterIndex>;
@@ -680,6 +735,48 @@ class Network
680735
}
681736
}
682737

738+
/**
739+
* @brief Convert string to probability
740+
*
741+
* @param text String with probability
742+
* @return Probability as number
743+
*/
744+
static double probability_from_text(const std::string& text)
745+
{
746+
double value{0};
747+
try {
748+
value = std::stod(text);
749+
}
750+
catch (const std::invalid_argument& err) {
751+
if (string_contains(text, '"') || string_contains(text, '\'')) {
752+
throw std::invalid_argument(
753+
std::string("Text for connection probabilty cannot contain quotes "
754+
"(only digits are allowed): ")
755+
+ text);
756+
}
757+
if (text.empty()) {
758+
throw std::invalid_argument(
759+
"Text for connection probabilty cannot be an empty string");
760+
}
761+
else {
762+
throw std::invalid_argument(
763+
std::string("Text cannot be converted to connection probabilty "
764+
"(only digits are allowed): ")
765+
+ text);
766+
}
767+
}
768+
catch (const std::out_of_range& err) {
769+
throw std::out_of_range(
770+
std::string("Numerical value too large for connection probabilty: ")
771+
+ text);
772+
}
773+
if (value < 0) {
774+
throw std::invalid_argument(
775+
std::string("Probability needs to be >=0, not ") + text);
776+
}
777+
return value;
778+
}
779+
683780
/**
684781
* @brief Convert string to cost
685782
*
@@ -715,8 +812,10 @@ class Network
715812
}
716813

717814
template<typename InputStream>
718-
static bool stream_has_cost_column(InputStream& stream, char delimeter)
815+
static std::pair<bool, bool> stream_has_columns(InputStream& stream, char delimeter)
719816
{
817+
bool has_cost{false};
818+
bool has_probability{false};
720819
// Get header to determine what is included.
721820
auto starting_position = stream.tellg();
722821
std::string line;
@@ -731,15 +830,29 @@ class Network
731830
stream.seekg(starting_position);
732831
break;
733832
}
734-
if (label == "cost") {
833+
if (label == "probability") {
834+
if (has_cost) {
835+
// Detailed check to give a more relevant error message.
836+
throw std::runtime_error(
837+
"The cost column must be after the probability column");
838+
}
735839
if (column_number != 3) {
736840
throw std::runtime_error(
737-
"The cost column must be the third column");
841+
"The probability column must be the third column");
738842
}
739-
return true;
843+
has_probability = true;
844+
continue;
845+
}
846+
if (label == "cost") {
847+
if (!(column_number == 3 || column_number == 4)) {
848+
throw std::runtime_error(
849+
"The cost column must be the third or fourth column");
850+
}
851+
has_cost = true;
852+
continue;
740853
}
741854
}
742-
return false;
855+
return {has_cost, has_probability};
743856
}
744857

745858
/**
@@ -752,7 +865,9 @@ class Network
752865
void load_segments(InputStream& stream)
753866
{
754867
char delimeter{','};
755-
bool has_cost = stream_has_cost_column(stream, delimeter);
868+
bool has_cost{false};
869+
bool has_probability{false};
870+
std::tie(has_cost, has_probability) = stream_has_columns(stream, delimeter);
756871

757872
std::string line;
758873
while (std::getline(stream, line)) {
@@ -775,6 +890,12 @@ class Network
775890
}
776891
Segment segment;
777892

893+
if (has_probability) {
894+
std::string probability_text;
895+
std::getline(line_stream, probability_text, delimeter);
896+
double connection_probability = probability_from_text(probability_text);
897+
segment.set_probability(connection_probability);
898+
}
778899
if (has_cost) {
779900
std::string cost_text;
780901
std::getline(line_stream, cost_text, delimeter);
@@ -842,8 +963,12 @@ class Network
842963
const auto& segment{node_segment.second};
843964
nodes_by_row_col_[segment.front()].insert(start_node_id);
844965
nodes_by_row_col_[segment.back()].insert(end_node_id);
845-
node_matrix_[start_node_id].push_back(end_node_id);
846-
node_matrix_[end_node_id].push_back(start_node_id);
966+
if (has_probability) {
967+
node_matrix_[start_node_id].first.push_back(segment.probability());
968+
node_matrix_[end_node_id].first.push_back(segment.probability());
969+
}
970+
node_matrix_[start_node_id].second.push_back(end_node_id);
971+
node_matrix_[end_node_id].second.push_back(start_node_id);
847972
}
848973
}
849974

@@ -881,7 +1006,7 @@ class Network
8811006
*/
8821007
const std::vector<NodeId>& nodes_connected_to(NodeId node) const
8831008
{
884-
return node_matrix_.at(node);
1009+
return node_matrix_.at(node).second;
8851010
}
8861011

8871012
/**
@@ -929,13 +1054,45 @@ class Network
9291054
return pick_random_item(nodes, generator);
9301055
}
9311056

1057+
/**
1058+
* @brief Pick a probable node from the given node.
1059+
*
1060+
* If there is more than one edge leading from the given node, a random node is
1061+
* picked based on edge probabilities. If there are no probabilities assigned, a
1062+
* random node is picked. If there are no edges leading from the given node, the
1063+
* node itself is returned.
1064+
*/
1065+
template<typename Generator>
1066+
NodeId next_probable_node(NodeId node, Generator& generator) const
1067+
{
1068+
// Get all candidate nodes.
1069+
const auto& record{node_matrix_.at(node)};
1070+
const auto& probabilities{record.first};
1071+
const auto& nodes{record.second};
1072+
1073+
// Resolve disconnected node and dead end cases.
1074+
auto num_nodes = nodes.size();
1075+
if (!num_nodes)
1076+
return node;
1077+
else if (num_nodes == 1)
1078+
return nodes[0];
1079+
1080+
// Pick nodes based on edge probabilities if they are available.
1081+
if (!probabilities.empty()) {
1082+
std::discrete_distribution<int> dd{
1083+
probabilities.begin(), probabilities.end()};
1084+
return nodes.at(dd(generator));
1085+
}
1086+
// Pick a connected node with equal edge probabilities.
1087+
return pick_random_item(nodes, generator);
1088+
}
1089+
9321090
BBox<double> bbox_; ///< Bounding box of the network grid in real world coordinates
9331091
double ew_res_; ///< East-west resolution of the grid
9341092
double ns_res_; ///< North-south resolution of the grid
9351093
RasterIndex max_row_; ///< Maximum row index in the grid
9361094
RasterIndex max_col_; ///< Maximum column index in the grid
9371095
double distance_per_cell_; ///< Distance to travel through one cell (cost)
938-
bool snap_ = false;
9391096
/** Node IDs stored by row and column (multiple nodes per cell) */
9401097
std::map<std::pair<RasterIndex, RasterIndex>, std::set<NodeId>> nodes_by_row_col_;
9411098
NodeMatrix node_matrix_; ///< List of node neighbors by node ID (edges)

0 commit comments

Comments
 (0)