Skip to content

Handle set_index properly in to_adjlist #511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions libpysal/weights/tests/test_adjlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,45 @@ def test_map(self):
np.testing.assert_allclose(
data, mapped["_".join(("subtract", name))].values
)

def test_sort(self):
from libpysal import examples
from libpysal.weights import Rook

us = geopandas.read_file(examples.get_path("us48.shp"))
w = Rook.from_dataframe(us.set_index("STATE_FIPS"), use_index=True)
unsorted_al = w.to_adjlist(sort_joins=False)
sorted_al = w.to_adjlist(sort_joins=True)
sv = ["01"] * 4
sv.append("04")
sv = np.array(sv)
usv = np.array(["53", "53", "30", "30", "30"])
np.testing.assert_array_equal(unsorted_al.focal.values[:5], usv)
np.testing.assert_array_equal(sorted_al.focal.values[:5], sv)

def test_ids(self):
df = geopandas.read_file(examples.get_path("columbus.dbf")).head()
df["my_id"] = range(3, len(df) + 3)
W = weights.Queen.from_dataframe(df, ids="my_id")
W_adj = W.to_adjlist(drop_islands=True)
for i in range(3, 8):
assert i in W_adj.focal
assert i in W_adj.neighbor
for i in W_adj.focal:
assert i in list(range(3, len(df) + 3))
for i in W_adj.neighbor:
assert i in list(range(3, len(df) + 3))

def test_str_ids(self):
df = geopandas.read_file(examples.get_path("columbus.dbf")).head()
snakes = ["mamba", "boa", "python", "rattlesnake", "cobra"]
df["my_str_id"] = snakes
W = weights.Queen.from_dataframe(df, ids="my_str_id")
W_adj = W.to_adjlist(drop_islands=True)
for i in snakes:
(W_adj.focal == i).any()
(W_adj.neighbor == i).any()
for i in W_adj.focal:
assert i in snakes
for i in W_adj.neighbor:
assert i in snakes
12 changes: 11 additions & 1 deletion libpysal/weights/weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ def to_adjlist(
focal_col="focal",
neighbor_col="neighbor",
weight_col="weight",
sort_joins=False,
):
"""
Compute an adjacency list representation of a weights object.
Expand All @@ -416,6 +417,10 @@ def to_adjlist(
Name of the column in which to store "destination" node ids.
weight_col : str
Name of the column in which to store weight information.
sort_joins : bool
Whether or not to lexicographically sort the adjacency
list by (focal_col, neighbor_col). Default is False.

"""
try:
import pandas
Expand All @@ -432,6 +437,9 @@ def to_adjlist(

links = []
focal_ix, neighbor_ix = self.sparse.nonzero()
idxs = np.array(list(self.neighbors.keys()))
focal_ix = idxs[focal_ix]
neighbor_ix = idxs[neighbor_ix]
Comment on lines +440 to +442
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to do this if there are no custom ids set. I would add

self.ids = ids

to the W.__init__ and then have this done conditionally after

if self.ids:
    ...

You can potentially also use the ids directly here but that does not matter much

        if self.ids:
            ids = np.asarray(self.ids)
            focal_ix = ids.take(focal_ix)
            neighbor_ix = ids.take(neighbor_ix)

Using keys from neighbours is probably safer option anyway.

weights = self.sparse.data
adjlist = pandas.DataFrame(
{focal_col: focal_ix, neighbor_col: neighbor_ix, weight_col: weights}
Expand All @@ -443,7 +451,9 @@ def to_adjlist(
{focal_col: self.islands, neighbor_col: self.islands, weight_col: 0}
)
adjlist = pandas.concat((adjlist, island_adjlist)).reset_index(drop=True)
return adjlist.sort_values([focal_col, neighbor_col])
if sort_joins:
return adjlist.sort_values([focal_col, neighbor_col])
return adjlist

def to_networkx(self):
"""Convert a weights object to a ``networkx`` graph.
Expand Down