Skip to content

Commit b195ea7

Browse files
committed
sundry bugfixes
1 parent 851d113 commit b195ea7

File tree

6 files changed

+128
-22
lines changed

6 files changed

+128
-22
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
DESCRIPTION = ('Bibliographic network and corpus analysis for historians')
88
LICENSE = 'GNU GPL 3'
99
URL = 'http://diging.github.io/tethne/'
10-
VERSION = '0.7.0-beta'
10+
VERSION = '0.6.4-beta'
1111

1212
PACKAGES = [ 'tethne',
1313
'tethne.analyze',

tethne/analyze/collection.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,22 @@ def algorithm(G, method, **kwargs):
7373
raise(ValueError("No such method in networkx."))
7474
else:
7575
for k, g in G.graphs.iteritems():
76-
r = networkx.__dict__[method](g, **kwargs)
77-
for elem, value in r.iteritems():
78-
try:
79-
results[elem][k] = value
80-
except KeyError:
81-
results[elem] = { k: value }
82-
networkx.set_node_attributes(g, method, r) # [#61510128]
76+
try:
77+
r = networkx.__dict__[method](g, **kwargs)
78+
except:
79+
r = 0.
80+
# Some methods return a value for each node.
81+
if type(r) is dict and len(r) == len(g.nodes()):
82+
for elem, value in r.iteritems():
83+
try:
84+
results[elem][k] = value
85+
except KeyError:
86+
results[elem] = { k: value }
87+
# Update the nodes in the graph with the results.
88+
networkx.set_node_attributes(g, method, r) # [#61510128]
89+
# Other methods return other kinds of values.
90+
else:
91+
results[k] = r
8392
return results
8493

8594
def delta(G, attribute):

tethne/analyze/corpus.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import networkx
1616
import numpy
1717
from ..networks.helpers import top_cited
18-
from ..classes import GraphCollection
1918

2019
import matplotlib.pyplot as plt
2120
import matplotlib.patches as mpatches
@@ -93,7 +92,7 @@ def _top_features(corpus, feature, topn=20, perslice=False, axis='date'):
9392
top = [ counts.keys()[c] for c in cvalues.argsort()[-topn:][::-1] ]
9493
return top
9594

96-
def plot_burstness(corpus, feature, k=5, topn=20, perslice=False,
95+
def plot_burstness(corpus, feature=None, k=5, topn=20, perslice=False,
9796
flist=None, normalize=True, fig=None, **kwargs):
9897
"""
9998
Generate a figure depicting burstness profiles for ``feature``.
@@ -145,6 +144,9 @@ def plot_burstness(corpus, feature, k=5, topn=20, perslice=False,
145144
:align: center
146145
147146
"""
147+
if feature is None:
148+
raise ValueError('No feature specified.')
149+
148150
B = burstness(corpus, feature, k=k, topn=topn, perslice=perslice,
149151
flist=flist, normalize=normalize, **kwargs)
150152

@@ -201,7 +203,7 @@ def plot_burstness(corpus, feature, k=5, topn=20, perslice=False,
201203

202204
return fig
203205

204-
def burstness(corpus, feature, k=5, topn=20, perslice=False,
206+
def burstness(corpus, feature=None, k=5, topn=20, perslice=False,
205207
flist=None, normalize=True, **kwargs):
206208
"""
207209
Estimate burstness profile for the ``topn`` features (or ``flist``) in
@@ -248,6 +250,9 @@ def burstness(corpus, feature, k=5, topn=20, perslice=False,
248250
([1990, 1991, 1992, 1993], [0., 0.4, 0.6, 0.])
249251
250252
"""
253+
254+
if feature is None:
255+
raise ValueError('No feature specified.')
251256

252257
if flist is None:
253258
top = _top_features(corpus, feature, topn=topn, perslice=perslice)
@@ -267,7 +272,7 @@ def burstness(corpus, feature, k=5, topn=20, perslice=False,
267272
normalize=normalize, **kwargs)
268273
return B
269274

270-
def feature_burstness(corpus, feature, findex, k=5, normalize=True, **kwargs):
275+
def feature_burstness(corpus, feature=None, findex=None, k=5, normalize=True, **kwargs):
271276
"""
272277
Estimate burstness profile for a feature over the ``'date'`` axis.
273278
@@ -287,6 +292,9 @@ def feature_burstness(corpus, feature, findex, k=5, normalize=True, **kwargs):
287292
Parameters for burstness automaton HMM.
288293
"""
289294

295+
if feature is None:
296+
raise ValueError('No feature specified.')
297+
290298
# Get time-intervals between occurrences.
291299
last = min(corpus.axes['date'].keys())-1
292300
dates = [last] # Pad start.
@@ -339,7 +347,7 @@ def feature_burstness(corpus, feature, findex, k=5, normalize=True, **kwargs):
339347
return D, [ A_[d] for d in D ]
340348

341349

342-
def plot_sigma(G, corpus, feature, topn=20, sort_by='max', perslice=False,
350+
def plot_sigma(G=None, corpus=None, feature=None, topn=20, sort_by='max', perslice=False,
343351
flist=None, fig=None, **kwargs):
344352
"""
345353
Plot sigma values for the ``topn`` most influential nodes.
@@ -393,6 +401,16 @@ def plot_sigma(G, corpus, feature, topn=20, sort_by='max', perslice=False,
393401
:width: 600
394402
:align: center
395403
"""
404+
if G is None:
405+
raise ValueError('No GraphCollection specified.')
406+
407+
408+
if feature is None:
409+
raise ValueError('No feature specified.')
410+
411+
if corpus is None:
412+
raise ValueError('No corpus specified.')
413+
396414
G = sigma(G, corpus, feature)
397415
nodes = G.nodes()
398416

@@ -491,7 +509,7 @@ def plot_sigma(G, corpus, feature, topn=20, sort_by='max', perslice=False,
491509
rect = mpatches.Rectangle( xy, width, height, fill=True,
492510
linewidth=0.0 )
493511
rect.set_facecolor(color)
494-
rect.set_alpha(state + 0.1)
512+
rect.set_alpha(min(state + 0.1, 1.0))
495513
ax.add_patch(rect)
496514

497515
ax.set_ylabel( G.node_index[node], rotation=0,
@@ -500,9 +518,9 @@ def plot_sigma(G, corpus, feature, topn=20, sort_by='max', perslice=False,
500518

501519
plt.subplots_adjust(left=0.5)
502520
fig.tight_layout(h_pad=0.25)
503-
return fig, G
521+
return fig
504522

505-
def sigma(G, corpus, feature, **kwargs):
523+
def sigma(G, corpus=None, feature=None, **kwargs):
506524
"""
507525
Calculate sigma (from `Chen 2009 <http://arxiv.org/pdf/0904.1439.pdf>`_) for
508526
all of the nodes in a :class:`.GraphCollection`\.
@@ -564,6 +582,12 @@ def sigma(G, corpus, feature, **kwargs):
564582
565583
"""
566584

585+
if feature is None:
586+
raise ValueError('No feature specified.')
587+
588+
if corpus is None:
589+
raise ValueError('No corpus specified.')
590+
567591
nodes = G.node_lookup.keys()
568592

569593
B = burstness(corpus, feature, flist=nodes, **kwargs)

tethne/classes/corpus.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import matplotlib
1313
from paper import Paper
1414
from collections import Counter
15+
from ..analyze import corpus as az_corpus
1516
from nltk.corpus import stopwords
1617
import scipy
1718

@@ -154,6 +155,15 @@ def __init__(self, papers, features=None, index_by='ayjid',
154155
index_citation_by=index_citation_by,
155156
exclude=exclude, filt=filt )
156157

158+
def analyze(self, method, **kwargs):
159+
try:
160+
method_callable = az_corpus.__dict__[method]
161+
except KeyError:
162+
raise ValueError('No such method.')
163+
164+
return method_callable(corpus=self, **kwargs)
165+
166+
157167
def index( self, papers, features=None, index_by='ayjid',
158168
index_citation_by='ayjid', exclude=set([]),
159169
filt=None, stem=False ):
@@ -1204,6 +1214,8 @@ def distribution(self, x_axis, y_axis=None):
12041214
y_size = 1
12051215
shape = (x_size, y_size)
12061216
logger.debug('distribution shape: {0}'.format(shape))
1217+
1218+
# Construct the sparse matrix.
12071219
I = []
12081220
J = []
12091221
K = []
@@ -1223,8 +1235,10 @@ def distribution(self, x_axis, y_axis=None):
12231235
K.append(k)
12241236

12251237
# TODO: Move away from SciPy, to facilitate PyPy compatibility?
1226-
dist = np.array(scipy.sparse.coo_matrix((K, (I,J)), shape=shape).todense())
1227-
1238+
csr = scipy.sparse.coo_matrix((K, (I,J)), shape=shape).tocsr()
1239+
nonzero = csr.nonzero()
1240+
dist = np.array(csr.todense())
1241+
12281242
return dist
12291243

12301244
# TODO: Merge this with :func:`.distribution`
@@ -1320,7 +1334,12 @@ def feature_distribution(self, featureset, feature, x_axis, y_axis=None,
13201334
fvalues = self.features[featureset]['features']
13211335

13221336
def _get_value(papers):
1323-
vtuples = [ fv for p in papers for fv in fvalues[p] ]
1337+
vtuples = []
1338+
for p in papers:
1339+
if p in fvalues:
1340+
vtuples += fvalues[p]
1341+
1342+
# vtuples = [ fv for p in papers for fv in fvalues[p] ]
13241343
values = [ v for f,v in vtuples if f == findex ]
13251344

13261345
if mode == 'counts':

tethne/classes/graphcollection.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import networkx
1212
import numpy as np
1313
import matplotlib.pyplot as plt
14+
from ..analyze.collection import algorithm
1415

1516
import warnings
1617

@@ -121,7 +122,57 @@ def __delitem__(self, key):
121122

122123
def __len__(self):
123124
return len(self.graphs)
124-
125+
126+
def analyze(self, method, **kwargs):
127+
"""
128+
Apply a ``method`` from NetworkX to all ``networkx.Graph`` objects in the
129+
:class:`.GraphCollection` ``G``.
130+
131+
For options, see the `list of algorithms
132+
<http://networkx.github.io/documentation/networkx-1.9/reference/algorithms.html>`_
133+
in the NetworkX documentation. Not all of these have been tested.
134+
135+
Parameters
136+
----------
137+
G : :class:`.GraphCollection`
138+
The :class:`.GraphCollection` to analyze. The specified method will be
139+
applied to each graph in ``G``.
140+
method : string
141+
Name of a method in NetworkX to execute on graph collection.
142+
**kwargs
143+
A list of keyword arguments that should correspond to the parameters
144+
of the specified method.
145+
146+
Returns
147+
-------
148+
results : dict
149+
Indexed by element (node or edge) and graph index (e.g. ``date``).
150+
151+
Raises
152+
------
153+
ValueError
154+
If no such method exists.
155+
156+
Examples
157+
--------
158+
159+
*Betweenness centrality:* (``G`` is a :class:`.GraphCollection`\)
160+
161+
.. code-block:: python
162+
163+
>>> from tethne.analyze import collection
164+
>>> BC = collection.algorithm(G, 'betweenness_centrality')
165+
>>> print BC[0]
166+
{1999: 0.010101651117889644,
167+
2000: 0.0008689093723107329,
168+
2001: 0.010504898852426189,
169+
2002: 0.009338654511194512,
170+
2003: 0.007519105636349891}
171+
172+
"""
173+
results = algorithm(self, method, **kwargs)
174+
return results
175+
125176
def build(self, corpus, axis, node_type, graph_type, method_kwargs={},
126177
**kwargs):
127178
"""
@@ -471,7 +522,10 @@ def attr_distribution(self, attr='weight', etype='edge', stat=np.mean):
471522
# Ignore warnings; will handle NaNs below.
472523
with warnings.catch_warnings():
473524
warnings.simplefilter('ignore')
474-
v = stat(A)
525+
try:
526+
v = stat(A)
527+
except ValueError: # Raised by max with empty sequence.
528+
v = 0.
475529

476530
if np.isnan(v):
477531
v = 0.

tethne/networks/authors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def author_papers(papers, node_id='ayjid', paper_attribs=[], **kwargs):
8383

8484
return author_papers_graph
8585

86-
def institutions(papers, threshold=1, edge_attrbs=['ayjid'],
86+
def institutions(papers, threshold=1, edge_attrbs=[],
8787
node_attribs=['authors'], geocode=False, **kwargs):
8888
"""
8989
Generates an institutional network based on coauthorship.

0 commit comments

Comments
 (0)