Skip to content

Commit f119b54

Browse files
guoyongzhiguo-yong-zhi
guoyongzhi
authored andcommitted
add example embedding
1 parent 50e42b8 commit f119b54

File tree

2 files changed

+54
-2
lines changed

2 files changed

+54
-2
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "WordCloud"
22
uuid = "6385f0a0-cb03-45b6-9089-4e0acc74b26b"
33
authors = ["guoyongzhi <momoshanghan@163.com>"]
4-
version = "0.6.8"
4+
version = "0.6.9"
55

66
[deps]
77
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
@@ -20,4 +20,4 @@ ImageMagick = "1"
2020
ImageTransformations = "0.5, 0.6, 0.7, 0.8"
2121
Luxor = "2.8"
2222
julia = "1.2"
23-
Stuffing = "0.1, 0.2, 0.3"
23+
Stuffing = "0.3"

examples/embedding.jl

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#md# The positions of words can be initialized with pre-trained word vectors.
2+
#md# ### Words
3+
using WordCloud
4+
stwords = ["us", "will"];
5+
words_weights = processtext(open(pkgdir(WordCloud)*"/res/Barack Obama's First Inaugural Address.txt"), stopwords=WordCloud.stopwords_en stwords)
6+
words_weights = Dict(zip(words_weights...))
7+
#md# ### Embeddings
8+
using Embeddings
9+
using TSne
10+
const embtable = load_embeddings(GloVe{:en})
11+
const get_word_index = Dict(word=>ii for (ii,word) in enumerate(embtable.vocab))
12+
function get_embedding(word)
13+
ind = get_word_index[word]
14+
emb = embtable.embeddings[:,ind]
15+
return emb
16+
end
17+
wordvec = Dict()
18+
for k in keys(words_weights)
19+
if k in keys(get_word_index)
20+
wordvec[k] = get_embedding(k)
21+
elseif lowercase(k) in keys(get_word_index)
22+
wordvec[k] = get_embedding(lowercase(k))
23+
else
24+
pop!(words_weights, k)
25+
println("remove ", k)
26+
end
27+
end
28+
embedded = tsne(hcat(values(wordvec)...)', 2)
29+
#md# ### WordCloud
30+
sc = WordCloud.randomscheme()
31+
wc = wordcloud(
32+
words_weights,
33+
mask = shape(ellipse, 1000, 1000, backgroundcolor=(0,0,0,0), color=WordCloud.chooseabgcolor(sc)),
34+
colors = sc,
35+
run = initimages!
36+
)
37+
38+
pos = embedded
39+
mean = sum(pos, dims=1) / size(pos, 1)
40+
r = maximum(sqrt.(pos[:,1].^2 + pos[:,2].^2 ))
41+
pos = (pos .- mean) ./ 2r
42+
sz = collect(size(wc.mask))'
43+
pos = round.(Int, pos .* sz .+ sz ./ 2)
44+
45+
setpositions!(wc, keys(wordvec)|>collect, eachrow(pos), type=setcenter!)
46+
setstate!(wc, :placement!)
47+
generate!(wc, patient=-1)
48+
println("results are saved to embedding.png")
49+
paint(wc, "embedding.png")
50+
wc
51+
#eval# runexample(:embedding)
52+
#md# ![](embedding.png)

0 commit comments

Comments
 (0)